diff --git a/.gitattributes b/.gitattributes
index a6344aac8c09253b3b630fb776ae94478aa0275b..52373fe24473b1aa44333d318f578ae6bf04b49b 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
*.zst filter=lfs diff=lfs merge=lfs -text
*tfevents* filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text
diff --git a/README.md b/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..7c26719a4e5196628a5ecca01ec5c2bbb0d832ba
--- /dev/null
+++ b/README.md
@@ -0,0 +1,136 @@
+# DPO Chinese Error Correction Model
+使用DPO訓練之中文糾錯模型。
+
+### Usage
+```python
+from transformers import AutoTokenizer, AutoModelForCausalLM, LlamaForCausalLM,AddedToken
+import sys
+
+mode_id = "p208p2002/bloom-1b1-zh-error-correction-dpo"
+model: LlamaForCausalLM = AutoModelForCausalLM.from_pretrained("p208p2002/bloom-1b1-zh-error-correction-dpo")
+tokenizer = AutoTokenizer.from_pretrained("p208p2002/bloom-1b1-zh-error-correction-dpo")
+
+test_texts = [
+ "為了潔約能源請隨守關閉沒有使用的電器",
+ "今天新情很好",
+ "你快樂我也很高心",
+ "但不再算再找實習生了",
+ "今天太陽很大要注意篩傷",
+ "你要不要和我依起去台北",
+ "清晨六點終太陽會升起",
+ "傾城六點鐘太陽會升起",
+ "鍋馬路時你應該要注意虹綠燈",
+ "他正在學學彈吉他",
+ "下樓梯請注意階梯",
+ "此信件為系統自動發送之通知",
+ "此信件為系統自動發送知通知",
+ "如為誤傳也請立即刪除本郵件並通知寄件者"
+]
+for text in test_texts:
+ inputs = tokenizer(
+ f"{tokenizer.bos_token}{text} {tokenizer.eos_token}\n {tokenizer.bos_token}",
+ return_tensors="pt",
+ add_special_tokens=False
+ )["input_ids"]
+
+ out = model.generate(
+ inputs,
+ max_new_tokens=20,
+ )
+ decode_out = tokenizer.decode(out[0])
+
+ input_text,output_text = decode_out.split("\n")
+ input_text = input_text.strip()
+ output_text = output_text.strip()
+
+ print("input :",input_text)
+ print("output:",output_text)
+ print('-'*30)
+```
+```
+input: 為了潔約能源請隨守關閉沒有使用的電器
+output: 為了節約能源請隨時關閉沒有使用的電器
+------------------------------
+input: 今天新情很好
+output: 今天心情很好
+------------------------------
+input: 你快樂我也很高心
+output: 你快樂我也很高興
+------------------------------
+input: 但不再算再找實習生了
+output: 但不再去找實習生了
+------------------------------
+input: 今天太陽很大要注意篩傷
+output: 今天太陽很大要注意一下
+------------------------------
+input: 你要不要和我依起去台北
+output: 你要不要和我一起去台北
+------------------------------
+input: 清晨六點終太陽會升起
+output: 清晨六點鐘太陽會升起
+------------------------------
+input: 傾城六點鐘太陽會升起
+output: 凌晨六點鐘太陽會升起
+------------------------------
+input: 鍋馬路時你應該要注意虹綠燈
+output: 過馬路時你應該要注意紅綠燈
+------------------------------
+input: 他正在學學彈吉他
+output: 他正在學習彈吉他
+------------------------------
+input: 下樓梯請注意階梯
+output: 下樓梯請注意階梯
+------------------------------
+input: 此信件為系統自動發送之通知
+output: 此信件為系統自動發送之通知
+------------------------------
+input: 此信件為系統自動發送知通知
+output: 此信件為系統自動發送通知
+------------------------------
+input: 如為誤傳也請立即刪除本郵件並通知寄件者
+output: 如為誤傳也請立即刪除本郵件並通知寄件者
+------------------------------
+(venv) philip@nca100-3-G1:~/ec-dpo$ python test_model.py dpo_trainer/checkpoint-250
+input : 為了潔約能源請隨守關閉沒有使用的電器
+output: 為了節約能源請隨時關閉沒有使用的電器
+------------------------------
+input : 今天新情很好
+output: 今天心情很好
+------------------------------
+input : 你快樂我也很高心
+output: 你快樂我也很高興
+------------------------------
+input : 但不再算再找實習生了
+output: 但不再去找實習生了
+------------------------------
+input : 今天太陽很大要注意篩傷
+output: 今天太陽很大要注意一下
+------------------------------
+input : 你要不要和我依起去台北
+output: 你要不要和我一起去台北
+------------------------------
+input : 清晨六點終太陽會升起
+output: 清晨六點鐘太陽會升起
+------------------------------
+input : 傾城六點鐘太陽會升起
+output: 凌晨六點鐘太陽會升起
+------------------------------
+input : 鍋馬路時你應該要注意虹綠燈
+output: 過馬路時你應該要注意紅綠燈
+------------------------------
+input : 他正在學學彈吉他
+output: 他正在學習彈吉他
+------------------------------
+input : 下樓梯請注意階梯
+output: 下樓梯請注意階梯
+------------------------------
+input : 此信件為系統自動發送之通知
+output: 此信件為系統自動發送之通知
+------------------------------
+input : 此信件為系統自動發送知通知
+output: 此信件為系統自動發送通知
+------------------------------
+input : 如為誤傳也請立即刪除本郵件並通知寄件者
+output: 如為誤傳也請立即刪除本郵件並通知寄件者
+------------------------------
+```
\ No newline at end of file
diff --git a/config.json b/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..53800af7e02e2618cd95b9dd69c321b0f21b93b2
--- /dev/null
+++ b/config.json
@@ -0,0 +1,32 @@
+{
+ "_name_or_path": "sft_trainer/checkpoint-4500/",
+ "apply_residual_connection_post_layernorm": false,
+ "architectures": [
+ "BloomForCausalLM"
+ ],
+ "attention_dropout": 0.0,
+ "attention_softmax_in_fp32": true,
+ "bias_dropout_fusion": true,
+ "bos_token_id": 1,
+ "eos_token_id": 2,
+ "hidden_dropout": 0.0,
+ "hidden_size": 1536,
+ "initializer_range": 0.02,
+ "layer_norm_epsilon": 1e-05,
+ "masked_softmax_fusion": true,
+ "model_type": "bloom",
+ "n_head": 16,
+ "n_inner": null,
+ "n_layer": 24,
+ "offset_alibi": 100,
+ "pad_token_id": 3,
+ "pretraining_tp": 1,
+ "skip_bias_add": true,
+ "skip_bias_add_qkv": false,
+ "slow_but_exact": false,
+ "torch_dtype": "float32",
+ "transformers_version": "4.37.2",
+ "unk_token_id": 0,
+ "use_cache": true,
+ "vocab_size": 250880
+}
diff --git a/generation_config.json b/generation_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..4029c6dec46810283a25fe61ed113b9ac898f18d
--- /dev/null
+++ b/generation_config.json
@@ -0,0 +1,7 @@
+{
+ "_from_model_config": true,
+ "bos_token_id": 1,
+ "eos_token_id": 2,
+ "pad_token_id": 3,
+ "transformers_version": "4.37.2"
+}
diff --git a/model.safetensors b/model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..1914303d8e34c639502cbfb686daee1ef6849bf7
--- /dev/null
+++ b/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2a4d934b57e15c85fabeee1c80fc1ba3fb58d9bd959865a102d1fedd35b0ebcd
+size 4261291440
diff --git a/optimizer.pt b/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2443140345520f2fc557f856b1cdbe5a692cebd7
--- /dev/null
+++ b/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2e82e1971b8b37f9437ead50ede64293d81ecf954e006d50246065f3b12a49f5
+size 8522768386
diff --git a/rng_state.pth b/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..33cefe6919222ddfa3c3946df69b8e5c5a17a0fc
--- /dev/null
+++ b/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1ff264f99d31b522cc7e2a4eac9d38606d0c58a34c0adc74d71e0ca8b371dc36
+size 14244
diff --git a/scheduler.pt b/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b2d9a05c9a4e0fffa3fa1ae3694b978ec3124ae7
--- /dev/null
+++ b/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6f3ec4f70580d870f44b786edc3a8bc0395e2f10d51f478622a7a57d30160892
+size 1064
diff --git a/special_tokens_map.json b/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..72ecfeeb7e14d244c936169d2ed139eeae235ef1
--- /dev/null
+++ b/special_tokens_map.json
@@ -0,0 +1,24 @@
+{
+ "bos_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "eos_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "pad_token": "",
+ "unk_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ }
+}
diff --git a/tokenizer.json b/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..dbf002cafbd4818dcff2abc9156c088d681b4533
--- /dev/null
+++ b/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:17a208233d2ee8d8c83b23bc214df737c44806a1919f444e89b31e586cd956ba
+size 14500471
diff --git a/tokenizer_config.json b/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..2f84072fa5c9d9d99596e59928f088460a213082
--- /dev/null
+++ b/tokenizer_config.json
@@ -0,0 +1,49 @@
+{
+ "add_prefix_space": false,
+ "added_tokens_decoder": {
+ "0": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "1": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "2": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "3": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ }
+ },
+ "bos_token": "",
+ "clean_up_tokenization_spaces": false,
+ "eos_token": "",
+ "max_length": 256,
+ "model_max_length": 1000000000000000019884624838656,
+ "pad_token": "",
+ "padding_side": "right",
+ "stride": 0,
+ "tokenizer_class": "BloomTokenizer",
+ "truncation_side": "right",
+ "truncation_strategy": "longest_first",
+ "unk_token": ""
+}
diff --git a/trainer_state.json b/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..81868c6ad193fd5bb13017f755e8e4f82e341f1f
--- /dev/null
+++ b/trainer_state.json
@@ -0,0 +1,3521 @@
+{
+ "best_metric": null,
+ "best_model_checkpoint": null,
+ "epoch": 0.0625,
+ "eval_steps": 500,
+ "global_step": 250,
+ "is_hyper_param_search": false,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "epoch": 0.0,
+ "learning_rate": 4.99e-07,
+ "logits/chosen": -9.150251388549805,
+ "logits/rejected": -8.951294898986816,
+ "logps/chosen": -39.82106399536133,
+ "logps/rejected": -51.287376403808594,
+ "loss": 0.6931,
+ "rewards/accuracies": 0.0,
+ "rewards/chosen": 0.0,
+ "rewards/margins": 0.0,
+ "rewards/rejected": 0.0,
+ "step": 1
+ },
+ {
+ "epoch": 0.0,
+ "learning_rate": 4.979999999999999e-07,
+ "logits/chosen": -8.701302528381348,
+ "logits/rejected": -8.59887981414795,
+ "logps/chosen": -38.8251953125,
+ "logps/rejected": -50.281246185302734,
+ "loss": 0.6902,
+ "rewards/accuracies": 0.9000000953674316,
+ "rewards/chosen": 0.005459675099700689,
+ "rewards/margins": 0.005827327724546194,
+ "rewards/rejected": -0.00036765271215699613,
+ "step": 2
+ },
+ {
+ "epoch": 0.0,
+ "learning_rate": 4.97e-07,
+ "logits/chosen": -8.877432823181152,
+ "logits/rejected": -8.745807647705078,
+ "logps/chosen": -39.213958740234375,
+ "logps/rejected": -53.814178466796875,
+ "loss": 0.6868,
+ "rewards/accuracies": 0.940000057220459,
+ "rewards/chosen": 0.009096940979361534,
+ "rewards/margins": 0.012745475396513939,
+ "rewards/rejected": -0.003648536978289485,
+ "step": 3
+ },
+ {
+ "epoch": 0.0,
+ "learning_rate": 4.96e-07,
+ "logits/chosen": -8.850730895996094,
+ "logits/rejected": -8.655380249023438,
+ "logps/chosen": -41.56839370727539,
+ "logps/rejected": -49.78559112548828,
+ "loss": 0.685,
+ "rewards/accuracies": 0.8999999761581421,
+ "rewards/chosen": 0.010362102650105953,
+ "rewards/margins": 0.016426045447587967,
+ "rewards/rejected": -0.006063942797482014,
+ "step": 4
+ },
+ {
+ "epoch": 0.0,
+ "learning_rate": 4.95e-07,
+ "logits/chosen": -9.169770240783691,
+ "logits/rejected": -9.068281173706055,
+ "logps/chosen": -39.02599334716797,
+ "logps/rejected": -54.13230514526367,
+ "loss": 0.6802,
+ "rewards/accuracies": 0.9600000381469727,
+ "rewards/chosen": 0.017099833115935326,
+ "rewards/margins": 0.026187023147940636,
+ "rewards/rejected": -0.009087189100682735,
+ "step": 5
+ },
+ {
+ "epoch": 0.0,
+ "learning_rate": 4.94e-07,
+ "logits/chosen": -8.943150520324707,
+ "logits/rejected": -8.760871887207031,
+ "logps/chosen": -40.129783630371094,
+ "logps/rejected": -51.28352737426758,
+ "loss": 0.6759,
+ "rewards/accuracies": 0.8799999952316284,
+ "rewards/chosen": 0.018004529178142548,
+ "rewards/margins": 0.03507697209715843,
+ "rewards/rejected": -0.017072444781661034,
+ "step": 6
+ },
+ {
+ "epoch": 0.0,
+ "learning_rate": 4.93e-07,
+ "logits/chosen": -8.973767280578613,
+ "logits/rejected": -8.846033096313477,
+ "logps/chosen": -38.954933166503906,
+ "logps/rejected": -52.8114128112793,
+ "loss": 0.6694,
+ "rewards/accuracies": 0.9199999570846558,
+ "rewards/chosen": 0.01600685343146324,
+ "rewards/margins": 0.048318009823560715,
+ "rewards/rejected": -0.032311152666807175,
+ "step": 7
+ },
+ {
+ "epoch": 0.0,
+ "learning_rate": 4.92e-07,
+ "logits/chosen": -9.100973129272461,
+ "logits/rejected": -8.930387496948242,
+ "logps/chosen": -36.695037841796875,
+ "logps/rejected": -51.42365646362305,
+ "loss": 0.6664,
+ "rewards/accuracies": 0.9200000762939453,
+ "rewards/chosen": 0.028073444962501526,
+ "rewards/margins": 0.054737381637096405,
+ "rewards/rejected": -0.026663940399885178,
+ "step": 8
+ },
+ {
+ "epoch": 0.0,
+ "learning_rate": 4.909999999999999e-07,
+ "logits/chosen": -8.858081817626953,
+ "logits/rejected": -8.691803932189941,
+ "logps/chosen": -39.423805236816406,
+ "logps/rejected": -53.124473571777344,
+ "loss": 0.6663,
+ "rewards/accuracies": 0.9600000381469727,
+ "rewards/chosen": 0.006643497850745916,
+ "rewards/margins": 0.05475940182805061,
+ "rewards/rejected": -0.04811590164899826,
+ "step": 9
+ },
+ {
+ "epoch": 0.0,
+ "learning_rate": 4.9e-07,
+ "logits/chosen": -8.932714462280273,
+ "logits/rejected": -8.752630233764648,
+ "logps/chosen": -40.90498733520508,
+ "logps/rejected": -53.95857620239258,
+ "loss": 0.6667,
+ "rewards/accuracies": 0.9000000953674316,
+ "rewards/chosen": 0.022381644695997238,
+ "rewards/margins": 0.054132066667079926,
+ "rewards/rejected": -0.031750429421663284,
+ "step": 10
+ },
+ {
+ "epoch": 0.0,
+ "learning_rate": 4.89e-07,
+ "logits/chosen": -8.926645278930664,
+ "logits/rejected": -8.795331954956055,
+ "logps/chosen": -39.50680923461914,
+ "logps/rejected": -49.81561279296875,
+ "loss": 0.6657,
+ "rewards/accuracies": 0.8600000143051147,
+ "rewards/chosen": 0.02121429704129696,
+ "rewards/margins": 0.05642315000295639,
+ "rewards/rejected": -0.035208847373723984,
+ "step": 11
+ },
+ {
+ "epoch": 0.0,
+ "learning_rate": 4.879999999999999e-07,
+ "logits/chosen": -9.18893814086914,
+ "logits/rejected": -9.020618438720703,
+ "logps/chosen": -38.57902908325195,
+ "logps/rejected": -54.4058952331543,
+ "loss": 0.6514,
+ "rewards/accuracies": 0.9600000381469727,
+ "rewards/chosen": 0.03454957157373428,
+ "rewards/margins": 0.0861399695277214,
+ "rewards/rejected": -0.05159040167927742,
+ "step": 12
+ },
+ {
+ "epoch": 0.0,
+ "learning_rate": 4.87e-07,
+ "logits/chosen": -8.865549087524414,
+ "logits/rejected": -8.72575569152832,
+ "logps/chosen": -37.333641052246094,
+ "logps/rejected": -50.77653884887695,
+ "loss": 0.6508,
+ "rewards/accuracies": 0.8999999761581421,
+ "rewards/chosen": 0.045416563749313354,
+ "rewards/margins": 0.08835401386022568,
+ "rewards/rejected": -0.04293745011091232,
+ "step": 13
+ },
+ {
+ "epoch": 0.0,
+ "learning_rate": 4.86e-07,
+ "logits/chosen": -9.011914253234863,
+ "logits/rejected": -8.871015548706055,
+ "logps/chosen": -37.863914489746094,
+ "logps/rejected": -51.89360427856445,
+ "loss": 0.6456,
+ "rewards/accuracies": 0.8799999952316284,
+ "rewards/chosen": 0.05132318660616875,
+ "rewards/margins": 0.09992541372776031,
+ "rewards/rejected": -0.048602234572172165,
+ "step": 14
+ },
+ {
+ "epoch": 0.0,
+ "learning_rate": 4.85e-07,
+ "logits/chosen": -9.177026748657227,
+ "logits/rejected": -9.040088653564453,
+ "logps/chosen": -38.882564544677734,
+ "logps/rejected": -49.73377227783203,
+ "loss": 0.647,
+ "rewards/accuracies": 0.9199999570846558,
+ "rewards/chosen": 0.021302934736013412,
+ "rewards/margins": 0.09643281996250153,
+ "rewards/rejected": -0.07512988150119781,
+ "step": 15
+ },
+ {
+ "epoch": 0.0,
+ "learning_rate": 4.839999999999999e-07,
+ "logits/chosen": -8.8854398727417,
+ "logits/rejected": -8.71996784210205,
+ "logps/chosen": -41.88127517700195,
+ "logps/rejected": -53.972862243652344,
+ "loss": 0.6432,
+ "rewards/accuracies": 0.8400000333786011,
+ "rewards/chosen": 0.01929306425154209,
+ "rewards/margins": 0.10526645183563232,
+ "rewards/rejected": -0.08597338944673538,
+ "step": 16
+ },
+ {
+ "epoch": 0.0,
+ "learning_rate": 4.83e-07,
+ "logits/chosen": -8.87810230255127,
+ "logits/rejected": -8.729606628417969,
+ "logps/chosen": -37.907958984375,
+ "logps/rejected": -54.153953552246094,
+ "loss": 0.6297,
+ "rewards/accuracies": 1.0,
+ "rewards/chosen": 0.026595568284392357,
+ "rewards/margins": 0.13360002636909485,
+ "rewards/rejected": -0.10700444877147675,
+ "step": 17
+ },
+ {
+ "epoch": 0.0,
+ "learning_rate": 4.82e-07,
+ "logits/chosen": -9.304192543029785,
+ "logits/rejected": -9.190086364746094,
+ "logps/chosen": -38.377891540527344,
+ "logps/rejected": -51.592796325683594,
+ "loss": 0.6391,
+ "rewards/accuracies": 0.8399999737739563,
+ "rewards/chosen": 0.06439777463674545,
+ "rewards/margins": 0.11593715101480484,
+ "rewards/rejected": -0.051539380103349686,
+ "step": 18
+ },
+ {
+ "epoch": 0.0,
+ "learning_rate": 4.809999999999999e-07,
+ "logits/chosen": -8.877577781677246,
+ "logits/rejected": -8.728302001953125,
+ "logps/chosen": -36.82625198364258,
+ "logps/rejected": -50.772274017333984,
+ "loss": 0.6317,
+ "rewards/accuracies": 0.940000057220459,
+ "rewards/chosen": 0.04051102325320244,
+ "rewards/margins": 0.13047367334365845,
+ "rewards/rejected": -0.08996264636516571,
+ "step": 19
+ },
+ {
+ "epoch": 0.01,
+ "learning_rate": 4.8e-07,
+ "logits/chosen": -8.93675422668457,
+ "logits/rejected": -8.837154388427734,
+ "logps/chosen": -38.22481918334961,
+ "logps/rejected": -51.28339385986328,
+ "loss": 0.6327,
+ "rewards/accuracies": 0.8999999761581421,
+ "rewards/chosen": 0.024285469204187393,
+ "rewards/margins": 0.12898896634578705,
+ "rewards/rejected": -0.10470350831747055,
+ "step": 20
+ },
+ {
+ "epoch": 0.01,
+ "learning_rate": 4.79e-07,
+ "logits/chosen": -8.971104621887207,
+ "logits/rejected": -8.809735298156738,
+ "logps/chosen": -38.27422332763672,
+ "logps/rejected": -51.61455154418945,
+ "loss": 0.6212,
+ "rewards/accuracies": 0.8600000143051147,
+ "rewards/chosen": 0.04925350099802017,
+ "rewards/margins": 0.1548921763896942,
+ "rewards/rejected": -0.10563866049051285,
+ "step": 21
+ },
+ {
+ "epoch": 0.01,
+ "learning_rate": 4.779999999999999e-07,
+ "logits/chosen": -8.943137168884277,
+ "logits/rejected": -8.724119186401367,
+ "logps/chosen": -39.740211486816406,
+ "logps/rejected": -53.67293167114258,
+ "loss": 0.6185,
+ "rewards/accuracies": 0.8999999761581421,
+ "rewards/chosen": 0.031043654307723045,
+ "rewards/margins": 0.15891726315021515,
+ "rewards/rejected": -0.12787359952926636,
+ "step": 22
+ },
+ {
+ "epoch": 0.01,
+ "learning_rate": 4.769999999999999e-07,
+ "logits/chosen": -8.783563613891602,
+ "logits/rejected": -8.568138122558594,
+ "logps/chosen": -40.75530242919922,
+ "logps/rejected": -55.012939453125,
+ "loss": 0.6142,
+ "rewards/accuracies": 0.9800000190734863,
+ "rewards/chosen": 0.022519828751683235,
+ "rewards/margins": 0.1669926643371582,
+ "rewards/rejected": -0.14447283744812012,
+ "step": 23
+ },
+ {
+ "epoch": 0.01,
+ "learning_rate": 4.76e-07,
+ "logits/chosen": -8.948726654052734,
+ "logits/rejected": -8.741633415222168,
+ "logps/chosen": -38.42156982421875,
+ "logps/rejected": -54.19482421875,
+ "loss": 0.6141,
+ "rewards/accuracies": 0.8600000143051147,
+ "rewards/chosen": 0.042123936116695404,
+ "rewards/margins": 0.16957491636276245,
+ "rewards/rejected": -0.12745098769664764,
+ "step": 24
+ },
+ {
+ "epoch": 0.01,
+ "learning_rate": 4.7499999999999995e-07,
+ "logits/chosen": -8.800437927246094,
+ "logits/rejected": -8.649993896484375,
+ "logps/chosen": -39.37318420410156,
+ "logps/rejected": -50.02665710449219,
+ "loss": 0.6253,
+ "rewards/accuracies": 0.8400000333786011,
+ "rewards/chosen": 0.004943800158798695,
+ "rewards/margins": 0.14754608273506165,
+ "rewards/rejected": -0.14260227978229523,
+ "step": 25
+ },
+ {
+ "epoch": 0.01,
+ "learning_rate": 4.7399999999999993e-07,
+ "logits/chosen": -9.165999412536621,
+ "logits/rejected": -9.073251724243164,
+ "logps/chosen": -37.4705696105957,
+ "logps/rejected": -50.39186477661133,
+ "loss": 0.613,
+ "rewards/accuracies": 0.9600000381469727,
+ "rewards/chosen": 0.005681462120264769,
+ "rewards/margins": 0.17087292671203613,
+ "rewards/rejected": -0.16519147157669067,
+ "step": 26
+ },
+ {
+ "epoch": 0.01,
+ "learning_rate": 4.7299999999999996e-07,
+ "logits/chosen": -8.994885444641113,
+ "logits/rejected": -8.7987642288208,
+ "logps/chosen": -37.67160415649414,
+ "logps/rejected": -54.15082931518555,
+ "loss": 0.5968,
+ "rewards/accuracies": 1.0,
+ "rewards/chosen": 0.03374551981687546,
+ "rewards/margins": 0.20702290534973145,
+ "rewards/rejected": -0.1732773780822754,
+ "step": 27
+ },
+ {
+ "epoch": 0.01,
+ "learning_rate": 4.7199999999999994e-07,
+ "logits/chosen": -8.93022346496582,
+ "logits/rejected": -8.841963768005371,
+ "logps/chosen": -37.873985290527344,
+ "logps/rejected": -56.12858200073242,
+ "loss": 0.6008,
+ "rewards/accuracies": 0.8399999737739563,
+ "rewards/chosen": 0.00800693966448307,
+ "rewards/margins": 0.2039312869310379,
+ "rewards/rejected": -0.19592434167861938,
+ "step": 28
+ },
+ {
+ "epoch": 0.01,
+ "learning_rate": 4.7099999999999997e-07,
+ "logits/chosen": -9.13399600982666,
+ "logits/rejected": -8.960123062133789,
+ "logps/chosen": -37.81399917602539,
+ "logps/rejected": -53.7081298828125,
+ "loss": 0.5955,
+ "rewards/accuracies": 0.9800000190734863,
+ "rewards/chosen": -0.03130952641367912,
+ "rewards/margins": 0.21175985038280487,
+ "rewards/rejected": -0.24306941032409668,
+ "step": 29
+ },
+ {
+ "epoch": 0.01,
+ "learning_rate": 4.6999999999999995e-07,
+ "logits/chosen": -8.995954513549805,
+ "logits/rejected": -8.84068775177002,
+ "logps/chosen": -38.8903923034668,
+ "logps/rejected": -57.47145462036133,
+ "loss": 0.5892,
+ "rewards/accuracies": 0.9199999570846558,
+ "rewards/chosen": 0.044602252542972565,
+ "rewards/margins": 0.22832927107810974,
+ "rewards/rejected": -0.18372702598571777,
+ "step": 30
+ },
+ {
+ "epoch": 0.01,
+ "learning_rate": 4.689999999999999e-07,
+ "logits/chosen": -8.951233863830566,
+ "logits/rejected": -8.74404525756836,
+ "logps/chosen": -39.165775299072266,
+ "logps/rejected": -53.60399627685547,
+ "loss": 0.5901,
+ "rewards/accuracies": 0.9600000381469727,
+ "rewards/chosen": -0.005719953216612339,
+ "rewards/margins": 0.2242022305727005,
+ "rewards/rejected": -0.22992220520973206,
+ "step": 31
+ },
+ {
+ "epoch": 0.01,
+ "learning_rate": 4.68e-07,
+ "logits/chosen": -8.605962753295898,
+ "logits/rejected": -8.464166641235352,
+ "logps/chosen": -39.067291259765625,
+ "logps/rejected": -56.53247833251953,
+ "loss": 0.565,
+ "rewards/accuracies": 1.0,
+ "rewards/chosen": 0.03435752913355827,
+ "rewards/margins": 0.28634151816368103,
+ "rewards/rejected": -0.2519839406013489,
+ "step": 32
+ },
+ {
+ "epoch": 0.01,
+ "learning_rate": 4.67e-07,
+ "logits/chosen": -8.780204772949219,
+ "logits/rejected": -8.59669017791748,
+ "logps/chosen": -39.499412536621094,
+ "logps/rejected": -55.1195068359375,
+ "loss": 0.5906,
+ "rewards/accuracies": 0.8999999761581421,
+ "rewards/chosen": 0.012880785390734673,
+ "rewards/margins": 0.22619274258613586,
+ "rewards/rejected": -0.21331194043159485,
+ "step": 33
+ },
+ {
+ "epoch": 0.01,
+ "learning_rate": 4.66e-07,
+ "logits/chosen": -9.019442558288574,
+ "logits/rejected": -8.858607292175293,
+ "logps/chosen": -39.38768768310547,
+ "logps/rejected": -56.47705841064453,
+ "loss": 0.5818,
+ "rewards/accuracies": 0.9399999380111694,
+ "rewards/chosen": 0.029814431443810463,
+ "rewards/margins": 0.24373717606067657,
+ "rewards/rejected": -0.21392273902893066,
+ "step": 34
+ },
+ {
+ "epoch": 0.01,
+ "learning_rate": 4.65e-07,
+ "logits/chosen": -8.972381591796875,
+ "logits/rejected": -8.825788497924805,
+ "logps/chosen": -39.450408935546875,
+ "logps/rejected": -54.7730827331543,
+ "loss": 0.5931,
+ "rewards/accuracies": 0.9200000762939453,
+ "rewards/chosen": 0.01736309379339218,
+ "rewards/margins": 0.22288131713867188,
+ "rewards/rejected": -0.2055182158946991,
+ "step": 35
+ },
+ {
+ "epoch": 0.01,
+ "learning_rate": 4.64e-07,
+ "logits/chosen": -8.794334411621094,
+ "logits/rejected": -8.642388343811035,
+ "logps/chosen": -38.96186447143555,
+ "logps/rejected": -52.460182189941406,
+ "loss": 0.5874,
+ "rewards/accuracies": 0.8600000143051147,
+ "rewards/chosen": 0.03622301667928696,
+ "rewards/margins": 0.23951515555381775,
+ "rewards/rejected": -0.20329216122627258,
+ "step": 36
+ },
+ {
+ "epoch": 0.01,
+ "learning_rate": 4.63e-07,
+ "logits/chosen": -8.897015571594238,
+ "logits/rejected": -8.766530990600586,
+ "logps/chosen": -37.4925537109375,
+ "logps/rejected": -50.289085388183594,
+ "loss": 0.5946,
+ "rewards/accuracies": 0.9200000762939453,
+ "rewards/chosen": 0.008345548994839191,
+ "rewards/margins": 0.21366062760353088,
+ "rewards/rejected": -0.20531506836414337,
+ "step": 37
+ },
+ {
+ "epoch": 0.01,
+ "learning_rate": 4.62e-07,
+ "logits/chosen": -9.160964965820312,
+ "logits/rejected": -9.01344108581543,
+ "logps/chosen": -39.84162139892578,
+ "logps/rejected": -50.35344696044922,
+ "loss": 0.5878,
+ "rewards/accuracies": 0.9600000381469727,
+ "rewards/chosen": 0.03968983516097069,
+ "rewards/margins": 0.23294827342033386,
+ "rewards/rejected": -0.19325841963291168,
+ "step": 38
+ },
+ {
+ "epoch": 0.01,
+ "learning_rate": 4.61e-07,
+ "logits/chosen": -8.879249572753906,
+ "logits/rejected": -8.736249923706055,
+ "logps/chosen": -39.56095504760742,
+ "logps/rejected": -55.347694396972656,
+ "loss": 0.5662,
+ "rewards/accuracies": 0.9600000381469727,
+ "rewards/chosen": 0.02437666617333889,
+ "rewards/margins": 0.28601738810539246,
+ "rewards/rejected": -0.261640727519989,
+ "step": 39
+ },
+ {
+ "epoch": 0.01,
+ "learning_rate": 4.6e-07,
+ "logits/chosen": -9.063352584838867,
+ "logits/rejected": -8.963701248168945,
+ "logps/chosen": -38.563880920410156,
+ "logps/rejected": -51.63567352294922,
+ "loss": 0.5769,
+ "rewards/accuracies": 0.9600000381469727,
+ "rewards/chosen": -0.02472488023340702,
+ "rewards/margins": 0.2595635950565338,
+ "rewards/rejected": -0.2842884659767151,
+ "step": 40
+ },
+ {
+ "epoch": 0.01,
+ "learning_rate": 4.59e-07,
+ "logits/chosen": -9.278203964233398,
+ "logits/rejected": -9.08998966217041,
+ "logps/chosen": -39.864532470703125,
+ "logps/rejected": -54.92973709106445,
+ "loss": 0.5737,
+ "rewards/accuracies": 0.9199999570846558,
+ "rewards/chosen": 0.020174704492092133,
+ "rewards/margins": 0.26886457204818726,
+ "rewards/rejected": -0.24868984520435333,
+ "step": 41
+ },
+ {
+ "epoch": 0.01,
+ "learning_rate": 4.58e-07,
+ "logits/chosen": -8.939000129699707,
+ "logits/rejected": -8.744054794311523,
+ "logps/chosen": -37.10688400268555,
+ "logps/rejected": -54.631141662597656,
+ "loss": 0.5495,
+ "rewards/accuracies": 0.9600000381469727,
+ "rewards/chosen": 0.04707593470811844,
+ "rewards/margins": 0.3288743495941162,
+ "rewards/rejected": -0.28179842233657837,
+ "step": 42
+ },
+ {
+ "epoch": 0.01,
+ "learning_rate": 4.57e-07,
+ "logits/chosen": -8.78071403503418,
+ "logits/rejected": -8.645574569702148,
+ "logps/chosen": -38.05216598510742,
+ "logps/rejected": -54.2934684753418,
+ "loss": 0.5586,
+ "rewards/accuracies": 0.940000057220459,
+ "rewards/chosen": 0.01882551982998848,
+ "rewards/margins": 0.3048241138458252,
+ "rewards/rejected": -0.2859985828399658,
+ "step": 43
+ },
+ {
+ "epoch": 0.01,
+ "learning_rate": 4.56e-07,
+ "logits/chosen": -8.890421867370605,
+ "logits/rejected": -8.79121208190918,
+ "logps/chosen": -36.806209564208984,
+ "logps/rejected": -54.96057891845703,
+ "loss": 0.5378,
+ "rewards/accuracies": 1.0,
+ "rewards/chosen": 0.09500815719366074,
+ "rewards/margins": 0.3511715531349182,
+ "rewards/rejected": -0.2561633884906769,
+ "step": 44
+ },
+ {
+ "epoch": 0.01,
+ "learning_rate": 4.55e-07,
+ "logits/chosen": -8.712895393371582,
+ "logits/rejected": -8.633960723876953,
+ "logps/chosen": -38.36237335205078,
+ "logps/rejected": -53.05036163330078,
+ "loss": 0.5671,
+ "rewards/accuracies": 0.9600000381469727,
+ "rewards/chosen": 0.10673508793115616,
+ "rewards/margins": 0.28717535734176636,
+ "rewards/rejected": -0.1804402768611908,
+ "step": 45
+ },
+ {
+ "epoch": 0.01,
+ "learning_rate": 4.54e-07,
+ "logits/chosen": -9.085457801818848,
+ "logits/rejected": -8.948368072509766,
+ "logps/chosen": -40.31169128417969,
+ "logps/rejected": -56.712364196777344,
+ "loss": 0.5538,
+ "rewards/accuracies": 0.9800000190734863,
+ "rewards/chosen": 0.05474279075860977,
+ "rewards/margins": 0.32782307267189026,
+ "rewards/rejected": -0.2730802297592163,
+ "step": 46
+ },
+ {
+ "epoch": 0.01,
+ "learning_rate": 4.53e-07,
+ "logits/chosen": -8.925232887268066,
+ "logits/rejected": -8.793390274047852,
+ "logps/chosen": -40.80510711669922,
+ "logps/rejected": -53.9893798828125,
+ "loss": 0.5603,
+ "rewards/accuracies": 0.940000057220459,
+ "rewards/chosen": -0.004703240934759378,
+ "rewards/margins": 0.2981411814689636,
+ "rewards/rejected": -0.30284440517425537,
+ "step": 47
+ },
+ {
+ "epoch": 0.01,
+ "learning_rate": 4.5199999999999997e-07,
+ "logits/chosen": -8.936556816101074,
+ "logits/rejected": -8.795175552368164,
+ "logps/chosen": -39.4090690612793,
+ "logps/rejected": -56.091827392578125,
+ "loss": 0.5701,
+ "rewards/accuracies": 0.8799999952316284,
+ "rewards/chosen": -0.02163396216928959,
+ "rewards/margins": 0.28110483288764954,
+ "rewards/rejected": -0.3027387261390686,
+ "step": 48
+ },
+ {
+ "epoch": 0.01,
+ "learning_rate": 4.51e-07,
+ "logits/chosen": -8.785165786743164,
+ "logits/rejected": -8.663257598876953,
+ "logps/chosen": -39.04924011230469,
+ "logps/rejected": -55.55643844604492,
+ "loss": 0.5291,
+ "rewards/accuracies": 0.940000057220459,
+ "rewards/chosen": 0.07937277853488922,
+ "rewards/margins": 0.3831271529197693,
+ "rewards/rejected": -0.3037543296813965,
+ "step": 49
+ },
+ {
+ "epoch": 0.01,
+ "learning_rate": 4.5e-07,
+ "logits/chosen": -8.918477058410645,
+ "logits/rejected": -8.69984245300293,
+ "logps/chosen": -39.793434143066406,
+ "logps/rejected": -51.4716911315918,
+ "loss": 0.5581,
+ "rewards/accuracies": 0.9399999380111694,
+ "rewards/chosen": -0.030612414702773094,
+ "rewards/margins": 0.3040314316749573,
+ "rewards/rejected": -0.3346438705921173,
+ "step": 50
+ },
+ {
+ "epoch": 0.01,
+ "learning_rate": 4.49e-07,
+ "logits/chosen": -8.830556869506836,
+ "logits/rejected": -8.661957740783691,
+ "logps/chosen": -39.756195068359375,
+ "logps/rejected": -55.29694747924805,
+ "loss": 0.5275,
+ "rewards/accuracies": 0.9199999570846558,
+ "rewards/chosen": 0.03386972099542618,
+ "rewards/margins": 0.3903976380825043,
+ "rewards/rejected": -0.3565279543399811,
+ "step": 51
+ },
+ {
+ "epoch": 0.01,
+ "learning_rate": 4.48e-07,
+ "logits/chosen": -8.969281196594238,
+ "logits/rejected": -8.82741641998291,
+ "logps/chosen": -37.20234298706055,
+ "logps/rejected": -59.197296142578125,
+ "loss": 0.5128,
+ "rewards/accuracies": 0.9399999380111694,
+ "rewards/chosen": 0.04513595253229141,
+ "rewards/margins": 0.429348886013031,
+ "rewards/rejected": -0.3842129111289978,
+ "step": 52
+ },
+ {
+ "epoch": 0.01,
+ "learning_rate": 4.4699999999999997e-07,
+ "logits/chosen": -8.737951278686523,
+ "logits/rejected": -8.606011390686035,
+ "logps/chosen": -38.3763542175293,
+ "logps/rejected": -58.59581756591797,
+ "loss": 0.5162,
+ "rewards/accuracies": 0.9199999570846558,
+ "rewards/chosen": 0.030918415635824203,
+ "rewards/margins": 0.41947564482688904,
+ "rewards/rejected": -0.3885572552680969,
+ "step": 53
+ },
+ {
+ "epoch": 0.01,
+ "learning_rate": 4.46e-07,
+ "logits/chosen": -8.750028610229492,
+ "logits/rejected": -8.646293640136719,
+ "logps/chosen": -38.137672424316406,
+ "logps/rejected": -56.8802490234375,
+ "loss": 0.5144,
+ "rewards/accuracies": 0.9600000381469727,
+ "rewards/chosen": 0.08223019540309906,
+ "rewards/margins": 0.42382898926734924,
+ "rewards/rejected": -0.3415988087654114,
+ "step": 54
+ },
+ {
+ "epoch": 0.01,
+ "learning_rate": 4.45e-07,
+ "logits/chosen": -9.04769515991211,
+ "logits/rejected": -8.884363174438477,
+ "logps/chosen": -39.094200134277344,
+ "logps/rejected": -56.6423454284668,
+ "loss": 0.5296,
+ "rewards/accuracies": 1.0,
+ "rewards/chosen": 0.01863374561071396,
+ "rewards/margins": 0.3876304030418396,
+ "rewards/rejected": -0.36899662017822266,
+ "step": 55
+ },
+ {
+ "epoch": 0.01,
+ "learning_rate": 4.44e-07,
+ "logits/chosen": -8.93348503112793,
+ "logits/rejected": -8.77459716796875,
+ "logps/chosen": -38.1727294921875,
+ "logps/rejected": -51.39598846435547,
+ "loss": 0.5394,
+ "rewards/accuracies": 0.9199999570846558,
+ "rewards/chosen": 0.08033865690231323,
+ "rewards/margins": 0.3627088963985443,
+ "rewards/rejected": -0.2823702096939087,
+ "step": 56
+ },
+ {
+ "epoch": 0.01,
+ "learning_rate": 4.43e-07,
+ "logits/chosen": -8.88620376586914,
+ "logits/rejected": -8.79748821258545,
+ "logps/chosen": -38.02767562866211,
+ "logps/rejected": -53.46161651611328,
+ "loss": 0.5248,
+ "rewards/accuracies": 0.8400000333786011,
+ "rewards/chosen": 0.07710711658000946,
+ "rewards/margins": 0.404231458902359,
+ "rewards/rejected": -0.32712429761886597,
+ "step": 57
+ },
+ {
+ "epoch": 0.01,
+ "learning_rate": 4.4199999999999996e-07,
+ "logits/chosen": -9.048208236694336,
+ "logits/rejected": -8.921117782592773,
+ "logps/chosen": -39.349849700927734,
+ "logps/rejected": -57.04978561401367,
+ "loss": 0.525,
+ "rewards/accuracies": 0.9800000190734863,
+ "rewards/chosen": 0.005890236236155033,
+ "rewards/margins": 0.3883860111236572,
+ "rewards/rejected": -0.38249582052230835,
+ "step": 58
+ },
+ {
+ "epoch": 0.01,
+ "learning_rate": 4.41e-07,
+ "logits/chosen": -9.050989151000977,
+ "logits/rejected": -8.864224433898926,
+ "logps/chosen": -38.37465286254883,
+ "logps/rejected": -54.69602584838867,
+ "loss": 0.5161,
+ "rewards/accuracies": 0.9199999570846558,
+ "rewards/chosen": -0.00012345091090537608,
+ "rewards/margins": 0.41539669036865234,
+ "rewards/rejected": -0.4155201017856598,
+ "step": 59
+ },
+ {
+ "epoch": 0.01,
+ "learning_rate": 4.3999999999999997e-07,
+ "logits/chosen": -8.77641773223877,
+ "logits/rejected": -8.65733528137207,
+ "logps/chosen": -37.439788818359375,
+ "logps/rejected": -56.8023681640625,
+ "loss": 0.491,
+ "rewards/accuracies": 0.9800000190734863,
+ "rewards/chosen": 0.10770513862371445,
+ "rewards/margins": 0.4838793873786926,
+ "rewards/rejected": -0.37617427110671997,
+ "step": 60
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": 4.39e-07,
+ "logits/chosen": -8.84074878692627,
+ "logits/rejected": -8.694551467895508,
+ "logps/chosen": -40.056434631347656,
+ "logps/rejected": -60.27661895751953,
+ "loss": 0.5015,
+ "rewards/accuracies": 0.9600000381469727,
+ "rewards/chosen": 0.010883894748985767,
+ "rewards/margins": 0.4609171748161316,
+ "rewards/rejected": -0.4500332772731781,
+ "step": 61
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": 4.38e-07,
+ "logits/chosen": -8.841463088989258,
+ "logits/rejected": -8.691165924072266,
+ "logps/chosen": -38.9443473815918,
+ "logps/rejected": -56.7674446105957,
+ "loss": 0.5052,
+ "rewards/accuracies": 0.9199999570846558,
+ "rewards/chosen": 0.058075837790966034,
+ "rewards/margins": 0.45668134093284607,
+ "rewards/rejected": -0.39860549569129944,
+ "step": 62
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": 4.3699999999999996e-07,
+ "logits/chosen": -8.556544303894043,
+ "logits/rejected": -8.511371612548828,
+ "logps/chosen": -37.55242919921875,
+ "logps/rejected": -52.5374755859375,
+ "loss": 0.5261,
+ "rewards/accuracies": 0.9199999570846558,
+ "rewards/chosen": 0.05054298788309097,
+ "rewards/margins": 0.3981621265411377,
+ "rewards/rejected": -0.3476191461086273,
+ "step": 63
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": 4.36e-07,
+ "logits/chosen": -8.824804306030273,
+ "logits/rejected": -8.721410751342773,
+ "logps/chosen": -39.77140808105469,
+ "logps/rejected": -60.0277099609375,
+ "loss": 0.4751,
+ "rewards/accuracies": 1.0,
+ "rewards/chosen": -0.01679672673344612,
+ "rewards/margins": 0.5294073820114136,
+ "rewards/rejected": -0.5462040901184082,
+ "step": 64
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": 4.3499999999999996e-07,
+ "logits/chosen": -8.601284980773926,
+ "logits/rejected": -8.539445877075195,
+ "logps/chosen": -40.41975402832031,
+ "logps/rejected": -55.65443801879883,
+ "loss": 0.5237,
+ "rewards/accuracies": 0.9399999380111694,
+ "rewards/chosen": -0.10446874797344208,
+ "rewards/margins": 0.39701706171035767,
+ "rewards/rejected": -0.5014857649803162,
+ "step": 65
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": 4.34e-07,
+ "logits/chosen": -9.016075134277344,
+ "logits/rejected": -8.911613464355469,
+ "logps/chosen": -36.6504020690918,
+ "logps/rejected": -52.8062629699707,
+ "loss": 0.4991,
+ "rewards/accuracies": 0.9399999380111694,
+ "rewards/chosen": 0.03334439545869827,
+ "rewards/margins": 0.46417728066444397,
+ "rewards/rejected": -0.4308328628540039,
+ "step": 66
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": 4.3299999999999997e-07,
+ "logits/chosen": -9.052962303161621,
+ "logits/rejected": -8.949380874633789,
+ "logps/chosen": -37.03533172607422,
+ "logps/rejected": -52.54149627685547,
+ "loss": 0.5236,
+ "rewards/accuracies": 0.8999999761581421,
+ "rewards/chosen": 0.004350895527750254,
+ "rewards/margins": 0.41073599457740784,
+ "rewards/rejected": -0.4063850939273834,
+ "step": 67
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": 4.3199999999999995e-07,
+ "logits/chosen": -9.015048027038574,
+ "logits/rejected": -8.912318229675293,
+ "logps/chosen": -38.99870300292969,
+ "logps/rejected": -52.52238082885742,
+ "loss": 0.517,
+ "rewards/accuracies": 0.9600000381469727,
+ "rewards/chosen": 0.050432223826646805,
+ "rewards/margins": 0.4209260046482086,
+ "rewards/rejected": -0.3704938292503357,
+ "step": 68
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": 4.31e-07,
+ "logits/chosen": -8.882599830627441,
+ "logits/rejected": -8.721563339233398,
+ "logps/chosen": -39.24352264404297,
+ "logps/rejected": -57.102867126464844,
+ "loss": 0.4889,
+ "rewards/accuracies": 0.9399999380111694,
+ "rewards/chosen": 0.05120646953582764,
+ "rewards/margins": 0.5033482909202576,
+ "rewards/rejected": -0.4521418511867523,
+ "step": 69
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": 4.2999999999999996e-07,
+ "logits/chosen": -8.721124649047852,
+ "logits/rejected": -8.638886451721191,
+ "logps/chosen": -40.13719940185547,
+ "logps/rejected": -56.29268264770508,
+ "loss": 0.5076,
+ "rewards/accuracies": 0.940000057220459,
+ "rewards/chosen": -0.024597348645329475,
+ "rewards/margins": 0.4424295425415039,
+ "rewards/rejected": -0.46702688932418823,
+ "step": 70
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": 4.29e-07,
+ "logits/chosen": -8.832576751708984,
+ "logits/rejected": -8.6920747756958,
+ "logps/chosen": -39.40597152709961,
+ "logps/rejected": -57.85718536376953,
+ "loss": 0.4844,
+ "rewards/accuracies": 0.9399999380111694,
+ "rewards/chosen": 0.0307625625282526,
+ "rewards/margins": 0.5312424302101135,
+ "rewards/rejected": -0.5004798769950867,
+ "step": 71
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": 4.2799999999999997e-07,
+ "logits/chosen": -8.825450897216797,
+ "logits/rejected": -8.670401573181152,
+ "logps/chosen": -39.127418518066406,
+ "logps/rejected": -58.1724967956543,
+ "loss": 0.4951,
+ "rewards/accuracies": 0.9199999570846558,
+ "rewards/chosen": 0.04789305105805397,
+ "rewards/margins": 0.48660898208618164,
+ "rewards/rejected": -0.43871593475341797,
+ "step": 72
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": 4.2699999999999995e-07,
+ "logits/chosen": -9.031660079956055,
+ "logits/rejected": -8.88580322265625,
+ "logps/chosen": -40.26874542236328,
+ "logps/rejected": -59.174354553222656,
+ "loss": 0.4813,
+ "rewards/accuracies": 0.9800000190734863,
+ "rewards/chosen": 0.056357551366090775,
+ "rewards/margins": 0.5150011777877808,
+ "rewards/rejected": -0.4586435854434967,
+ "step": 73
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": 4.26e-07,
+ "logits/chosen": -9.249466896057129,
+ "logits/rejected": -9.104633331298828,
+ "logps/chosen": -38.35686492919922,
+ "logps/rejected": -58.423255920410156,
+ "loss": 0.4694,
+ "rewards/accuracies": 0.9600000381469727,
+ "rewards/chosen": 0.003165402915328741,
+ "rewards/margins": 0.5658671855926514,
+ "rewards/rejected": -0.5627016425132751,
+ "step": 74
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": 4.2499999999999995e-07,
+ "logits/chosen": -8.971678733825684,
+ "logits/rejected": -8.806716918945312,
+ "logps/chosen": -40.63257598876953,
+ "logps/rejected": -58.69512939453125,
+ "loss": 0.4714,
+ "rewards/accuracies": 0.8600000143051147,
+ "rewards/chosen": -0.003097705077379942,
+ "rewards/margins": 0.5738621950149536,
+ "rewards/rejected": -0.5769599676132202,
+ "step": 75
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": 4.24e-07,
+ "logits/chosen": -8.751283645629883,
+ "logits/rejected": -8.613395690917969,
+ "logps/chosen": -41.808475494384766,
+ "logps/rejected": -57.4010009765625,
+ "loss": 0.5206,
+ "rewards/accuracies": 0.9200000762939453,
+ "rewards/chosen": -0.018361693248152733,
+ "rewards/margins": 0.4181668758392334,
+ "rewards/rejected": -0.4365285038948059,
+ "step": 76
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": 4.2299999999999996e-07,
+ "logits/chosen": -8.919511795043945,
+ "logits/rejected": -8.767281532287598,
+ "logps/chosen": -38.50531005859375,
+ "logps/rejected": -56.9246826171875,
+ "loss": 0.5066,
+ "rewards/accuracies": 0.8999999761581421,
+ "rewards/chosen": -0.08143829554319382,
+ "rewards/margins": 0.46956175565719604,
+ "rewards/rejected": -0.5510000586509705,
+ "step": 77
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": 4.2199999999999994e-07,
+ "logits/chosen": -9.05775260925293,
+ "logits/rejected": -8.882523536682129,
+ "logps/chosen": -37.908966064453125,
+ "logps/rejected": -57.79386520385742,
+ "loss": 0.4638,
+ "rewards/accuracies": 0.9399999380111694,
+ "rewards/chosen": 0.06249593570828438,
+ "rewards/margins": 0.5957901477813721,
+ "rewards/rejected": -0.5332942008972168,
+ "step": 78
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": 4.2099999999999997e-07,
+ "logits/chosen": -9.013116836547852,
+ "logits/rejected": -8.926069259643555,
+ "logps/chosen": -37.548606872558594,
+ "logps/rejected": -54.40301513671875,
+ "loss": 0.4867,
+ "rewards/accuracies": 0.8999999761581421,
+ "rewards/chosen": 0.08341957628726959,
+ "rewards/margins": 0.5038079023361206,
+ "rewards/rejected": -0.4203883111476898,
+ "step": 79
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": 4.1999999999999995e-07,
+ "logits/chosen": -8.759903907775879,
+ "logits/rejected": -8.721986770629883,
+ "logps/chosen": -38.83586120605469,
+ "logps/rejected": -56.38386154174805,
+ "loss": 0.4749,
+ "rewards/accuracies": 0.9000000953674316,
+ "rewards/chosen": 0.03295533359050751,
+ "rewards/margins": 0.5568066239356995,
+ "rewards/rejected": -0.5238512754440308,
+ "step": 80
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": 4.19e-07,
+ "logits/chosen": -8.818650245666504,
+ "logits/rejected": -8.679798126220703,
+ "logps/chosen": -40.744956970214844,
+ "logps/rejected": -56.096473693847656,
+ "loss": 0.4843,
+ "rewards/accuracies": 0.9600000381469727,
+ "rewards/chosen": -0.10699313879013062,
+ "rewards/margins": 0.49514955282211304,
+ "rewards/rejected": -0.6021426916122437,
+ "step": 81
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": 4.1799999999999996e-07,
+ "logits/chosen": -8.959874153137207,
+ "logits/rejected": -8.873147010803223,
+ "logps/chosen": -38.775062561035156,
+ "logps/rejected": -56.94916534423828,
+ "loss": 0.4637,
+ "rewards/accuracies": 0.9800000190734863,
+ "rewards/chosen": 0.026896214112639427,
+ "rewards/margins": 0.5791338682174683,
+ "rewards/rejected": -0.5522376298904419,
+ "step": 82
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": 4.17e-07,
+ "logits/chosen": -9.120010375976562,
+ "logits/rejected": -8.946874618530273,
+ "logps/chosen": -41.79666519165039,
+ "logps/rejected": -59.282958984375,
+ "loss": 0.469,
+ "rewards/accuracies": 0.9399999380111694,
+ "rewards/chosen": -0.009585860185325146,
+ "rewards/margins": 0.5570787191390991,
+ "rewards/rejected": -0.5666645169258118,
+ "step": 83
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": 4.1599999999999997e-07,
+ "logits/chosen": -8.929158210754395,
+ "logits/rejected": -8.834013938903809,
+ "logps/chosen": -40.36653137207031,
+ "logps/rejected": -54.375205993652344,
+ "loss": 0.5029,
+ "rewards/accuracies": 0.9200000762939453,
+ "rewards/chosen": -0.10653682053089142,
+ "rewards/margins": 0.4618176519870758,
+ "rewards/rejected": -0.5683544874191284,
+ "step": 84
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": 4.1499999999999994e-07,
+ "logits/chosen": -9.015276908874512,
+ "logits/rejected": -8.931076049804688,
+ "logps/chosen": -40.707420349121094,
+ "logps/rejected": -57.53357696533203,
+ "loss": 0.4619,
+ "rewards/accuracies": 0.9800000190734863,
+ "rewards/chosen": -0.034116435796022415,
+ "rewards/margins": 0.5871976613998413,
+ "rewards/rejected": -0.6213140487670898,
+ "step": 85
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": 4.14e-07,
+ "logits/chosen": -8.926106452941895,
+ "logits/rejected": -8.83165168762207,
+ "logps/chosen": -37.86637878417969,
+ "logps/rejected": -57.00706100463867,
+ "loss": 0.4474,
+ "rewards/accuracies": 0.9800000190734863,
+ "rewards/chosen": 0.026764903217554092,
+ "rewards/margins": 0.6376525163650513,
+ "rewards/rejected": -0.6108875870704651,
+ "step": 86
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": 4.1299999999999995e-07,
+ "logits/chosen": -8.976417541503906,
+ "logits/rejected": -8.816550254821777,
+ "logps/chosen": -40.38107681274414,
+ "logps/rejected": -58.815818786621094,
+ "loss": 0.4547,
+ "rewards/accuracies": 0.9200000762939453,
+ "rewards/chosen": -0.023364685475826263,
+ "rewards/margins": 0.6099767088890076,
+ "rewards/rejected": -0.6333414316177368,
+ "step": 87
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": 4.12e-07,
+ "logits/chosen": -8.921110153198242,
+ "logits/rejected": -8.771123886108398,
+ "logps/chosen": -36.119667053222656,
+ "logps/rejected": -54.26972198486328,
+ "loss": 0.4564,
+ "rewards/accuracies": 0.9800000190734863,
+ "rewards/chosen": -0.024796072393655777,
+ "rewards/margins": 0.5949214100837708,
+ "rewards/rejected": -0.6197174787521362,
+ "step": 88
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": 4.1099999999999996e-07,
+ "logits/chosen": -8.972981452941895,
+ "logits/rejected": -8.839695930480957,
+ "logps/chosen": -37.952171325683594,
+ "logps/rejected": -53.723838806152344,
+ "loss": 0.4678,
+ "rewards/accuracies": 0.9600000381469727,
+ "rewards/chosen": 0.0343460738658905,
+ "rewards/margins": 0.5734801888465881,
+ "rewards/rejected": -0.5391340851783752,
+ "step": 89
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": 4.0999999999999994e-07,
+ "logits/chosen": -8.822635650634766,
+ "logits/rejected": -8.72290325164795,
+ "logps/chosen": -40.56664276123047,
+ "logps/rejected": -58.175697326660156,
+ "loss": 0.4662,
+ "rewards/accuracies": 0.9800000190734863,
+ "rewards/chosen": -0.11275799572467804,
+ "rewards/margins": 0.5608721971511841,
+ "rewards/rejected": -0.6736301779747009,
+ "step": 90
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": 4.0899999999999997e-07,
+ "logits/chosen": -8.966471672058105,
+ "logits/rejected": -8.847415924072266,
+ "logps/chosen": -39.2136344909668,
+ "logps/rejected": -55.99798583984375,
+ "loss": 0.4806,
+ "rewards/accuracies": 0.8999999761581421,
+ "rewards/chosen": 0.03947858139872551,
+ "rewards/margins": 0.5527079701423645,
+ "rewards/rejected": -0.5132293701171875,
+ "step": 91
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": 4.0799999999999995e-07,
+ "logits/chosen": -8.708093643188477,
+ "logits/rejected": -8.622647285461426,
+ "logps/chosen": -40.248023986816406,
+ "logps/rejected": -60.658241271972656,
+ "loss": 0.4263,
+ "rewards/accuracies": 0.9199999570846558,
+ "rewards/chosen": 0.06861706078052521,
+ "rewards/margins": 0.7151986360549927,
+ "rewards/rejected": -0.6465815305709839,
+ "step": 92
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": 4.07e-07,
+ "logits/chosen": -8.927497863769531,
+ "logits/rejected": -8.764131546020508,
+ "logps/chosen": -41.07722473144531,
+ "logps/rejected": -57.40480422973633,
+ "loss": 0.4604,
+ "rewards/accuracies": 0.9800000190734863,
+ "rewards/chosen": -0.10462639480829239,
+ "rewards/margins": 0.577288806438446,
+ "rewards/rejected": -0.6819152235984802,
+ "step": 93
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": 4.06e-07,
+ "logits/chosen": -8.988286018371582,
+ "logits/rejected": -8.856369018554688,
+ "logps/chosen": -39.973121643066406,
+ "logps/rejected": -55.97014236450195,
+ "loss": 0.4614,
+ "rewards/accuracies": 0.8999999761581421,
+ "rewards/chosen": -0.0598948672413826,
+ "rewards/margins": 0.6045216917991638,
+ "rewards/rejected": -0.664416491985321,
+ "step": 94
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": 4.05e-07,
+ "logits/chosen": -8.822972297668457,
+ "logits/rejected": -8.72160816192627,
+ "logps/chosen": -40.39838409423828,
+ "logps/rejected": -58.671974182128906,
+ "loss": 0.4388,
+ "rewards/accuracies": 0.9399999380111694,
+ "rewards/chosen": 0.012630686163902283,
+ "rewards/margins": 0.6781344413757324,
+ "rewards/rejected": -0.6655037999153137,
+ "step": 95
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": 4.04e-07,
+ "logits/chosen": -8.826386451721191,
+ "logits/rejected": -8.73621940612793,
+ "logps/chosen": -38.85763168334961,
+ "logps/rejected": -61.4788818359375,
+ "loss": 0.4197,
+ "rewards/accuracies": 0.9600000381469727,
+ "rewards/chosen": 0.0026855766773223877,
+ "rewards/margins": 0.7139507532119751,
+ "rewards/rejected": -0.7112652063369751,
+ "step": 96
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": 4.03e-07,
+ "logits/chosen": -9.003509521484375,
+ "logits/rejected": -8.840544700622559,
+ "logps/chosen": -39.192012786865234,
+ "logps/rejected": -59.52681350708008,
+ "loss": 0.4507,
+ "rewards/accuracies": 0.9199999570846558,
+ "rewards/chosen": -0.04670552909374237,
+ "rewards/margins": 0.62091064453125,
+ "rewards/rejected": -0.6676161885261536,
+ "step": 97
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": 4.02e-07,
+ "logits/chosen": -9.014259338378906,
+ "logits/rejected": -8.890603065490723,
+ "logps/chosen": -39.755069732666016,
+ "logps/rejected": -62.2213134765625,
+ "loss": 0.4409,
+ "rewards/accuracies": 0.940000057220459,
+ "rewards/chosen": -0.07653121650218964,
+ "rewards/margins": 0.6743559837341309,
+ "rewards/rejected": -0.7508872151374817,
+ "step": 98
+ },
+ {
+ "epoch": 0.02,
+ "learning_rate": 4.01e-07,
+ "logits/chosen": -8.92739486694336,
+ "logits/rejected": -8.727579116821289,
+ "logps/chosen": -38.63489532470703,
+ "logps/rejected": -56.489234924316406,
+ "loss": 0.4253,
+ "rewards/accuracies": 0.9600000381469727,
+ "rewards/chosen": -0.06340552121400833,
+ "rewards/margins": 0.6787285804748535,
+ "rewards/rejected": -0.7421342134475708,
+ "step": 99
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": 4e-07,
+ "logits/chosen": -8.823177337646484,
+ "logits/rejected": -8.733776092529297,
+ "logps/chosen": -39.35440444946289,
+ "logps/rejected": -59.88597869873047,
+ "loss": 0.4156,
+ "rewards/accuracies": 0.9200000762939453,
+ "rewards/chosen": 0.06192043423652649,
+ "rewards/margins": 0.7568650245666504,
+ "rewards/rejected": -0.6949446201324463,
+ "step": 100
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": 3.99e-07,
+ "logits/chosen": -8.744458198547363,
+ "logits/rejected": -8.607200622558594,
+ "logps/chosen": -39.50025177001953,
+ "logps/rejected": -59.96971893310547,
+ "loss": 0.4466,
+ "rewards/accuracies": 0.8600000143051147,
+ "rewards/chosen": -0.02974173054099083,
+ "rewards/margins": 0.6540297269821167,
+ "rewards/rejected": -0.6837714910507202,
+ "step": 101
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": 3.98e-07,
+ "logits/chosen": -8.990362167358398,
+ "logits/rejected": -8.814451217651367,
+ "logps/chosen": -41.06502914428711,
+ "logps/rejected": -59.0604133605957,
+ "loss": 0.439,
+ "rewards/accuracies": 0.940000057220459,
+ "rewards/chosen": -0.11213630437850952,
+ "rewards/margins": 0.681516170501709,
+ "rewards/rejected": -0.7936524152755737,
+ "step": 102
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": 3.97e-07,
+ "logits/chosen": -8.999361038208008,
+ "logits/rejected": -8.890585899353027,
+ "logps/chosen": -36.496822357177734,
+ "logps/rejected": -61.975677490234375,
+ "loss": 0.4139,
+ "rewards/accuracies": 0.940000057220459,
+ "rewards/chosen": 0.07788603752851486,
+ "rewards/margins": 0.7719110250473022,
+ "rewards/rejected": -0.6940250992774963,
+ "step": 103
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": 3.96e-07,
+ "logits/chosen": -8.933026313781738,
+ "logits/rejected": -8.826833724975586,
+ "logps/chosen": -40.48444366455078,
+ "logps/rejected": -55.9661865234375,
+ "loss": 0.449,
+ "rewards/accuracies": 1.0,
+ "rewards/chosen": -0.10673530399799347,
+ "rewards/margins": 0.6240180730819702,
+ "rewards/rejected": -0.730753481388092,
+ "step": 104
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": 3.95e-07,
+ "logits/chosen": -9.035877227783203,
+ "logits/rejected": -8.826123237609863,
+ "logps/chosen": -38.494232177734375,
+ "logps/rejected": -58.50568771362305,
+ "loss": 0.4465,
+ "rewards/accuracies": 0.940000057220459,
+ "rewards/chosen": -0.05296442657709122,
+ "rewards/margins": 0.677048921585083,
+ "rewards/rejected": -0.7300133109092712,
+ "step": 105
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": 3.94e-07,
+ "logits/chosen": -8.989307403564453,
+ "logits/rejected": -8.91016674041748,
+ "logps/chosen": -41.43378448486328,
+ "logps/rejected": -57.25815963745117,
+ "loss": 0.4938,
+ "rewards/accuracies": 0.8799999952316284,
+ "rewards/chosen": -0.20345261693000793,
+ "rewards/margins": 0.5115060806274414,
+ "rewards/rejected": -0.7149587869644165,
+ "step": 106
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": 3.93e-07,
+ "logits/chosen": -8.774269104003906,
+ "logits/rejected": -8.619407653808594,
+ "logps/chosen": -39.817054748535156,
+ "logps/rejected": -54.45206832885742,
+ "loss": 0.4652,
+ "rewards/accuracies": 0.9399999380111694,
+ "rewards/chosen": -0.1449589878320694,
+ "rewards/margins": 0.5843663811683655,
+ "rewards/rejected": -0.7293254137039185,
+ "step": 107
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": 3.92e-07,
+ "logits/chosen": -8.860220909118652,
+ "logits/rejected": -8.775630950927734,
+ "logps/chosen": -38.79396438598633,
+ "logps/rejected": -58.30022430419922,
+ "loss": 0.4283,
+ "rewards/accuracies": 0.940000057220459,
+ "rewards/chosen": 0.04749782010912895,
+ "rewards/margins": 0.7379117012023926,
+ "rewards/rejected": -0.6904138922691345,
+ "step": 108
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": 3.91e-07,
+ "logits/chosen": -8.848630905151367,
+ "logits/rejected": -8.661413192749023,
+ "logps/chosen": -40.61771774291992,
+ "logps/rejected": -60.2691764831543,
+ "loss": 0.4042,
+ "rewards/accuracies": 0.9800000190734863,
+ "rewards/chosen": -0.08625830709934235,
+ "rewards/margins": 0.773119330406189,
+ "rewards/rejected": -0.8593775629997253,
+ "step": 109
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": 3.8999999999999997e-07,
+ "logits/chosen": -8.671426773071289,
+ "logits/rejected": -8.512123107910156,
+ "logps/chosen": -40.18708038330078,
+ "logps/rejected": -61.94392776489258,
+ "loss": 0.426,
+ "rewards/accuracies": 0.9800000190734863,
+ "rewards/chosen": 0.004405555315315723,
+ "rewards/margins": 0.7397416234016418,
+ "rewards/rejected": -0.7353360056877136,
+ "step": 110
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": 3.89e-07,
+ "logits/chosen": -8.962957382202148,
+ "logits/rejected": -8.918905258178711,
+ "logps/chosen": -37.032073974609375,
+ "logps/rejected": -58.812171936035156,
+ "loss": 0.3985,
+ "rewards/accuracies": 0.9200000762939453,
+ "rewards/chosen": 0.1496596783399582,
+ "rewards/margins": 0.8128288984298706,
+ "rewards/rejected": -0.663169264793396,
+ "step": 111
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": 3.88e-07,
+ "logits/chosen": -8.809492111206055,
+ "logits/rejected": -8.77208137512207,
+ "logps/chosen": -40.58615493774414,
+ "logps/rejected": -61.23163604736328,
+ "loss": 0.3824,
+ "rewards/accuracies": 0.9800000190734863,
+ "rewards/chosen": -0.0648830309510231,
+ "rewards/margins": 0.8464531898498535,
+ "rewards/rejected": -0.9113362431526184,
+ "step": 112
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": 3.87e-07,
+ "logits/chosen": -9.049692153930664,
+ "logits/rejected": -8.90321159362793,
+ "logps/chosen": -37.798988342285156,
+ "logps/rejected": -56.5068244934082,
+ "loss": 0.4262,
+ "rewards/accuracies": 0.9600000381469727,
+ "rewards/chosen": -0.009292450733482838,
+ "rewards/margins": 0.7117542624473572,
+ "rewards/rejected": -0.7210468053817749,
+ "step": 113
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": 3.86e-07,
+ "logits/chosen": -9.263845443725586,
+ "logits/rejected": -9.075590133666992,
+ "logps/chosen": -38.95356750488281,
+ "logps/rejected": -57.786277770996094,
+ "loss": 0.4206,
+ "rewards/accuracies": 0.9600000381469727,
+ "rewards/chosen": 0.047295115888118744,
+ "rewards/margins": 0.7150000333786011,
+ "rewards/rejected": -0.6677049398422241,
+ "step": 114
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": 3.8499999999999997e-07,
+ "logits/chosen": -8.880139350891113,
+ "logits/rejected": -8.713644981384277,
+ "logps/chosen": -37.136085510253906,
+ "logps/rejected": -57.32819747924805,
+ "loss": 0.4066,
+ "rewards/accuracies": 0.9600000381469727,
+ "rewards/chosen": 0.023683354258537292,
+ "rewards/margins": 0.790765643119812,
+ "rewards/rejected": -0.7670822143554688,
+ "step": 115
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": 3.84e-07,
+ "logits/chosen": -9.125120162963867,
+ "logits/rejected": -8.953531265258789,
+ "logps/chosen": -40.43872833251953,
+ "logps/rejected": -63.04737091064453,
+ "loss": 0.3631,
+ "rewards/accuracies": 1.0,
+ "rewards/chosen": 0.0034645921550691128,
+ "rewards/margins": 0.9286357164382935,
+ "rewards/rejected": -0.9251710772514343,
+ "step": 116
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": 3.83e-07,
+ "logits/chosen": -9.222723960876465,
+ "logits/rejected": -9.075920104980469,
+ "logps/chosen": -37.43045425415039,
+ "logps/rejected": -54.45465087890625,
+ "loss": 0.4351,
+ "rewards/accuracies": 0.9000000953674316,
+ "rewards/chosen": 0.03504835441708565,
+ "rewards/margins": 0.7053635120391846,
+ "rewards/rejected": -0.670315146446228,
+ "step": 117
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": 3.82e-07,
+ "logits/chosen": -8.868099212646484,
+ "logits/rejected": -8.783113479614258,
+ "logps/chosen": -38.179161071777344,
+ "logps/rejected": -59.50090408325195,
+ "loss": 0.4082,
+ "rewards/accuracies": 0.8799999356269836,
+ "rewards/chosen": -0.0526953861117363,
+ "rewards/margins": 0.7886097431182861,
+ "rewards/rejected": -0.8413050770759583,
+ "step": 118
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": 3.81e-07,
+ "logits/chosen": -8.88872241973877,
+ "logits/rejected": -8.763318061828613,
+ "logps/chosen": -42.87889862060547,
+ "logps/rejected": -62.076698303222656,
+ "loss": 0.4254,
+ "rewards/accuracies": 0.9200000762939453,
+ "rewards/chosen": -0.18682220578193665,
+ "rewards/margins": 0.7382919788360596,
+ "rewards/rejected": -0.9251142740249634,
+ "step": 119
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": 3.7999999999999996e-07,
+ "logits/chosen": -8.93816089630127,
+ "logits/rejected": -8.88119888305664,
+ "logps/chosen": -38.15512466430664,
+ "logps/rejected": -58.470611572265625,
+ "loss": 0.4107,
+ "rewards/accuracies": 0.940000057220459,
+ "rewards/chosen": 0.04111450910568237,
+ "rewards/margins": 0.7645248770713806,
+ "rewards/rejected": -0.7234103679656982,
+ "step": 120
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": 3.79e-07,
+ "logits/chosen": -8.973804473876953,
+ "logits/rejected": -8.872992515563965,
+ "logps/chosen": -39.51805114746094,
+ "logps/rejected": -55.87736129760742,
+ "loss": 0.4504,
+ "rewards/accuracies": 0.8999999761581421,
+ "rewards/chosen": -0.1321883648633957,
+ "rewards/margins": 0.6513045430183411,
+ "rewards/rejected": -0.783492922782898,
+ "step": 121
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": 3.7799999999999997e-07,
+ "logits/chosen": -8.803823471069336,
+ "logits/rejected": -8.616742134094238,
+ "logps/chosen": -40.673221588134766,
+ "logps/rejected": -61.703948974609375,
+ "loss": 0.3922,
+ "rewards/accuracies": 0.9800000190734863,
+ "rewards/chosen": -0.07538942992687225,
+ "rewards/margins": 0.8220105171203613,
+ "rewards/rejected": -0.89739990234375,
+ "step": 122
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": 3.77e-07,
+ "logits/chosen": -9.03860855102539,
+ "logits/rejected": -8.868653297424316,
+ "logps/chosen": -37.68621063232422,
+ "logps/rejected": -58.667076110839844,
+ "loss": 0.3545,
+ "rewards/accuracies": 1.0,
+ "rewards/chosen": 0.06506671011447906,
+ "rewards/margins": 0.9463762044906616,
+ "rewards/rejected": -0.8813096284866333,
+ "step": 123
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": 3.76e-07,
+ "logits/chosen": -8.842554092407227,
+ "logits/rejected": -8.731310844421387,
+ "logps/chosen": -40.247703552246094,
+ "logps/rejected": -59.82563018798828,
+ "loss": 0.3838,
+ "rewards/accuracies": 1.0,
+ "rewards/chosen": -0.1266755610704422,
+ "rewards/margins": 0.8307477831840515,
+ "rewards/rejected": -0.9574233889579773,
+ "step": 124
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": 3.75e-07,
+ "logits/chosen": -9.012685775756836,
+ "logits/rejected": -8.93365478515625,
+ "logps/chosen": -39.423152923583984,
+ "logps/rejected": -66.47831726074219,
+ "loss": 0.3284,
+ "rewards/accuracies": 1.0,
+ "rewards/chosen": 0.07251543551683426,
+ "rewards/margins": 1.0880799293518066,
+ "rewards/rejected": -1.0155645608901978,
+ "step": 125
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": 3.74e-07,
+ "logits/chosen": -8.971677780151367,
+ "logits/rejected": -8.89514446258545,
+ "logps/chosen": -39.17797088623047,
+ "logps/rejected": -61.886260986328125,
+ "loss": 0.3578,
+ "rewards/accuracies": 0.9800000190734863,
+ "rewards/chosen": -0.00424486119300127,
+ "rewards/margins": 0.9471753239631653,
+ "rewards/rejected": -0.9514201879501343,
+ "step": 126
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": 3.7299999999999997e-07,
+ "logits/chosen": -8.827725410461426,
+ "logits/rejected": -8.715534210205078,
+ "logps/chosen": -42.28189468383789,
+ "logps/rejected": -63.66273880004883,
+ "loss": 0.3951,
+ "rewards/accuracies": 0.9600000381469727,
+ "rewards/chosen": -0.21217112243175507,
+ "rewards/margins": 0.8158702850341797,
+ "rewards/rejected": -1.0280416011810303,
+ "step": 127
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": 3.72e-07,
+ "logits/chosen": -8.811687469482422,
+ "logits/rejected": -8.690886497497559,
+ "logps/chosen": -39.99266815185547,
+ "logps/rejected": -60.4193229675293,
+ "loss": 0.4187,
+ "rewards/accuracies": 0.9399999380111694,
+ "rewards/chosen": -0.16783051192760468,
+ "rewards/margins": 0.767844557762146,
+ "rewards/rejected": -0.9356750249862671,
+ "step": 128
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": 3.71e-07,
+ "logits/chosen": -8.954986572265625,
+ "logits/rejected": -8.789628982543945,
+ "logps/chosen": -38.853965759277344,
+ "logps/rejected": -64.25423431396484,
+ "loss": 0.3561,
+ "rewards/accuracies": 0.9600000381469727,
+ "rewards/chosen": 0.12899711728096008,
+ "rewards/margins": 1.0408105850219727,
+ "rewards/rejected": -0.9118132591247559,
+ "step": 129
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": 3.7e-07,
+ "logits/chosen": -8.90104866027832,
+ "logits/rejected": -8.75663948059082,
+ "logps/chosen": -38.79196548461914,
+ "logps/rejected": -59.23478317260742,
+ "loss": 0.3979,
+ "rewards/accuracies": 1.0,
+ "rewards/chosen": 0.045634448528289795,
+ "rewards/margins": 0.8061555027961731,
+ "rewards/rejected": -0.7605210542678833,
+ "step": 130
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": 3.69e-07,
+ "logits/chosen": -9.070549011230469,
+ "logits/rejected": -8.883820533752441,
+ "logps/chosen": -40.20603942871094,
+ "logps/rejected": -57.377357482910156,
+ "loss": 0.4072,
+ "rewards/accuracies": 0.9600000381469727,
+ "rewards/chosen": -0.19748033583164215,
+ "rewards/margins": 0.7769988775253296,
+ "rewards/rejected": -0.9744793772697449,
+ "step": 131
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": 3.6799999999999996e-07,
+ "logits/chosen": -8.954313278198242,
+ "logits/rejected": -8.846368789672852,
+ "logps/chosen": -39.513771057128906,
+ "logps/rejected": -60.5933723449707,
+ "loss": 0.3613,
+ "rewards/accuracies": 0.9800000190734863,
+ "rewards/chosen": -0.09566650539636612,
+ "rewards/margins": 0.9306944012641907,
+ "rewards/rejected": -1.0263609886169434,
+ "step": 132
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": 3.67e-07,
+ "logits/chosen": -8.816751480102539,
+ "logits/rejected": -8.633028030395508,
+ "logps/chosen": -39.54310989379883,
+ "logps/rejected": -58.71696090698242,
+ "loss": 0.3859,
+ "rewards/accuracies": 0.8999999761581421,
+ "rewards/chosen": -0.03110024333000183,
+ "rewards/margins": 0.9095786809921265,
+ "rewards/rejected": -0.9406788945198059,
+ "step": 133
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": 3.6599999999999997e-07,
+ "logits/chosen": -8.940773963928223,
+ "logits/rejected": -8.746129035949707,
+ "logps/chosen": -40.17658233642578,
+ "logps/rejected": -62.3738899230957,
+ "loss": 0.3541,
+ "rewards/accuracies": 0.9600000381469727,
+ "rewards/chosen": -0.16021467745304108,
+ "rewards/margins": 0.984930694103241,
+ "rewards/rejected": -1.145145297050476,
+ "step": 134
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": 3.65e-07,
+ "logits/chosen": -8.982457160949707,
+ "logits/rejected": -8.930691719055176,
+ "logps/chosen": -41.17652893066406,
+ "logps/rejected": -63.67726516723633,
+ "loss": 0.3454,
+ "rewards/accuracies": 0.9600000381469727,
+ "rewards/chosen": -0.1305641233921051,
+ "rewards/margins": 0.996552586555481,
+ "rewards/rejected": -1.1271167993545532,
+ "step": 135
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": 3.64e-07,
+ "logits/chosen": -9.113368034362793,
+ "logits/rejected": -9.021788597106934,
+ "logps/chosen": -40.26515579223633,
+ "logps/rejected": -61.51102828979492,
+ "loss": 0.3919,
+ "rewards/accuracies": 0.9399999380111694,
+ "rewards/chosen": -0.07528214156627655,
+ "rewards/margins": 0.8362929224967957,
+ "rewards/rejected": -0.9115751385688782,
+ "step": 136
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": 3.6299999999999995e-07,
+ "logits/chosen": -9.021696090698242,
+ "logits/rejected": -8.881583213806152,
+ "logps/chosen": -39.594642639160156,
+ "logps/rejected": -62.238922119140625,
+ "loss": 0.3868,
+ "rewards/accuracies": 0.940000057220459,
+ "rewards/chosen": -0.08284278213977814,
+ "rewards/margins": 0.8705474138259888,
+ "rewards/rejected": -0.9533903002738953,
+ "step": 137
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": 3.62e-07,
+ "logits/chosen": -9.039748191833496,
+ "logits/rejected": -8.940320014953613,
+ "logps/chosen": -40.2382926940918,
+ "logps/rejected": -62.49633026123047,
+ "loss": 0.3742,
+ "rewards/accuracies": 0.9800000190734863,
+ "rewards/chosen": -0.19811776280403137,
+ "rewards/margins": 0.9347583651542664,
+ "rewards/rejected": -1.1328761577606201,
+ "step": 138
+ },
+ {
+ "epoch": 0.03,
+ "learning_rate": 3.6099999999999996e-07,
+ "logits/chosen": -8.731492042541504,
+ "logits/rejected": -8.617894172668457,
+ "logps/chosen": -40.858238220214844,
+ "logps/rejected": -62.641571044921875,
+ "loss": 0.3566,
+ "rewards/accuracies": 0.9800000190734863,
+ "rewards/chosen": -0.1721496880054474,
+ "rewards/margins": 0.9204368591308594,
+ "rewards/rejected": -1.0925863981246948,
+ "step": 139
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": 3.6e-07,
+ "logits/chosen": -9.012569427490234,
+ "logits/rejected": -8.815531730651855,
+ "logps/chosen": -42.314231872558594,
+ "logps/rejected": -64.81603240966797,
+ "loss": 0.35,
+ "rewards/accuracies": 0.8799999952316284,
+ "rewards/chosen": -0.15320369601249695,
+ "rewards/margins": 1.030665397644043,
+ "rewards/rejected": -1.1838690042495728,
+ "step": 140
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": 3.5899999999999997e-07,
+ "logits/chosen": -8.839929580688477,
+ "logits/rejected": -8.719411849975586,
+ "logps/chosen": -40.46647262573242,
+ "logps/rejected": -61.498626708984375,
+ "loss": 0.3717,
+ "rewards/accuracies": 0.9800000190734863,
+ "rewards/chosen": 0.006344547960907221,
+ "rewards/margins": 0.8955361247062683,
+ "rewards/rejected": -0.8891915082931519,
+ "step": 141
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": 3.5799999999999995e-07,
+ "logits/chosen": -8.899998664855957,
+ "logits/rejected": -8.761857986450195,
+ "logps/chosen": -39.248046875,
+ "logps/rejected": -64.58299255371094,
+ "loss": 0.3387,
+ "rewards/accuracies": 0.9600000381469727,
+ "rewards/chosen": -0.021908091381192207,
+ "rewards/margins": 1.0876431465148926,
+ "rewards/rejected": -1.109551191329956,
+ "step": 142
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": 3.57e-07,
+ "logits/chosen": -8.721105575561523,
+ "logits/rejected": -8.539088249206543,
+ "logps/chosen": -38.27935028076172,
+ "logps/rejected": -60.71025848388672,
+ "loss": 0.3689,
+ "rewards/accuracies": 0.9800000190734863,
+ "rewards/chosen": -0.12697605788707733,
+ "rewards/margins": 0.9274777173995972,
+ "rewards/rejected": -1.054453730583191,
+ "step": 143
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": 3.5599999999999996e-07,
+ "logits/chosen": -8.694182395935059,
+ "logits/rejected": -8.604917526245117,
+ "logps/chosen": -41.218135833740234,
+ "logps/rejected": -63.59687042236328,
+ "loss": 0.3785,
+ "rewards/accuracies": 0.8999999761581421,
+ "rewards/chosen": -0.21460440754890442,
+ "rewards/margins": 0.9406296610832214,
+ "rewards/rejected": -1.1552340984344482,
+ "step": 144
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": 3.55e-07,
+ "logits/chosen": -9.010725021362305,
+ "logits/rejected": -8.792009353637695,
+ "logps/chosen": -44.04743576049805,
+ "logps/rejected": -63.77995681762695,
+ "loss": 0.3911,
+ "rewards/accuracies": 0.8999999761581421,
+ "rewards/chosen": -0.310243159532547,
+ "rewards/margins": 0.9037361145019531,
+ "rewards/rejected": -1.2139792442321777,
+ "step": 145
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": 3.5399999999999997e-07,
+ "logits/chosen": -8.800936698913574,
+ "logits/rejected": -8.641168594360352,
+ "logps/chosen": -40.73430252075195,
+ "logps/rejected": -59.47686004638672,
+ "loss": 0.4198,
+ "rewards/accuracies": 0.8999999761581421,
+ "rewards/chosen": -0.12597987055778503,
+ "rewards/margins": 0.8163179159164429,
+ "rewards/rejected": -0.9422977566719055,
+ "step": 146
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": 3.5299999999999994e-07,
+ "logits/chosen": -8.81165885925293,
+ "logits/rejected": -8.621085166931152,
+ "logps/chosen": -41.21516418457031,
+ "logps/rejected": -65.77284240722656,
+ "loss": 0.3477,
+ "rewards/accuracies": 0.9600000381469727,
+ "rewards/chosen": 0.02493276819586754,
+ "rewards/margins": 1.0518163442611694,
+ "rewards/rejected": -1.026883602142334,
+ "step": 147
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": 3.52e-07,
+ "logits/chosen": -9.05104923248291,
+ "logits/rejected": -8.940778732299805,
+ "logps/chosen": -39.09772872924805,
+ "logps/rejected": -63.58698272705078,
+ "loss": 0.3292,
+ "rewards/accuracies": 0.9600000381469727,
+ "rewards/chosen": -0.06748518347740173,
+ "rewards/margins": 1.0447824001312256,
+ "rewards/rejected": -1.1122677326202393,
+ "step": 148
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": 3.5099999999999995e-07,
+ "logits/chosen": -8.909757614135742,
+ "logits/rejected": -8.688767433166504,
+ "logps/chosen": -40.91362762451172,
+ "logps/rejected": -60.848472595214844,
+ "loss": 0.3958,
+ "rewards/accuracies": 0.9600000381469727,
+ "rewards/chosen": -0.20450308918952942,
+ "rewards/margins": 0.8363680839538574,
+ "rewards/rejected": -1.0408711433410645,
+ "step": 149
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": 3.5e-07,
+ "logits/chosen": -9.037310600280762,
+ "logits/rejected": -8.962252616882324,
+ "logps/chosen": -40.28340530395508,
+ "logps/rejected": -63.94322967529297,
+ "loss": 0.3284,
+ "rewards/accuracies": 0.9600000381469727,
+ "rewards/chosen": -0.07565836608409882,
+ "rewards/margins": 1.125253438949585,
+ "rewards/rejected": -1.2009117603302002,
+ "step": 150
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": 3.4899999999999996e-07,
+ "logits/chosen": -8.9218168258667,
+ "logits/rejected": -8.84290599822998,
+ "logps/chosen": -39.55026626586914,
+ "logps/rejected": -61.443641662597656,
+ "loss": 0.3476,
+ "rewards/accuracies": 0.9800000190734863,
+ "rewards/chosen": -0.02470378205180168,
+ "rewards/margins": 0.9961774945259094,
+ "rewards/rejected": -1.020881175994873,
+ "step": 151
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": 3.4799999999999994e-07,
+ "logits/chosen": -8.936076164245605,
+ "logits/rejected": -8.845673561096191,
+ "logps/chosen": -38.409576416015625,
+ "logps/rejected": -61.9953727722168,
+ "loss": 0.3381,
+ "rewards/accuracies": 1.0,
+ "rewards/chosen": -0.11342154443264008,
+ "rewards/margins": 0.9978846311569214,
+ "rewards/rejected": -1.111306071281433,
+ "step": 152
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": 3.4699999999999997e-07,
+ "logits/chosen": -8.993753433227539,
+ "logits/rejected": -8.88867473602295,
+ "logps/chosen": -41.41925811767578,
+ "logps/rejected": -64.46873474121094,
+ "loss": 0.3345,
+ "rewards/accuracies": 0.9800000190734863,
+ "rewards/chosen": -0.1233917623758316,
+ "rewards/margins": 1.042170763015747,
+ "rewards/rejected": -1.165562629699707,
+ "step": 153
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": 3.4599999999999995e-07,
+ "logits/chosen": -8.77087688446045,
+ "logits/rejected": -8.657720565795898,
+ "logps/chosen": -40.0390510559082,
+ "logps/rejected": -60.32958984375,
+ "loss": 0.3967,
+ "rewards/accuracies": 0.9200000762939453,
+ "rewards/chosen": -0.19076868891716003,
+ "rewards/margins": 0.9012584686279297,
+ "rewards/rejected": -1.0920270681381226,
+ "step": 154
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": 3.45e-07,
+ "logits/chosen": -8.897802352905273,
+ "logits/rejected": -8.744150161743164,
+ "logps/chosen": -40.92519760131836,
+ "logps/rejected": -65.74849700927734,
+ "loss": 0.3411,
+ "rewards/accuracies": 0.9800000190734863,
+ "rewards/chosen": -0.15756723284721375,
+ "rewards/margins": 1.0892850160598755,
+ "rewards/rejected": -1.2468522787094116,
+ "step": 155
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": 3.4399999999999996e-07,
+ "logits/chosen": -8.871163368225098,
+ "logits/rejected": -8.717877388000488,
+ "logps/chosen": -43.01222229003906,
+ "logps/rejected": -70.48704528808594,
+ "loss": 0.3188,
+ "rewards/accuracies": 0.9800000190734863,
+ "rewards/chosen": -0.3022673428058624,
+ "rewards/margins": 1.1483441591262817,
+ "rewards/rejected": -1.4506114721298218,
+ "step": 156
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": 3.43e-07,
+ "logits/chosen": -8.949113845825195,
+ "logits/rejected": -8.905904769897461,
+ "logps/chosen": -39.99720001220703,
+ "logps/rejected": -62.5020866394043,
+ "loss": 0.3482,
+ "rewards/accuracies": 0.940000057220459,
+ "rewards/chosen": -0.06127683073282242,
+ "rewards/margins": 1.05168616771698,
+ "rewards/rejected": -1.1129629611968994,
+ "step": 157
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": 3.42e-07,
+ "logits/chosen": -8.867097854614258,
+ "logits/rejected": -8.736467361450195,
+ "logps/chosen": -41.989192962646484,
+ "logps/rejected": -61.25897216796875,
+ "loss": 0.3882,
+ "rewards/accuracies": 0.940000057220459,
+ "rewards/chosen": -0.35165315866470337,
+ "rewards/margins": 0.877507209777832,
+ "rewards/rejected": -1.2291605472564697,
+ "step": 158
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": 3.41e-07,
+ "logits/chosen": -8.919662475585938,
+ "logits/rejected": -8.849380493164062,
+ "logps/chosen": -42.1957893371582,
+ "logps/rejected": -66.24508666992188,
+ "loss": 0.3071,
+ "rewards/accuracies": 0.9800000190734863,
+ "rewards/chosen": -0.30500465631484985,
+ "rewards/margins": 1.1364095211029053,
+ "rewards/rejected": -1.4414143562316895,
+ "step": 159
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": 3.4000000000000003e-07,
+ "logits/chosen": -8.94130802154541,
+ "logits/rejected": -8.818609237670898,
+ "logps/chosen": -40.54155731201172,
+ "logps/rejected": -59.704978942871094,
+ "loss": 0.3546,
+ "rewards/accuracies": 0.9600000381469727,
+ "rewards/chosen": -0.13368672132492065,
+ "rewards/margins": 0.9669790267944336,
+ "rewards/rejected": -1.100665807723999,
+ "step": 160
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": 3.39e-07,
+ "logits/chosen": -8.80152416229248,
+ "logits/rejected": -8.622817039489746,
+ "logps/chosen": -41.8613166809082,
+ "logps/rejected": -63.264976501464844,
+ "loss": 0.3418,
+ "rewards/accuracies": 0.9399999380111694,
+ "rewards/chosen": -0.29719024896621704,
+ "rewards/margins": 1.0231988430023193,
+ "rewards/rejected": -1.3203891515731812,
+ "step": 161
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": 3.38e-07,
+ "logits/chosen": -8.761874198913574,
+ "logits/rejected": -8.586349487304688,
+ "logps/chosen": -41.71952438354492,
+ "logps/rejected": -68.82637023925781,
+ "loss": 0.2946,
+ "rewards/accuracies": 0.9399999380111694,
+ "rewards/chosen": -0.21405473351478577,
+ "rewards/margins": 1.3066158294677734,
+ "rewards/rejected": -1.5206706523895264,
+ "step": 162
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": 3.37e-07,
+ "logits/chosen": -8.828725814819336,
+ "logits/rejected": -8.663387298583984,
+ "logps/chosen": -41.91630935668945,
+ "logps/rejected": -65.08641052246094,
+ "loss": 0.341,
+ "rewards/accuracies": 0.9800000190734863,
+ "rewards/chosen": -0.24001702666282654,
+ "rewards/margins": 1.04341721534729,
+ "rewards/rejected": -1.2834341526031494,
+ "step": 163
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": 3.36e-07,
+ "logits/chosen": -8.988265991210938,
+ "logits/rejected": -8.813672065734863,
+ "logps/chosen": -41.581756591796875,
+ "logps/rejected": -64.73905944824219,
+ "loss": 0.3243,
+ "rewards/accuracies": 0.9600000381469727,
+ "rewards/chosen": -0.14427520334720612,
+ "rewards/margins": 1.099812388420105,
+ "rewards/rejected": -1.2440874576568604,
+ "step": 164
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": 3.35e-07,
+ "logits/chosen": -9.024198532104492,
+ "logits/rejected": -8.907011985778809,
+ "logps/chosen": -40.02942657470703,
+ "logps/rejected": -65.40174865722656,
+ "loss": 0.3156,
+ "rewards/accuracies": 0.9600000381469727,
+ "rewards/chosen": -0.17336931824684143,
+ "rewards/margins": 1.1412004232406616,
+ "rewards/rejected": -1.3145698308944702,
+ "step": 165
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": 3.34e-07,
+ "logits/chosen": -8.859591484069824,
+ "logits/rejected": -8.78995132446289,
+ "logps/chosen": -37.986610412597656,
+ "logps/rejected": -62.487632751464844,
+ "loss": 0.3413,
+ "rewards/accuracies": 0.9800000190734863,
+ "rewards/chosen": -0.03164323419332504,
+ "rewards/margins": 1.038362741470337,
+ "rewards/rejected": -1.0700057744979858,
+ "step": 166
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": 3.33e-07,
+ "logits/chosen": -8.95915412902832,
+ "logits/rejected": -8.862491607666016,
+ "logps/chosen": -39.90531539916992,
+ "logps/rejected": -68.55361938476562,
+ "loss": 0.2611,
+ "rewards/accuracies": 0.9800000190734863,
+ "rewards/chosen": -0.09153137356042862,
+ "rewards/margins": 1.3365256786346436,
+ "rewards/rejected": -1.4280569553375244,
+ "step": 167
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": 3.32e-07,
+ "logits/chosen": -8.84840202331543,
+ "logits/rejected": -8.720226287841797,
+ "logps/chosen": -39.774906158447266,
+ "logps/rejected": -63.923797607421875,
+ "loss": 0.2924,
+ "rewards/accuracies": 1.0,
+ "rewards/chosen": -0.15840193629264832,
+ "rewards/margins": 1.228715181350708,
+ "rewards/rejected": -1.3871170282363892,
+ "step": 168
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": 3.31e-07,
+ "logits/chosen": -8.834859848022461,
+ "logits/rejected": -8.646490097045898,
+ "logps/chosen": -40.75969696044922,
+ "logps/rejected": -68.73313903808594,
+ "loss": 0.289,
+ "rewards/accuracies": 0.9800000190734863,
+ "rewards/chosen": -0.2145496904850006,
+ "rewards/margins": 1.282721757888794,
+ "rewards/rejected": -1.4972714185714722,
+ "step": 169
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": 3.3e-07,
+ "logits/chosen": -9.14974594116211,
+ "logits/rejected": -9.094747543334961,
+ "logps/chosen": -38.71310806274414,
+ "logps/rejected": -64.7855453491211,
+ "loss": 0.3276,
+ "rewards/accuracies": 0.9600000381469727,
+ "rewards/chosen": -0.10842056572437286,
+ "rewards/margins": 1.1217743158340454,
+ "rewards/rejected": -1.2301948070526123,
+ "step": 170
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": 3.29e-07,
+ "logits/chosen": -8.805329322814941,
+ "logits/rejected": -8.60169506072998,
+ "logps/chosen": -44.541385650634766,
+ "logps/rejected": -67.90604400634766,
+ "loss": 0.3276,
+ "rewards/accuracies": 0.9800000190734863,
+ "rewards/chosen": -0.33684179186820984,
+ "rewards/margins": 1.0830628871917725,
+ "rewards/rejected": -1.4199049472808838,
+ "step": 171
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": 3.28e-07,
+ "logits/chosen": -9.00810432434082,
+ "logits/rejected": -8.874260902404785,
+ "logps/chosen": -42.70119857788086,
+ "logps/rejected": -66.67572784423828,
+ "loss": 0.3219,
+ "rewards/accuracies": 0.9800000190734863,
+ "rewards/chosen": -0.28449511528015137,
+ "rewards/margins": 1.1121952533721924,
+ "rewards/rejected": -1.3966902494430542,
+ "step": 172
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": 3.27e-07,
+ "logits/chosen": -9.011045455932617,
+ "logits/rejected": -8.86266803741455,
+ "logps/chosen": -40.55815887451172,
+ "logps/rejected": -65.76628875732422,
+ "loss": 0.3108,
+ "rewards/accuracies": 0.9800000190734863,
+ "rewards/chosen": -0.15313352644443512,
+ "rewards/margins": 1.2283756732940674,
+ "rewards/rejected": -1.3815090656280518,
+ "step": 173
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": 3.26e-07,
+ "logits/chosen": -8.745307922363281,
+ "logits/rejected": -8.638575553894043,
+ "logps/chosen": -40.17422866821289,
+ "logps/rejected": -61.25606155395508,
+ "loss": 0.3363,
+ "rewards/accuracies": 0.9600000381469727,
+ "rewards/chosen": -0.26441556215286255,
+ "rewards/margins": 1.0637372732162476,
+ "rewards/rejected": -1.3281527757644653,
+ "step": 174
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": 3.25e-07,
+ "logits/chosen": -8.917795181274414,
+ "logits/rejected": -8.75191593170166,
+ "logps/chosen": -40.356971740722656,
+ "logps/rejected": -65.25245666503906,
+ "loss": 0.3272,
+ "rewards/accuracies": 0.9600000381469727,
+ "rewards/chosen": -0.21726162731647491,
+ "rewards/margins": 1.1349842548370361,
+ "rewards/rejected": -1.3522460460662842,
+ "step": 175
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": 3.24e-07,
+ "logits/chosen": -9.057024002075195,
+ "logits/rejected": -9.007242202758789,
+ "logps/chosen": -41.4436149597168,
+ "logps/rejected": -59.33845138549805,
+ "loss": 0.3784,
+ "rewards/accuracies": 0.9600000381469727,
+ "rewards/chosen": -0.20186138153076172,
+ "rewards/margins": 0.8808485865592957,
+ "rewards/rejected": -1.082709789276123,
+ "step": 176
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": 3.23e-07,
+ "logits/chosen": -9.150751113891602,
+ "logits/rejected": -8.990005493164062,
+ "logps/chosen": -41.529197692871094,
+ "logps/rejected": -64.6326904296875,
+ "loss": 0.3474,
+ "rewards/accuracies": 0.940000057220459,
+ "rewards/chosen": -0.1397334188222885,
+ "rewards/margins": 1.0998492240905762,
+ "rewards/rejected": -1.2395826578140259,
+ "step": 177
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": 3.22e-07,
+ "logits/chosen": -8.985628128051758,
+ "logits/rejected": -8.924480438232422,
+ "logps/chosen": -39.99455261230469,
+ "logps/rejected": -62.32940673828125,
+ "loss": 0.3442,
+ "rewards/accuracies": 0.940000057220459,
+ "rewards/chosen": -0.21177975833415985,
+ "rewards/margins": 1.0999975204467773,
+ "rewards/rejected": -1.311777114868164,
+ "step": 178
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": 3.21e-07,
+ "logits/chosen": -9.033208847045898,
+ "logits/rejected": -8.933832168579102,
+ "logps/chosen": -42.78688049316406,
+ "logps/rejected": -65.41902160644531,
+ "loss": 0.3323,
+ "rewards/accuracies": 0.9600000381469727,
+ "rewards/chosen": -0.42959874868392944,
+ "rewards/margins": 1.0489178895950317,
+ "rewards/rejected": -1.478516697883606,
+ "step": 179
+ },
+ {
+ "epoch": 0.04,
+ "learning_rate": 3.2e-07,
+ "logits/chosen": -8.921372413635254,
+ "logits/rejected": -8.761728286743164,
+ "logps/chosen": -41.17156982421875,
+ "logps/rejected": -68.08998107910156,
+ "loss": 0.2719,
+ "rewards/accuracies": 1.0,
+ "rewards/chosen": -0.20495982468128204,
+ "rewards/margins": 1.340659737586975,
+ "rewards/rejected": -1.5456194877624512,
+ "step": 180
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": 3.19e-07,
+ "logits/chosen": -8.87382698059082,
+ "logits/rejected": -8.771355628967285,
+ "logps/chosen": -41.562965393066406,
+ "logps/rejected": -64.83119201660156,
+ "loss": 0.2963,
+ "rewards/accuracies": 0.9800000190734863,
+ "rewards/chosen": -0.17605173587799072,
+ "rewards/margins": 1.220849633216858,
+ "rewards/rejected": -1.3969013690948486,
+ "step": 181
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": 3.18e-07,
+ "logits/chosen": -8.82695198059082,
+ "logits/rejected": -8.737783432006836,
+ "logps/chosen": -39.56995391845703,
+ "logps/rejected": -62.3057746887207,
+ "loss": 0.3402,
+ "rewards/accuracies": 0.9600000381469727,
+ "rewards/chosen": -0.20497837662696838,
+ "rewards/margins": 1.1743295192718506,
+ "rewards/rejected": -1.3793079853057861,
+ "step": 182
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": 3.17e-07,
+ "logits/chosen": -9.018879890441895,
+ "logits/rejected": -8.862287521362305,
+ "logps/chosen": -42.85093688964844,
+ "logps/rejected": -72.0836181640625,
+ "loss": 0.3212,
+ "rewards/accuracies": 0.9399999380111694,
+ "rewards/chosen": -0.2954815924167633,
+ "rewards/margins": 1.268796682357788,
+ "rewards/rejected": -1.5642781257629395,
+ "step": 183
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": 3.1599999999999997e-07,
+ "logits/chosen": -8.963022232055664,
+ "logits/rejected": -8.82475471496582,
+ "logps/chosen": -38.94888687133789,
+ "logps/rejected": -68.8856201171875,
+ "loss": 0.2643,
+ "rewards/accuracies": 1.0,
+ "rewards/chosen": -0.0330241322517395,
+ "rewards/margins": 1.4156899452209473,
+ "rewards/rejected": -1.4487141370773315,
+ "step": 184
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": 3.15e-07,
+ "logits/chosen": -9.073816299438477,
+ "logits/rejected": -8.94584846496582,
+ "logps/chosen": -44.071678161621094,
+ "logps/rejected": -70.720947265625,
+ "loss": 0.2948,
+ "rewards/accuracies": 0.9600000381469727,
+ "rewards/chosen": -0.4320756494998932,
+ "rewards/margins": 1.216040849685669,
+ "rewards/rejected": -1.6481164693832397,
+ "step": 185
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": 3.14e-07,
+ "logits/chosen": -9.04081916809082,
+ "logits/rejected": -8.882933616638184,
+ "logps/chosen": -39.92203140258789,
+ "logps/rejected": -66.04690551757812,
+ "loss": 0.3199,
+ "rewards/accuracies": 0.9399999380111694,
+ "rewards/chosen": -0.12839896976947784,
+ "rewards/margins": 1.1672937870025635,
+ "rewards/rejected": -1.2956926822662354,
+ "step": 186
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": 3.13e-07,
+ "logits/chosen": -8.965912818908691,
+ "logits/rejected": -8.788244247436523,
+ "logps/chosen": -40.17821502685547,
+ "logps/rejected": -65.7179946899414,
+ "loss": 0.2863,
+ "rewards/accuracies": 0.940000057220459,
+ "rewards/chosen": -0.1306789070367813,
+ "rewards/margins": 1.2615444660186768,
+ "rewards/rejected": -1.3922232389450073,
+ "step": 187
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": 3.12e-07,
+ "logits/chosen": -8.968905448913574,
+ "logits/rejected": -8.836429595947266,
+ "logps/chosen": -40.19866180419922,
+ "logps/rejected": -65.63804626464844,
+ "loss": 0.2997,
+ "rewards/accuracies": 0.9800000190734863,
+ "rewards/chosen": -0.29229894280433655,
+ "rewards/margins": 1.2268226146697998,
+ "rewards/rejected": -1.5191216468811035,
+ "step": 188
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": 3.1099999999999997e-07,
+ "logits/chosen": -8.949649810791016,
+ "logits/rejected": -8.85619068145752,
+ "logps/chosen": -41.16625213623047,
+ "logps/rejected": -67.67311096191406,
+ "loss": 0.3123,
+ "rewards/accuracies": 0.940000057220459,
+ "rewards/chosen": -0.18751493096351624,
+ "rewards/margins": 1.2116743326187134,
+ "rewards/rejected": -1.3991893529891968,
+ "step": 189
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": 3.1e-07,
+ "logits/chosen": -9.069915771484375,
+ "logits/rejected": -8.9552640914917,
+ "logps/chosen": -44.67123031616211,
+ "logps/rejected": -69.6094970703125,
+ "loss": 0.3179,
+ "rewards/accuracies": 0.9600000381469727,
+ "rewards/chosen": -0.36990422010421753,
+ "rewards/margins": 1.1963659524917603,
+ "rewards/rejected": -1.566270112991333,
+ "step": 190
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": 3.09e-07,
+ "logits/chosen": -8.953503608703613,
+ "logits/rejected": -8.81824779510498,
+ "logps/chosen": -43.4686279296875,
+ "logps/rejected": -65.56145477294922,
+ "loss": 0.2937,
+ "rewards/accuracies": 0.9399999380111694,
+ "rewards/chosen": -0.3817153573036194,
+ "rewards/margins": 1.2739273309707642,
+ "rewards/rejected": -1.6556425094604492,
+ "step": 191
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": 3.08e-07,
+ "logits/chosen": -8.856042861938477,
+ "logits/rejected": -8.653620719909668,
+ "logps/chosen": -40.48377990722656,
+ "logps/rejected": -65.58942413330078,
+ "loss": 0.3144,
+ "rewards/accuracies": 0.9399999380111694,
+ "rewards/chosen": -0.21695208549499512,
+ "rewards/margins": 1.223385214805603,
+ "rewards/rejected": -1.4403371810913086,
+ "step": 192
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": 3.07e-07,
+ "logits/chosen": -8.951238632202148,
+ "logits/rejected": -8.893243789672852,
+ "logps/chosen": -38.89543914794922,
+ "logps/rejected": -65.236572265625,
+ "loss": 0.2907,
+ "rewards/accuracies": 0.9800000190734863,
+ "rewards/chosen": -0.14968658983707428,
+ "rewards/margins": 1.2427290678024292,
+ "rewards/rejected": -1.3924156427383423,
+ "step": 193
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": 3.0599999999999996e-07,
+ "logits/chosen": -9.175559043884277,
+ "logits/rejected": -9.138572692871094,
+ "logps/chosen": -41.56648635864258,
+ "logps/rejected": -62.5894889831543,
+ "loss": 0.3235,
+ "rewards/accuracies": 0.9600000381469727,
+ "rewards/chosen": -0.3439091444015503,
+ "rewards/margins": 1.1237269639968872,
+ "rewards/rejected": -1.467635989189148,
+ "step": 194
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": 3.05e-07,
+ "logits/chosen": -9.081843376159668,
+ "logits/rejected": -8.967177391052246,
+ "logps/chosen": -48.0760383605957,
+ "logps/rejected": -72.55915069580078,
+ "loss": 0.3347,
+ "rewards/accuracies": 0.9399999380111694,
+ "rewards/chosen": -0.599833607673645,
+ "rewards/margins": 1.1599971055984497,
+ "rewards/rejected": -1.7598304748535156,
+ "step": 195
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": 3.0399999999999997e-07,
+ "logits/chosen": -8.88502025604248,
+ "logits/rejected": -8.776427268981934,
+ "logps/chosen": -38.89107894897461,
+ "logps/rejected": -62.98077392578125,
+ "loss": 0.3103,
+ "rewards/accuracies": 0.9800000190734863,
+ "rewards/chosen": -0.08193059265613556,
+ "rewards/margins": 1.204545021057129,
+ "rewards/rejected": -1.2864755392074585,
+ "step": 196
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": 3.03e-07,
+ "logits/chosen": -8.958200454711914,
+ "logits/rejected": -8.758167266845703,
+ "logps/chosen": -42.52730941772461,
+ "logps/rejected": -68.24816131591797,
+ "loss": 0.2818,
+ "rewards/accuracies": 0.9600000381469727,
+ "rewards/chosen": -0.2780629098415375,
+ "rewards/margins": 1.3868484497070312,
+ "rewards/rejected": -1.6649112701416016,
+ "step": 197
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": 3.02e-07,
+ "logits/chosen": -9.064251899719238,
+ "logits/rejected": -9.055684089660645,
+ "logps/chosen": -40.26599884033203,
+ "logps/rejected": -68.03285217285156,
+ "loss": 0.2815,
+ "rewards/accuracies": 0.9800000190734863,
+ "rewards/chosen": -0.21159246563911438,
+ "rewards/margins": 1.3151299953460693,
+ "rewards/rejected": -1.5267223119735718,
+ "step": 198
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": 3.0099999999999996e-07,
+ "logits/chosen": -8.8607177734375,
+ "logits/rejected": -8.776520729064941,
+ "logps/chosen": -44.09022521972656,
+ "logps/rejected": -67.35237884521484,
+ "loss": 0.2963,
+ "rewards/accuracies": 0.9600000381469727,
+ "rewards/chosen": -0.3733174204826355,
+ "rewards/margins": 1.302872896194458,
+ "rewards/rejected": -1.6761903762817383,
+ "step": 199
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": 3e-07,
+ "logits/chosen": -9.326008796691895,
+ "logits/rejected": -9.195959091186523,
+ "logps/chosen": -42.66015625,
+ "logps/rejected": -69.38017272949219,
+ "loss": 0.29,
+ "rewards/accuracies": 0.940000057220459,
+ "rewards/chosen": -0.3082619607448578,
+ "rewards/margins": 1.301923394203186,
+ "rewards/rejected": -1.6101853847503662,
+ "step": 200
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": 2.9899999999999996e-07,
+ "logits/chosen": -8.926416397094727,
+ "logits/rejected": -8.793083190917969,
+ "logps/chosen": -42.86346435546875,
+ "logps/rejected": -69.28981018066406,
+ "loss": 0.2735,
+ "rewards/accuracies": 1.0,
+ "rewards/chosen": -0.30893146991729736,
+ "rewards/margins": 1.3181147575378418,
+ "rewards/rejected": -1.6270462274551392,
+ "step": 201
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": 2.98e-07,
+ "logits/chosen": -9.045461654663086,
+ "logits/rejected": -8.975128173828125,
+ "logps/chosen": -41.371551513671875,
+ "logps/rejected": -68.18250274658203,
+ "loss": 0.3091,
+ "rewards/accuracies": 0.9600000381469727,
+ "rewards/chosen": -0.3913578391075134,
+ "rewards/margins": 1.3062489032745361,
+ "rewards/rejected": -1.6976070404052734,
+ "step": 202
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": 2.9699999999999997e-07,
+ "logits/chosen": -8.808774948120117,
+ "logits/rejected": -8.637701034545898,
+ "logps/chosen": -41.52623748779297,
+ "logps/rejected": -65.2273941040039,
+ "loss": 0.3081,
+ "rewards/accuracies": 0.8999999761581421,
+ "rewards/chosen": -0.23487603664398193,
+ "rewards/margins": 1.2760727405548096,
+ "rewards/rejected": -1.510948896408081,
+ "step": 203
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": 2.9599999999999995e-07,
+ "logits/chosen": -8.662897109985352,
+ "logits/rejected": -8.514671325683594,
+ "logps/chosen": -40.43410873413086,
+ "logps/rejected": -63.4935188293457,
+ "loss": 0.3236,
+ "rewards/accuracies": 0.9199999570846558,
+ "rewards/chosen": -0.180062934756279,
+ "rewards/margins": 1.2099792957305908,
+ "rewards/rejected": -1.3900420665740967,
+ "step": 204
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": 2.95e-07,
+ "logits/chosen": -9.048240661621094,
+ "logits/rejected": -8.950238227844238,
+ "logps/chosen": -38.42674255371094,
+ "logps/rejected": -70.53987121582031,
+ "loss": 0.2272,
+ "rewards/accuracies": 1.0,
+ "rewards/chosen": -0.2043372094631195,
+ "rewards/margins": 1.5515215396881104,
+ "rewards/rejected": -1.7558587789535522,
+ "step": 205
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": 2.9399999999999996e-07,
+ "logits/chosen": -8.96391487121582,
+ "logits/rejected": -8.855020523071289,
+ "logps/chosen": -41.72402572631836,
+ "logps/rejected": -68.75076293945312,
+ "loss": 0.2608,
+ "rewards/accuracies": 1.0,
+ "rewards/chosen": -0.2568601965904236,
+ "rewards/margins": 1.3788115978240967,
+ "rewards/rejected": -1.6356719732284546,
+ "step": 206
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": 2.93e-07,
+ "logits/chosen": -8.95437240600586,
+ "logits/rejected": -8.87646484375,
+ "logps/chosen": -43.06665802001953,
+ "logps/rejected": -68.59609985351562,
+ "loss": 0.2886,
+ "rewards/accuracies": 0.9800000190734863,
+ "rewards/chosen": -0.3358538746833801,
+ "rewards/margins": 1.31618332862854,
+ "rewards/rejected": -1.652037262916565,
+ "step": 207
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": 2.9199999999999997e-07,
+ "logits/chosen": -8.841405868530273,
+ "logits/rejected": -8.739511489868164,
+ "logps/chosen": -42.112754821777344,
+ "logps/rejected": -64.5459976196289,
+ "loss": 0.322,
+ "rewards/accuracies": 0.9600000381469727,
+ "rewards/chosen": -0.3095633387565613,
+ "rewards/margins": 1.1418513059616089,
+ "rewards/rejected": -1.4514145851135254,
+ "step": 208
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": 2.9099999999999995e-07,
+ "logits/chosen": -9.11125373840332,
+ "logits/rejected": -9.027490615844727,
+ "logps/chosen": -41.227901458740234,
+ "logps/rejected": -76.45286560058594,
+ "loss": 0.236,
+ "rewards/accuracies": 0.9800000190734863,
+ "rewards/chosen": -0.18211723864078522,
+ "rewards/margins": 1.6697490215301514,
+ "rewards/rejected": -1.8518664836883545,
+ "step": 209
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": 2.9e-07,
+ "logits/chosen": -8.93098258972168,
+ "logits/rejected": -8.775957107543945,
+ "logps/chosen": -42.168487548828125,
+ "logps/rejected": -69.60896301269531,
+ "loss": 0.2531,
+ "rewards/accuracies": 0.9600000381469727,
+ "rewards/chosen": -0.1441860944032669,
+ "rewards/margins": 1.5427669286727905,
+ "rewards/rejected": -1.6869529485702515,
+ "step": 210
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": 2.8899999999999995e-07,
+ "logits/chosen": -9.149178504943848,
+ "logits/rejected": -8.981005668640137,
+ "logps/chosen": -40.37749481201172,
+ "logps/rejected": -66.62581634521484,
+ "loss": 0.276,
+ "rewards/accuracies": 0.9600000381469727,
+ "rewards/chosen": -0.16469226777553558,
+ "rewards/margins": 1.4120306968688965,
+ "rewards/rejected": -1.5767228603363037,
+ "step": 211
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": 2.88e-07,
+ "logits/chosen": -8.969169616699219,
+ "logits/rejected": -8.803339004516602,
+ "logps/chosen": -42.27989959716797,
+ "logps/rejected": -64.9478759765625,
+ "loss": 0.2654,
+ "rewards/accuracies": 0.9800000190734863,
+ "rewards/chosen": -0.25626304745674133,
+ "rewards/margins": 1.4114668369293213,
+ "rewards/rejected": -1.6677299737930298,
+ "step": 212
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": 2.8699999999999996e-07,
+ "logits/chosen": -8.94532585144043,
+ "logits/rejected": -8.8807373046875,
+ "logps/chosen": -44.27782440185547,
+ "logps/rejected": -70.2257308959961,
+ "loss": 0.2728,
+ "rewards/accuracies": 0.9600000381469727,
+ "rewards/chosen": -0.2951999306678772,
+ "rewards/margins": 1.3818528652191162,
+ "rewards/rejected": -1.6770527362823486,
+ "step": 213
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": 2.8599999999999994e-07,
+ "logits/chosen": -9.086102485656738,
+ "logits/rejected": -9.030981063842773,
+ "logps/chosen": -41.30907440185547,
+ "logps/rejected": -68.33830261230469,
+ "loss": 0.2695,
+ "rewards/accuracies": 0.9800000190734863,
+ "rewards/chosen": -0.1512337625026703,
+ "rewards/margins": 1.4189387559890747,
+ "rewards/rejected": -1.570172667503357,
+ "step": 214
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": 2.8499999999999997e-07,
+ "logits/chosen": -8.850401878356934,
+ "logits/rejected": -8.755792617797852,
+ "logps/chosen": -43.39167022705078,
+ "logps/rejected": -70.92607879638672,
+ "loss": 0.2675,
+ "rewards/accuracies": 0.940000057220459,
+ "rewards/chosen": -0.1908183991909027,
+ "rewards/margins": 1.4917595386505127,
+ "rewards/rejected": -1.6825778484344482,
+ "step": 215
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": 2.8399999999999995e-07,
+ "logits/chosen": -9.259855270385742,
+ "logits/rejected": -9.203694343566895,
+ "logps/chosen": -41.756351470947266,
+ "logps/rejected": -65.8165512084961,
+ "loss": 0.2729,
+ "rewards/accuracies": 0.9800000190734863,
+ "rewards/chosen": -0.2136426419019699,
+ "rewards/margins": 1.3828479051589966,
+ "rewards/rejected": -1.5964906215667725,
+ "step": 216
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": 2.83e-07,
+ "logits/chosen": -8.922069549560547,
+ "logits/rejected": -8.816206932067871,
+ "logps/chosen": -43.09095001220703,
+ "logps/rejected": -70.11869049072266,
+ "loss": 0.2481,
+ "rewards/accuracies": 1.0,
+ "rewards/chosen": -0.40523457527160645,
+ "rewards/margins": 1.536644697189331,
+ "rewards/rejected": -1.9418792724609375,
+ "step": 217
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": 2.8199999999999996e-07,
+ "logits/chosen": -9.18004322052002,
+ "logits/rejected": -9.122257232666016,
+ "logps/chosen": -40.0262451171875,
+ "logps/rejected": -73.48004150390625,
+ "loss": 0.2586,
+ "rewards/accuracies": 0.8999999761581421,
+ "rewards/chosen": -0.18847300112247467,
+ "rewards/margins": 1.6103322505950928,
+ "rewards/rejected": -1.7988052368164062,
+ "step": 218
+ },
+ {
+ "epoch": 0.05,
+ "learning_rate": 2.8100000000000004e-07,
+ "logits/chosen": -8.809640884399414,
+ "logits/rejected": -8.642433166503906,
+ "logps/chosen": -46.24394989013672,
+ "logps/rejected": -66.08168029785156,
+ "loss": 0.3238,
+ "rewards/accuracies": 0.9600000381469727,
+ "rewards/chosen": -0.5799109935760498,
+ "rewards/margins": 1.1493396759033203,
+ "rewards/rejected": -1.7292506694793701,
+ "step": 219
+ },
+ {
+ "epoch": 0.06,
+ "learning_rate": 2.8e-07,
+ "logits/chosen": -9.087625503540039,
+ "logits/rejected": -8.999017715454102,
+ "logps/chosen": -44.835086822509766,
+ "logps/rejected": -69.23603057861328,
+ "loss": 0.3145,
+ "rewards/accuracies": 0.940000057220459,
+ "rewards/chosen": -0.4856896996498108,
+ "rewards/margins": 1.271304726600647,
+ "rewards/rejected": -1.7569944858551025,
+ "step": 220
+ },
+ {
+ "epoch": 0.06,
+ "learning_rate": 2.79e-07,
+ "logits/chosen": -8.82785415649414,
+ "logits/rejected": -8.75294017791748,
+ "logps/chosen": -44.45280838012695,
+ "logps/rejected": -73.3116226196289,
+ "loss": 0.237,
+ "rewards/accuracies": 1.0,
+ "rewards/chosen": -0.446541965007782,
+ "rewards/margins": 1.5875802040100098,
+ "rewards/rejected": -2.0341219902038574,
+ "step": 221
+ },
+ {
+ "epoch": 0.06,
+ "learning_rate": 2.7800000000000003e-07,
+ "logits/chosen": -8.659734725952148,
+ "logits/rejected": -8.634805679321289,
+ "logps/chosen": -42.231929779052734,
+ "logps/rejected": -70.79386901855469,
+ "loss": 0.2399,
+ "rewards/accuracies": 1.0,
+ "rewards/chosen": -0.25388625264167786,
+ "rewards/margins": 1.5821778774261475,
+ "rewards/rejected": -1.836064100265503,
+ "step": 222
+ },
+ {
+ "epoch": 0.06,
+ "learning_rate": 2.77e-07,
+ "logits/chosen": -9.120695114135742,
+ "logits/rejected": -9.018156051635742,
+ "logps/chosen": -41.204193115234375,
+ "logps/rejected": -70.23939514160156,
+ "loss": 0.2549,
+ "rewards/accuracies": 1.0,
+ "rewards/chosen": -0.20924563705921173,
+ "rewards/margins": 1.5008659362792969,
+ "rewards/rejected": -1.7101116180419922,
+ "step": 223
+ },
+ {
+ "epoch": 0.06,
+ "learning_rate": 2.7600000000000004e-07,
+ "logits/chosen": -8.943544387817383,
+ "logits/rejected": -8.886691093444824,
+ "logps/chosen": -44.56146240234375,
+ "logps/rejected": -69.162841796875,
+ "loss": 0.2689,
+ "rewards/accuracies": 0.9600000381469727,
+ "rewards/chosen": -0.40339189767837524,
+ "rewards/margins": 1.4689480066299438,
+ "rewards/rejected": -1.8723399639129639,
+ "step": 224
+ },
+ {
+ "epoch": 0.06,
+ "learning_rate": 2.75e-07,
+ "logits/chosen": -8.663228988647461,
+ "logits/rejected": -8.65444278717041,
+ "logps/chosen": -38.287235260009766,
+ "logps/rejected": -70.95658874511719,
+ "loss": 0.2141,
+ "rewards/accuracies": 0.9800000190734863,
+ "rewards/chosen": 0.003157383296638727,
+ "rewards/margins": 1.7416331768035889,
+ "rewards/rejected": -1.7384757995605469,
+ "step": 225
+ },
+ {
+ "epoch": 0.06,
+ "learning_rate": 2.74e-07,
+ "logits/chosen": -9.011740684509277,
+ "logits/rejected": -8.81816577911377,
+ "logps/chosen": -41.13055419921875,
+ "logps/rejected": -70.431884765625,
+ "loss": 0.253,
+ "rewards/accuracies": 0.9600000381469727,
+ "rewards/chosen": -0.17377158999443054,
+ "rewards/margins": 1.6204302310943604,
+ "rewards/rejected": -1.7942014932632446,
+ "step": 226
+ },
+ {
+ "epoch": 0.06,
+ "learning_rate": 2.73e-07,
+ "logits/chosen": -8.906305313110352,
+ "logits/rejected": -8.782539367675781,
+ "logps/chosen": -41.5731086730957,
+ "logps/rejected": -68.40547180175781,
+ "loss": 0.3045,
+ "rewards/accuracies": 0.9199999570846558,
+ "rewards/chosen": -0.27805295586586,
+ "rewards/margins": 1.4335918426513672,
+ "rewards/rejected": -1.7116448879241943,
+ "step": 227
+ },
+ {
+ "epoch": 0.06,
+ "learning_rate": 2.72e-07,
+ "logits/chosen": -8.857791900634766,
+ "logits/rejected": -8.7904052734375,
+ "logps/chosen": -44.0427131652832,
+ "logps/rejected": -67.93760681152344,
+ "loss": 0.3059,
+ "rewards/accuracies": 0.9399999380111694,
+ "rewards/chosen": -0.4859923720359802,
+ "rewards/margins": 1.3006880283355713,
+ "rewards/rejected": -1.7866804599761963,
+ "step": 228
+ },
+ {
+ "epoch": 0.06,
+ "learning_rate": 2.7100000000000003e-07,
+ "logits/chosen": -8.68824577331543,
+ "logits/rejected": -8.578463554382324,
+ "logps/chosen": -46.805423736572266,
+ "logps/rejected": -71.96556854248047,
+ "loss": 0.2496,
+ "rewards/accuracies": 1.0,
+ "rewards/chosen": -0.6673830151557922,
+ "rewards/margins": 1.489000678062439,
+ "rewards/rejected": -2.156383752822876,
+ "step": 229
+ },
+ {
+ "epoch": 0.06,
+ "learning_rate": 2.7e-07,
+ "logits/chosen": -8.906216621398926,
+ "logits/rejected": -8.847137451171875,
+ "logps/chosen": -45.05828857421875,
+ "logps/rejected": -71.52420043945312,
+ "loss": 0.2385,
+ "rewards/accuracies": 0.9800000190734863,
+ "rewards/chosen": -0.435865581035614,
+ "rewards/margins": 1.5502560138702393,
+ "rewards/rejected": -1.9861215353012085,
+ "step": 230
+ },
+ {
+ "epoch": 0.06,
+ "learning_rate": 2.69e-07,
+ "logits/chosen": -8.947155952453613,
+ "logits/rejected": -8.829044342041016,
+ "logps/chosen": -41.90214920043945,
+ "logps/rejected": -73.6261978149414,
+ "loss": 0.2404,
+ "rewards/accuracies": 0.9800000190734863,
+ "rewards/chosen": -0.3273487687110901,
+ "rewards/margins": 1.621421456336975,
+ "rewards/rejected": -1.9487701654434204,
+ "step": 231
+ },
+ {
+ "epoch": 0.06,
+ "learning_rate": 2.68e-07,
+ "logits/chosen": -8.829843521118164,
+ "logits/rejected": -8.652304649353027,
+ "logps/chosen": -41.917579650878906,
+ "logps/rejected": -71.42601776123047,
+ "loss": 0.2304,
+ "rewards/accuracies": 0.9800000190734863,
+ "rewards/chosen": -0.40560126304626465,
+ "rewards/margins": 1.6656455993652344,
+ "rewards/rejected": -2.07124662399292,
+ "step": 232
+ },
+ {
+ "epoch": 0.06,
+ "learning_rate": 2.67e-07,
+ "logits/chosen": -9.078448295593262,
+ "logits/rejected": -9.015963554382324,
+ "logps/chosen": -48.53789138793945,
+ "logps/rejected": -72.73210144042969,
+ "loss": 0.2697,
+ "rewards/accuracies": 0.940000057220459,
+ "rewards/chosen": -0.6256522536277771,
+ "rewards/margins": 1.4912573099136353,
+ "rewards/rejected": -2.1169095039367676,
+ "step": 233
+ },
+ {
+ "epoch": 0.06,
+ "learning_rate": 2.66e-07,
+ "logits/chosen": -9.04401683807373,
+ "logits/rejected": -8.977219581604004,
+ "logps/chosen": -44.62416458129883,
+ "logps/rejected": -71.57743835449219,
+ "loss": 0.2474,
+ "rewards/accuracies": 1.0,
+ "rewards/chosen": -0.5261238813400269,
+ "rewards/margins": 1.5465754270553589,
+ "rewards/rejected": -2.072699546813965,
+ "step": 234
+ },
+ {
+ "epoch": 0.06,
+ "learning_rate": 2.65e-07,
+ "logits/chosen": -8.7246732711792,
+ "logits/rejected": -8.663068771362305,
+ "logps/chosen": -42.98565673828125,
+ "logps/rejected": -69.6664047241211,
+ "loss": 0.2359,
+ "rewards/accuracies": 1.0,
+ "rewards/chosen": -0.28909361362457275,
+ "rewards/margins": 1.6054264307022095,
+ "rewards/rejected": -1.8945200443267822,
+ "step": 235
+ },
+ {
+ "epoch": 0.06,
+ "learning_rate": 2.64e-07,
+ "logits/chosen": -9.035811424255371,
+ "logits/rejected": -8.968504905700684,
+ "logps/chosen": -42.082191467285156,
+ "logps/rejected": -69.3706283569336,
+ "loss": 0.2806,
+ "rewards/accuracies": 0.9600000381469727,
+ "rewards/chosen": -0.3632660508155823,
+ "rewards/margins": 1.3896889686584473,
+ "rewards/rejected": -1.7529550790786743,
+ "step": 236
+ },
+ {
+ "epoch": 0.06,
+ "learning_rate": 2.63e-07,
+ "logits/chosen": -8.899124145507812,
+ "logits/rejected": -8.845288276672363,
+ "logps/chosen": -42.73535919189453,
+ "logps/rejected": -73.52890014648438,
+ "loss": 0.2707,
+ "rewards/accuracies": 0.9800000190734863,
+ "rewards/chosen": -0.3918378949165344,
+ "rewards/margins": 1.5124340057373047,
+ "rewards/rejected": -1.9042720794677734,
+ "step": 237
+ },
+ {
+ "epoch": 0.06,
+ "learning_rate": 2.62e-07,
+ "logits/chosen": -9.026718139648438,
+ "logits/rejected": -8.953622817993164,
+ "logps/chosen": -40.321632385253906,
+ "logps/rejected": -68.15009307861328,
+ "loss": 0.2917,
+ "rewards/accuracies": 0.9600000381469727,
+ "rewards/chosen": -0.21047565340995789,
+ "rewards/margins": 1.3489338159561157,
+ "rewards/rejected": -1.5594093799591064,
+ "step": 238
+ },
+ {
+ "epoch": 0.06,
+ "learning_rate": 2.61e-07,
+ "logits/chosen": -9.139519691467285,
+ "logits/rejected": -9.046548843383789,
+ "logps/chosen": -39.14946365356445,
+ "logps/rejected": -66.32569122314453,
+ "loss": 0.2374,
+ "rewards/accuracies": 0.9800000190734863,
+ "rewards/chosen": -0.24171173572540283,
+ "rewards/margins": 1.5724042654037476,
+ "rewards/rejected": -1.8141158819198608,
+ "step": 239
+ },
+ {
+ "epoch": 0.06,
+ "learning_rate": 2.6e-07,
+ "logits/chosen": -8.669023513793945,
+ "logits/rejected": -8.563835144042969,
+ "logps/chosen": -40.61223602294922,
+ "logps/rejected": -69.82514190673828,
+ "loss": 0.2413,
+ "rewards/accuracies": 0.9800000190734863,
+ "rewards/chosen": -0.3103567063808441,
+ "rewards/margins": 1.6561682224273682,
+ "rewards/rejected": -1.9665250778198242,
+ "step": 240
+ },
+ {
+ "epoch": 0.06,
+ "learning_rate": 2.59e-07,
+ "logits/chosen": -8.794702529907227,
+ "logits/rejected": -8.615911483764648,
+ "logps/chosen": -43.99329376220703,
+ "logps/rejected": -66.2555923461914,
+ "loss": 0.2858,
+ "rewards/accuracies": 0.9600000381469727,
+ "rewards/chosen": -0.3557160496711731,
+ "rewards/margins": 1.3395440578460693,
+ "rewards/rejected": -1.6952602863311768,
+ "step": 241
+ },
+ {
+ "epoch": 0.06,
+ "learning_rate": 2.58e-07,
+ "logits/chosen": -8.988102912902832,
+ "logits/rejected": -8.940208435058594,
+ "logps/chosen": -42.496700286865234,
+ "logps/rejected": -69.17347717285156,
+ "loss": 0.272,
+ "rewards/accuracies": 0.9800000190734863,
+ "rewards/chosen": -0.460458904504776,
+ "rewards/margins": 1.4812017679214478,
+ "rewards/rejected": -1.9416606426239014,
+ "step": 242
+ },
+ {
+ "epoch": 0.06,
+ "learning_rate": 2.57e-07,
+ "logits/chosen": -9.062856674194336,
+ "logits/rejected": -8.94471263885498,
+ "logps/chosen": -42.636497497558594,
+ "logps/rejected": -71.41357421875,
+ "loss": 0.221,
+ "rewards/accuracies": 0.9800000190734863,
+ "rewards/chosen": -0.4040011465549469,
+ "rewards/margins": 1.7163587808609009,
+ "rewards/rejected": -2.1203601360321045,
+ "step": 243
+ },
+ {
+ "epoch": 0.06,
+ "learning_rate": 2.56e-07,
+ "logits/chosen": -9.013895034790039,
+ "logits/rejected": -9.061233520507812,
+ "logps/chosen": -46.01321792602539,
+ "logps/rejected": -71.0923843383789,
+ "loss": 0.3042,
+ "rewards/accuracies": 0.9600000381469727,
+ "rewards/chosen": -0.4178338646888733,
+ "rewards/margins": 1.3820786476135254,
+ "rewards/rejected": -1.799912452697754,
+ "step": 244
+ },
+ {
+ "epoch": 0.06,
+ "learning_rate": 2.55e-07,
+ "logits/chosen": -9.007227897644043,
+ "logits/rejected": -8.854522705078125,
+ "logps/chosen": -42.58584213256836,
+ "logps/rejected": -68.90918731689453,
+ "loss": 0.2832,
+ "rewards/accuracies": 0.9800000190734863,
+ "rewards/chosen": -0.3119141161441803,
+ "rewards/margins": 1.5015567541122437,
+ "rewards/rejected": -1.8134710788726807,
+ "step": 245
+ },
+ {
+ "epoch": 0.06,
+ "learning_rate": 2.5399999999999997e-07,
+ "logits/chosen": -9.030736923217773,
+ "logits/rejected": -8.89038372039795,
+ "logps/chosen": -37.5427131652832,
+ "logps/rejected": -67.01028442382812,
+ "loss": 0.2396,
+ "rewards/accuracies": 0.940000057220459,
+ "rewards/chosen": -0.014831873588263988,
+ "rewards/margins": 1.6802629232406616,
+ "rewards/rejected": -1.6950948238372803,
+ "step": 246
+ },
+ {
+ "epoch": 0.06,
+ "learning_rate": 2.53e-07,
+ "logits/chosen": -8.911919593811035,
+ "logits/rejected": -8.769577026367188,
+ "logps/chosen": -43.36934280395508,
+ "logps/rejected": -71.1383056640625,
+ "loss": 0.2516,
+ "rewards/accuracies": 0.9600000381469727,
+ "rewards/chosen": -0.3899439573287964,
+ "rewards/margins": 1.618249535560608,
+ "rewards/rejected": -2.0081934928894043,
+ "step": 247
+ },
+ {
+ "epoch": 0.06,
+ "learning_rate": 2.52e-07,
+ "logits/chosen": -8.795068740844727,
+ "logits/rejected": -8.727156639099121,
+ "logps/chosen": -43.60725402832031,
+ "logps/rejected": -73.10511016845703,
+ "loss": 0.2449,
+ "rewards/accuracies": 0.9800000190734863,
+ "rewards/chosen": -0.5213699340820312,
+ "rewards/margins": 1.5963902473449707,
+ "rewards/rejected": -2.117760181427002,
+ "step": 248
+ },
+ {
+ "epoch": 0.06,
+ "learning_rate": 2.51e-07,
+ "logits/chosen": -8.8865385055542,
+ "logits/rejected": -8.710765838623047,
+ "logps/chosen": -44.19552230834961,
+ "logps/rejected": -74.49358367919922,
+ "loss": 0.224,
+ "rewards/accuracies": 0.9600000381469727,
+ "rewards/chosen": -0.5294827222824097,
+ "rewards/margins": 1.6507676839828491,
+ "rewards/rejected": -2.180250406265259,
+ "step": 249
+ },
+ {
+ "epoch": 0.06,
+ "learning_rate": 2.5e-07,
+ "logits/chosen": -8.938467979431152,
+ "logits/rejected": -8.918638229370117,
+ "logps/chosen": -42.69424819946289,
+ "logps/rejected": -77.01347351074219,
+ "loss": 0.1974,
+ "rewards/accuracies": 1.0,
+ "rewards/chosen": -0.47195687890052795,
+ "rewards/margins": 1.8267240524291992,
+ "rewards/rejected": -2.2986807823181152,
+ "step": 250
+ }
+ ],
+ "logging_steps": 1,
+ "max_steps": 500,
+ "num_input_tokens_seen": 0,
+ "num_train_epochs": 1,
+ "save_steps": 50,
+ "total_flos": 0.0,
+ "train_batch_size": 5,
+ "trial_name": null,
+ "trial_params": null
+}
diff --git a/training_args.bin b/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..e5a1100885f985b4e33e446e892233df03ac1001
--- /dev/null
+++ b/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9299d7ea4fb442144a1ab68d137cae8b85e61eaf3c86b5bdbffc30c723e505cf
+size 4664