{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 500, "global_step": 604, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 0.0, "logits/chosen": -4.7122321128845215, "logits/rejected": -3.4910330772399902, "logps/chosen": -480.7637939453125, "logps/rejected": -307.30804443359375, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.01, "learning_rate": 0.0, "logits/chosen": -4.456130027770996, "logits/rejected": -3.5022683143615723, "logps/chosen": -441.0960693359375, "logps/rejected": -254.25729370117188, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 2 }, { "epoch": 0.01, "learning_rate": 0.0, "logits/chosen": -4.721767425537109, "logits/rejected": -3.4279396533966064, "logps/chosen": -440.868896484375, "logps/rejected": -285.726806640625, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 3 }, { "epoch": 0.01, "learning_rate": 5.494505494505495e-08, "logits/chosen": -4.769167423248291, "logits/rejected": -4.213985443115234, "logps/chosen": -339.8769226074219, "logps/rejected": -288.4583435058594, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 4 }, { "epoch": 0.02, "learning_rate": 1.098901098901099e-07, "logits/chosen": -4.547554016113281, "logits/rejected": -3.9193525314331055, "logps/chosen": -351.4005126953125, "logps/rejected": -348.05291748046875, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 5 }, { "epoch": 0.02, "learning_rate": 1.6483516483516484e-07, "logits/chosen": -4.506200790405273, "logits/rejected": -4.258366107940674, "logps/chosen": -367.66943359375, "logps/rejected": -365.24383544921875, "loss": 0.701, "rewards/accuracies": 0.0, "rewards/chosen": -0.00408935546875, "rewards/margins": -0.01570434495806694, "rewards/rejected": 0.011614990420639515, "step": 6 }, { "epoch": 0.02, "learning_rate": 2.197802197802198e-07, "logits/chosen": -4.512852191925049, "logits/rejected": -3.491598129272461, "logps/chosen": -418.4132995605469, "logps/rejected": -340.18426513671875, "loss": 0.6811, "rewards/accuracies": 1.0, "rewards/chosen": 0.0052337646484375, "rewards/margins": 0.0241851806640625, "rewards/rejected": -0.018951416015625, "step": 7 }, { "epoch": 0.03, "learning_rate": 2.7472527472527475e-07, "logits/chosen": -4.398182392120361, "logits/rejected": -3.73840069770813, "logps/chosen": -383.665771484375, "logps/rejected": -369.041748046875, "loss": 0.6499, "rewards/accuracies": 1.0, "rewards/chosen": 0.03215942531824112, "rewards/margins": 0.088531494140625, "rewards/rejected": -0.05637207254767418, "step": 8 }, { "epoch": 0.03, "learning_rate": 3.296703296703297e-07, "logits/chosen": -4.551479339599609, "logits/rejected": -3.8382506370544434, "logps/chosen": -348.11627197265625, "logps/rejected": -271.0714111328125, "loss": 0.6261, "rewards/accuracies": 1.0, "rewards/chosen": 0.06197509914636612, "rewards/margins": 0.13901062309741974, "rewards/rejected": -0.07703552395105362, "step": 9 }, { "epoch": 0.03, "learning_rate": 3.846153846153847e-07, "logits/chosen": -4.554244518280029, "logits/rejected": -3.5874130725860596, "logps/chosen": -477.6059875488281, "logps/rejected": -331.3657531738281, "loss": 0.5562, "rewards/accuracies": 1.0, "rewards/chosen": 0.08335571736097336, "rewards/margins": 0.2956085205078125, "rewards/rejected": -0.21225281059741974, "step": 10 }, { "epoch": 0.04, "learning_rate": 4.395604395604396e-07, "logits/chosen": -4.534104347229004, "logits/rejected": -3.622457265853882, "logps/chosen": -452.00927734375, "logps/rejected": -308.62396240234375, "loss": 0.5024, "rewards/accuracies": 1.0, "rewards/chosen": 0.11546631157398224, "rewards/margins": 0.42655640840530396, "rewards/rejected": -0.3110900819301605, "step": 11 }, { "epoch": 0.04, "learning_rate": 4.945054945054946e-07, "logits/chosen": -4.5819878578186035, "logits/rejected": -4.043532848358154, "logps/chosen": -489.98583984375, "logps/rejected": -355.9280700683594, "loss": 0.3914, "rewards/accuracies": 1.0, "rewards/chosen": 0.254067987203598, "rewards/margins": 0.7358551025390625, "rewards/rejected": -0.4817871153354645, "step": 12 }, { "epoch": 0.04, "learning_rate": 5.494505494505495e-07, "logits/chosen": -4.548617839813232, "logits/rejected": -3.5121090412139893, "logps/chosen": -453.76708984375, "logps/rejected": -340.49310302734375, "loss": 0.3846, "rewards/accuracies": 1.0, "rewards/chosen": 0.16141052544116974, "rewards/margins": 0.7571258544921875, "rewards/rejected": -0.595715343952179, "step": 13 }, { "epoch": 0.05, "learning_rate": 6.043956043956044e-07, "logits/chosen": -4.559290885925293, "logits/rejected": -3.851238250732422, "logps/chosen": -359.08013916015625, "logps/rejected": -325.69927978515625, "loss": 0.2665, "rewards/accuracies": 1.0, "rewards/chosen": 0.43179628252983093, "rewards/margins": 1.1861114501953125, "rewards/rejected": -0.754315197467804, "step": 14 }, { "epoch": 0.05, "learning_rate": 6.593406593406594e-07, "logits/chosen": -4.530972003936768, "logits/rejected": -3.6676721572875977, "logps/chosen": -283.534423828125, "logps/rejected": -344.08294677734375, "loss": 0.2007, "rewards/accuracies": 1.0, "rewards/chosen": 0.46563720703125, "rewards/margins": 1.5038880109786987, "rewards/rejected": -1.0382508039474487, "step": 15 }, { "epoch": 0.05, "learning_rate": 7.142857142857143e-07, "logits/chosen": -4.578598976135254, "logits/rejected": -3.969371795654297, "logps/chosen": -329.626708984375, "logps/rejected": -358.00067138671875, "loss": 0.15, "rewards/accuracies": 1.0, "rewards/chosen": 0.37066346406936646, "rewards/margins": 1.8208892345428467, "rewards/rejected": -1.450225830078125, "step": 16 }, { "epoch": 0.06, "learning_rate": 7.692307692307694e-07, "logits/chosen": -4.456603050231934, "logits/rejected": -3.9640321731567383, "logps/chosen": -325.3027648925781, "logps/rejected": -371.80010986328125, "loss": 0.2361, "rewards/accuracies": 1.0, "rewards/chosen": 0.45557862520217896, "rewards/margins": 1.3231537342071533, "rewards/rejected": -0.8675751090049744, "step": 17 }, { "epoch": 0.06, "learning_rate": 8.241758241758242e-07, "logits/chosen": -4.553553581237793, "logits/rejected": -4.103464603424072, "logps/chosen": -392.54351806640625, "logps/rejected": -390.0396728515625, "loss": 0.0861, "rewards/accuracies": 1.0, "rewards/chosen": 0.4598144590854645, "rewards/margins": 2.409454345703125, "rewards/rejected": -1.949639916419983, "step": 18 }, { "epoch": 0.06, "learning_rate": 8.791208791208792e-07, "logits/chosen": -4.560757637023926, "logits/rejected": -3.3375790119171143, "logps/chosen": -458.788818359375, "logps/rejected": -237.71102905273438, "loss": 0.1327, "rewards/accuracies": 1.0, "rewards/chosen": 0.553814709186554, "rewards/margins": 1.9525666236877441, "rewards/rejected": -1.3987518548965454, "step": 19 }, { "epoch": 0.07, "learning_rate": 9.340659340659342e-07, "logits/chosen": -4.524593830108643, "logits/rejected": -3.837027072906494, "logps/chosen": -485.5209045410156, "logps/rejected": -361.74188232421875, "loss": 0.0366, "rewards/accuracies": 1.0, "rewards/chosen": 0.9675140380859375, "rewards/margins": 3.29044508934021, "rewards/rejected": -2.3229310512542725, "step": 20 }, { "epoch": 0.07, "learning_rate": 9.890109890109891e-07, "logits/chosen": -4.552459239959717, "logits/rejected": -3.4426755905151367, "logps/chosen": -637.16259765625, "logps/rejected": -307.08282470703125, "loss": 0.024, "rewards/accuracies": 1.0, "rewards/chosen": 1.276123046875, "rewards/margins": 3.7193756103515625, "rewards/rejected": -2.4432525634765625, "step": 21 }, { "epoch": 0.07, "learning_rate": 1.0439560439560442e-06, "logits/chosen": -4.615756034851074, "logits/rejected": -3.5999937057495117, "logps/chosen": -299.9642333984375, "logps/rejected": -382.33734130859375, "loss": 0.015, "rewards/accuracies": 1.0, "rewards/chosen": 1.375402808189392, "rewards/margins": 4.191192626953125, "rewards/rejected": -2.8157899379730225, "step": 22 }, { "epoch": 0.08, "learning_rate": 1.098901098901099e-06, "logits/chosen": -4.56257963180542, "logits/rejected": -3.9953930377960205, "logps/chosen": -304.43365478515625, "logps/rejected": -345.88836669921875, "loss": 0.0122, "rewards/accuracies": 1.0, "rewards/chosen": 0.693988025188446, "rewards/margins": 4.401156425476074, "rewards/rejected": -3.7071685791015625, "step": 23 }, { "epoch": 0.08, "learning_rate": 1.153846153846154e-06, "logits/chosen": -4.4777092933654785, "logits/rejected": -3.514190435409546, "logps/chosen": -350.09747314453125, "logps/rejected": -357.66943359375, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": 1.7525604963302612, "rewards/margins": 5.601266384124756, "rewards/rejected": -3.848706007003784, "step": 24 }, { "epoch": 0.08, "learning_rate": 1.2087912087912089e-06, "logits/chosen": -4.659963607788086, "logits/rejected": -3.676948070526123, "logps/chosen": -426.1668701171875, "logps/rejected": -333.86212158203125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": 2.552722215652466, "rewards/margins": 6.66888427734375, "rewards/rejected": -4.116162300109863, "step": 25 }, { "epoch": 0.09, "learning_rate": 1.2637362637362637e-06, "logits/chosen": -4.635252952575684, "logits/rejected": -4.635253429412842, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 26 }, { "epoch": 0.09, "learning_rate": 1.3186813186813187e-06, "logits/chosen": -4.54569149017334, "logits/rejected": -4.585753440856934, "logps/chosen": -268.20098876953125, "logps/rejected": -388.1652526855469, "loss": 0.0254, "rewards/accuracies": 1.0, "rewards/chosen": 2.103280782699585, "rewards/margins": 3.65966796875, "rewards/rejected": -1.5563873052597046, "step": 27 }, { "epoch": 0.09, "learning_rate": 1.3736263736263738e-06, "logits/chosen": -4.528458595275879, "logits/rejected": -3.756037473678589, "logps/chosen": -320.74591064453125, "logps/rejected": -383.32171630859375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 2.2982544898986816, "rewards/margins": 8.030874252319336, "rewards/rejected": -5.7326202392578125, "step": 28 }, { "epoch": 0.1, "learning_rate": 1.4285714285714286e-06, "logits/chosen": -4.45478630065918, "logits/rejected": -3.7227022647857666, "logps/chosen": -421.645263671875, "logps/rejected": -337.57232666015625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 2.300579786300659, "rewards/margins": 8.780420303344727, "rewards/rejected": -6.479840278625488, "step": 29 }, { "epoch": 0.1, "learning_rate": 1.4835164835164837e-06, "logits/chosen": -4.644472122192383, "logits/rejected": -3.8435280323028564, "logps/chosen": -344.5352783203125, "logps/rejected": -387.8581848144531, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": 2.017681837081909, "rewards/margins": 7.160736083984375, "rewards/rejected": -5.143054485321045, "step": 30 }, { "epoch": 0.1, "learning_rate": 1.5384615384615387e-06, "logits/chosen": -4.358231544494629, "logits/rejected": -3.987455129623413, "logps/chosen": -300.4159851074219, "logps/rejected": -426.6040344238281, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 2.130636692047119, "rewards/margins": 8.095977783203125, "rewards/rejected": -5.965341091156006, "step": 31 }, { "epoch": 0.11, "learning_rate": 1.5934065934065933e-06, "logits/chosen": -4.457190990447998, "logits/rejected": -3.8622899055480957, "logps/chosen": -428.80010986328125, "logps/rejected": -282.08941650390625, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": 1.919866919517517, "rewards/margins": 5.6237030029296875, "rewards/rejected": -3.70383620262146, "step": 32 }, { "epoch": 0.11, "learning_rate": 1.6483516483516484e-06, "logits/chosen": -4.373894214630127, "logits/rejected": -3.93933367729187, "logps/chosen": -282.36163330078125, "logps/rejected": -356.90789794921875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 2.413839817047119, "rewards/margins": 7.732351779937744, "rewards/rejected": -5.318511962890625, "step": 33 }, { "epoch": 0.11, "learning_rate": 1.7032967032967034e-06, "logits/chosen": -4.522324085235596, "logits/rejected": -3.5009684562683105, "logps/chosen": -299.6717834472656, "logps/rejected": -359.8671875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 2.1251494884490967, "rewards/margins": 8.901113510131836, "rewards/rejected": -6.775964260101318, "step": 34 }, { "epoch": 0.12, "learning_rate": 1.7582417582417585e-06, "logits/chosen": -4.407108783721924, "logits/rejected": -3.9882192611694336, "logps/chosen": -279.01580810546875, "logps/rejected": -309.4888916015625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 2.941366672515869, "rewards/margins": 8.681912422180176, "rewards/rejected": -5.740545749664307, "step": 35 }, { "epoch": 0.12, "learning_rate": 1.8131868131868133e-06, "logits/chosen": -4.558385848999023, "logits/rejected": -4.195313453674316, "logps/chosen": -283.02545166015625, "logps/rejected": -402.60540771484375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 1.7620728015899658, "rewards/margins": 9.408578872680664, "rewards/rejected": -7.646505832672119, "step": 36 }, { "epoch": 0.12, "learning_rate": 1.8681318681318684e-06, "logits/chosen": -4.52475118637085, "logits/rejected": -4.018232345581055, "logps/chosen": -304.9980773925781, "logps/rejected": -374.7655029296875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 2.8536040782928467, "rewards/margins": 9.533895492553711, "rewards/rejected": -6.680291652679443, "step": 37 }, { "epoch": 0.13, "learning_rate": 1.9230769230769234e-06, "logits/chosen": -4.477925777435303, "logits/rejected": -3.7866082191467285, "logps/chosen": -269.1673278808594, "logps/rejected": -379.8326416015625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 2.563244581222534, "rewards/margins": 9.162829399108887, "rewards/rejected": -6.599585056304932, "step": 38 }, { "epoch": 0.13, "learning_rate": 1.9780219780219782e-06, "logits/chosen": -4.392153263092041, "logits/rejected": -4.061153411865234, "logps/chosen": -323.526611328125, "logps/rejected": -459.18817138671875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 2.4470367431640625, "rewards/margins": 9.656204223632812, "rewards/rejected": -7.20916748046875, "step": 39 }, { "epoch": 0.13, "learning_rate": 2.032967032967033e-06, "logits/chosen": -4.585270881652832, "logits/rejected": -3.468134641647339, "logps/chosen": -314.48748779296875, "logps/rejected": -404.564453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.6204469203948975, "rewards/margins": 10.225598335266113, "rewards/rejected": -7.605151653289795, "step": 40 }, { "epoch": 0.14, "learning_rate": 2.0879120879120883e-06, "logits/chosen": -4.52483606338501, "logits/rejected": -3.6769773960113525, "logps/chosen": -283.4615173339844, "logps/rejected": -368.316650390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.3265349864959717, "rewards/margins": 9.931375503540039, "rewards/rejected": -7.604840278625488, "step": 41 }, { "epoch": 0.14, "learning_rate": 2.1428571428571427e-06, "logits/chosen": -4.958460330963135, "logits/rejected": -3.5075302124023438, "logps/chosen": -1548.0390625, "logps/rejected": -430.23260498046875, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": 1.080407738685608, "rewards/margins": 5.01118803024292, "rewards/rejected": -3.9307801723480225, "step": 42 }, { "epoch": 0.14, "learning_rate": 2.197802197802198e-06, "logits/chosen": -4.469013690948486, "logits/rejected": -3.7112741470336914, "logps/chosen": -329.34527587890625, "logps/rejected": -353.8309326171875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 3.11572265625, "rewards/margins": 8.09234619140625, "rewards/rejected": -4.97662353515625, "step": 43 }, { "epoch": 0.15, "learning_rate": 2.252747252747253e-06, "logits/chosen": -4.559028625488281, "logits/rejected": -3.685147523880005, "logps/chosen": -350.5050048828125, "logps/rejected": -432.88720703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.776409864425659, "rewards/margins": 11.043191909790039, "rewards/rejected": -8.2667818069458, "step": 44 }, { "epoch": 0.15, "learning_rate": 2.307692307692308e-06, "logits/chosen": -4.4963531494140625, "logits/rejected": -3.1627519130706787, "logps/chosen": -553.4663696289062, "logps/rejected": -415.58251953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 3.3123841285705566, "rewards/margins": 13.567768096923828, "rewards/rejected": -10.255383491516113, "step": 45 }, { "epoch": 0.15, "learning_rate": 2.362637362637363e-06, "logits/chosen": -4.479090690612793, "logits/rejected": -3.9051225185394287, "logps/chosen": -281.47723388671875, "logps/rejected": -345.96337890625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 2.1858582496643066, "rewards/margins": 7.392383098602295, "rewards/rejected": -5.206524848937988, "step": 46 }, { "epoch": 0.16, "learning_rate": 2.4175824175824177e-06, "logits/chosen": -4.356232166290283, "logits/rejected": -3.9175236225128174, "logps/chosen": -362.4033203125, "logps/rejected": -447.981201171875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 1.4192932844161987, "rewards/margins": 9.018689155578613, "rewards/rejected": -7.599395751953125, "step": 47 }, { "epoch": 0.16, "learning_rate": 2.472527472527473e-06, "logits/chosen": -4.230052471160889, "logits/rejected": -3.6365554332733154, "logps/chosen": -336.6274108886719, "logps/rejected": -369.9189453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.9184234142303467, "rewards/margins": 10.501449584960938, "rewards/rejected": -7.58302640914917, "step": 48 }, { "epoch": 0.16, "learning_rate": 2.5274725274725274e-06, "logits/chosen": -4.472602367401123, "logits/rejected": -3.738085985183716, "logps/chosen": -372.7972717285156, "logps/rejected": -331.42022705078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.7339019775390625, "rewards/margins": 10.597005844116211, "rewards/rejected": -7.863104343414307, "step": 49 }, { "epoch": 0.17, "learning_rate": 2.582417582417583e-06, "logits/chosen": -4.685428619384766, "logits/rejected": -3.7145485877990723, "logps/chosen": -326.884521484375, "logps/rejected": -369.75830078125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.3666748106479645, "rewards/margins": 8.955459594726562, "rewards/rejected": -8.588785171508789, "step": 50 }, { "epoch": 0.17, "learning_rate": 2.6373626373626375e-06, "logits/chosen": -4.467536926269531, "logits/rejected": -3.8350751399993896, "logps/chosen": -312.74237060546875, "logps/rejected": -340.1723937988281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 3.017587423324585, "rewards/margins": 10.204119682312012, "rewards/rejected": -7.186532497406006, "step": 51 }, { "epoch": 0.17, "learning_rate": 2.6923076923076923e-06, "logits/chosen": -4.286996364593506, "logits/rejected": -3.920314073562622, "logps/chosen": -331.720703125, "logps/rejected": -453.9488830566406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 3.2590577602386475, "rewards/margins": 13.077414512634277, "rewards/rejected": -9.81835651397705, "step": 52 }, { "epoch": 0.18, "learning_rate": 2.7472527472527476e-06, "logits/chosen": -4.573817729949951, "logits/rejected": -4.573817729949951, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 53 }, { "epoch": 0.18, "learning_rate": 2.8021978021978024e-06, "logits/chosen": -4.560126781463623, "logits/rejected": -3.358957529067993, "logps/chosen": -505.0814208984375, "logps/rejected": -392.2349853515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.12652587890625, "rewards/margins": 11.479181289672852, "rewards/rejected": -9.352655410766602, "step": 54 }, { "epoch": 0.18, "learning_rate": 2.8571428571428573e-06, "logits/chosen": -4.519628047943115, "logits/rejected": -3.72153902053833, "logps/chosen": -297.31982421875, "logps/rejected": -359.1614990234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.254974365234375, "rewards/margins": 9.919842720031738, "rewards/rejected": -7.664868354797363, "step": 55 }, { "epoch": 0.19, "learning_rate": 2.9120879120879125e-06, "logits/chosen": -4.356388568878174, "logits/rejected": -3.934237241744995, "logps/chosen": -305.0617370605469, "logps/rejected": -370.7964172363281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.9260528087615967, "rewards/margins": 10.116022109985352, "rewards/rejected": -7.189969062805176, "step": 56 }, { "epoch": 0.19, "learning_rate": 2.9670329670329673e-06, "logits/chosen": -4.416037082672119, "logits/rejected": -4.145164489746094, "logps/chosen": -205.00186157226562, "logps/rejected": -364.52728271484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.6028244495391846, "rewards/margins": 10.653569221496582, "rewards/rejected": -8.050745010375977, "step": 57 }, { "epoch": 0.19, "learning_rate": 3.021978021978022e-06, "logits/chosen": -4.462647438049316, "logits/rejected": -3.762413501739502, "logps/chosen": -300.01239013671875, "logps/rejected": -449.92999267578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 3.1943695545196533, "rewards/margins": 13.079874038696289, "rewards/rejected": -9.885504722595215, "step": 58 }, { "epoch": 0.2, "learning_rate": 3.0769230769230774e-06, "logits/chosen": -4.499337196350098, "logits/rejected": -3.3283798694610596, "logps/chosen": -415.0854797363281, "logps/rejected": -453.6877136230469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 3.19559645652771, "rewards/margins": 13.638141632080078, "rewards/rejected": -10.442544937133789, "step": 59 }, { "epoch": 0.2, "learning_rate": 3.1318681318681323e-06, "logits/chosen": -4.473818778991699, "logits/rejected": -3.3775250911712646, "logps/chosen": -632.7835083007812, "logps/rejected": -332.44622802734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.4099366664886475, "rewards/margins": 10.282444953918457, "rewards/rejected": -7.872508525848389, "step": 60 }, { "epoch": 0.2, "learning_rate": 3.1868131868131867e-06, "logits/chosen": -4.390290260314941, "logits/rejected": -3.326557159423828, "logps/chosen": -386.25701904296875, "logps/rejected": -294.870361328125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 2.3045289516448975, "rewards/margins": 8.1837739944458, "rewards/rejected": -5.879245281219482, "step": 61 }, { "epoch": 0.21, "learning_rate": 3.2417582417582424e-06, "logits/chosen": -4.46834135055542, "logits/rejected": -4.094131946563721, "logps/chosen": -334.70654296875, "logps/rejected": -451.2972412109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.4573609828948975, "rewards/margins": 12.338126182556152, "rewards/rejected": -9.880764961242676, "step": 62 }, { "epoch": 0.21, "learning_rate": 3.2967032967032968e-06, "logits/chosen": -4.594233989715576, "logits/rejected": -3.5783090591430664, "logps/chosen": -278.1133117675781, "logps/rejected": -505.69195556640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.8014190196990967, "rewards/margins": 13.821588516235352, "rewards/rejected": -11.020169258117676, "step": 63 }, { "epoch": 0.21, "learning_rate": 3.3516483516483516e-06, "logits/chosen": -4.607097625732422, "logits/rejected": -3.978281021118164, "logps/chosen": -312.921142578125, "logps/rejected": -342.41351318359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.903662085533142, "rewards/margins": 10.762219429016113, "rewards/rejected": -8.85855770111084, "step": 64 }, { "epoch": 0.22, "learning_rate": 3.406593406593407e-06, "logits/chosen": -4.4642014503479, "logits/rejected": -3.5378057956695557, "logps/chosen": -488.7149658203125, "logps/rejected": -365.5252380371094, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 1.4717254638671875, "rewards/margins": 9.707379341125488, "rewards/rejected": -8.2356538772583, "step": 65 }, { "epoch": 0.22, "learning_rate": 3.4615384615384617e-06, "logits/chosen": -4.361936569213867, "logits/rejected": -4.100497245788574, "logps/chosen": -287.7290344238281, "logps/rejected": -451.6803283691406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.551348924636841, "rewards/margins": 13.295758247375488, "rewards/rejected": -10.744409561157227, "step": 66 }, { "epoch": 0.22, "learning_rate": 3.516483516483517e-06, "logits/chosen": -4.3527302742004395, "logits/rejected": -3.811634063720703, "logps/chosen": -360.6308898925781, "logps/rejected": -448.3175354003906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 3.832226514816284, "rewards/margins": 12.640280723571777, "rewards/rejected": -8.808053970336914, "step": 67 }, { "epoch": 0.23, "learning_rate": 3.5714285714285718e-06, "logits/chosen": -4.550887584686279, "logits/rejected": -4.550887584686279, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 68 }, { "epoch": 0.23, "learning_rate": 3.6263736263736266e-06, "logits/chosen": -4.38459587097168, "logits/rejected": -3.529341697692871, "logps/chosen": -330.7218017578125, "logps/rejected": -371.4253234863281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.72430419921875, "rewards/margins": 10.868155479431152, "rewards/rejected": -9.143851280212402, "step": 69 }, { "epoch": 0.23, "learning_rate": 3.681318681318682e-06, "logits/chosen": -4.437158584594727, "logits/rejected": -4.42078971862793, "logps/chosen": -243.7357177734375, "logps/rejected": -423.0533447265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.9866089820861816, "rewards/margins": 12.251775741577148, "rewards/rejected": -9.265167236328125, "step": 70 }, { "epoch": 0.24, "learning_rate": 3.7362637362637367e-06, "logits/chosen": -4.38008451461792, "logits/rejected": -3.6854677200317383, "logps/chosen": -255.04901123046875, "logps/rejected": -424.14111328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 3.8715972900390625, "rewards/margins": 14.87622356414795, "rewards/rejected": -11.004626274108887, "step": 71 }, { "epoch": 0.24, "learning_rate": 3.7912087912087915e-06, "logits/chosen": -4.497178077697754, "logits/rejected": -3.6329329013824463, "logps/chosen": -349.28289794921875, "logps/rejected": -381.4493408203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.124981641769409, "rewards/margins": 10.515064239501953, "rewards/rejected": -8.390082359313965, "step": 72 }, { "epoch": 0.24, "learning_rate": 3.846153846153847e-06, "logits/chosen": -4.524789333343506, "logits/rejected": -3.4265801906585693, "logps/chosen": -365.79644775390625, "logps/rejected": -475.97869873046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.8129639625549316, "rewards/margins": 14.072290420532227, "rewards/rejected": -11.259325981140137, "step": 73 }, { "epoch": 0.25, "learning_rate": 3.901098901098901e-06, "logits/chosen": -4.497722625732422, "logits/rejected": -3.3662238121032715, "logps/chosen": -472.6344909667969, "logps/rejected": -421.3455505371094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 3.5328400135040283, "rewards/margins": 12.811511039733887, "rewards/rejected": -9.278671264648438, "step": 74 }, { "epoch": 0.25, "learning_rate": 3.9560439560439565e-06, "logits/chosen": -4.341458797454834, "logits/rejected": -3.5533199310302734, "logps/chosen": -340.4435119628906, "logps/rejected": -364.519775390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.8503997325897217, "rewards/margins": 11.756946563720703, "rewards/rejected": -8.906546592712402, "step": 75 }, { "epoch": 0.25, "learning_rate": 4.010989010989012e-06, "logits/chosen": -4.497629165649414, "logits/rejected": -4.373428821563721, "logps/chosen": -253.01666259765625, "logps/rejected": -411.61370849609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 3.3009033203125, "rewards/margins": 12.612372398376465, "rewards/rejected": -9.311469078063965, "step": 76 }, { "epoch": 0.25, "learning_rate": 4.065934065934066e-06, "logits/chosen": -4.67010498046875, "logits/rejected": -4.221648216247559, "logps/chosen": -267.68695068359375, "logps/rejected": -406.31768798828125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 3.4193084239959717, "rewards/margins": 8.557241439819336, "rewards/rejected": -5.137933254241943, "step": 77 }, { "epoch": 0.26, "learning_rate": 4.120879120879121e-06, "logits/chosen": -4.276376724243164, "logits/rejected": -3.5325844287872314, "logps/chosen": -359.96893310546875, "logps/rejected": -416.0820617675781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.9439544677734375, "rewards/margins": 13.037528038024902, "rewards/rejected": -10.093573570251465, "step": 78 }, { "epoch": 0.26, "learning_rate": 4.175824175824177e-06, "logits/chosen": -4.5093512535095215, "logits/rejected": -4.4456963539123535, "logps/chosen": -192.96502685546875, "logps/rejected": -362.227783203125, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 0.3808960020542145, "rewards/margins": 7.196664810180664, "rewards/rejected": -6.815768718719482, "step": 79 }, { "epoch": 0.26, "learning_rate": 4.230769230769231e-06, "logits/chosen": -4.7614617347717285, "logits/rejected": -4.761462211608887, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 80 }, { "epoch": 0.27, "learning_rate": 4.2857142857142855e-06, "logits/chosen": -4.45823860168457, "logits/rejected": -3.9341721534729004, "logps/chosen": -418.939697265625, "logps/rejected": -429.19378662109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.361285448074341, "rewards/margins": 14.282876968383789, "rewards/rejected": -11.921591758728027, "step": 81 }, { "epoch": 0.27, "learning_rate": 4.340659340659341e-06, "logits/chosen": -4.4410505294799805, "logits/rejected": -4.185164928436279, "logps/chosen": -232.5185546875, "logps/rejected": -447.5303649902344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.2920806407928467, "rewards/margins": 13.843243598937988, "rewards/rejected": -11.551162719726562, "step": 82 }, { "epoch": 0.27, "learning_rate": 4.395604395604396e-06, "logits/chosen": -4.416217803955078, "logits/rejected": -3.6590464115142822, "logps/chosen": -433.36480712890625, "logps/rejected": -375.2690734863281, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 1.0846405029296875, "rewards/margins": 7.43996000289917, "rewards/rejected": -6.355319499969482, "step": 83 }, { "epoch": 0.28, "learning_rate": 4.45054945054945e-06, "logits/chosen": -4.3372907638549805, "logits/rejected": -4.462950229644775, "logps/chosen": -397.9360656738281, "logps/rejected": -463.2792663574219, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.9072632193565369, "rewards/margins": 6.424984931945801, "rewards/rejected": -7.332248210906982, "step": 84 }, { "epoch": 0.28, "learning_rate": 4.505494505494506e-06, "logits/chosen": -4.51932954788208, "logits/rejected": -3.1768529415130615, "logps/chosen": -661.5889282226562, "logps/rejected": -380.08734130859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.3720276355743408, "rewards/margins": 14.595202445983887, "rewards/rejected": -13.223175048828125, "step": 85 }, { "epoch": 0.28, "learning_rate": 4.560439560439561e-06, "logits/chosen": -4.376767158508301, "logits/rejected": -3.4610347747802734, "logps/chosen": -433.4891357421875, "logps/rejected": -476.48748779296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.76708984375, "rewards/margins": 15.365961074829102, "rewards/rejected": -13.598871231079102, "step": 86 }, { "epoch": 0.29, "learning_rate": 4.615384615384616e-06, "logits/chosen": -4.398746490478516, "logits/rejected": -3.6896204948425293, "logps/chosen": -230.8444061279297, "logps/rejected": -394.7328796386719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.5491745471954346, "rewards/margins": 15.824689865112305, "rewards/rejected": -13.27551555633545, "step": 87 }, { "epoch": 0.29, "learning_rate": 4.6703296703296706e-06, "logits/chosen": -4.4249114990234375, "logits/rejected": -3.499051570892334, "logps/chosen": -363.8670349121094, "logps/rejected": -453.15826416015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.3732330799102783, "rewards/margins": 15.843539237976074, "rewards/rejected": -13.470306396484375, "step": 88 }, { "epoch": 0.29, "learning_rate": 4.725274725274726e-06, "logits/chosen": -4.579809665679932, "logits/rejected": -3.963841199874878, "logps/chosen": -376.39263916015625, "logps/rejected": -475.49981689453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.1896270513534546, "rewards/margins": 17.53704261779785, "rewards/rejected": -16.347415924072266, "step": 89 }, { "epoch": 0.3, "learning_rate": 4.780219780219781e-06, "logits/chosen": -4.374417304992676, "logits/rejected": -4.248178005218506, "logps/chosen": -194.196533203125, "logps/rejected": -417.26776123046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.190634250640869, "rewards/margins": 15.594205856323242, "rewards/rejected": -13.403571128845215, "step": 90 }, { "epoch": 0.3, "learning_rate": 4.8351648351648355e-06, "logits/chosen": -4.6437482833862305, "logits/rejected": -3.73307728767395, "logps/chosen": -413.7351379394531, "logps/rejected": -395.78302001953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 3.820913791656494, "rewards/margins": 16.67730712890625, "rewards/rejected": -12.856393814086914, "step": 91 }, { "epoch": 0.3, "learning_rate": 4.890109890109891e-06, "logits/chosen": -4.21191930770874, "logits/rejected": -3.5639426708221436, "logps/chosen": -447.6401062011719, "logps/rejected": -457.892578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.4168548583984375, "rewards/margins": 14.87121868133545, "rewards/rejected": -13.454363822937012, "step": 92 }, { "epoch": 0.31, "learning_rate": 4.945054945054946e-06, "logits/chosen": -4.551489353179932, "logits/rejected": -4.471848964691162, "logps/chosen": -240.43063354492188, "logps/rejected": -433.2778625488281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.8902206420898438, "rewards/margins": 14.926170349121094, "rewards/rejected": -14.03594970703125, "step": 93 }, { "epoch": 0.31, "learning_rate": 5e-06, "logits/chosen": -4.397589206695557, "logits/rejected": -4.0388078689575195, "logps/chosen": -348.8073425292969, "logps/rejected": -452.66693115234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 3.624746799468994, "rewards/margins": 15.00168228149414, "rewards/rejected": -11.376935005187988, "step": 94 }, { "epoch": 0.31, "learning_rate": 4.999981426489179e-06, "logits/chosen": -4.5778608322143555, "logits/rejected": -3.8527991771698, "logps/chosen": -359.4041748046875, "logps/rejected": -409.2432861328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 3.038177490234375, "rewards/margins": 16.7247314453125, "rewards/rejected": -13.686553955078125, "step": 95 }, { "epoch": 0.32, "learning_rate": 4.999925706232695e-06, "logits/chosen": -4.530111312866211, "logits/rejected": -4.155897617340088, "logps/chosen": -234.99557495117188, "logps/rejected": -426.90765380859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 3.2188384532928467, "rewards/margins": 14.525558471679688, "rewards/rejected": -11.306719779968262, "step": 96 }, { "epoch": 0.32, "learning_rate": 4.9998328400584864e-06, "logits/chosen": -4.477783679962158, "logits/rejected": -4.133589267730713, "logps/chosen": -248.23715209960938, "logps/rejected": -445.22174072265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 3.41404128074646, "rewards/margins": 16.36020851135254, "rewards/rejected": -12.9461669921875, "step": 97 }, { "epoch": 0.32, "learning_rate": 4.999702829346432e-06, "logits/chosen": -4.469297885894775, "logits/rejected": -4.458068370819092, "logps/chosen": -243.61090087890625, "logps/rejected": -386.2374267578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.7676605582237244, "rewards/margins": 11.3004789352417, "rewards/rejected": -10.532818794250488, "step": 98 }, { "epoch": 0.33, "learning_rate": 4.999535676028338e-06, "logits/chosen": -4.551413536071777, "logits/rejected": -3.6586270332336426, "logps/chosen": -347.52716064453125, "logps/rejected": -452.22930908203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.1838836669921875, "rewards/margins": 17.132701873779297, "rewards/rejected": -14.948819160461426, "step": 99 }, { "epoch": 0.33, "learning_rate": 4.999331382587901e-06, "logits/chosen": -4.324283599853516, "logits/rejected": -3.1241581439971924, "logps/chosen": -483.8208923339844, "logps/rejected": -419.9326171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.6294128894805908, "rewards/margins": 13.845037460327148, "rewards/rejected": -12.215624809265137, "step": 100 }, { "epoch": 0.33, "learning_rate": 4.999089952060681e-06, "logits/chosen": -4.464879035949707, "logits/rejected": -3.448148488998413, "logps/chosen": -426.8560485839844, "logps/rejected": -436.4939270019531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.1339569091796875, "rewards/margins": 15.621515274047852, "rewards/rejected": -13.487558364868164, "step": 101 }, { "epoch": 0.34, "learning_rate": 4.998811388034046e-06, "logits/chosen": -4.5417256355285645, "logits/rejected": -4.166463851928711, "logps/chosen": -389.265625, "logps/rejected": -494.1796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.934008836746216, "rewards/margins": 20.474946975708008, "rewards/rejected": -17.540937423706055, "step": 102 }, { "epoch": 0.34, "learning_rate": 4.998495694647127e-06, "logits/chosen": -4.398112773895264, "logits/rejected": -4.488321781158447, "logps/chosen": -314.2387390136719, "logps/rejected": -540.993408203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.8422698974609375, "rewards/margins": 17.285541534423828, "rewards/rejected": -15.443270683288574, "step": 103 }, { "epoch": 0.34, "learning_rate": 4.998142876590749e-06, "logits/chosen": -4.43848180770874, "logits/rejected": -3.8401870727539062, "logps/chosen": -357.3565673828125, "logps/rejected": -495.7008972167969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.558154284954071, "rewards/margins": 18.115083694458008, "rewards/rejected": -17.556928634643555, "step": 104 }, { "epoch": 0.35, "learning_rate": 4.997752939107372e-06, "logits/chosen": -4.379143714904785, "logits/rejected": -2.8701581954956055, "logps/chosen": -384.65484619140625, "logps/rejected": -369.762939453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.203588843345642, "rewards/margins": 10.755364418029785, "rewards/rejected": -9.551775932312012, "step": 105 }, { "epoch": 0.35, "learning_rate": 4.997325887990999e-06, "logits/chosen": -4.355114936828613, "logits/rejected": -3.516786575317383, "logps/chosen": -395.44866943359375, "logps/rejected": -483.1910095214844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.061553955078125, "rewards/margins": 16.13475799560547, "rewards/rejected": -14.073203086853027, "step": 106 }, { "epoch": 0.35, "learning_rate": 4.996861729587103e-06, "logits/chosen": -4.370433330535889, "logits/rejected": -3.369537353515625, "logps/chosen": -388.32037353515625, "logps/rejected": -497.56427001953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 3.486157178878784, "rewards/margins": 18.97348403930664, "rewards/rejected": -15.487326622009277, "step": 107 }, { "epoch": 0.36, "learning_rate": 4.996360470792524e-06, "logits/chosen": -4.528186321258545, "logits/rejected": -3.8962700366973877, "logps/chosen": -284.3951110839844, "logps/rejected": -496.885009765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 3.5079925060272217, "rewards/margins": 17.01309585571289, "rewards/rejected": -13.50510311126709, "step": 108 }, { "epoch": 0.36, "learning_rate": 4.9958221190553705e-06, "logits/chosen": -4.446348190307617, "logits/rejected": -4.065671443939209, "logps/chosen": -374.7364501953125, "logps/rejected": -565.4706420898438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 3.9700074195861816, "rewards/margins": 15.873197555541992, "rewards/rejected": -11.903189659118652, "step": 109 }, { "epoch": 0.36, "learning_rate": 4.9952466823749076e-06, "logits/chosen": -4.366666316986084, "logits/rejected": -3.5727248191833496, "logps/chosen": -303.04241943359375, "logps/rejected": -456.4177551269531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.866119384765625, "rewards/margins": 15.641772270202637, "rewards/rejected": -13.775652885437012, "step": 110 }, { "epoch": 0.37, "learning_rate": 4.994634169301439e-06, "logits/chosen": -4.417354583740234, "logits/rejected": -4.194676399230957, "logps/chosen": -246.4547576904297, "logps/rejected": -482.65960693359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.9938981533050537, "rewards/margins": 16.903947830200195, "rewards/rejected": -13.910049438476562, "step": 111 }, { "epoch": 0.37, "learning_rate": 4.99398458893618e-06, "logits/chosen": -4.699535846710205, "logits/rejected": -4.523982524871826, "logps/chosen": -260.97991943359375, "logps/rejected": -504.26416015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6771606802940369, "rewards/margins": 14.61356258392334, "rewards/rejected": -15.290722846984863, "step": 112 }, { "epoch": 0.37, "learning_rate": 4.993297950931121e-06, "logits/chosen": -4.534379005432129, "logits/rejected": -3.2884504795074463, "logps/chosen": -392.5225830078125, "logps/rejected": -483.8066101074219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.4821900129318237, "rewards/margins": 17.772390365600586, "rewards/rejected": -16.29020118713379, "step": 113 }, { "epoch": 0.38, "learning_rate": 4.992574265488883e-06, "logits/chosen": -4.4783616065979, "logits/rejected": -3.9488227367401123, "logps/chosen": -268.69671630859375, "logps/rejected": -445.88934326171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 3.969137668609619, "rewards/margins": 18.754684448242188, "rewards/rejected": -14.785547256469727, "step": 114 }, { "epoch": 0.38, "learning_rate": 4.991813543362572e-06, "logits/chosen": -4.584895610809326, "logits/rejected": -3.91094970703125, "logps/chosen": -306.7088623046875, "logps/rejected": -446.6007995605469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.993066430091858, "rewards/margins": 13.759903907775879, "rewards/rejected": -11.766837120056152, "step": 115 }, { "epoch": 0.38, "learning_rate": 4.991015795855611e-06, "logits/chosen": -4.340481281280518, "logits/rejected": -3.854095935821533, "logps/chosen": -297.0914001464844, "logps/rejected": -418.73492431640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.8690094351768494, "rewards/margins": 13.999166488647461, "rewards/rejected": -13.130157470703125, "step": 116 }, { "epoch": 0.39, "learning_rate": 4.990181034821578e-06, "logits/chosen": -4.376932144165039, "logits/rejected": -3.738924264907837, "logps/chosen": -426.25537109375, "logps/rejected": -413.3848876953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.9265228509902954, "rewards/margins": 14.493753433227539, "rewards/rejected": -12.567230224609375, "step": 117 }, { "epoch": 0.39, "learning_rate": 4.989309272664026e-06, "logits/chosen": -4.558651924133301, "logits/rejected": -3.5922670364379883, "logps/chosen": -469.27423095703125, "logps/rejected": -514.3047485351562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.9566650390625, "rewards/margins": 17.330196380615234, "rewards/rejected": -14.37353229522705, "step": 118 }, { "epoch": 0.39, "learning_rate": 4.988400522336304e-06, "logits/chosen": -4.404550075531006, "logits/rejected": -3.672477960586548, "logps/chosen": -275.354248046875, "logps/rejected": -364.86968994140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.3228882551193237, "rewards/margins": 13.743850708007812, "rewards/rejected": -12.4209623336792, "step": 119 }, { "epoch": 0.4, "learning_rate": 4.987454797341358e-06, "logits/chosen": -4.511158466339111, "logits/rejected": -3.3894896507263184, "logps/chosen": -320.03155517578125, "logps/rejected": -423.61590576171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.1824281215667725, "rewards/margins": 14.258829116821289, "rewards/rejected": -12.076400756835938, "step": 120 }, { "epoch": 0.4, "learning_rate": 4.986472111731536e-06, "logits/chosen": -4.6026153564453125, "logits/rejected": -4.254034996032715, "logps/chosen": -234.12818908691406, "logps/rejected": -384.76104736328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.7228440046310425, "rewards/margins": 12.307738304138184, "rewards/rejected": -10.584894180297852, "step": 121 }, { "epoch": 0.4, "learning_rate": 4.985452480108376e-06, "logits/chosen": -4.389405727386475, "logits/rejected": -4.311841011047363, "logps/chosen": -345.70697021484375, "logps/rejected": -424.91094970703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.3869384825229645, "rewards/margins": 11.403149604797363, "rewards/rejected": -11.01621150970459, "step": 122 }, { "epoch": 0.41, "learning_rate": 4.984395917622387e-06, "logits/chosen": -4.487508296966553, "logits/rejected": -3.7850193977355957, "logps/chosen": -354.9103698730469, "logps/rejected": -464.49505615234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.3494720458984375, "rewards/margins": 15.737408638000488, "rewards/rejected": -15.38793659210205, "step": 123 }, { "epoch": 0.41, "learning_rate": 4.9833024399728295e-06, "logits/chosen": -4.406660079956055, "logits/rejected": -3.810431718826294, "logps/chosen": -335.26019287109375, "logps/rejected": -485.70513916015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.31449586153030396, "rewards/margins": 15.32269287109375, "rewards/rejected": -15.008196830749512, "step": 124 }, { "epoch": 0.41, "learning_rate": 4.982172063407479e-06, "logits/chosen": -4.340198993682861, "logits/rejected": -3.715421199798584, "logps/chosen": -288.08349609375, "logps/rejected": -427.89544677734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.296283006668091, "rewards/margins": 17.811279296875, "rewards/rejected": -15.514996528625488, "step": 125 }, { "epoch": 0.42, "learning_rate": 4.981004804722384e-06, "logits/chosen": -4.355514049530029, "logits/rejected": -4.320943355560303, "logps/chosen": -344.5791320800781, "logps/rejected": -434.83441162109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.7778748273849487, "rewards/margins": 15.279544830322266, "rewards/rejected": -13.501669883728027, "step": 126 }, { "epoch": 0.42, "learning_rate": 4.979800681261619e-06, "logits/chosen": -4.434340476989746, "logits/rejected": -3.5260894298553467, "logps/chosen": -304.6591491699219, "logps/rejected": -368.31622314453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.424444556236267, "rewards/margins": 12.992469787597656, "rewards/rejected": -11.568025588989258, "step": 127 }, { "epoch": 0.42, "learning_rate": 4.978559710917024e-06, "logits/chosen": -4.621590614318848, "logits/rejected": -4.356657028198242, "logps/chosen": -253.44842529296875, "logps/rejected": -314.619140625, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": 0.195851132273674, "rewards/margins": 5.029829502105713, "rewards/rejected": -4.833978176116943, "step": 128 }, { "epoch": 0.43, "learning_rate": 4.9772819121279395e-06, "logits/chosen": -4.356565475463867, "logits/rejected": -4.380559921264648, "logps/chosen": -248.25204467773438, "logps/rejected": -454.09014892578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.265228271484375, "rewards/margins": 15.274236679077148, "rewards/rejected": -16.539464950561523, "step": 129 }, { "epoch": 0.43, "learning_rate": 4.975967303880933e-06, "logits/chosen": -4.506335258483887, "logits/rejected": -3.854314088821411, "logps/chosen": -328.2158508300781, "logps/rejected": -472.5008239746094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1369415521621704, "rewards/margins": 17.064838409423828, "rewards/rejected": -18.201780319213867, "step": 130 }, { "epoch": 0.43, "learning_rate": 4.974615905709518e-06, "logits/chosen": -4.472858905792236, "logits/rejected": -3.3297882080078125, "logps/chosen": -406.04315185546875, "logps/rejected": -437.1851501464844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.828503429889679, "rewards/margins": 17.225427627563477, "rewards/rejected": -16.39692497253418, "step": 131 }, { "epoch": 0.44, "learning_rate": 4.973227737693858e-06, "logits/chosen": -4.501963138580322, "logits/rejected": -4.339019298553467, "logps/chosen": -290.47528076171875, "logps/rejected": -569.775146484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.762200951576233, "rewards/margins": 17.014074325561523, "rewards/rejected": -18.776275634765625, "step": 132 }, { "epoch": 0.44, "learning_rate": 4.971802820460481e-06, "logits/chosen": -4.366243839263916, "logits/rejected": -3.672136068344116, "logps/chosen": -432.7600402832031, "logps/rejected": -490.27880859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.8038910031318665, "rewards/margins": 19.19203758239746, "rewards/rejected": -18.388147354125977, "step": 133 }, { "epoch": 0.44, "learning_rate": 4.970341175181957e-06, "logits/chosen": -4.175065040588379, "logits/rejected": -3.344090461730957, "logps/chosen": -656.5657958984375, "logps/rejected": -555.3231201171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.54205322265625, "rewards/margins": 20.33514404296875, "rewards/rejected": -20.877197265625, "step": 134 }, { "epoch": 0.45, "learning_rate": 4.968842823576592e-06, "logits/chosen": -4.238167762756348, "logits/rejected": -3.387505054473877, "logps/chosen": -430.23291015625, "logps/rejected": -489.0005798339844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.47160646319389343, "rewards/margins": 17.979446411132812, "rewards/rejected": -18.451053619384766, "step": 135 }, { "epoch": 0.45, "learning_rate": 4.967307787908108e-06, "logits/chosen": -4.278386116027832, "logits/rejected": -3.415719509124756, "logps/chosen": -294.67205810546875, "logps/rejected": -477.9254150390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.18892823159694672, "rewards/margins": 17.483989715576172, "rewards/rejected": -17.67291831970215, "step": 136 }, { "epoch": 0.45, "learning_rate": 4.965736090985305e-06, "logits/chosen": -4.284654140472412, "logits/rejected": -3.7332231998443604, "logps/chosen": -395.2005615234375, "logps/rejected": -559.507568359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.7245513796806335, "rewards/margins": 21.60517692565918, "rewards/rejected": -20.880624771118164, "step": 137 }, { "epoch": 0.46, "learning_rate": 4.964127756161727e-06, "logits/chosen": -4.413480758666992, "logits/rejected": -3.4410207271575928, "logps/chosen": -453.9344177246094, "logps/rejected": -478.32720947265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.7942901849746704, "rewards/margins": 19.258975982666016, "rewards/rejected": -18.464685440063477, "step": 138 }, { "epoch": 0.46, "learning_rate": 4.9624828073353144e-06, "logits/chosen": -4.612053871154785, "logits/rejected": -3.6660573482513428, "logps/chosen": -454.968994140625, "logps/rejected": -491.28668212890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.370809942483902, "rewards/margins": 17.405982971191406, "rewards/rejected": -17.776792526245117, "step": 139 }, { "epoch": 0.46, "learning_rate": 4.960801268948047e-06, "logits/chosen": -4.630027770996094, "logits/rejected": -3.9804086685180664, "logps/chosen": -334.5860290527344, "logps/rejected": -470.1821594238281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.2251189947128296, "rewards/margins": 15.139511108398438, "rewards/rejected": -13.914392471313477, "step": 140 }, { "epoch": 0.47, "learning_rate": 4.959083165985581e-06, "logits/chosen": -4.38828706741333, "logits/rejected": -3.9800713062286377, "logps/chosen": -251.57284545898438, "logps/rejected": -336.22308349609375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.5879532098770142, "rewards/margins": 8.483784675598145, "rewards/rejected": -9.071738243103027, "step": 141 }, { "epoch": 0.47, "learning_rate": 4.957328523976879e-06, "logits/chosen": -4.331254482269287, "logits/rejected": -3.744624376296997, "logps/chosen": -555.8796997070312, "logps/rejected": -509.46636962890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 3.13983154296875, "rewards/margins": 23.415903091430664, "rewards/rejected": -20.276071548461914, "step": 142 }, { "epoch": 0.47, "learning_rate": 4.9555373689938325e-06, "logits/chosen": -4.3973493576049805, "logits/rejected": -3.3343820571899414, "logps/chosen": -488.8701171875, "logps/rejected": -539.1553344726562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.8417510986328125, "rewards/margins": 21.32379722595215, "rewards/rejected": -20.482046127319336, "step": 143 }, { "epoch": 0.48, "learning_rate": 4.95370972765087e-06, "logits/chosen": -3.7870357036590576, "logits/rejected": -4.47447395324707, "logps/chosen": -113.8028793334961, "logps/rejected": -345.9452819824219, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.3848892152309418, "rewards/margins": 9.808587074279785, "rewards/rejected": -9.423697471618652, "step": 144 }, { "epoch": 0.48, "learning_rate": 4.951845627104565e-06, "logits/chosen": -4.488361835479736, "logits/rejected": -4.143915176391602, "logps/chosen": -271.135498046875, "logps/rejected": -533.8534545898438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.818115234375, "rewards/margins": 22.837081909179688, "rewards/rejected": -21.018966674804688, "step": 145 }, { "epoch": 0.48, "learning_rate": 4.9499450950532305e-06, "logits/chosen": -4.404188632965088, "logits/rejected": -3.7939138412475586, "logps/chosen": -502.817626953125, "logps/rejected": -596.62646484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.257251024246216, "rewards/margins": 10.3175048828125, "rewards/rejected": -13.574755668640137, "step": 146 }, { "epoch": 0.49, "learning_rate": 4.948008159736507e-06, "logits/chosen": -4.459652423858643, "logits/rejected": -3.455385208129883, "logps/chosen": -459.57611083984375, "logps/rejected": -548.26806640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.31995850801467896, "rewards/margins": 21.58222198486328, "rewards/rejected": -21.902179718017578, "step": 147 }, { "epoch": 0.49, "learning_rate": 4.9460348499349485e-06, "logits/chosen": -4.625619888305664, "logits/rejected": -3.69688081741333, "logps/chosen": -304.876953125, "logps/rejected": -458.91082763671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.4879150390625, "rewards/margins": 19.09657859802246, "rewards/rejected": -19.58449363708496, "step": 148 }, { "epoch": 0.49, "learning_rate": 4.944025194969586e-06, "logits/chosen": -4.478323459625244, "logits/rejected": -3.7782227993011475, "logps/chosen": -332.0870361328125, "logps/rejected": -599.0357666015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.6869843006134033, "rewards/margins": 25.880760192871094, "rewards/rejected": -23.193775177001953, "step": 149 }, { "epoch": 0.5, "learning_rate": 4.941979224701499e-06, "logits/chosen": -4.613561153411865, "logits/rejected": -4.613561153411865, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 150 }, { "epoch": 0.5, "learning_rate": 4.939896969531367e-06, "logits/chosen": -4.662393093109131, "logits/rejected": -4.662393093109131, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 151 }, { "epoch": 0.5, "learning_rate": 4.937778460399022e-06, "logits/chosen": -4.4899797439575195, "logits/rejected": -3.9571731090545654, "logps/chosen": -311.52081298828125, "logps/rejected": -528.713134765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 3.377178907394409, "rewards/margins": 24.917617797851562, "rewards/rejected": -21.54043960571289, "step": 152 }, { "epoch": 0.51, "learning_rate": 4.935623728782986e-06, "logits/chosen": -4.431125164031982, "logits/rejected": -3.994685173034668, "logps/chosen": -318.579833984375, "logps/rejected": -469.0091552734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.33842775225639343, "rewards/margins": 20.076812744140625, "rewards/rejected": -20.415241241455078, "step": 153 }, { "epoch": 0.51, "learning_rate": 4.933432806700004e-06, "logits/chosen": -4.436576843261719, "logits/rejected": -3.4724605083465576, "logps/chosen": -499.44195556640625, "logps/rejected": -503.2141418457031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.4404419660568237, "rewards/margins": 17.919048309326172, "rewards/rejected": -16.478607177734375, "step": 154 }, { "epoch": 0.51, "learning_rate": 4.931205726704566e-06, "logits/chosen": -4.575839519500732, "logits/rejected": -3.600397825241089, "logps/chosen": -335.92559814453125, "logps/rejected": -512.950439453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.44965821504592896, "rewards/margins": 20.810823440551758, "rewards/rejected": -20.36116600036621, "step": 155 }, { "epoch": 0.52, "learning_rate": 4.928942521888431e-06, "logits/chosen": -4.4273834228515625, "logits/rejected": -3.1403005123138428, "logps/chosen": -641.9811401367188, "logps/rejected": -565.1820678710938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.003668213030323386, "rewards/margins": 20.497615814208984, "rewards/rejected": -20.493947982788086, "step": 156 }, { "epoch": 0.52, "learning_rate": 4.926643225880123e-06, "logits/chosen": -4.4851250648498535, "logits/rejected": -3.762442111968994, "logps/chosen": -542.037353515625, "logps/rejected": -583.6822509765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.6874328851699829, "rewards/margins": 22.295642852783203, "rewards/rejected": -21.60820960998535, "step": 157 }, { "epoch": 0.52, "learning_rate": 4.924307872844444e-06, "logits/chosen": -4.5215559005737305, "logits/rejected": -3.9681222438812256, "logps/chosen": -452.0367736816406, "logps/rejected": -570.5191650390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.5621551871299744, "rewards/margins": 24.197097778320312, "rewards/rejected": -23.63494300842285, "step": 158 }, { "epoch": 0.53, "learning_rate": 4.921936497481956e-06, "logits/chosen": -4.452681541442871, "logits/rejected": -4.026414394378662, "logps/chosen": -238.1045379638672, "logps/rejected": -457.61431884765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.171229600906372, "rewards/margins": 20.798799514770508, "rewards/rejected": -18.6275691986084, "step": 159 }, { "epoch": 0.53, "learning_rate": 4.919529135028473e-06, "logits/chosen": -4.6069817543029785, "logits/rejected": -4.021325588226318, "logps/chosen": -443.1712951660156, "logps/rejected": -596.0953979492188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.43758851289749146, "rewards/margins": 20.0040283203125, "rewards/rejected": -19.56644058227539, "step": 160 }, { "epoch": 0.53, "learning_rate": 4.917085821254532e-06, "logits/chosen": -4.430685520172119, "logits/rejected": -3.408867835998535, "logps/chosen": -333.0233459472656, "logps/rejected": -514.176025390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.3141602277755737, "rewards/margins": 21.7270450592041, "rewards/rejected": -20.412885665893555, "step": 161 }, { "epoch": 0.54, "learning_rate": 4.914606592464865e-06, "logits/chosen": -4.614257335662842, "logits/rejected": -4.173254013061523, "logps/chosen": -392.7569274902344, "logps/rejected": -600.0775146484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.3662140369415283, "rewards/margins": 26.564104080200195, "rewards/rejected": -25.19788932800293, "step": 162 }, { "epoch": 0.54, "learning_rate": 4.91209148549786e-06, "logits/chosen": -4.470552444458008, "logits/rejected": -3.6645894050598145, "logps/chosen": -278.90374755859375, "logps/rejected": -585.414306640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 3.7279815673828125, "rewards/margins": 22.604557037353516, "rewards/rejected": -18.876575469970703, "step": 163 }, { "epoch": 0.54, "learning_rate": 4.909540537725007e-06, "logits/chosen": -4.464867115020752, "logits/rejected": -3.679206609725952, "logps/chosen": -399.67230224609375, "logps/rejected": -459.1192932128906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 3.3072266578674316, "rewards/margins": 20.884450912475586, "rewards/rejected": -17.577224731445312, "step": 164 }, { "epoch": 0.55, "learning_rate": 4.906953787050354e-06, "logits/chosen": -4.506504535675049, "logits/rejected": -4.483677864074707, "logps/chosen": -212.6044921875, "logps/rejected": -577.6050415039062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.012289524078369, "rewards/margins": 21.269466400146484, "rewards/rejected": -19.257177352905273, "step": 165 }, { "epoch": 0.55, "learning_rate": 4.904331271909932e-06, "logits/chosen": -4.631925106048584, "logits/rejected": -3.576526403427124, "logps/chosen": -348.4132995605469, "logps/rejected": -490.9519348144531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.7574676871299744, "rewards/margins": 21.938953399658203, "rewards/rejected": -21.181486129760742, "step": 166 }, { "epoch": 0.55, "learning_rate": 4.901673031271194e-06, "logits/chosen": -4.463425159454346, "logits/rejected": -3.718756914138794, "logps/chosen": -464.51141357421875, "logps/rejected": -558.5987548828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.619091808795929, "rewards/margins": 22.998170852661133, "rewards/rejected": -22.379079818725586, "step": 167 }, { "epoch": 0.56, "learning_rate": 4.898979104632427e-06, "logits/chosen": -4.307651996612549, "logits/rejected": -3.490633010864258, "logps/chosen": -389.7247314453125, "logps/rejected": -523.1450805664062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.3679230213165283, "rewards/margins": 20.981260299682617, "rewards/rejected": -19.61333656311035, "step": 168 }, { "epoch": 0.56, "learning_rate": 4.8962495320221714e-06, "logits/chosen": -4.304553031921387, "logits/rejected": -3.776304006576538, "logps/chosen": -323.0478515625, "logps/rejected": -520.6531982421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.1722412109375, "rewards/margins": 20.817508697509766, "rewards/rejected": -18.645267486572266, "step": 169 }, { "epoch": 0.56, "learning_rate": 4.8934843539986266e-06, "logits/chosen": -4.417074680328369, "logits/rejected": -3.540210008621216, "logps/chosen": -322.0855407714844, "logps/rejected": -536.3630981445312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.3448638916015625, "rewards/margins": 22.750934600830078, "rewards/rejected": -20.406070709228516, "step": 170 }, { "epoch": 0.57, "learning_rate": 4.890683611649041e-06, "logits/chosen": -4.454306125640869, "logits/rejected": -3.4941771030426025, "logps/chosen": -449.34722900390625, "logps/rejected": -584.3948974609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.965466320514679, "rewards/margins": 24.43212890625, "rewards/rejected": -23.466663360595703, "step": 171 }, { "epoch": 0.57, "learning_rate": 4.887847346589111e-06, "logits/chosen": -4.528407096862793, "logits/rejected": -4.528407096862793, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 172 }, { "epoch": 0.57, "learning_rate": 4.884975600962355e-06, "logits/chosen": -4.461303234100342, "logits/rejected": -3.4503564834594727, "logps/chosen": -350.125, "logps/rejected": -544.8951416015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.8698486685752869, "rewards/margins": 22.143985748291016, "rewards/rejected": -21.274137496948242, "step": 173 }, { "epoch": 0.58, "learning_rate": 4.8820684174394935e-06, "logits/chosen": -4.2601318359375, "logits/rejected": -3.9379310607910156, "logps/chosen": -276.892822265625, "logps/rejected": -565.9779052734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 4.50067138671875, "rewards/margins": 27.015552520751953, "rewards/rejected": -22.514881134033203, "step": 174 }, { "epoch": 0.58, "learning_rate": 4.879125839217808e-06, "logits/chosen": -4.442978382110596, "logits/rejected": -4.165872097015381, "logps/chosen": -276.66558837890625, "logps/rejected": -485.2083435058594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.557089328765869, "rewards/margins": 19.78055763244629, "rewards/rejected": -17.223468780517578, "step": 175 }, { "epoch": 0.58, "learning_rate": 4.8761479100205085e-06, "logits/chosen": -4.185311794281006, "logits/rejected": -4.506956100463867, "logps/chosen": -302.510009765625, "logps/rejected": -368.14508056640625, "loss": 0.0401, "rewards/accuracies": 1.0, "rewards/chosen": 0.349456787109375, "rewards/margins": 3.197528123855591, "rewards/rejected": -2.848071336746216, "step": 176 }, { "epoch": 0.59, "learning_rate": 4.873134674096072e-06, "logits/chosen": -4.432115077972412, "logits/rejected": -4.062368392944336, "logps/chosen": -437.7923583984375, "logps/rejected": -577.2886962890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.143060207366943, "rewards/margins": 21.335046768188477, "rewards/rejected": -26.478107452392578, "step": 177 }, { "epoch": 0.59, "learning_rate": 4.870086176217597e-06, "logits/chosen": -4.418601989746094, "logits/rejected": -4.349551677703857, "logps/chosen": -278.1993713378906, "logps/rejected": -625.385986328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.728192090988159, "rewards/margins": 23.60893440246582, "rewards/rejected": -27.337125778198242, "step": 178 }, { "epoch": 0.59, "learning_rate": 4.867002461682129e-06, "logits/chosen": -4.347306251525879, "logits/rejected": -4.1640944480896, "logps/chosen": -397.3504638671875, "logps/rejected": -576.0902099609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7298157215118408, "rewards/margins": 20.283533096313477, "rewards/rejected": -22.013349533081055, "step": 179 }, { "epoch": 0.6, "learning_rate": 4.863883576309991e-06, "logits/chosen": -4.444037437438965, "logits/rejected": -3.735938310623169, "logps/chosen": -439.25177001953125, "logps/rejected": -506.2371826171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.559460401535034, "rewards/margins": 16.74394416809082, "rewards/rejected": -20.303403854370117, "step": 180 }, { "epoch": 0.6, "learning_rate": 4.860729566444106e-06, "logits/chosen": -4.453701972961426, "logits/rejected": -3.483694076538086, "logps/chosen": -435.145751953125, "logps/rejected": -411.5998840332031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.5900208950042725, "rewards/margins": 14.451112747192383, "rewards/rejected": -18.041133880615234, "step": 181 }, { "epoch": 0.6, "learning_rate": 4.857540478949302e-06, "logits/chosen": -4.345010757446289, "logits/rejected": -3.3772659301757812, "logps/chosen": -424.6817626953125, "logps/rejected": -536.5455322265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.265347480773926, "rewards/margins": 19.152793884277344, "rewards/rejected": -23.418142318725586, "step": 182 }, { "epoch": 0.61, "learning_rate": 4.854316361211619e-06, "logits/chosen": -4.456699848175049, "logits/rejected": -4.032180309295654, "logps/chosen": -479.0955505371094, "logps/rejected": -602.84130859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.73911452293396, "rewards/margins": 17.996774673461914, "rewards/rejected": -21.735889434814453, "step": 183 }, { "epoch": 0.61, "learning_rate": 4.851057261137608e-06, "logits/chosen": -4.340909481048584, "logits/rejected": -3.292670726776123, "logps/chosen": -462.56268310546875, "logps/rejected": -523.4163208007812, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.709277391433716, "rewards/margins": 19.967500686645508, "rewards/rejected": -22.67677879333496, "step": 184 }, { "epoch": 0.61, "learning_rate": 4.847763227153612e-06, "logits/chosen": -4.428055286407471, "logits/rejected": -3.5385830402374268, "logps/chosen": -422.267822265625, "logps/rejected": -485.8017578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.4887635707855225, "rewards/margins": 19.343963623046875, "rewards/rejected": -21.832727432250977, "step": 185 }, { "epoch": 0.62, "learning_rate": 4.844434308205052e-06, "logits/chosen": -4.273122310638428, "logits/rejected": -3.7151196002960205, "logps/chosen": -546.6180419921875, "logps/rejected": -715.68896484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.0272216796875, "rewards/margins": 25.675113677978516, "rewards/rejected": -30.702335357666016, "step": 186 }, { "epoch": 0.62, "learning_rate": 4.841070553755697e-06, "logits/chosen": -4.607006072998047, "logits/rejected": -4.249687671661377, "logps/chosen": -261.2702941894531, "logps/rejected": -548.1873779296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2804367542266846, "rewards/margins": 21.665143966674805, "rewards/rejected": -22.945581436157227, "step": 187 }, { "epoch": 0.62, "learning_rate": 4.837672013786931e-06, "logits/chosen": -4.519053936004639, "logits/rejected": -4.209323883056641, "logps/chosen": -303.30255126953125, "logps/rejected": -551.4930419921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.127401828765869, "rewards/margins": 21.955106735229492, "rewards/rejected": -25.082508087158203, "step": 188 }, { "epoch": 0.63, "learning_rate": 4.8342387387970105e-06, "logits/chosen": -4.35404634475708, "logits/rejected": -3.9764857292175293, "logps/chosen": -331.4461669921875, "logps/rejected": -582.4991455078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.6374847888946533, "rewards/margins": 22.73183250427246, "rewards/rejected": -25.36931800842285, "step": 189 }, { "epoch": 0.63, "learning_rate": 4.830770779800309e-06, "logits/chosen": -4.2908101081848145, "logits/rejected": -3.6881422996520996, "logps/chosen": -402.2869567871094, "logps/rejected": -572.225830078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.96708083152771, "rewards/margins": 22.76718521118164, "rewards/rejected": -25.73426628112793, "step": 190 }, { "epoch": 0.63, "learning_rate": 4.827268188326567e-06, "logits/chosen": -4.4686408042907715, "logits/rejected": -3.9434425830841064, "logps/chosen": -478.5679626464844, "logps/rejected": -673.1861572265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.687936305999756, "rewards/margins": 21.72261619567871, "rewards/rejected": -26.410552978515625, "step": 191 }, { "epoch": 0.64, "learning_rate": 4.823731016420122e-06, "logits/chosen": -4.402027606964111, "logits/rejected": -3.672158718109131, "logps/chosen": -521.04248046875, "logps/rejected": -567.2761840820312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.248925685882568, "rewards/margins": 16.707509994506836, "rewards/rejected": -21.956436157226562, "step": 192 }, { "epoch": 0.64, "learning_rate": 4.820159316639133e-06, "logits/chosen": -4.356218338012695, "logits/rejected": -4.386761665344238, "logps/chosen": -312.3507080078125, "logps/rejected": -663.8659057617188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.5384186506271362, "rewards/margins": 29.62138557434082, "rewards/rejected": -28.08296775817871, "step": 193 }, { "epoch": 0.64, "learning_rate": 4.816553142054806e-06, "logits/chosen": -4.566652774810791, "logits/rejected": -4.032877445220947, "logps/chosen": -390.8731689453125, "logps/rejected": -503.9954833984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.9249114990234375, "rewards/margins": 17.718976974487305, "rewards/rejected": -22.643888473510742, "step": 194 }, { "epoch": 0.65, "learning_rate": 4.812912546250595e-06, "logits/chosen": -4.595418453216553, "logits/rejected": -3.4654226303100586, "logps/chosen": -437.74615478515625, "logps/rejected": -564.276611328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.270282030105591, "rewards/margins": 22.324874877929688, "rewards/rejected": -24.595157623291016, "step": 195 }, { "epoch": 0.65, "learning_rate": 4.809237583321421e-06, "logits/chosen": -4.55130672454834, "logits/rejected": -3.809107542037964, "logps/chosen": -288.273681640625, "logps/rejected": -529.8118286132812, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.319549560546875, "rewards/margins": 21.799253463745117, "rewards/rejected": -25.118803024291992, "step": 196 }, { "epoch": 0.65, "learning_rate": 4.8055283078728525e-06, "logits/chosen": -4.273970127105713, "logits/rejected": -3.532085657119751, "logps/chosen": -394.9105224609375, "logps/rejected": -580.9087524414062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0409668684005737, "rewards/margins": 24.131925582885742, "rewards/rejected": -25.17289161682129, "step": 197 }, { "epoch": 0.66, "learning_rate": 4.801784775020303e-06, "logits/chosen": -4.558983325958252, "logits/rejected": -3.609147071838379, "logps/chosen": -425.38165283203125, "logps/rejected": -579.5765991210938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.166571140289307, "rewards/margins": 21.48810577392578, "rewards/rejected": -25.65467643737793, "step": 198 }, { "epoch": 0.66, "learning_rate": 4.798007040388212e-06, "logits/chosen": -4.523662567138672, "logits/rejected": -3.308692693710327, "logps/chosen": -537.5259399414062, "logps/rejected": -611.4498901367188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3234070539474487, "rewards/margins": 22.504859924316406, "rewards/rejected": -23.828266143798828, "step": 199 }, { "epoch": 0.66, "learning_rate": 4.794195160109215e-06, "logits/chosen": -4.554165840148926, "logits/rejected": -4.412696361541748, "logps/chosen": -283.873046875, "logps/rejected": -653.0494384765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.464404344558716, "rewards/margins": 24.511882781982422, "rewards/rejected": -26.976287841796875, "step": 200 }, { "epoch": 0.67, "learning_rate": 4.790349190823313e-06, "logits/chosen": -4.461995601654053, "logits/rejected": -4.220631122589111, "logps/chosen": -378.62567138671875, "logps/rejected": -555.7577514648438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.84375, "rewards/margins": 15.135431289672852, "rewards/rejected": -22.97918128967285, "step": 201 }, { "epoch": 0.67, "learning_rate": 4.786469189677026e-06, "logits/chosen": -4.626312732696533, "logits/rejected": -3.6043701171875, "logps/chosen": -404.515625, "logps/rejected": -554.8936767578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.731201171875, "rewards/margins": 19.18717384338379, "rewards/rejected": -22.91837501525879, "step": 202 }, { "epoch": 0.67, "learning_rate": 4.78255521432255e-06, "logits/chosen": -4.456379413604736, "logits/rejected": -4.084070205688477, "logps/chosen": -233.46734619140625, "logps/rejected": -546.2238159179688, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8788391351699829, "rewards/margins": 24.65411376953125, "rewards/rejected": -25.5329532623291, "step": 203 }, { "epoch": 0.68, "learning_rate": 4.778607322916896e-06, "logits/chosen": -4.25200891494751, "logits/rejected": -4.352073669433594, "logps/chosen": -287.51422119140625, "logps/rejected": -514.3617553710938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.367222785949707, "rewards/margins": 19.036060333251953, "rewards/rejected": -24.403284072875977, "step": 204 }, { "epoch": 0.68, "learning_rate": 4.7746255741210256e-06, "logits/chosen": -4.257689952850342, "logits/rejected": -3.9616525173187256, "logps/chosen": -436.54052734375, "logps/rejected": -558.0086669921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.211642742156982, "rewards/margins": 16.815370559692383, "rewards/rejected": -23.027013778686523, "step": 205 }, { "epoch": 0.68, "learning_rate": 4.770610027098983e-06, "logits/chosen": -4.314263820648193, "logits/rejected": -3.706144332885742, "logps/chosen": -385.21514892578125, "logps/rejected": -575.6632080078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.6782593131065369, "rewards/margins": 22.845712661743164, "rewards/rejected": -22.16745376586914, "step": 206 }, { "epoch": 0.69, "learning_rate": 4.766560741517014e-06, "logits/chosen": -4.261018753051758, "logits/rejected": -3.49680757522583, "logps/chosen": -443.4586181640625, "logps/rejected": -528.631103515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.9242799282073975, "rewards/margins": 18.376794815063477, "rewards/rejected": -22.301074981689453, "step": 207 }, { "epoch": 0.69, "learning_rate": 4.762477777542676e-06, "logits/chosen": -4.405641078948975, "logits/rejected": -3.945005178451538, "logps/chosen": -405.1590576171875, "logps/rejected": -605.96142578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.02716064453125, "rewards/margins": 23.706918716430664, "rewards/rejected": -27.734079360961914, "step": 208 }, { "epoch": 0.69, "learning_rate": 4.7583611958439514e-06, "logits/chosen": -4.536804676055908, "logits/rejected": -3.4177980422973633, "logps/chosen": -509.9901123046875, "logps/rejected": -620.6497802734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6092407703399658, "rewards/margins": 23.246379852294922, "rewards/rejected": -24.855621337890625, "step": 209 }, { "epoch": 0.7, "learning_rate": 4.75421105758834e-06, "logits/chosen": -4.660959243774414, "logits/rejected": -4.660958766937256, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 210 }, { "epoch": 0.7, "learning_rate": 4.750027424441949e-06, "logits/chosen": -4.213469982147217, "logits/rejected": -4.410857200622559, "logps/chosen": -280.2644348144531, "logps/rejected": -588.2990112304688, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.470999151468277, "rewards/margins": 24.910537719726562, "rewards/rejected": -24.439538955688477, "step": 211 }, { "epoch": 0.7, "learning_rate": 4.745810358568588e-06, "logits/chosen": -4.407349586486816, "logits/rejected": -3.4045121669769287, "logps/chosen": -651.0145263671875, "logps/rejected": -581.0377807617188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.35540771484375, "rewards/margins": 13.205488204956055, "rewards/rejected": -22.560895919799805, "step": 212 }, { "epoch": 0.71, "learning_rate": 4.741559922628828e-06, "logits/chosen": -4.383808612823486, "logits/rejected": -3.694657325744629, "logps/chosen": -627.8776245117188, "logps/rejected": -550.0055541992188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.867218017578125, "rewards/margins": 19.274717330932617, "rewards/rejected": -24.141935348510742, "step": 213 }, { "epoch": 0.71, "learning_rate": 4.7372761797790836e-06, "logits/chosen": -4.419149875640869, "logits/rejected": -3.9326839447021484, "logps/chosen": -456.3083801269531, "logps/rejected": -572.00732421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.1375579833984375, "rewards/margins": 19.8563232421875, "rewards/rejected": -23.993881225585938, "step": 214 }, { "epoch": 0.71, "learning_rate": 4.732959193670672e-06, "logits/chosen": -4.390028476715088, "logits/rejected": -3.866453170776367, "logps/chosen": -332.904052734375, "logps/rejected": -565.65673828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6805603504180908, "rewards/margins": 21.7492733001709, "rewards/rejected": -23.429834365844727, "step": 215 }, { "epoch": 0.72, "learning_rate": 4.728609028448862e-06, "logits/chosen": -4.234015941619873, "logits/rejected": -3.2613775730133057, "logps/chosen": -456.681640625, "logps/rejected": -508.46441650390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.307135105133057, "rewards/margins": 17.87537384033203, "rewards/rejected": -22.18250846862793, "step": 216 }, { "epoch": 0.72, "learning_rate": 4.7242257487519275e-06, "logits/chosen": -4.365071773529053, "logits/rejected": -3.898486852645874, "logps/chosen": -285.94317626953125, "logps/rejected": -444.3804931640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7177032828330994, "rewards/margins": 18.269119262695312, "rewards/rejected": -18.9868221282959, "step": 217 }, { "epoch": 0.72, "learning_rate": 4.7198094197101826e-06, "logits/chosen": -4.433129787445068, "logits/rejected": -3.400374412536621, "logps/chosen": -395.3111267089844, "logps/rejected": -566.3218383789062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.34246826171875, "rewards/margins": 23.32318687438965, "rewards/rejected": -25.6656551361084, "step": 218 }, { "epoch": 0.73, "learning_rate": 4.715360106945015e-06, "logits/chosen": -4.460170269012451, "logits/rejected": -4.486166477203369, "logps/chosen": -322.58245849609375, "logps/rejected": -571.2058715820312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.377227783203125, "rewards/margins": 21.1688232421875, "rewards/rejected": -25.546051025390625, "step": 219 }, { "epoch": 0.73, "learning_rate": 4.710877876567912e-06, "logits/chosen": -4.677746772766113, "logits/rejected": -3.628592014312744, "logps/chosen": -282.5626525878906, "logps/rejected": -498.66815185546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.01987624168396, "rewards/margins": 17.119985580444336, "rewards/rejected": -19.139862060546875, "step": 220 }, { "epoch": 0.73, "learning_rate": 4.706362795179476e-06, "logits/chosen": -4.649206638336182, "logits/rejected": -3.9550185203552246, "logps/chosen": -317.6220703125, "logps/rejected": -532.2894897460938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.211251974105835, "rewards/margins": 20.70464515686035, "rewards/rejected": -22.915897369384766, "step": 221 }, { "epoch": 0.74, "learning_rate": 4.70181492986844e-06, "logits/chosen": -4.574020862579346, "logits/rejected": -3.7530534267425537, "logps/chosen": -390.477783203125, "logps/rejected": -538.0514526367188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6195312738418579, "rewards/margins": 23.567447662353516, "rewards/rejected": -24.186979293823242, "step": 222 }, { "epoch": 0.74, "learning_rate": 4.6972343482106615e-06, "logits/chosen": -4.381142616271973, "logits/rejected": -3.9664149284362793, "logps/chosen": -368.2343444824219, "logps/rejected": -584.2080688476562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.74643874168396, "rewards/margins": 23.20481300354004, "rewards/rejected": -26.951251983642578, "step": 223 }, { "epoch": 0.74, "learning_rate": 4.6926211182681295e-06, "logits/chosen": -4.515586853027344, "logits/rejected": -4.021639823913574, "logps/chosen": -422.8552551269531, "logps/rejected": -605.24853515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.3351776599884033, "rewards/margins": 23.986923217773438, "rewards/rejected": -27.322101593017578, "step": 224 }, { "epoch": 0.75, "learning_rate": 4.687975308587944e-06, "logits/chosen": -4.370363235473633, "logits/rejected": -3.52022385597229, "logps/chosen": -421.40106201171875, "logps/rejected": -555.5052490234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.351022243499756, "rewards/margins": 20.446269989013672, "rewards/rejected": -25.797292709350586, "step": 225 }, { "epoch": 0.75, "learning_rate": 4.683296988201301e-06, "logits/chosen": -4.45542049407959, "logits/rejected": -3.843148946762085, "logps/chosen": -250.9272003173828, "logps/rejected": -461.41656494140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.5148300528526306, "rewards/margins": 22.641338348388672, "rewards/rejected": -22.126508712768555, "step": 226 }, { "epoch": 0.75, "learning_rate": 4.6785862266224695e-06, "logits/chosen": -4.4527587890625, "logits/rejected": -3.8313660621643066, "logps/chosen": -435.7333068847656, "logps/rejected": -420.346435546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.598269939422607, "rewards/margins": 12.53331184387207, "rewards/rejected": -18.131582260131836, "step": 227 }, { "epoch": 0.75, "learning_rate": 4.673843093847753e-06, "logits/chosen": -4.46143913269043, "logits/rejected": -4.0022873878479, "logps/chosen": -546.3322143554688, "logps/rejected": -678.267822265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.684521675109863, "rewards/margins": 22.149757385253906, "rewards/rejected": -28.834280014038086, "step": 228 }, { "epoch": 0.76, "learning_rate": 4.669067660354456e-06, "logits/chosen": -4.453671932220459, "logits/rejected": -3.783543586730957, "logps/chosen": -489.1321105957031, "logps/rejected": -653.801513671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.456167697906494, "rewards/margins": 27.995878219604492, "rewards/rejected": -30.452045440673828, "step": 229 }, { "epoch": 0.76, "learning_rate": 4.664259997099829e-06, "logits/chosen": -4.382017135620117, "logits/rejected": -4.235163688659668, "logps/chosen": -401.96905517578125, "logps/rejected": -611.9321899414062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1131255626678467, "rewards/margins": 23.950090408325195, "rewards/rejected": -26.063215255737305, "step": 230 }, { "epoch": 0.76, "learning_rate": 4.6594201755200205e-06, "logits/chosen": -4.401975154876709, "logits/rejected": -4.047399520874023, "logps/chosen": -347.8199768066406, "logps/rejected": -534.321533203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.148288249969482, "rewards/margins": 15.352182388305664, "rewards/rejected": -20.500471115112305, "step": 231 }, { "epoch": 0.77, "learning_rate": 4.654548267529015e-06, "logits/chosen": -4.742393970489502, "logits/rejected": -3.528773546218872, "logps/chosen": -461.3231201171875, "logps/rejected": -502.95330810546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.834014892578125, "rewards/margins": 19.609724044799805, "rewards/rejected": -22.44373893737793, "step": 232 }, { "epoch": 0.77, "learning_rate": 4.64964434551756e-06, "logits/chosen": -4.29525899887085, "logits/rejected": -3.517086982727051, "logps/chosen": -655.9037475585938, "logps/rejected": -467.7865295410156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.464410305023193, "rewards/margins": 16.863277435302734, "rewards/rejected": -21.327688217163086, "step": 233 }, { "epoch": 0.77, "learning_rate": 4.644708482352093e-06, "logits/chosen": -4.491517066955566, "logits/rejected": -3.6783647537231445, "logps/chosen": -402.5279846191406, "logps/rejected": -560.4140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.5269012451171875, "rewards/margins": 17.657962799072266, "rewards/rejected": -22.184864044189453, "step": 234 }, { "epoch": 0.78, "learning_rate": 4.639740751373663e-06, "logits/chosen": -4.2944111824035645, "logits/rejected": -3.600904703140259, "logps/chosen": -518.21533203125, "logps/rejected": -547.026611328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.848297119140625, "rewards/margins": 20.74945068359375, "rewards/rejected": -24.597747802734375, "step": 235 }, { "epoch": 0.78, "learning_rate": 4.634741226396832e-06, "logits/chosen": -4.682287693023682, "logits/rejected": -3.6956443786621094, "logps/chosen": -314.2425537109375, "logps/rejected": -630.8653564453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.6687896251678467, "rewards/margins": 24.917322158813477, "rewards/rejected": -28.586111068725586, "step": 236 }, { "epoch": 0.78, "learning_rate": 4.629709981708586e-06, "logits/chosen": -4.4025044441223145, "logits/rejected": -3.328565835952759, "logps/chosen": -596.0982666015625, "logps/rejected": -469.07025146484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.942303657531738, "rewards/margins": 13.943493843078613, "rewards/rejected": -21.88579750061035, "step": 237 }, { "epoch": 0.79, "learning_rate": 4.624647092067226e-06, "logits/chosen": -4.347430229187012, "logits/rejected": -4.287944316864014, "logps/chosen": -307.51995849609375, "logps/rejected": -544.3563232421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.6383118629455566, "rewards/margins": 22.841541290283203, "rewards/rejected": -25.4798526763916, "step": 238 }, { "epoch": 0.79, "learning_rate": 4.619552632701263e-06, "logits/chosen": -4.473455905914307, "logits/rejected": -4.314274787902832, "logps/chosen": -277.60906982421875, "logps/rejected": -504.164306640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.467631816864014, "rewards/margins": 18.195838928222656, "rewards/rejected": -23.663471221923828, "step": 239 }, { "epoch": 0.79, "learning_rate": 4.614426679308291e-06, "logits/chosen": -4.148372650146484, "logits/rejected": -3.744719982147217, "logps/chosen": -355.4638671875, "logps/rejected": -454.26422119140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.292315721511841, "rewards/margins": 16.010753631591797, "rewards/rejected": -18.303070068359375, "step": 240 }, { "epoch": 0.8, "learning_rate": 4.609269308053872e-06, "logits/chosen": -4.353799343109131, "logits/rejected": -3.8214452266693115, "logps/chosen": -393.32989501953125, "logps/rejected": -582.2720336914062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.2819581031799316, "rewards/margins": 22.352136611938477, "rewards/rejected": -24.63409423828125, "step": 241 }, { "epoch": 0.8, "learning_rate": 4.604080595570399e-06, "logits/chosen": -4.46610689163208, "logits/rejected": -3.906358242034912, "logps/chosen": -322.1328125, "logps/rejected": -599.1141357421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1895081996917725, "rewards/margins": 23.97051429748535, "rewards/rejected": -26.160022735595703, "step": 242 }, { "epoch": 0.8, "learning_rate": 4.598860618955957e-06, "logits/chosen": -4.385786056518555, "logits/rejected": -4.310035228729248, "logps/chosen": -400.26947021484375, "logps/rejected": -559.5322875976562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.774789571762085, "rewards/margins": 18.254159927368164, "rewards/rejected": -21.028949737548828, "step": 243 }, { "epoch": 0.81, "learning_rate": 4.5936094557731815e-06, "logits/chosen": -4.240229606628418, "logits/rejected": -4.443663597106934, "logps/chosen": -345.6215515136719, "logps/rejected": -692.10107421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.39714357256889343, "rewards/margins": 27.15097427368164, "rewards/rejected": -26.753829956054688, "step": 244 }, { "epoch": 0.81, "learning_rate": 4.588327184048099e-06, "logits/chosen": -4.332690238952637, "logits/rejected": -3.449024200439453, "logps/chosen": -496.02508544921875, "logps/rejected": -615.9970092773438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.7168214321136475, "rewards/margins": 22.373443603515625, "rewards/rejected": -26.09026527404785, "step": 245 }, { "epoch": 0.81, "learning_rate": 4.5830138822689755e-06, "logits/chosen": -4.444944381713867, "logits/rejected": -3.7115731239318848, "logps/chosen": -449.7723083496094, "logps/rejected": -626.166015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.5469634532928467, "rewards/margins": 23.168373107910156, "rewards/rejected": -26.715335845947266, "step": 246 }, { "epoch": 0.82, "learning_rate": 4.577669629385145e-06, "logits/chosen": -4.408154487609863, "logits/rejected": -3.651484489440918, "logps/chosen": -441.13018798828125, "logps/rejected": -643.3833618164062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.660876512527466, "rewards/margins": 22.676677703857422, "rewards/rejected": -25.337554931640625, "step": 247 }, { "epoch": 0.82, "learning_rate": 4.572294504805841e-06, "logits/chosen": -4.424633502960205, "logits/rejected": -3.707329034805298, "logps/chosen": -487.3986511230469, "logps/rejected": -554.1790771484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.541256904602051, "rewards/margins": 15.3828706741333, "rewards/rejected": -20.92412757873535, "step": 248 }, { "epoch": 0.82, "learning_rate": 4.566888588399007e-06, "logits/chosen": -4.403136730194092, "logits/rejected": -3.7492878437042236, "logps/chosen": -355.76708984375, "logps/rejected": -554.3052978515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.743695020675659, "rewards/margins": 20.683734893798828, "rewards/rejected": -24.42742919921875, "step": 249 }, { "epoch": 0.83, "learning_rate": 4.561451960490123e-06, "logits/chosen": -4.675843715667725, "logits/rejected": -4.675843715667725, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 250 }, { "epoch": 0.83, "learning_rate": 4.5559847018610034e-06, "logits/chosen": -4.456602096557617, "logits/rejected": -3.8642396926879883, "logps/chosen": -359.24835205078125, "logps/rejected": -608.4000244140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.042169213294983, "rewards/margins": 25.666967391967773, "rewards/rejected": -26.709136962890625, "step": 251 }, { "epoch": 0.83, "learning_rate": 4.550486893748596e-06, "logits/chosen": -4.495731830596924, "logits/rejected": -4.421840190887451, "logps/chosen": -300.7923889160156, "logps/rejected": -369.44671630859375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.9146941900253296, "rewards/margins": 9.700987815856934, "rewards/rejected": -11.615681648254395, "step": 252 }, { "epoch": 0.84, "learning_rate": 4.544958617843782e-06, "logits/chosen": -4.233188629150391, "logits/rejected": -3.7297332286834717, "logps/chosen": -473.4989013671875, "logps/rejected": -492.3461608886719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.732303142547607, "rewards/margins": 15.883966445922852, "rewards/rejected": -22.616270065307617, "step": 253 }, { "epoch": 0.84, "learning_rate": 4.539399956290152e-06, "logits/chosen": -4.557020664215088, "logits/rejected": -3.960388660430908, "logps/chosen": -353.52593994140625, "logps/rejected": -604.6260986328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.712493896484375, "rewards/margins": 22.881561279296875, "rewards/rejected": -27.59405517578125, "step": 254 }, { "epoch": 0.84, "learning_rate": 4.533810991682799e-06, "logits/chosen": -4.580037593841553, "logits/rejected": -4.580037593841553, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 255 }, { "epoch": 0.85, "learning_rate": 4.528191807067074e-06, "logits/chosen": -4.481337547302246, "logits/rejected": -4.369873046875, "logps/chosen": -302.75152587890625, "logps/rejected": -500.8403625488281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.4375977516174316, "rewards/margins": 18.154129028320312, "rewards/rejected": -21.591726303100586, "step": 256 }, { "epoch": 0.85, "learning_rate": 4.522542485937369e-06, "logits/chosen": -4.541010856628418, "logits/rejected": -4.120445728302002, "logps/chosen": -266.5628662109375, "logps/rejected": -520.7110595703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6100586652755737, "rewards/margins": 22.605920791625977, "rewards/rejected": -24.215978622436523, "step": 257 }, { "epoch": 0.85, "learning_rate": 4.516863112235864e-06, "logits/chosen": -4.432363033294678, "logits/rejected": -4.100122928619385, "logps/chosen": -329.5076599121094, "logps/rejected": -615.78515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.16323547065258026, "rewards/margins": 28.81184196472168, "rewards/rejected": -28.97507667541504, "step": 258 }, { "epoch": 0.86, "learning_rate": 4.511153770351288e-06, "logits/chosen": -4.500782012939453, "logits/rejected": -3.487731695175171, "logps/chosen": -464.27020263671875, "logps/rejected": -605.0491943359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.322766304016113, "rewards/margins": 23.01436996459961, "rewards/rejected": -28.33713722229004, "step": 259 }, { "epoch": 0.86, "learning_rate": 4.505414545117658e-06, "logits/chosen": -4.300138473510742, "logits/rejected": -3.477233648300171, "logps/chosen": -533.3462524414062, "logps/rejected": -552.9212646484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.31990385055542, "rewards/margins": 18.3931884765625, "rewards/rejected": -23.713092803955078, "step": 260 }, { "epoch": 0.86, "learning_rate": 4.499645521813024e-06, "logits/chosen": -4.700889587402344, "logits/rejected": -4.700889587402344, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 261 }, { "epoch": 0.87, "learning_rate": 4.4938467861582e-06, "logits/chosen": -4.556081771850586, "logits/rejected": -4.198794841766357, "logps/chosen": -371.759033203125, "logps/rejected": -660.7146606445312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.564685106277466, "rewards/margins": 23.74681282043457, "rewards/rejected": -26.311498641967773, "step": 262 }, { "epoch": 0.87, "learning_rate": 4.488018424315488e-06, "logits/chosen": -4.297285079956055, "logits/rejected": -4.350274562835693, "logps/chosen": -330.6549072265625, "logps/rejected": -572.1771240234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.0300233364105225, "rewards/margins": 23.603017807006836, "rewards/rejected": -26.633041381835938, "step": 263 }, { "epoch": 0.87, "learning_rate": 4.482160522887404e-06, "logits/chosen": -4.338238716125488, "logits/rejected": -3.436751127243042, "logps/chosen": -326.945556640625, "logps/rejected": -512.1168823242188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8089507818222046, "rewards/margins": 21.56464385986328, "rewards/rejected": -23.373594284057617, "step": 264 }, { "epoch": 0.88, "learning_rate": 4.476273168915382e-06, "logits/chosen": -5.034451007843018, "logits/rejected": -3.47391414642334, "logps/chosen": -1412.2579345703125, "logps/rejected": -403.28057861328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.26282960176467896, "rewards/margins": 13.486255645751953, "rewards/rejected": -13.22342586517334, "step": 265 }, { "epoch": 0.88, "learning_rate": 4.470356449878489e-06, "logits/chosen": -4.398158073425293, "logits/rejected": -3.418801784515381, "logps/chosen": -422.3399658203125, "logps/rejected": -592.7428588867188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.6789307594299316, "rewards/margins": 21.556623458862305, "rewards/rejected": -25.235553741455078, "step": 266 }, { "epoch": 0.88, "learning_rate": 4.464410453692122e-06, "logits/chosen": -4.643823146820068, "logits/rejected": -3.279494285583496, "logps/chosen": -560.9666748046875, "logps/rejected": -519.3795776367188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.4105286598205566, "rewards/margins": 20.03549575805664, "rewards/rejected": -22.44602394104004, "step": 267 }, { "epoch": 0.89, "learning_rate": 4.458435268706699e-06, "logits/chosen": -4.174355506896973, "logits/rejected": -3.4881181716918945, "logps/chosen": -425.60882568359375, "logps/rejected": -419.9944763183594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.6146514415740967, "rewards/margins": 16.718265533447266, "rewards/rejected": -19.332916259765625, "step": 268 }, { "epoch": 0.89, "learning_rate": 4.452430983706351e-06, "logits/chosen": -4.392972469329834, "logits/rejected": -3.702141523361206, "logps/chosen": -366.05242919921875, "logps/rejected": -583.062255859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.865545630455017, "rewards/margins": 23.291095733642578, "rewards/rejected": -25.156641006469727, "step": 269 }, { "epoch": 0.89, "learning_rate": 4.446397687907601e-06, "logits/chosen": -4.396094799041748, "logits/rejected": -4.039773464202881, "logps/chosen": -341.2452392578125, "logps/rejected": -622.6715087890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.08438720554113388, "rewards/margins": 28.248329162597656, "rewards/rejected": -28.33271598815918, "step": 270 }, { "epoch": 0.9, "learning_rate": 4.440335470958035e-06, "logits/chosen": -4.646708011627197, "logits/rejected": -3.6073200702667236, "logps/chosen": -276.8403625488281, "logps/rejected": -427.32745361328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.796478271484375, "rewards/margins": 17.417638778686523, "rewards/rejected": -18.2141170501709, "step": 271 }, { "epoch": 0.9, "learning_rate": 4.434244422934976e-06, "logits/chosen": -4.129358768463135, "logits/rejected": -4.367154598236084, "logps/chosen": -171.61856079101562, "logps/rejected": -549.8220825195312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7979736328125, "rewards/margins": 24.695449829101562, "rewards/rejected": -25.493423461914062, "step": 272 }, { "epoch": 0.9, "learning_rate": 4.428124634344141e-06, "logits/chosen": -4.327607154846191, "logits/rejected": -3.922853946685791, "logps/chosen": -338.17413330078125, "logps/rejected": -567.5420532226562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.001150608062744, "rewards/margins": 21.21744155883789, "rewards/rejected": -25.218591690063477, "step": 273 }, { "epoch": 0.91, "learning_rate": 4.421976196118297e-06, "logits/chosen": -4.414794445037842, "logits/rejected": -3.8170502185821533, "logps/chosen": -354.2364501953125, "logps/rejected": -535.7091064453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5273651480674744, "rewards/margins": 22.748092651367188, "rewards/rejected": -23.27545738220215, "step": 274 }, { "epoch": 0.91, "learning_rate": 4.415799199615912e-06, "logits/chosen": -4.355689525604248, "logits/rejected": -3.721245050430298, "logps/chosen": -403.55718994140625, "logps/rejected": -614.30224609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.988238573074341, "rewards/margins": 26.3142032623291, "rewards/rejected": -29.30244255065918, "step": 275 }, { "epoch": 0.91, "learning_rate": 4.409593736619795e-06, "logits/chosen": -4.30208158493042, "logits/rejected": -4.249013900756836, "logps/chosen": -330.4805603027344, "logps/rejected": -520.9468383789062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.18290114402771, "rewards/margins": 19.691268920898438, "rewards/rejected": -21.874170303344727, "step": 276 }, { "epoch": 0.92, "learning_rate": 4.403359899335732e-06, "logits/chosen": -4.452451705932617, "logits/rejected": -3.5297634601593018, "logps/chosen": -407.70440673828125, "logps/rejected": -531.66455078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.366229295730591, "rewards/margins": 19.401885986328125, "rewards/rejected": -21.768115997314453, "step": 277 }, { "epoch": 0.92, "learning_rate": 4.39709778039112e-06, "logits/chosen": -4.624291896820068, "logits/rejected": -3.607923984527588, "logps/chosen": -432.9666748046875, "logps/rejected": -430.0704650878906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.567608833312988, "rewards/margins": 15.570273399353027, "rewards/rejected": -20.137882232666016, "step": 278 }, { "epoch": 0.92, "learning_rate": 4.390807472833585e-06, "logits/chosen": -4.4363179206848145, "logits/rejected": -4.285643100738525, "logps/chosen": -346.4957275390625, "logps/rejected": -557.8546752929688, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.2618744373321533, "rewards/margins": 22.152734756469727, "rewards/rejected": -24.414609909057617, "step": 279 }, { "epoch": 0.93, "learning_rate": 4.384489070129604e-06, "logits/chosen": -4.686382293701172, "logits/rejected": -4.411667823791504, "logps/chosen": -297.61651611328125, "logps/rejected": -598.6287841796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.362146019935608, "rewards/margins": 23.59848976135254, "rewards/rejected": -24.960636138916016, "step": 280 }, { "epoch": 0.93, "learning_rate": 4.378142666163114e-06, "logits/chosen": -4.398681163787842, "logits/rejected": -3.3622610569000244, "logps/chosen": -381.4431457519531, "logps/rejected": -480.4046630859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.9724457263946533, "rewards/margins": 19.119897842407227, "rewards/rejected": -22.092344284057617, "step": 281 }, { "epoch": 0.93, "learning_rate": 4.371768355234116e-06, "logits/chosen": -4.680546283721924, "logits/rejected": -4.680546283721924, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 282 }, { "epoch": 0.94, "learning_rate": 4.365366232057279e-06, "logits/chosen": -4.3958964347839355, "logits/rejected": -3.414220094680786, "logps/chosen": -470.550537109375, "logps/rejected": -456.4300231933594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.259875535964966, "rewards/margins": 16.188610076904297, "rewards/rejected": -19.448486328125, "step": 283 }, { "epoch": 0.94, "learning_rate": 4.358936391760524e-06, "logits/chosen": -4.416255950927734, "logits/rejected": -3.4653921127319336, "logps/chosen": -314.48199462890625, "logps/rejected": -503.40478515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.4892303943634033, "rewards/margins": 21.383264541625977, "rewards/rejected": -23.872495651245117, "step": 284 }, { "epoch": 0.94, "learning_rate": 4.3524789298836175e-06, "logits/chosen": -4.381935119628906, "logits/rejected": -3.548987627029419, "logps/chosen": -456.87030029296875, "logps/rejected": -514.978515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.680187940597534, "rewards/margins": 18.483985900878906, "rewards/rejected": -22.164173126220703, "step": 285 }, { "epoch": 0.95, "learning_rate": 4.345993942376752e-06, "logits/chosen": -4.319192409515381, "logits/rejected": -3.6070122718811035, "logps/chosen": -535.0125122070312, "logps/rejected": -593.177490234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.068231105804443, "rewards/margins": 22.915740966796875, "rewards/rejected": -26.983972549438477, "step": 286 }, { "epoch": 0.95, "learning_rate": 4.3394815255991135e-06, "logits/chosen": -4.557577610015869, "logits/rejected": -4.233284950256348, "logps/chosen": -340.40478515625, "logps/rejected": -559.3313598632812, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.232684373855591, "rewards/margins": 15.566218376159668, "rewards/rejected": -18.79890251159668, "step": 287 }, { "epoch": 0.95, "learning_rate": 4.332941776317458e-06, "logits/chosen": -4.449789524078369, "logits/rejected": -3.7085957527160645, "logps/chosen": -460.09100341796875, "logps/rejected": -565.1859130859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.4739625453948975, "rewards/margins": 21.726104736328125, "rewards/rejected": -25.2000675201416, "step": 288 }, { "epoch": 0.96, "learning_rate": 4.32637479170467e-06, "logits/chosen": -4.309577941894531, "logits/rejected": -3.4090349674224854, "logps/chosen": -488.4316711425781, "logps/rejected": -556.6356201171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.466006755828857, "rewards/margins": 21.675212860107422, "rewards/rejected": -26.141220092773438, "step": 289 }, { "epoch": 0.96, "learning_rate": 4.319780669338316e-06, "logits/chosen": -4.309078216552734, "logits/rejected": -3.30332612991333, "logps/chosen": -507.0978088378906, "logps/rejected": -519.831787109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.4428741931915283, "rewards/margins": 14.903473854064941, "rewards/rejected": -18.34634780883789, "step": 290 }, { "epoch": 0.96, "learning_rate": 4.313159507199197e-06, "logits/chosen": -4.414469242095947, "logits/rejected": -3.480733871459961, "logps/chosen": -471.8912048339844, "logps/rejected": -557.726318359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.649487257003784, "rewards/margins": 18.816673278808594, "rewards/rejected": -22.46615982055664, "step": 291 }, { "epoch": 0.97, "learning_rate": 4.306511403669897e-06, "logits/chosen": -4.4712090492248535, "logits/rejected": -4.122826099395752, "logps/chosen": -487.50177001953125, "logps/rejected": -632.9717407226562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.310089111328125, "rewards/margins": 18.895023345947266, "rewards/rejected": -24.20511245727539, "step": 292 }, { "epoch": 0.97, "learning_rate": 4.299836457533313e-06, "logits/chosen": -4.493587970733643, "logits/rejected": -4.127647399902344, "logps/chosen": -348.9983215332031, "logps/rejected": -613.3372802734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7200958728790283, "rewards/margins": 21.614328384399414, "rewards/rejected": -23.33442497253418, "step": 293 }, { "epoch": 0.97, "learning_rate": 4.293134767971193e-06, "logits/chosen": -4.367143154144287, "logits/rejected": -3.770662784576416, "logps/chosen": -465.8094482421875, "logps/rejected": -645.4409790039062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.2273621559143066, "rewards/margins": 24.89282989501953, "rewards/rejected": -28.12019157409668, "step": 294 }, { "epoch": 0.98, "learning_rate": 4.286406434562659e-06, "logits/chosen": -4.329665660858154, "logits/rejected": -4.187549591064453, "logps/chosen": -327.8598937988281, "logps/rejected": -573.4243774414062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.743408203125, "rewards/margins": 22.067590713500977, "rewards/rejected": -22.810998916625977, "step": 295 }, { "epoch": 0.98, "learning_rate": 4.2796515572827305e-06, "logits/chosen": -4.589941501617432, "logits/rejected": -3.3189613819122314, "logps/chosen": -437.3631591796875, "logps/rejected": -500.00341796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.332589864730835, "rewards/margins": 17.248987197875977, "rewards/rejected": -20.58157730102539, "step": 296 }, { "epoch": 0.98, "learning_rate": 4.2728702365008356e-06, "logits/chosen": -4.512362957000732, "logits/rejected": -3.4759039878845215, "logps/chosen": -452.6970520019531, "logps/rejected": -566.7788696289062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.2685546875, "rewards/margins": 21.10858726501465, "rewards/rejected": -23.37714195251465, "step": 297 }, { "epoch": 0.99, "learning_rate": 4.266062572979323e-06, "logits/chosen": -4.406839370727539, "logits/rejected": -3.6835734844207764, "logps/chosen": -479.810791015625, "logps/rejected": -554.9146728515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.451641798019409, "rewards/margins": 18.89285659790039, "rewards/rejected": -21.344497680664062, "step": 298 }, { "epoch": 0.99, "learning_rate": 4.259228667871963e-06, "logits/chosen": -4.3280510902404785, "logits/rejected": -3.3908562660217285, "logps/chosen": -535.2221069335938, "logps/rejected": -520.6017456054688, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.345489501953125, "rewards/margins": 21.740737915039062, "rewards/rejected": -24.086227416992188, "step": 299 }, { "epoch": 0.99, "learning_rate": 4.252368622722443e-06, "logits/chosen": -4.508797645568848, "logits/rejected": -4.2981276512146, "logps/chosen": -387.08135986328125, "logps/rejected": -548.83203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.486175537109375, "rewards/margins": 17.451730728149414, "rewards/rejected": -22.93790626525879, "step": 300 }, { "epoch": 1.0, "learning_rate": 4.245482539462861e-06, "logits/chosen": -4.583652973175049, "logits/rejected": -4.022202491760254, "logps/chosen": -302.711669921875, "logps/rejected": -516.1565551757812, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.4421050548553467, "rewards/margins": 20.776533126831055, "rewards/rejected": -24.218637466430664, "step": 301 }, { "epoch": 1.0, "learning_rate": 4.2385705204122104e-06, "logits/chosen": -4.387404441833496, "logits/rejected": -3.931056261062622, "logps/chosen": -387.2518310546875, "logps/rejected": -686.424560546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.06422758102417, "rewards/margins": 27.97595977783203, "rewards/rejected": -32.04018783569336, "step": 302 }, { "epoch": 1.0, "learning_rate": 4.231632668274861e-06, "logits/chosen": -4.505836009979248, "logits/rejected": -3.867441177368164, "logps/chosen": -370.88580322265625, "logps/rejected": -608.3780517578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9485291242599487, "rewards/margins": 24.083986282348633, "rewards/rejected": -26.032514572143555, "step": 303 }, { "epoch": 1.01, "learning_rate": 4.22466908613903e-06, "logits/chosen": -4.520392417907715, "logits/rejected": -4.209183692932129, "logps/chosen": -301.65997314453125, "logps/rejected": -557.6475830078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.963144063949585, "rewards/margins": 22.734817504882812, "rewards/rejected": -25.697961807250977, "step": 304 }, { "epoch": 1.01, "learning_rate": 4.217679877475251e-06, "logits/chosen": -4.450392246246338, "logits/rejected": -3.9287269115448, "logps/chosen": -309.934326171875, "logps/rejected": -550.7947387695312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.15462341904640198, "rewards/margins": 25.121463775634766, "rewards/rejected": -25.276086807250977, "step": 305 }, { "epoch": 1.01, "learning_rate": 4.210665146134838e-06, "logits/chosen": -4.404209613800049, "logits/rejected": -3.750500440597534, "logps/chosen": -354.6093444824219, "logps/rejected": -557.5514526367188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.627920627593994, "rewards/margins": 21.12412452697754, "rewards/rejected": -24.752044677734375, "step": 306 }, { "epoch": 1.02, "learning_rate": 4.203624996348343e-06, "logits/chosen": -4.643948554992676, "logits/rejected": -3.279536008834839, "logps/chosen": -560.704345703125, "logps/rejected": -519.2259521484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.384295701980591, "rewards/margins": 20.04636573791504, "rewards/rejected": -22.430662155151367, "step": 307 }, { "epoch": 1.02, "learning_rate": 4.196559532724004e-06, "logits/chosen": -4.450984001159668, "logits/rejected": -3.895106792449951, "logps/chosen": -327.5532531738281, "logps/rejected": -578.366943359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6179717779159546, "rewards/margins": 25.337055206298828, "rewards/rejected": -26.955026626586914, "step": 308 }, { "epoch": 1.02, "learning_rate": 4.189468860246192e-06, "logits/chosen": -4.460489273071289, "logits/rejected": -4.485321998596191, "logps/chosen": -321.6944885253906, "logps/rejected": -576.0396728515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.288430690765381, "rewards/margins": 21.74100112915039, "rewards/rejected": -26.02943229675293, "step": 309 }, { "epoch": 1.03, "learning_rate": 4.182353084273855e-06, "logits/chosen": -4.646491527557373, "logits/rejected": -3.6069564819335938, "logps/chosen": -276.98394775390625, "logps/rejected": -427.6263427734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8108367919921875, "rewards/margins": 17.433168411254883, "rewards/rejected": -18.24400520324707, "step": 310 }, { "epoch": 1.03, "learning_rate": 4.1752123105389444e-06, "logits/chosen": -4.558380126953125, "logits/rejected": -4.233245372772217, "logps/chosen": -340.41546630859375, "logps/rejected": -559.578369140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.233752489089966, "rewards/margins": 15.589850425720215, "rewards/rejected": -18.8236026763916, "step": 311 }, { "epoch": 1.03, "learning_rate": 4.168046645144851e-06, "logits/chosen": -4.401527404785156, "logits/rejected": -4.047603130340576, "logps/chosen": -346.8031005859375, "logps/rejected": -536.336181640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.046600341796875, "rewards/margins": 15.65533447265625, "rewards/rejected": -20.701934814453125, "step": 312 }, { "epoch": 1.04, "learning_rate": 4.160856194564828e-06, "logits/chosen": -4.449392795562744, "logits/rejected": -3.370621681213379, "logps/chosen": -714.1744995117188, "logps/rejected": -499.0378723144531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.729162693023682, "rewards/margins": 18.80251121520996, "rewards/rejected": -24.531673431396484, "step": 313 }, { "epoch": 1.04, "learning_rate": 4.153641065640402e-06, "logits/chosen": -4.675591468811035, "logits/rejected": -4.675591945648193, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 314 }, { "epoch": 1.04, "learning_rate": 4.146401365579795e-06, "logits/chosen": -4.659801959991455, "logits/rejected": -4.659801959991455, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 315 }, { "epoch": 1.05, "learning_rate": 4.139137201956324e-06, "logits/chosen": -4.526122570037842, "logits/rejected": -3.9803736209869385, "logps/chosen": -494.4631652832031, "logps/rejected": -624.318603515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.6804840564727783, "rewards/margins": 25.334402084350586, "rewards/rejected": -29.0148868560791, "step": 316 }, { "epoch": 1.05, "learning_rate": 4.131848682706807e-06, "logits/chosen": -4.45796012878418, "logits/rejected": -3.5760793685913086, "logps/chosen": -304.2862854003906, "logps/rejected": -613.4364013671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6095489263534546, "rewards/margins": 26.36404800415039, "rewards/rejected": -27.973596572875977, "step": 317 }, { "epoch": 1.05, "learning_rate": 4.1245359161299555e-06, "logits/chosen": -4.385075092315674, "logits/rejected": -3.695948600769043, "logps/chosen": -626.4497680664062, "logps/rejected": -554.8067016601562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.724432468414307, "rewards/margins": 19.89761734008789, "rewards/rejected": -24.62204933166504, "step": 318 }, { "epoch": 1.06, "learning_rate": 4.1171990108847705e-06, "logits/chosen": -4.380350112915039, "logits/rejected": -3.776153326034546, "logps/chosen": -533.7496948242188, "logps/rejected": -667.3619995117188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.350457668304443, "rewards/margins": 14.2978515625, "rewards/rejected": -20.6483097076416, "step": 319 }, { "epoch": 1.06, "learning_rate": 4.109838075988922e-06, "logits/chosen": -4.454567909240723, "logits/rejected": -4.401432991027832, "logps/chosen": -284.8964538574219, "logps/rejected": -612.6644897460938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1294647455215454, "rewards/margins": 27.096817016601562, "rewards/rejected": -28.226282119750977, "step": 320 }, { "epoch": 1.06, "learning_rate": 4.102453220817134e-06, "logits/chosen": -4.539063453674316, "logits/rejected": -4.120962142944336, "logps/chosen": -340.3846130371094, "logps/rejected": -592.820068359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.9738433361053467, "rewards/margins": 22.694129943847656, "rewards/rejected": -26.667972564697266, "step": 321 }, { "epoch": 1.07, "learning_rate": 4.0950445550995566e-06, "logits/chosen": -4.302433013916016, "logits/rejected": -3.098027229309082, "logps/chosen": -548.219970703125, "logps/rejected": -516.339599609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.810494899749756, "rewards/margins": 17.045827865600586, "rewards/rejected": -21.8563232421875, "step": 322 }, { "epoch": 1.07, "learning_rate": 4.087612188920135e-06, "logits/chosen": -4.680222511291504, "logits/rejected": -4.680222511291504, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 323 }, { "epoch": 1.07, "learning_rate": 4.080156232714976e-06, "logits/chosen": -4.441516399383545, "logits/rejected": -3.8761839866638184, "logps/chosen": -315.43927001953125, "logps/rejected": -490.2115173339844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2103455066680908, "rewards/margins": 18.420991897583008, "rewards/rejected": -19.631338119506836, "step": 324 }, { "epoch": 1.08, "learning_rate": 4.072676797270708e-06, "logits/chosen": -4.385481834411621, "logits/rejected": -3.9012012481689453, "logps/chosen": -477.62548828125, "logps/rejected": -559.3220825195312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.507293701171875, "rewards/margins": 21.427127838134766, "rewards/rejected": -24.93442153930664, "step": 325 }, { "epoch": 1.08, "learning_rate": 4.06517399372283e-06, "logits/chosen": -4.476378917694092, "logits/rejected": -3.7612292766571045, "logps/chosen": -581.444091796875, "logps/rejected": -637.1437377929688, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.2532410621643066, "rewards/margins": 23.70111846923828, "rewards/rejected": -26.95435905456543, "step": 326 }, { "epoch": 1.08, "learning_rate": 4.057647933554063e-06, "logits/chosen": -4.519208908081055, "logits/rejected": -3.9047725200653076, "logps/chosen": -342.0140075683594, "logps/rejected": -641.6871337890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8680664300918579, "rewards/margins": 28.9508056640625, "rewards/rejected": -29.818872451782227, "step": 327 }, { "epoch": 1.09, "learning_rate": 4.050098728592698e-06, "logits/chosen": -4.34064245223999, "logits/rejected": -3.292074203491211, "logps/chosen": -461.8697814941406, "logps/rejected": -528.8592529296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.6399872303009033, "rewards/margins": 20.581085205078125, "rewards/rejected": -23.221073150634766, "step": 328 }, { "epoch": 1.09, "learning_rate": 4.0425264910109245e-06, "logits/chosen": -4.454086780548096, "logits/rejected": -3.8325753211975098, "logps/chosen": -433.9237365722656, "logps/rejected": -424.3806457519531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.4173126220703125, "rewards/margins": 13.11768913269043, "rewards/rejected": -18.535001754760742, "step": 329 }, { "epoch": 1.09, "learning_rate": 4.034931333323173e-06, "logits/chosen": -4.427676200866699, "logits/rejected": -4.156816482543945, "logps/chosen": -310.61053466796875, "logps/rejected": -528.1558837890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8374053835868835, "rewards/margins": 20.680816650390625, "rewards/rejected": -21.51822280883789, "step": 330 }, { "epoch": 1.1, "learning_rate": 4.0273133683844375e-06, "logits/chosen": -4.304573059082031, "logits/rejected": -3.848345994949341, "logps/chosen": -349.03472900390625, "logps/rejected": -499.785400390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.325323581695557, "rewards/margins": 16.909881591796875, "rewards/rejected": -21.235204696655273, "step": 331 }, { "epoch": 1.1, "learning_rate": 4.0196727093886024e-06, "logits/chosen": -4.527863502502441, "logits/rejected": -3.6625583171844482, "logps/chosen": -404.2148742675781, "logps/rejected": -546.36572265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.4848878383636475, "rewards/margins": 20.877573013305664, "rewards/rejected": -24.36246109008789, "step": 332 }, { "epoch": 1.1, "learning_rate": 4.012009469866756e-06, "logits/chosen": -4.395691394805908, "logits/rejected": -3.414201498031616, "logps/chosen": -470.52752685546875, "logps/rejected": -457.0702209472656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.2575745582580566, "rewards/margins": 16.254932403564453, "rewards/rejected": -19.51250648498535, "step": 333 }, { "epoch": 1.11, "learning_rate": 4.004323763685511e-06, "logits/chosen": -4.453549385070801, "logits/rejected": -3.3401639461517334, "logps/chosen": -534.1157836914062, "logps/rejected": -584.5160522460938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.6152894496917725, "rewards/margins": 22.980432510375977, "rewards/rejected": -25.595722198486328, "step": 334 }, { "epoch": 1.11, "learning_rate": 3.996615705045302e-06, "logits/chosen": -4.428602695465088, "logits/rejected": -4.351799011230469, "logps/chosen": -283.1662292480469, "logps/rejected": -552.4287719726562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.28594666719436646, "rewards/margins": 23.67892074584961, "rewards/rejected": -23.392974853515625, "step": 335 }, { "epoch": 1.11, "learning_rate": 3.9888854084786995e-06, "logits/chosen": -4.3763909339904785, "logits/rejected": -3.646036148071289, "logps/chosen": -312.95526123046875, "logps/rejected": -450.7712707519531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.437213182449341, "rewards/margins": 18.57390785217285, "rewards/rejected": -21.01112174987793, "step": 336 }, { "epoch": 1.12, "learning_rate": 3.9811329888487004e-06, "logits/chosen": -4.468436241149902, "logits/rejected": -3.846614360809326, "logps/chosen": -347.97125244140625, "logps/rejected": -509.1820983886719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.1124818325042725, "rewards/margins": 18.75742530822754, "rewards/rejected": -21.86990737915039, "step": 337 }, { "epoch": 1.12, "learning_rate": 3.973358561347024e-06, "logits/chosen": -4.590075969696045, "logits/rejected": -3.3168811798095703, "logps/chosen": -437.10064697265625, "logps/rejected": -500.73876953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.3063385486602783, "rewards/margins": 17.348772048950195, "rewards/rejected": -20.65511131286621, "step": 338 }, { "epoch": 1.12, "learning_rate": 3.965562241492401e-06, "logits/chosen": -4.620311260223389, "logits/rejected": -3.9685401916503906, "logps/chosen": -362.0390319824219, "logps/rejected": -528.6869506835938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5201812982559204, "rewards/margins": 18.24468994140625, "rewards/rejected": -19.76487159729004, "step": 339 }, { "epoch": 1.13, "learning_rate": 3.957744145128858e-06, "logits/chosen": -4.478018760681152, "logits/rejected": -3.5600032806396484, "logps/chosen": -532.4029541015625, "logps/rejected": -598.7540283203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.396340847015381, "rewards/margins": 21.554738998413086, "rewards/rejected": -26.951080322265625, "step": 340 }, { "epoch": 1.13, "learning_rate": 3.9499043884239894e-06, "logits/chosen": -4.449504375457764, "logits/rejected": -3.656759738922119, "logps/chosen": -309.1235046386719, "logps/rejected": -631.4158325195312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.706005871295929, "rewards/margins": 24.1827335357666, "rewards/rejected": -23.476728439331055, "step": 341 }, { "epoch": 1.13, "learning_rate": 3.942043087867244e-06, "logits/chosen": -4.459195137023926, "logits/rejected": -4.034330368041992, "logps/chosen": -454.82110595703125, "logps/rejected": -693.478271484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.7679443359375, "rewards/margins": 26.525554656982422, "rewards/rejected": -32.29349899291992, "step": 342 }, { "epoch": 1.14, "learning_rate": 3.9341603602681805e-06, "logits/chosen": -4.481115341186523, "logits/rejected": -4.369643688201904, "logps/chosen": -302.33941650390625, "logps/rejected": -501.6031799316406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.3963868618011475, "rewards/margins": 18.271621704101562, "rewards/rejected": -21.66800880432129, "step": 343 }, { "epoch": 1.14, "learning_rate": 3.92625632275474e-06, "logits/chosen": -4.441457271575928, "logits/rejected": -3.1386094093322754, "logps/chosen": -624.3786010742188, "logps/rejected": -605.319091796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.778839111328125, "rewards/margins": 25.4502010345459, "rewards/rejected": -29.229040145874023, "step": 344 }, { "epoch": 1.14, "learning_rate": 3.918331092771505e-06, "logits/chosen": -4.457245826721191, "logits/rejected": -4.084907531738281, "logps/chosen": -231.635498046875, "logps/rejected": -552.2274780273438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6956543326377869, "rewards/margins": 25.437665939331055, "rewards/rejected": -26.133319854736328, "step": 345 }, { "epoch": 1.15, "learning_rate": 3.910384788077949e-06, "logits/chosen": -4.531241416931152, "logits/rejected": -3.4365487098693848, "logps/chosen": -372.13153076171875, "logps/rejected": -595.1321411132812, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.1439576148986816, "rewards/margins": 23.517963409423828, "rewards/rejected": -26.66192054748535, "step": 346 }, { "epoch": 1.15, "learning_rate": 3.902417526746694e-06, "logits/chosen": -4.46983528137207, "logits/rejected": -3.303872585296631, "logps/chosen": -476.74951171875, "logps/rejected": -629.5556030273438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.970806837081909, "rewards/margins": 25.05852699279785, "rewards/rejected": -28.029333114624023, "step": 347 }, { "epoch": 1.15, "learning_rate": 3.8944294271617524e-06, "logits/chosen": -4.311168670654297, "logits/rejected": -3.7667226791381836, "logps/chosen": -355.90771484375, "logps/rejected": -570.6123046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.113745093345642, "rewards/margins": 22.527433395385742, "rewards/rejected": -23.641178131103516, "step": 348 }, { "epoch": 1.16, "learning_rate": 3.886420608016767e-06, "logits/chosen": -4.611914157867432, "logits/rejected": -3.7337758541107178, "logps/chosen": -450.34246826171875, "logps/rejected": -485.8800048828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.16018067300319672, "rewards/margins": 22.026273727416992, "rewards/rejected": -21.866092681884766, "step": 349 }, { "epoch": 1.16, "learning_rate": 3.878391188313249e-06, "logits/chosen": -4.441217422485352, "logits/rejected": -4.413001537322998, "logps/chosen": -290.1426696777344, "logps/rejected": -474.15716552734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.885516405105591, "rewards/margins": 15.439276695251465, "rewards/rejected": -19.324792861938477, "step": 350 }, { "epoch": 1.16, "learning_rate": 3.870341287358809e-06, "logits/chosen": -4.424674987792969, "logits/rejected": -3.499305248260498, "logps/chosen": -571.8875122070312, "logps/rejected": -499.6201171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.845529079437256, "rewards/margins": 14.799612045288086, "rewards/rejected": -21.6451416015625, "step": 351 }, { "epoch": 1.17, "learning_rate": 3.862271024765385e-06, "logits/chosen": -4.41467809677124, "logits/rejected": -3.788273334503174, "logps/chosen": -504.5979309082031, "logps/rejected": -588.0304565429688, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9401885867118835, "rewards/margins": 24.011598587036133, "rewards/rejected": -24.9517879486084, "step": 352 }, { "epoch": 1.17, "learning_rate": 3.854180520447465e-06, "logits/chosen": -4.399990558624268, "logits/rejected": -3.702749490737915, "logps/chosen": -359.1282958984375, "logps/rejected": -613.07958984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.7172210216522217, "rewards/margins": 23.483243942260742, "rewards/rejected": -26.200464248657227, "step": 353 }, { "epoch": 1.17, "learning_rate": 3.846069894620306e-06, "logits/chosen": -4.455563068389893, "logits/rejected": -3.8418784141540527, "logps/chosen": -249.07275390625, "logps/rejected": -466.025146484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.700274646282196, "rewards/margins": 23.287641525268555, "rewards/rejected": -22.587366104125977, "step": 354 }, { "epoch": 1.18, "learning_rate": 3.8379392677981434e-06, "logits/chosen": -4.436300754547119, "logits/rejected": -4.024048805236816, "logps/chosen": -267.5246887207031, "logps/rejected": -486.9378356933594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7707855105400085, "rewards/margins": 20.789134979248047, "rewards/rejected": -21.559921264648438, "step": 355 }, { "epoch": 1.18, "learning_rate": 3.8297887607924044e-06, "logits/chosen": -4.544910430908203, "logits/rejected": -4.544910430908203, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 356 }, { "epoch": 1.18, "learning_rate": 3.821618494709916e-06, "logits/chosen": -4.401289463043213, "logits/rejected": -3.6716558933258057, "logps/chosen": -519.7855224609375, "logps/rejected": -570.1717529296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.12322998046875, "rewards/margins": 17.12276268005371, "rewards/rejected": -22.24599266052246, "step": 357 }, { "epoch": 1.19, "learning_rate": 3.8134285909510972e-06, "logits/chosen": -4.755959987640381, "logits/rejected": -4.755960464477539, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 358 }, { "epoch": 1.19, "learning_rate": 3.80521917120816e-06, "logits/chosen": -4.258815765380859, "logits/rejected": -3.9361300468444824, "logps/chosen": -304.29443359375, "logps/rejected": -626.0820922851562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.7605103254318237, "rewards/margins": 30.285808563232422, "rewards/rejected": -28.525299072265625, "step": 359 }, { "epoch": 1.19, "learning_rate": 3.7969903574633028e-06, "logits/chosen": -4.397978782653809, "logits/rejected": -3.4190871715545654, "logps/chosen": -422.203857421875, "logps/rejected": -593.5458374023438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.6653199195861816, "rewards/margins": 21.650531768798828, "rewards/rejected": -25.31585121154785, "step": 360 }, { "epoch": 1.2, "learning_rate": 3.7887422719868937e-06, "logits/chosen": -4.5388994216918945, "logits/rejected": -4.143129825592041, "logps/chosen": -268.7479248046875, "logps/rejected": -503.136962890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.15639649331569672, "rewards/margins": 18.77325439453125, "rewards/rejected": -18.929651260375977, "step": 361 }, { "epoch": 1.2, "learning_rate": 3.7804750373356576e-06, "logits/chosen": -4.233099460601807, "logits/rejected": -3.729583263397217, "logps/chosen": -473.1009826660156, "logps/rejected": -493.331787109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.692511081695557, "rewards/margins": 16.022321701049805, "rewards/rejected": -22.714832305908203, "step": 362 }, { "epoch": 1.2, "learning_rate": 3.7721887763508512e-06, "logits/chosen": -4.477231025695801, "logits/rejected": -3.61136794090271, "logps/chosen": -328.746826171875, "logps/rejected": -559.17529296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.201995849609375, "rewards/margins": 24.48870849609375, "rewards/rejected": -26.690704345703125, "step": 363 }, { "epoch": 1.21, "learning_rate": 3.7638836121564414e-06, "logits/chosen": -4.40407657623291, "logits/rejected": -3.6937649250030518, "logps/chosen": -480.8106689453125, "logps/rejected": -529.0651245117188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.6159608364105225, "rewards/margins": 22.013158798217773, "rewards/rejected": -25.629119873046875, "step": 364 }, { "epoch": 1.21, "learning_rate": 3.7555596681572736e-06, "logits/chosen": -4.173845291137695, "logits/rejected": -4.481690883636475, "logps/chosen": -230.34567260742188, "logps/rejected": -510.9122314453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 7.565890789031982, "rewards/margins": 24.690677642822266, "rewards/rejected": -17.124786376953125, "step": 365 }, { "epoch": 1.21, "learning_rate": 3.7472170680372398e-06, "logits/chosen": -4.645546913146973, "logits/rejected": -4.204290866851807, "logps/chosen": -291.627685546875, "logps/rejected": -565.508056640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.0252350568771362, "rewards/margins": 22.082204818725586, "rewards/rejected": -21.056970596313477, "step": 366 }, { "epoch": 1.22, "learning_rate": 3.738855935757438e-06, "logits/chosen": -4.355076313018799, "logits/rejected": -4.114178657531738, "logps/chosen": -243.2429962158203, "logps/rejected": -501.1885681152344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.221289038658142, "rewards/margins": 20.49558448791504, "rewards/rejected": -21.716873168945312, "step": 367 }, { "epoch": 1.22, "learning_rate": 3.7304763955543332e-06, "logits/chosen": -4.433329105377197, "logits/rejected": -3.3993592262268066, "logps/chosen": -394.0951843261719, "logps/rejected": -572.6485595703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.220874071121216, "rewards/margins": 24.07745361328125, "rewards/rejected": -26.298328399658203, "step": 368 }, { "epoch": 1.22, "learning_rate": 3.72207857193791e-06, "logits/chosen": -4.431034564971924, "logits/rejected": -3.396305799484253, "logps/chosen": -369.4999084472656, "logps/rejected": -557.2697143554688, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.33349609375, "rewards/margins": 22.388757705688477, "rewards/rejected": -24.722253799438477, "step": 369 }, { "epoch": 1.23, "learning_rate": 3.7136625896898226e-06, "logits/chosen": -4.555417060852051, "logits/rejected": -4.198338031768799, "logps/chosen": -371.4862060546875, "logps/rejected": -662.2922973632812, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.537402391433716, "rewards/margins": 23.931859970092773, "rewards/rejected": -26.469263076782227, "step": 370 }, { "epoch": 1.23, "learning_rate": 3.7052285738615412e-06, "logits/chosen": -4.398463249206543, "logits/rejected": -3.3618404865264893, "logps/chosen": -381.2416076660156, "logps/rejected": -481.7105407714844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.952291965484619, "rewards/margins": 19.270639419555664, "rewards/rejected": -22.222930908203125, "step": 371 }, { "epoch": 1.23, "learning_rate": 3.696776649772492e-06, "logits/chosen": -4.381402492523193, "logits/rejected": -3.549434185028076, "logps/chosen": -456.45806884765625, "logps/rejected": -515.9154663085938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.638964891433716, "rewards/margins": 18.6189022064209, "rewards/rejected": -22.25786781311035, "step": 372 }, { "epoch": 1.24, "learning_rate": 3.6883069430081986e-06, "logits/chosen": -4.49332332611084, "logits/rejected": -4.4724907875061035, "logps/chosen": -243.32974243164062, "logps/rejected": -635.329833984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.060235619544983, "rewards/margins": 23.96942138671875, "rewards/rejected": -25.0296573638916, "step": 373 }, { "epoch": 1.24, "learning_rate": 3.679819579418414e-06, "logits/chosen": -4.253709316253662, "logits/rejected": -3.5135128498077393, "logps/chosen": -407.85272216796875, "logps/rejected": -533.027099609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8444244861602783, "rewards/margins": 19.94365119934082, "rewards/rejected": -21.788076400756836, "step": 374 }, { "epoch": 1.24, "learning_rate": 3.6713146851152487e-06, "logits/chosen": -4.382305145263672, "logits/rejected": -3.966198682785034, "logps/chosen": -366.61993408203125, "logps/rejected": -589.8076782226562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.5849976539611816, "rewards/margins": 23.92621612548828, "rewards/rejected": -27.511213302612305, "step": 375 }, { "epoch": 1.25, "learning_rate": 3.6627923864713e-06, "logits/chosen": -4.128899574279785, "logits/rejected": -4.366008758544922, "logps/chosen": -171.67576599121094, "logps/rejected": -551.530517578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8036941885948181, "rewards/margins": 24.860572814941406, "rewards/rejected": -25.66426658630371, "step": 376 }, { "epoch": 1.25, "learning_rate": 3.654252810117773e-06, "logits/chosen": -4.742278099060059, "logits/rejected": -3.5298373699188232, "logps/chosen": -460.48419189453125, "logps/rejected": -507.640869140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.7501220703125, "rewards/margins": 20.162372589111328, "rewards/rejected": -22.912494659423828, "step": 377 }, { "epoch": 1.25, "learning_rate": 3.6456960829425987e-06, "logits/chosen": -4.415173053741455, "logits/rejected": -3.8172292709350586, "logps/chosen": -353.8888854980469, "logps/rejected": -536.80712890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.4926086366176605, "rewards/margins": 22.892650604248047, "rewards/rejected": -23.3852596282959, "step": 378 }, { "epoch": 1.25, "learning_rate": 3.6371223320885492e-06, "logits/chosen": -4.940047264099121, "logits/rejected": -3.484922409057617, "logps/chosen": -1516.173095703125, "logps/rejected": -594.2432861328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 4.267004489898682, "rewards/margins": 24.598852157592773, "rewards/rejected": -20.33184814453125, "step": 379 }, { "epoch": 1.26, "learning_rate": 3.628531684951347e-06, "logits/chosen": -4.382914066314697, "logits/rejected": -4.234165668487549, "logps/chosen": -401.0128479003906, "logps/rejected": -617.7637329101562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.017504930496216, "rewards/margins": 24.628862380981445, "rewards/rejected": -26.6463680267334, "step": 380 }, { "epoch": 1.26, "learning_rate": 3.6199242691777745e-06, "logits/chosen": -4.537250518798828, "logits/rejected": -3.417498826980591, "logps/chosen": -508.52447509765625, "logps/rejected": -626.3768310546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.462677001953125, "rewards/margins": 23.96565055847168, "rewards/rejected": -25.428327560424805, "step": 381 }, { "epoch": 1.26, "learning_rate": 3.6113002126637765e-06, "logits/chosen": -4.2400126457214355, "logits/rejected": -4.443871974945068, "logps/chosen": -344.1387939453125, "logps/rejected": -697.9903564453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.5454193353652954, "rewards/margins": 27.8881778717041, "rewards/rejected": -27.342758178710938, "step": 382 }, { "epoch": 1.27, "learning_rate": 3.6026596435525578e-06, "logits/chosen": -4.263064384460449, "logits/rejected": -3.7142536640167236, "logps/chosen": -429.40966796875, "logps/rejected": -633.1529541015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.696359395980835, "rewards/margins": 25.548803329467773, "rewards/rejected": -28.245162963867188, "step": 383 }, { "epoch": 1.27, "learning_rate": 3.5940026902326825e-06, "logits/chosen": -4.683154582977295, "logits/rejected": -3.695619821548462, "logps/chosen": -313.42706298828125, "logps/rejected": -636.9192504882812, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.587240695953369, "rewards/margins": 25.60426139831543, "rewards/rejected": -29.19150161743164, "step": 384 }, { "epoch": 1.27, "learning_rate": 3.5853294813361614e-06, "logits/chosen": -4.441425800323486, "logits/rejected": -3.4744443893432617, "logps/chosen": -492.7984924316406, "logps/rejected": -609.9415283203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.741729736328125, "rewards/margins": 23.798828125, "rewards/rejected": -27.540557861328125, "step": 385 }, { "epoch": 1.28, "learning_rate": 3.5766401457365485e-06, "logits/chosen": -4.289592742919922, "logits/rejected": -3.687018871307373, "logps/chosen": -400.8878173828125, "logps/rejected": -576.2542724609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.827166795730591, "rewards/margins": 23.3099422454834, "rewards/rejected": -26.137109756469727, "step": 386 }, { "epoch": 1.28, "learning_rate": 3.5679348125470175e-06, "logits/chosen": -4.466724872589111, "logits/rejected": -3.9063446521759033, "logps/chosen": -321.11163330078125, "logps/rejected": -604.6334838867188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.087390184402466, "rewards/margins": 24.62456703186035, "rewards/rejected": -26.711957931518555, "step": 387 }, { "epoch": 1.28, "learning_rate": 3.5592136111184483e-06, "logits/chosen": -4.287783145904541, "logits/rejected": -3.479191541671753, "logps/chosen": -428.4388427734375, "logps/rejected": -567.2464599609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.503488302230835, "rewards/margins": 21.51998519897461, "rewards/rejected": -24.023473739624023, "step": 388 }, { "epoch": 1.29, "learning_rate": 3.550476671037505e-06, "logits/chosen": -4.486946105957031, "logits/rejected": -4.348780155181885, "logps/chosen": -314.7575988769531, "logps/rejected": -655.424560546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.190433025360107, "rewards/margins": 23.15078353881836, "rewards/rejected": -27.341217041015625, "step": 389 }, { "epoch": 1.29, "learning_rate": 3.5417241221247078e-06, "logits/chosen": -4.402674674987793, "logits/rejected": -3.328934907913208, "logps/chosen": -594.84765625, "logps/rejected": -472.50946044921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.817242622375488, "rewards/margins": 14.412476539611816, "rewards/rejected": -22.229719161987305, "step": 390 }, { "epoch": 1.29, "learning_rate": 3.5329560944325065e-06, "logits/chosen": -4.393891334533691, "logits/rejected": -3.416555166244507, "logps/chosen": -486.5477600097656, "logps/rejected": -546.95654296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.4670441150665283, "rewards/margins": 22.86057472229004, "rewards/rejected": -25.327619552612305, "step": 391 }, { "epoch": 1.3, "learning_rate": 3.524172718243347e-06, "logits/chosen": -4.566934585571289, "logits/rejected": -4.566934585571289, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 392 }, { "epoch": 1.3, "learning_rate": 3.515374124067736e-06, "logits/chosen": -4.335728168487549, "logits/rejected": -3.503232717514038, "logps/chosen": -392.0767822265625, "logps/rejected": -528.6370849609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.41119384765625, "rewards/margins": 20.453832626342773, "rewards/rejected": -24.865026473999023, "step": 393 }, { "epoch": 1.3, "learning_rate": 3.5065604426422995e-06, "logits/chosen": -4.612951755523682, "logits/rejected": -4.612951755523682, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 394 }, { "epoch": 1.31, "learning_rate": 3.4977318049278443e-06, "logits/chosen": -4.628118515014648, "logits/rejected": -3.413256883621216, "logps/chosen": -560.8916015625, "logps/rejected": -539.226318359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.012781143188477, "rewards/margins": 15.179046630859375, "rewards/rejected": -23.19182777404785, "step": 395 }, { "epoch": 1.31, "learning_rate": 3.4888883421074076e-06, "logits/chosen": -4.4706711769104, "logits/rejected": -3.608553171157837, "logps/chosen": -421.37646484375, "logps/rejected": -551.02294921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.084374904632568, "rewards/margins": 20.2630672454834, "rewards/rejected": -25.347442626953125, "step": 396 }, { "epoch": 1.31, "learning_rate": 3.4800301855843137e-06, "logits/chosen": -4.328768730163574, "logits/rejected": -3.748556137084961, "logps/chosen": -589.5968017578125, "logps/rejected": -558.4112548828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.23187866806983948, "rewards/margins": 24.938682556152344, "rewards/rejected": -25.170560836791992, "step": 397 }, { "epoch": 1.32, "learning_rate": 3.471157466980214e-06, "logits/chosen": -4.3414387702941895, "logits/rejected": -3.2893929481506348, "logps/chosen": -441.0865478515625, "logps/rejected": -441.55029296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.178424119949341, "rewards/margins": 17.36881446838379, "rewards/rejected": -20.547239303588867, "step": 398 }, { "epoch": 1.32, "learning_rate": 3.462270318133136e-06, "logits/chosen": -4.279347896575928, "logits/rejected": -3.7708323001861572, "logps/chosen": -425.3995361328125, "logps/rejected": -606.691162109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.6446380615234375, "rewards/margins": 22.000778198242188, "rewards/rejected": -24.645416259765625, "step": 399 }, { "epoch": 1.32, "learning_rate": 3.4533688710955255e-06, "logits/chosen": -4.445547580718994, "logits/rejected": -3.736433744430542, "logps/chosen": -438.2364501953125, "logps/rejected": -511.470458984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.457928419113159, "rewards/margins": 17.368803024291992, "rewards/rejected": -20.826730728149414, "step": 400 }, { "epoch": 1.33, "learning_rate": 3.4444532581322793e-06, "logits/chosen": -4.259881496429443, "logits/rejected": -3.4961695671081543, "logps/chosen": -442.30908203125, "logps/rejected": -534.1782836914062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.809326171875, "rewards/margins": 19.046466827392578, "rewards/rejected": -22.855792999267578, "step": 401 }, { "epoch": 1.33, "learning_rate": 3.435523611718785e-06, "logits/chosen": -4.428008079528809, "logits/rejected": -3.818208932876587, "logps/chosen": -404.3310546875, "logps/rejected": -599.6188354492188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.139294624328613, "rewards/margins": 23.809425354003906, "rewards/rejected": -27.948720932006836, "step": 402 }, { "epoch": 1.33, "learning_rate": 3.42658006453895e-06, "logits/chosen": -4.678605556488037, "logits/rejected": -3.402674436569214, "logps/chosen": -461.89404296875, "logps/rejected": -475.98297119140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1025147438049316, "rewards/margins": 16.92310333251953, "rewards/rejected": -19.025617599487305, "step": 403 }, { "epoch": 1.34, "learning_rate": 3.4176227494832305e-06, "logits/chosen": -4.36278772354126, "logits/rejected": -4.285384178161621, "logps/chosen": -419.598388671875, "logps/rejected": -522.1483154296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.002203464508057, "rewards/margins": 13.73774528503418, "rewards/rejected": -20.739948272705078, "step": 404 }, { "epoch": 1.34, "learning_rate": 3.4086517996466574e-06, "logits/chosen": -4.3044610023498535, "logits/rejected": -4.057070255279541, "logps/chosen": -347.9353332519531, "logps/rejected": -613.7538452148438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.469280958175659, "rewards/margins": 23.482481002807617, "rewards/rejected": -26.95176124572754, "step": 405 }, { "epoch": 1.34, "learning_rate": 3.3996673483268573e-06, "logits/chosen": -4.251558303833008, "logits/rejected": -4.351174354553223, "logps/chosen": -286.38739013671875, "logps/rejected": -520.8343505859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.254539489746094, "rewards/margins": 19.796003341674805, "rewards/rejected": -25.0505428314209, "step": 406 }, { "epoch": 1.35, "learning_rate": 3.3906695290220736e-06, "logits/chosen": -4.284546375274658, "logits/rejected": -3.900489330291748, "logps/chosen": -427.06927490234375, "logps/rejected": -596.43212890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.04730224609375, "rewards/margins": 17.397186279296875, "rewards/rejected": -22.444488525390625, "step": 407 }, { "epoch": 1.35, "learning_rate": 3.3816584754291814e-06, "logits/chosen": -4.460649490356445, "logits/rejected": -4.002116680145264, "logps/chosen": -545.2120361328125, "logps/rejected": -682.9246826171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.572503566741943, "rewards/margins": 22.727462768554688, "rewards/rejected": -29.29996681213379, "step": 408 }, { "epoch": 1.35, "learning_rate": 3.3726343214417023e-06, "logits/chosen": -3.805220365524292, "logits/rejected": -4.449551582336426, "logps/chosen": -104.36502075195312, "logps/rejected": -373.097900390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.3286751508712769, "rewards/margins": 13.467634201049805, "rewards/rejected": -12.138958930969238, "step": 409 }, { "epoch": 1.36, "learning_rate": 3.3635972011478134e-06, "logits/chosen": -4.453293323516846, "logits/rejected": -3.6812851428985596, "logps/chosen": -344.2782287597656, "logps/rejected": -602.1340942382812, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.0549774169921875, "rewards/margins": 27.558881759643555, "rewards/rejected": -27.613859176635742, "step": 410 }, { "epoch": 1.36, "learning_rate": 3.354547248828356e-06, "logits/chosen": -4.348405361175537, "logits/rejected": -3.354741334915161, "logps/chosen": -443.5060729980469, "logps/rejected": -599.057373046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0324127674102783, "rewards/margins": 23.604223251342773, "rewards/rejected": -25.63663673400879, "step": 411 }, { "epoch": 1.36, "learning_rate": 3.3454845989548385e-06, "logits/chosen": -4.515380382537842, "logits/rejected": -4.0116400718688965, "logps/chosen": -468.1466369628906, "logps/rejected": -611.712158203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.43798828125, "rewards/margins": 28.498184204101562, "rewards/rejected": -26.060195922851562, "step": 412 }, { "epoch": 1.37, "learning_rate": 3.336409386187444e-06, "logits/chosen": -4.36453914642334, "logits/rejected": -3.8994998931884766, "logps/chosen": -284.07257080078125, "logps/rejected": -449.8879089355469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5306426882743835, "rewards/margins": 19.006921768188477, "rewards/rejected": -19.537565231323242, "step": 413 }, { "epoch": 1.37, "learning_rate": 3.327321745373021e-06, "logits/chosen": -4.660157680511475, "logits/rejected": -4.660157680511475, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 414 }, { "epoch": 1.37, "learning_rate": 3.318221811543086e-06, "logits/chosen": -4.238649845123291, "logits/rejected": -3.397836208343506, "logps/chosen": -325.6258850097656, "logps/rejected": -538.45703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.284311056137085, "rewards/margins": 20.441768646240234, "rewards/rejected": -23.7260799407959, "step": 415 }, { "epoch": 1.38, "learning_rate": 3.309109719911814e-06, "logits/chosen": -5.034450531005859, "logits/rejected": -3.474153757095337, "logps/chosen": -1411.798583984375, "logps/rejected": -404.42431640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.30876466631889343, "rewards/margins": 13.646563529968262, "rewards/rejected": -13.337799072265625, "step": 416 }, { "epoch": 1.38, "learning_rate": 3.299985605874031e-06, "logits/chosen": -4.445805549621582, "logits/rejected": -3.711859703063965, "logps/chosen": -448.7423400878906, "logps/rejected": -633.969482421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.4439666271209717, "rewards/margins": 24.051715850830078, "rewards/rejected": -27.495681762695312, "step": 417 }, { "epoch": 1.38, "learning_rate": 3.2908496050032024e-06, "logits/chosen": -4.347026348114014, "logits/rejected": -4.162792682647705, "logps/chosen": -395.5592956542969, "logps/rejected": -583.1677856445312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5506988763809204, "rewards/margins": 21.170406341552734, "rewards/rejected": -22.721105575561523, "step": 418 }, { "epoch": 1.39, "learning_rate": 3.2817018530494164e-06, "logits/chosen": -4.310215473175049, "logits/rejected": -3.728440523147583, "logps/chosen": -338.7269287109375, "logps/rejected": -533.636962890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.7680604457855225, "rewards/margins": 23.321086883544922, "rewards/rejected": -26.089147567749023, "step": 419 }, { "epoch": 1.39, "learning_rate": 3.272542485937369e-06, "logits/chosen": -4.449070930480957, "logits/rejected": -3.3976151943206787, "logps/chosen": -657.876708984375, "logps/rejected": -491.18988037109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7952880859375, "rewards/margins": 20.058670043945312, "rewards/rejected": -20.853958129882812, "step": 420 }, { "epoch": 1.39, "learning_rate": 3.263371639764343e-06, "logits/chosen": -4.493232727050781, "logits/rejected": -4.127214431762695, "logps/chosen": -348.39501953125, "logps/rejected": -614.731201171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.659765601158142, "rewards/margins": 21.814050674438477, "rewards/rejected": -23.47381591796875, "step": 421 }, { "epoch": 1.4, "learning_rate": 3.254189450798189e-06, "logits/chosen": -4.208687782287598, "logits/rejected": -3.545189380645752, "logps/chosen": -520.898193359375, "logps/rejected": -564.629150390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.90895414352417, "rewards/margins": 18.219066619873047, "rewards/rejected": -24.128021240234375, "step": 422 }, { "epoch": 1.4, "learning_rate": 3.2449960554752935e-06, "logits/chosen": -4.3931684494018555, "logits/rejected": -4.1837005615234375, "logps/chosen": -276.5966796875, "logps/rejected": -600.9006958007812, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.020294189453125, "rewards/margins": 25.713865280151367, "rewards/rejected": -25.734159469604492, "step": 423 }, { "epoch": 1.4, "learning_rate": 3.2357915903985605e-06, "logits/chosen": -4.418226718902588, "logits/rejected": -3.992652654647827, "logps/chosen": -351.876708984375, "logps/rejected": -505.398193359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.6681153774261475, "rewards/margins": 20.386028289794922, "rewards/rejected": -24.05414390563965, "step": 424 }, { "epoch": 1.41, "learning_rate": 3.226576192335373e-06, "logits/chosen": -4.429302215576172, "logits/rejected": -3.5385775566101074, "logps/chosen": -420.6581726074219, "logps/rejected": -491.62158203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.32779860496521, "rewards/margins": 20.086910247802734, "rewards/rejected": -22.414709091186523, "step": 425 }, { "epoch": 1.41, "learning_rate": 3.21734999821557e-06, "logits/chosen": -4.415342330932617, "logits/rejected": -4.032643795013428, "logps/chosen": -435.6635437011719, "logps/rejected": -701.41845703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.122702121734619, "rewards/margins": 23.375268936157227, "rewards/rejected": -25.497970581054688, "step": 426 }, { "epoch": 1.41, "learning_rate": 3.2081131451294025e-06, "logits/chosen": -4.572709560394287, "logits/rejected": -3.7515692710876465, "logps/chosen": -388.33758544921875, "logps/rejected": -545.783447265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.4055114686489105, "rewards/margins": 24.55466651916504, "rewards/rejected": -24.96017837524414, "step": 427 }, { "epoch": 1.42, "learning_rate": 3.1988657703255043e-06, "logits/chosen": -4.463183879852295, "logits/rejected": -3.774198293685913, "logps/chosen": -366.28350830078125, "logps/rejected": -657.7838134765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7326629757881165, "rewards/margins": 28.33591651916504, "rewards/rejected": -29.068578720092773, "step": 428 }, { "epoch": 1.42, "learning_rate": 3.1896080112088477e-06, "logits/chosen": -4.525426387786865, "logits/rejected": -3.691302537918091, "logps/chosen": -363.2294921875, "logps/rejected": -529.0943603515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.335992336273193, "rewards/margins": 20.322162628173828, "rewards/rejected": -24.65815544128418, "step": 429 }, { "epoch": 1.42, "learning_rate": 3.1803400053387044e-06, "logits/chosen": -4.471107006072998, "logits/rejected": -4.122209072113037, "logps/chosen": -487.1780700683594, "logps/rejected": -635.154052734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.277719020843506, "rewards/margins": 19.1456241607666, "rewards/rejected": -24.423343658447266, "step": 430 }, { "epoch": 1.43, "learning_rate": 3.1710618904266006e-06, "logits/chosen": -4.434894561767578, "logits/rejected": -3.4150948524475098, "logps/chosen": -449.016357421875, "logps/rejected": -627.8999633789062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.055072069168091, "rewards/margins": 25.735448837280273, "rewards/rejected": -28.7905216217041, "step": 431 }, { "epoch": 1.43, "learning_rate": 3.1617738043342695e-06, "logits/chosen": -4.355262279510498, "logits/rejected": -3.7205264568328857, "logps/chosen": -403.456298828125, "logps/rejected": -616.9301147460938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.9781494140625, "rewards/margins": 26.587080001831055, "rewards/rejected": -29.565229415893555, "step": 432 }, { "epoch": 1.43, "learning_rate": 3.152475885071606e-06, "logits/chosen": -4.493656158447266, "logits/rejected": -3.3645427227020264, "logps/chosen": -352.35736083984375, "logps/rejected": -511.148193359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0501526594161987, "rewards/margins": 19.779478073120117, "rewards/rejected": -20.82962989807129, "step": 433 }, { "epoch": 1.44, "learning_rate": 3.143168270794612e-06, "logits/chosen": -4.326676845550537, "logits/rejected": -3.9220542907714844, "logps/chosen": -337.70269775390625, "logps/rejected": -569.7907104492188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.9540069103240967, "rewards/margins": 21.489450454711914, "rewards/rejected": -25.443456649780273, "step": 434 }, { "epoch": 1.44, "learning_rate": 3.1338510998033484e-06, "logits/chosen": -4.615167617797852, "logits/rejected": -3.6789557933807373, "logps/chosen": -340.3958740234375, "logps/rejected": -509.6401062011719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.039807319641113, "rewards/margins": 20.61761474609375, "rewards/rejected": -24.65742301940918, "step": 435 }, { "epoch": 1.44, "learning_rate": 3.124524510539875e-06, "logits/chosen": -4.4688191413879395, "logits/rejected": -3.9443581104278564, "logps/chosen": -477.71588134765625, "logps/rejected": -680.073974609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.602728366851807, "rewards/margins": 22.496606826782227, "rewards/rejected": -27.099334716796875, "step": 436 }, { "epoch": 1.45, "learning_rate": 3.1151886415861993e-06, "logits/chosen": -4.272546291351318, "logits/rejected": -3.7141354084014893, "logps/chosen": -545.0047607421875, "logps/rejected": -723.4859619140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.865893840789795, "rewards/margins": 26.616140365600586, "rewards/rejected": -31.48203468322754, "step": 437 }, { "epoch": 1.45, "learning_rate": 3.1058436316622103e-06, "logits/chosen": -4.314119815826416, "logits/rejected": -3.5312554836273193, "logps/chosen": -392.02398681640625, "logps/rejected": -493.50408935546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.307647705078125, "rewards/margins": 19.497329711914062, "rewards/rejected": -21.804977416992188, "step": 438 }, { "epoch": 1.45, "learning_rate": 3.0964896196236217e-06, "logits/chosen": -4.147656440734863, "logits/rejected": -3.743605375289917, "logps/chosen": -353.80889892578125, "logps/rejected": -459.1600341796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.126818895339966, "rewards/margins": 16.66583251953125, "rewards/rejected": -18.792652130126953, "step": 439 }, { "epoch": 1.46, "learning_rate": 3.0871267444599098e-06, "logits/chosen": -4.173769474029541, "logits/rejected": -3.487497091293335, "logps/chosen": -424.98516845703125, "logps/rejected": -421.6287841796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.55228590965271, "rewards/margins": 16.944061279296875, "rewards/rejected": -19.496347427368164, "step": 440 }, { "epoch": 1.46, "learning_rate": 3.077755145292243e-06, "logits/chosen": -4.606653690338135, "logits/rejected": -4.249349594116211, "logps/chosen": -259.38092041015625, "logps/rejected": -554.31640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0914993286132812, "rewards/margins": 22.46698570251465, "rewards/rejected": -23.55848503112793, "step": 441 }, { "epoch": 1.46, "learning_rate": 3.0683749613714238e-06, "logits/chosen": -4.4153876304626465, "logits/rejected": -3.4645602703094482, "logps/chosen": -314.1990661621094, "logps/rejected": -505.63043212890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.4609375, "rewards/margins": 21.634122848510742, "rewards/rejected": -24.095060348510742, "step": 442 }, { "epoch": 1.47, "learning_rate": 3.0589863320758063e-06, "logits/chosen": -4.313124179840088, "logits/rejected": -3.704991102218628, "logps/chosen": -382.89312744140625, "logps/rejected": -581.22412109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.91046142578125, "rewards/margins": 23.63400650024414, "rewards/rejected": -22.72354507446289, "step": 443 }, { "epoch": 1.47, "learning_rate": 3.0495893969092395e-06, "logits/chosen": -4.387284755706787, "logits/rejected": -3.930586338043213, "logps/chosen": -387.11590576171875, "logps/rejected": -688.8898315429688, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.050634860992432, "rewards/margins": 28.2360782623291, "rewards/rejected": -32.286712646484375, "step": 444 }, { "epoch": 1.47, "learning_rate": 3.040184295498984e-06, "logits/chosen": -4.339963436126709, "logits/rejected": -3.655027151107788, "logps/chosen": -284.65704345703125, "logps/rejected": -575.107421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.9107940793037415, "rewards/margins": 27.01205062866211, "rewards/rejected": -26.10125732421875, "step": 445 }, { "epoch": 1.48, "learning_rate": 3.0307711675936426e-06, "logits/chosen": -4.384993553161621, "logits/rejected": -4.153196334838867, "logps/chosen": -274.9207763671875, "logps/rejected": -577.053955078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9481414556503296, "rewards/margins": 22.555381774902344, "rewards/rejected": -24.503522872924805, "step": 446 }, { "epoch": 1.48, "learning_rate": 3.0213501530610807e-06, "logits/chosen": -4.68607234954834, "logits/rejected": -4.4093337059021, "logps/chosen": -297.26617431640625, "logps/rejected": -601.4628295898438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.327111840248108, "rewards/margins": 23.916927337646484, "rewards/rejected": -25.24403953552246, "step": 447 }, { "epoch": 1.48, "learning_rate": 3.0119213918863515e-06, "logits/chosen": -4.277585029602051, "logits/rejected": -3.8975319862365723, "logps/chosen": -345.8713684082031, "logps/rejected": -497.20233154296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1549103260040283, "rewards/margins": 18.675649642944336, "rewards/rejected": -19.8305606842041, "step": 448 }, { "epoch": 1.49, "learning_rate": 3.0024850241696128e-06, "logits/chosen": -4.33684778213501, "logits/rejected": -3.8972766399383545, "logps/chosen": -358.2585144042969, "logps/rejected": -644.777099609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.839996337890625, "rewards/margins": 25.32527732849121, "rewards/rejected": -28.165273666381836, "step": 449 }, { "epoch": 1.49, "learning_rate": 2.993041190124047e-06, "logits/chosen": -4.357908248901367, "logits/rejected": -3.4419775009155273, "logps/chosen": -493.86541748046875, "logps/rejected": -579.1130981445312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.270538330078125, "rewards/margins": 19.59089469909668, "rewards/rejected": -23.861433029174805, "step": 450 }, { "epoch": 1.49, "learning_rate": 2.9835900300737763e-06, "logits/chosen": -4.34766960144043, "logits/rejected": -4.226493835449219, "logps/chosen": -234.78842163085938, "logps/rejected": -520.0653686523438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.868554711341858, "rewards/margins": 21.814777374267578, "rewards/rejected": -23.683332443237305, "step": 451 }, { "epoch": 1.5, "learning_rate": 2.974131684451781e-06, "logits/chosen": -4.53407621383667, "logits/rejected": -3.506723642349243, "logps/chosen": -326.0643615722656, "logps/rejected": -636.6197509765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2346099615097046, "rewards/margins": 27.009422302246094, "rewards/rejected": -28.24403190612793, "step": 452 }, { "epoch": 1.5, "learning_rate": 2.9646662937978082e-06, "logits/chosen": -4.328660011291504, "logits/rejected": -3.551304817199707, "logps/chosen": -357.0900573730469, "logps/rejected": -560.1730346679688, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.538644552230835, "rewards/margins": 20.612537384033203, "rewards/rejected": -24.151182174682617, "step": 453 }, { "epoch": 1.5, "learning_rate": 2.9551939987562866e-06, "logits/chosen": -4.679781913757324, "logits/rejected": -4.490259170532227, "logps/chosen": -305.5557861328125, "logps/rejected": -620.52490234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.134747505187988, "rewards/margins": 21.78205108642578, "rewards/rejected": -26.916797637939453, "step": 454 }, { "epoch": 1.51, "learning_rate": 2.9457149400742357e-06, "logits/chosen": -4.545608997344971, "logits/rejected": -3.8866727352142334, "logps/chosen": -370.07122802734375, "logps/rejected": -544.5244750976562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.343170166015625, "rewards/margins": 17.216033935546875, "rewards/rejected": -21.5592041015625, "step": 455 }, { "epoch": 1.51, "learning_rate": 2.936229258599174e-06, "logits/chosen": -4.396432876586914, "logits/rejected": -3.7948856353759766, "logps/chosen": -373.44183349609375, "logps/rejected": -572.761474609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0043731927871704, "rewards/margins": 24.456161499023438, "rewards/rejected": -25.460535049438477, "step": 456 }, { "epoch": 1.51, "learning_rate": 2.926737095277029e-06, "logits/chosen": -4.500033378601074, "logits/rejected": -3.4864351749420166, "logps/chosen": -464.05364990234375, "logps/rejected": -607.920166015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.301110744476318, "rewards/margins": 23.323123931884766, "rewards/rejected": -28.624235153198242, "step": 457 }, { "epoch": 1.52, "learning_rate": 2.9172385911500385e-06, "logits/chosen": -4.522511959075928, "logits/rejected": -4.429636001586914, "logps/chosen": -192.4569854736328, "logps/rejected": -501.73016357421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.4317001402378082, "rewards/margins": 21.19770622253418, "rewards/rejected": -20.766006469726562, "step": 458 }, { "epoch": 1.52, "learning_rate": 2.907733887354657e-06, "logits/chosen": -4.347232818603516, "logits/rejected": -4.287606239318848, "logps/chosen": -306.1401672363281, "logps/rejected": -550.2047119140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.5003325939178467, "rewards/margins": 23.564359664916992, "rewards/rejected": -26.0646915435791, "step": 459 }, { "epoch": 1.52, "learning_rate": 2.898223125119461e-06, "logits/chosen": -4.511804103851318, "logits/rejected": -3.4755098819732666, "logps/chosen": -452.4083251953125, "logps/rejected": -568.3182983398438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.2396819591522217, "rewards/margins": 21.291404724121094, "rewards/rejected": -23.531085968017578, "step": 460 }, { "epoch": 1.53, "learning_rate": 2.8887064457630453e-06, "logits/chosen": -4.725271701812744, "logits/rejected": -4.104916095733643, "logps/chosen": -371.2562255859375, "logps/rejected": -514.2117919921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.137930393218994, "rewards/margins": 19.437416076660156, "rewards/rejected": -22.575345993041992, "step": 461 }, { "epoch": 1.53, "learning_rate": 2.879183990691929e-06, "logits/chosen": -4.620101451873779, "logits/rejected": -3.5656161308288574, "logps/chosen": -386.53131103515625, "logps/rejected": -545.5679931640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.054333448410034, "rewards/margins": 23.58875846862793, "rewards/rejected": -26.643091201782227, "step": 462 }, { "epoch": 1.53, "learning_rate": 2.8696559013984488e-06, "logits/chosen": -4.516706466674805, "logits/rejected": -4.020683288574219, "logps/chosen": -421.23834228515625, "logps/rejected": -613.3597412109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.1734864711761475, "rewards/margins": 24.959735870361328, "rewards/rejected": -28.133222579956055, "step": 463 }, { "epoch": 1.54, "learning_rate": 2.8601223194586613e-06, "logits/chosen": -4.591239929199219, "logits/rejected": -3.6537089347839355, "logps/chosen": -472.05859375, "logps/rejected": -554.2310791015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0797698497772217, "rewards/margins": 21.99146270751953, "rewards/rejected": -24.071231842041016, "step": 464 }, { "epoch": 1.54, "learning_rate": 2.850583386530235e-06, "logits/chosen": -4.355994701385498, "logits/rejected": -4.38576078414917, "logps/chosen": -310.3178405761719, "logps/rejected": -672.557861328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.741705298423767, "rewards/margins": 30.69386863708496, "rewards/rejected": -28.952163696289062, "step": 465 }, { "epoch": 1.54, "learning_rate": 2.841039244350351e-06, "logits/chosen": -4.551270484924316, "logits/rejected": -3.8082916736602783, "logps/chosen": -286.8211669921875, "logps/rejected": -535.1260986328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.174298048019409, "rewards/margins": 22.47593116760254, "rewards/rejected": -25.65022850036621, "step": 466 }, { "epoch": 1.55, "learning_rate": 2.83149003473359e-06, "logits/chosen": -4.422173976898193, "logits/rejected": -3.4829633235931396, "logps/chosen": -370.59716796875, "logps/rejected": -434.92620849609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.1693572998046875, "rewards/margins": 13.059667587280273, "rewards/rejected": -18.22902488708496, "step": 467 }, { "epoch": 1.55, "learning_rate": 2.8219358995698307e-06, "logits/chosen": -4.556554317474365, "logits/rejected": -3.9597673416137695, "logps/chosen": -353.119140625, "logps/rejected": -607.6500854492188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.67181396484375, "rewards/margins": 23.224639892578125, "rewards/rejected": -27.896453857421875, "step": 468 }, { "epoch": 1.55, "learning_rate": 2.8123769808221407e-06, "logits/chosen": -4.386852741241455, "logits/rejected": -3.493263006210327, "logps/chosen": -418.1610107421875, "logps/rejected": -563.915771484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.056164503097534, "rewards/margins": 21.489892959594727, "rewards/rejected": -24.546056747436523, "step": 469 }, { "epoch": 1.56, "learning_rate": 2.8028134205246633e-06, "logits/chosen": -4.266320705413818, "logits/rejected": -3.9667537212371826, "logps/chosen": -340.1468811035156, "logps/rejected": -618.0774536132812, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8424530029296875, "rewards/margins": 23.270231246948242, "rewards/rejected": -25.11268424987793, "step": 470 }, { "epoch": 1.56, "learning_rate": 2.793245360780512e-06, "logits/chosen": -4.307549953460693, "logits/rejected": -3.302494525909424, "logps/chosen": -506.43548583984375, "logps/rejected": -521.8065185546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.3766419887542725, "rewards/margins": 15.167179107666016, "rewards/rejected": -18.543821334838867, "step": 471 }, { "epoch": 1.56, "learning_rate": 2.783672943759655e-06, "logits/chosen": -4.328502655029297, "logits/rejected": -4.18637752532959, "logps/chosen": -327.0488586425781, "logps/rejected": -575.7240600585938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.662304699420929, "rewards/margins": 22.378664016723633, "rewards/rejected": -23.04096794128418, "step": 472 }, { "epoch": 1.57, "learning_rate": 2.7740963116968063e-06, "logits/chosen": -4.624969959259033, "logits/rejected": -4.624969959259033, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 473 }, { "epoch": 1.57, "learning_rate": 2.7645156068893075e-06, "logits/chosen": -4.566493034362793, "logits/rejected": -4.030740737915039, "logps/chosen": -389.44818115234375, "logps/rejected": -509.12384033203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.782413005828857, "rewards/margins": 18.374309539794922, "rewards/rejected": -23.156723022460938, "step": 474 }, { "epoch": 1.57, "learning_rate": 2.754930971695019e-06, "logits/chosen": -4.385883808135986, "logits/rejected": -4.309135913848877, "logps/chosen": -399.21356201171875, "logps/rejected": -565.6505737304688, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.669198751449585, "rewards/margins": 18.97157859802246, "rewards/rejected": -21.640777587890625, "step": 475 }, { "epoch": 1.58, "learning_rate": 2.745342548530202e-06, "logits/chosen": -4.433050632476807, "logits/rejected": -3.125581979751587, "logps/chosen": -689.978515625, "logps/rejected": -626.1990966796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.796069622039795, "rewards/margins": 21.79958152770996, "rewards/rejected": -26.595651626586914, "step": 476 }, { "epoch": 1.58, "learning_rate": 2.7357504798674004e-06, "logits/chosen": -4.230395317077637, "logits/rejected": -3.8781867027282715, "logps/chosen": -393.67724609375, "logps/rejected": -633.24609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.936596632003784, "rewards/margins": 24.811481475830078, "rewards/rejected": -27.748077392578125, "step": 477 }, { "epoch": 1.58, "learning_rate": 2.726154908233328e-06, "logits/chosen": -4.332608222961426, "logits/rejected": -3.4488308429718018, "logps/chosen": -495.01068115234375, "logps/rejected": -621.5181884765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.6153810024261475, "rewards/margins": 23.027002334594727, "rewards/rejected": -26.642383575439453, "step": 478 }, { "epoch": 1.59, "learning_rate": 2.716555976206748e-06, "logits/chosen": -4.295093536376953, "logits/rejected": -4.473363876342773, "logps/chosen": -395.58465576171875, "logps/rejected": -657.3076171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6721221804618835, "rewards/margins": 26.062959671020508, "rewards/rejected": -26.735082626342773, "step": 479 }, { "epoch": 1.59, "learning_rate": 2.706953826416353e-06, "logits/chosen": -4.405943393707275, "logits/rejected": -3.682810068130493, "logps/chosen": -479.26678466796875, "logps/rejected": -557.24560546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.3972413539886475, "rewards/margins": 19.180349349975586, "rewards/rejected": -21.577590942382812, "step": 480 }, { "epoch": 1.59, "learning_rate": 2.6973486015386507e-06, "logits/chosen": -4.419567584991455, "logits/rejected": -3.9329118728637695, "logps/chosen": -454.8463439941406, "logps/rejected": -577.69677734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.991354465484619, "rewards/margins": 20.57147216796875, "rewards/rejected": -24.56282615661621, "step": 481 }, { "epoch": 1.6, "learning_rate": 2.6877404442958393e-06, "logits/chosen": -4.608742713928223, "logits/rejected": -3.9749083518981934, "logps/chosen": -374.61627197265625, "logps/rejected": -506.05657958984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.265851020812988, "rewards/margins": 20.957012176513672, "rewards/rejected": -25.222864151000977, "step": 482 }, { "epoch": 1.6, "learning_rate": 2.6781294974536886e-06, "logits/chosen": -4.444053649902344, "logits/rejected": -4.5116753578186035, "logps/chosen": -300.3226318359375, "logps/rejected": -692.375244140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1088837385177612, "rewards/margins": 30.86850357055664, "rewards/rejected": -31.977386474609375, "step": 483 }, { "epoch": 1.6, "learning_rate": 2.6685159038194202e-06, "logits/chosen": -4.410458087921143, "logits/rejected": -4.057353496551514, "logps/chosen": -410.6009521484375, "logps/rejected": -629.4668579101562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.132080078125, "rewards/margins": 22.56564712524414, "rewards/rejected": -27.69772720336914, "step": 484 }, { "epoch": 1.61, "learning_rate": 2.6588998062395803e-06, "logits/chosen": -4.451384544372559, "logits/rejected": -3.5285940170288086, "logps/chosen": -407.1529846191406, "logps/rejected": -534.109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.311087131500244, "rewards/margins": 19.70151138305664, "rewards/rejected": -22.012598037719727, "step": 485 }, { "epoch": 1.61, "learning_rate": 2.6492813475979243e-06, "logits/chosen": -4.520169734954834, "logits/rejected": -3.3711836338043213, "logps/chosen": -596.6748046875, "logps/rejected": -568.712890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.032812595367432, "rewards/margins": 19.967634201049805, "rewards/rejected": -27.000446319580078, "step": 486 }, { "epoch": 1.61, "learning_rate": 2.639660670813288e-06, "logits/chosen": -4.383321762084961, "logits/rejected": -3.8218154907226562, "logps/chosen": -384.46527099609375, "logps/rejected": -569.0106201171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.606011867523193, "rewards/margins": 18.73273277282715, "rewards/rejected": -23.3387451171875, "step": 487 }, { "epoch": 1.62, "learning_rate": 2.630037918837468e-06, "logits/chosen": -4.515131950378418, "logits/rejected": -3.692455291748047, "logps/chosen": -413.51153564453125, "logps/rejected": -612.6315307617188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.524243116378784, "rewards/margins": 22.71697235107422, "rewards/rejected": -26.241214752197266, "step": 488 }, { "epoch": 1.62, "learning_rate": 2.6204132346530936e-06, "logits/chosen": -4.435654640197754, "logits/rejected": -4.284449577331543, "logps/chosen": -346.2232971191406, "logps/rejected": -559.8519287109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.234631299972534, "rewards/margins": 22.379703521728516, "rewards/rejected": -24.614334106445312, "step": 489 }, { "epoch": 1.62, "learning_rate": 2.6107867612715043e-06, "logits/chosen": -4.561266899108887, "logits/rejected": -4.219658851623535, "logps/chosen": -273.3294677734375, "logps/rejected": -482.2241516113281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.197283983230591, "rewards/margins": 18.133920669555664, "rewards/rejected": -20.331205368041992, "step": 490 }, { "epoch": 1.63, "learning_rate": 2.601158641730629e-06, "logits/chosen": -4.344208717346191, "logits/rejected": -3.376279592514038, "logps/chosen": -423.7567443847656, "logps/rejected": -544.533203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.172845363616943, "rewards/margins": 20.044063568115234, "rewards/rejected": -24.216909408569336, "step": 491 }, { "epoch": 1.63, "learning_rate": 2.5915290190928518e-06, "logits/chosen": -4.436819553375244, "logits/rejected": -3.7870032787323, "logps/chosen": -344.77886962890625, "logps/rejected": -517.6800537109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.3957153260707855, "rewards/margins": 25.133615493774414, "rewards/rejected": -24.737899780273438, "step": 492 }, { "epoch": 1.63, "learning_rate": 2.5818980364428935e-06, "logits/chosen": -4.595250129699707, "logits/rejected": -3.464118719100952, "logps/chosen": -435.914794921875, "logps/rejected": -570.711669921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.087146043777466, "rewards/margins": 23.15151596069336, "rewards/rejected": -25.238662719726562, "step": 493 }, { "epoch": 1.64, "learning_rate": 2.572265836885682e-06, "logits/chosen": -4.294907093048096, "logits/rejected": -3.5174379348754883, "logps/chosen": -654.4555053710938, "logps/rejected": -472.199462890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.319586277008057, "rewards/margins": 17.44939613342285, "rewards/rejected": -21.76898193359375, "step": 494 }, { "epoch": 1.64, "learning_rate": 2.5626325635442283e-06, "logits/chosen": -4.347671031951904, "logits/rejected": -3.7099201679229736, "logps/chosen": -501.5091552734375, "logps/rejected": -515.044677734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.598855495452881, "rewards/margins": 17.134353637695312, "rewards/rejected": -22.73320960998535, "step": 495 }, { "epoch": 1.64, "learning_rate": 2.5529983595574964e-06, "logits/chosen": -4.487810134887695, "logits/rejected": -3.4707536697387695, "logps/chosen": -352.0235595703125, "logps/rejected": -498.4788513183594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.110028028488159, "rewards/margins": 17.527103424072266, "rewards/rejected": -20.637130737304688, "step": 496 }, { "epoch": 1.65, "learning_rate": 2.5433633680782817e-06, "logits/chosen": -4.514901638031006, "logits/rejected": -3.2945892810821533, "logps/chosen": -482.579345703125, "logps/rejected": -441.725341796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8252381086349487, "rewards/margins": 19.974946975708008, "rewards/rejected": -21.80018424987793, "step": 497 }, { "epoch": 1.65, "learning_rate": 2.533727732271077e-06, "logits/chosen": -4.41818904876709, "logits/rejected": -4.347615718841553, "logps/chosen": -276.743408203125, "logps/rejected": -635.6170654296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.5825958251953125, "rewards/margins": 24.777637481689453, "rewards/rejected": -28.360233306884766, "step": 498 }, { "epoch": 1.65, "learning_rate": 2.524091595309952e-06, "logits/chosen": -4.423609256744385, "logits/rejected": -3.495140552520752, "logps/chosen": -538.4277954101562, "logps/rejected": -546.6807861328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.458142042160034, "rewards/margins": 18.367130279541016, "rewards/rejected": -20.825271606445312, "step": 499 }, { "epoch": 1.66, "learning_rate": 2.5144551003764227e-06, "logits/chosen": -4.233229160308838, "logits/rejected": -3.2597172260284424, "logps/chosen": -455.0411376953125, "logps/rejected": -515.404052734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.14308500289917, "rewards/margins": 18.73338508605957, "rewards/rejected": -22.8764705657959, "step": 500 }, { "epoch": 1.66, "learning_rate": 2.5048183906573227e-06, "logits/chosen": -4.481802940368652, "logits/rejected": -3.8720529079437256, "logps/chosen": -339.2329406738281, "logps/rejected": -601.2000122070312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9757903814315796, "rewards/margins": 21.960813522338867, "rewards/rejected": -23.936603546142578, "step": 501 }, { "epoch": 1.66, "learning_rate": 2.495181609342678e-06, "logits/chosen": -4.353747367858887, "logits/rejected": -4.015441417694092, "logps/chosen": -317.51611328125, "logps/rejected": -484.4516296386719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9086639285087585, "rewards/margins": 22.328155517578125, "rewards/rejected": -23.236820220947266, "step": 502 }, { "epoch": 1.67, "learning_rate": 2.4855448996235777e-06, "logits/chosen": -4.451262950897217, "logits/rejected": -3.4453048706054688, "logps/chosen": -504.0457458496094, "logps/rejected": -600.30517578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.7669219970703125, "rewards/margins": 22.33896827697754, "rewards/rejected": -27.10589027404785, "step": 503 }, { "epoch": 1.67, "learning_rate": 2.475908404690049e-06, "logits/chosen": -4.581128120422363, "logits/rejected": -4.031887054443359, "logps/chosen": -493.5894775390625, "logps/rejected": -650.125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.604229927062988, "rewards/margins": 20.365169525146484, "rewards/rejected": -24.96940040588379, "step": 504 }, { "epoch": 1.67, "learning_rate": 2.466272267728924e-06, "logits/chosen": -4.434467792510986, "logits/rejected": -3.737818717956543, "logps/chosen": -434.6280212402344, "logps/rejected": -483.1436767578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.4491729736328125, "rewards/margins": 19.58627700805664, "rewards/rejected": -23.035449981689453, "step": 505 }, { "epoch": 1.68, "learning_rate": 2.4566366319217196e-06, "logits/chosen": -4.582334995269775, "logits/rejected": -4.020341396331787, "logps/chosen": -302.0543212890625, "logps/rejected": -519.0069580078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.3763701915740967, "rewards/margins": 21.127307891845703, "rewards/rejected": -24.503677368164062, "step": 506 }, { "epoch": 1.68, "learning_rate": 2.4470016404425045e-06, "logits/chosen": -4.539458751678467, "logits/rejected": -4.119174957275391, "logps/chosen": -265.7320556640625, "logps/rejected": -523.607177734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5269775390625, "rewards/margins": 22.978612899780273, "rewards/rejected": -24.505590438842773, "step": 507 }, { "epoch": 1.68, "learning_rate": 2.437367436455773e-06, "logits/chosen": -4.174989700317383, "logits/rejected": -3.3251116275787354, "logps/chosen": -699.0255126953125, "logps/rejected": -636.7362060546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.78802490234375, "rewards/margins": 24.23048210144043, "rewards/rejected": -29.01850700378418, "step": 508 }, { "epoch": 1.69, "learning_rate": 2.427734163114319e-06, "logits/chosen": -4.3215813636779785, "logits/rejected": -3.4943883419036865, "logps/chosen": -449.01904296875, "logps/rejected": -583.8412475585938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.295483350753784, "rewards/margins": 20.842744827270508, "rewards/rejected": -24.138227462768555, "step": 509 }, { "epoch": 1.69, "learning_rate": 2.418101963557107e-06, "logits/chosen": -4.3660993576049805, "logits/rejected": -3.6623497009277344, "logps/chosen": -264.6542053222656, "logps/rejected": -493.95806884765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.831805408000946, "rewards/margins": 22.366228103637695, "rewards/rejected": -23.198034286499023, "step": 510 }, { "epoch": 1.69, "learning_rate": 2.4084709809071487e-06, "logits/chosen": -4.372247219085693, "logits/rejected": -3.317601203918457, "logps/chosen": -530.7295532226562, "logps/rejected": -586.0404052734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.3441925048828125, "rewards/margins": 21.82636070251465, "rewards/rejected": -25.17055320739746, "step": 511 }, { "epoch": 1.7, "learning_rate": 2.398841358269371e-06, "logits/chosen": -4.4392924308776855, "logits/rejected": -3.483755350112915, "logps/chosen": -486.6715087890625, "logps/rejected": -640.510498046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.7669618129730225, "rewards/margins": 26.311260223388672, "rewards/rejected": -29.078222274780273, "step": 512 }, { "epoch": 1.7, "learning_rate": 2.3892132387284956e-06, "logits/chosen": -4.3920817375183105, "logits/rejected": -3.70149827003479, "logps/chosen": -365.09234619140625, "logps/rejected": -584.9925537109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.769537329673767, "rewards/margins": 23.58013343811035, "rewards/rejected": -25.34967041015625, "step": 513 }, { "epoch": 1.7, "learning_rate": 2.3795867653469072e-06, "logits/chosen": -4.450637340545654, "logits/rejected": -3.307157278060913, "logps/chosen": -444.735595703125, "logps/rejected": -511.06451416015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.040740966796875, "rewards/margins": 20.74411964416504, "rewards/rejected": -23.784860610961914, "step": 514 }, { "epoch": 1.71, "learning_rate": 2.3699620811625327e-06, "logits/chosen": -4.6257524490356445, "logits/rejected": -3.602998971939087, "logps/chosen": -402.9666748046875, "logps/rejected": -561.99560546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.576306104660034, "rewards/margins": 20.052263259887695, "rewards/rejected": -23.628568649291992, "step": 515 }, { "epoch": 1.71, "learning_rate": 2.3603393291867122e-06, "logits/chosen": -4.232629299163818, "logits/rejected": -3.3740334510803223, "logps/chosen": -471.32843017578125, "logps/rejected": -565.0958251953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.581158638000488, "rewards/margins": 21.479419708251953, "rewards/rejected": -26.060577392578125, "step": 516 }, { "epoch": 1.71, "learning_rate": 2.350718652402076e-06, "logits/chosen": -4.651673793792725, "logits/rejected": -3.6871848106384277, "logps/chosen": -412.3416748046875, "logps/rejected": -571.3272705078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.179040908813477, "rewards/margins": 20.566640853881836, "rewards/rejected": -28.745681762695312, "step": 517 }, { "epoch": 1.72, "learning_rate": 2.34110019376042e-06, "logits/chosen": -4.293639183044434, "logits/rejected": -3.6013848781585693, "logps/chosen": -517.2723388671875, "logps/rejected": -554.429931640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.753997802734375, "rewards/margins": 21.584081649780273, "rewards/rejected": -25.33807945251465, "step": 518 }, { "epoch": 1.72, "learning_rate": 2.3314840961805806e-06, "logits/chosen": -4.395750999450684, "logits/rejected": -4.038932800292969, "logps/chosen": -340.3520812988281, "logps/rejected": -625.3683471679688, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.0049285888671875, "rewards/margins": 28.607328414916992, "rewards/rejected": -28.602399826049805, "step": 519 }, { "epoch": 1.72, "learning_rate": 2.3218705025463118e-06, "logits/chosen": -4.397328853607178, "logits/rejected": -3.6527514457702637, "logps/chosen": -357.65704345703125, "logps/rejected": -506.7778015136719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.2845458984375, "rewards/margins": 20.555856704711914, "rewards/rejected": -20.271310806274414, "step": 520 }, { "epoch": 1.73, "learning_rate": 2.312259555704161e-06, "logits/chosen": -4.330678462982178, "logits/rejected": -4.049455642700195, "logps/chosen": -366.6837463378906, "logps/rejected": -647.5443115234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.868676781654358, "rewards/margins": 24.176105499267578, "rewards/rejected": -26.044782638549805, "step": 521 }, { "epoch": 1.73, "learning_rate": 2.3026513984613506e-06, "logits/chosen": -4.300678253173828, "logits/rejected": -4.247992515563965, "logps/chosen": -329.80303955078125, "logps/rejected": -523.3076171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1151490211486816, "rewards/margins": 19.995100021362305, "rewards/rejected": -22.110248565673828, "step": 522 }, { "epoch": 1.73, "learning_rate": 2.293046173583648e-06, "logits/chosen": -4.347271919250488, "logits/rejected": -4.29665470123291, "logps/chosen": -397.5057373046875, "logps/rejected": -547.013427734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.5147857666015625, "rewards/margins": 21.20478630065918, "rewards/rejected": -24.719572067260742, "step": 523 }, { "epoch": 1.74, "learning_rate": 2.2834440237932537e-06, "logits/chosen": -4.5628180503845215, "logits/rejected": -3.5940840244293213, "logps/chosen": -371.0335388183594, "logps/rejected": -569.8633422851562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.06113600730896, "rewards/margins": 22.991321563720703, "rewards/rejected": -26.052457809448242, "step": 524 }, { "epoch": 1.74, "learning_rate": 2.2738450917666727e-06, "logits/chosen": -4.492840766906738, "logits/rejected": -3.1524245738983154, "logps/chosen": -715.3472900390625, "logps/rejected": -474.80560302734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.003808498382568, "rewards/margins": 18.691192626953125, "rewards/rejected": -22.69500160217285, "step": 525 }, { "epoch": 1.74, "learning_rate": 2.2642495201325995e-06, "logits/chosen": -4.554260730743408, "logits/rejected": -4.411481857299805, "logps/chosen": -282.2828369140625, "logps/rejected": -661.9052124023438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.3053834438323975, "rewards/margins": 25.556482315063477, "rewards/rejected": -27.861865997314453, "step": 526 }, { "epoch": 1.75, "learning_rate": 2.2546574514697985e-06, "logits/chosen": -4.597961902618408, "logits/rejected": -4.161449909210205, "logps/chosen": -436.95843505859375, "logps/rejected": -650.0560302734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.053936719894409, "rewards/margins": 27.141803741455078, "rewards/rejected": -30.19573974609375, "step": 527 }, { "epoch": 1.75, "learning_rate": 2.245069028304981e-06, "logits/chosen": -4.431636333465576, "logits/rejected": -4.060880184173584, "logps/chosen": -435.4438171386719, "logps/rejected": -582.9575805664062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.908206462860107, "rewards/margins": 22.136789321899414, "rewards/rejected": -27.04499626159668, "step": 528 }, { "epoch": 1.75, "learning_rate": 2.2354843931106933e-06, "logits/chosen": -4.363324165344238, "logits/rejected": -3.485128402709961, "logps/chosen": -454.4947204589844, "logps/rejected": -500.46124267578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3398650884628296, "rewards/margins": 23.28053092956543, "rewards/rejected": -24.62039566040039, "step": 529 }, { "epoch": 1.75, "learning_rate": 2.225903688303195e-06, "logits/chosen": -4.49049711227417, "logits/rejected": -4.155828952789307, "logps/chosen": -311.2807922363281, "logps/rejected": -586.794189453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1964142322540283, "rewards/margins": 24.11662483215332, "rewards/rejected": -26.313039779663086, "step": 530 }, { "epoch": 1.76, "learning_rate": 2.2163270562403453e-06, "logits/chosen": -4.60865592956543, "logits/rejected": -3.8004865646362305, "logps/chosen": -386.2127990722656, "logps/rejected": -569.5377197265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1500701904296875, "rewards/margins": 21.160938262939453, "rewards/rejected": -23.31100845336914, "step": 531 }, { "epoch": 1.76, "learning_rate": 2.2067546392194888e-06, "logits/chosen": -4.559963703155518, "logits/rejected": -3.6089634895324707, "logps/chosen": -423.64300537109375, "logps/rejected": -586.7628173828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.992706298828125, "rewards/margins": 22.380590438842773, "rewards/rejected": -26.3732967376709, "step": 532 }, { "epoch": 1.76, "learning_rate": 2.197186579475337e-06, "logits/chosen": -4.424755573272705, "logits/rejected": -3.7066447734832764, "logps/chosen": -486.338134765625, "logps/rejected": -558.112548828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.435204982757568, "rewards/margins": 15.882268905639648, "rewards/rejected": -21.317474365234375, "step": 533 }, { "epoch": 1.77, "learning_rate": 2.1876230191778598e-06, "logits/chosen": -4.272974491119385, "logits/rejected": -3.5304250717163086, "logps/chosen": -393.22076416015625, "logps/rejected": -589.1480712890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.871990978717804, "rewards/margins": 25.124834060668945, "rewards/rejected": -25.996824264526367, "step": 534 }, { "epoch": 1.77, "learning_rate": 2.17806410043017e-06, "logits/chosen": -4.524386405944824, "logits/rejected": -4.524386405944824, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 535 }, { "epoch": 1.77, "learning_rate": 2.168509965266411e-06, "logits/chosen": -4.649780750274658, "logits/rejected": -3.954561948776245, "logps/chosen": -316.16741943359375, "logps/rejected": -537.8360595703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.065786838531494, "rewards/margins": 21.404767990112305, "rewards/rejected": -23.47055435180664, "step": 536 }, { "epoch": 1.78, "learning_rate": 2.15896075564965e-06, "logits/chosen": -4.431206703186035, "logits/rejected": -4.099005699157715, "logps/chosen": -328.8981628417969, "logps/rejected": -619.6755981445312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.10228576511144638, "rewards/margins": 29.2618350982666, "rewards/rejected": -29.364120483398438, "step": 537 }, { "epoch": 1.78, "learning_rate": 2.1494166134697655e-06, "logits/chosen": -4.413819313049316, "logits/rejected": -3.4801533222198486, "logps/chosen": -471.62554931640625, "logps/rejected": -560.5090942382812, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.6229217052459717, "rewards/margins": 19.12151527404785, "rewards/rejected": -22.744436264038086, "step": 538 }, { "epoch": 1.78, "learning_rate": 2.13987768054134e-06, "logits/chosen": -4.456581115722656, "logits/rejected": -3.863801956176758, "logps/chosen": -357.640380859375, "logps/rejected": -612.9239501953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8813720941543579, "rewards/margins": 26.2801570892334, "rewards/rejected": -27.161529541015625, "step": 539 }, { "epoch": 1.79, "learning_rate": 2.1303440986015525e-06, "logits/chosen": -4.296332836151123, "logits/rejected": -4.348906993865967, "logps/chosen": -330.3258056640625, "logps/rejected": -575.8681640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.997112989425659, "rewards/margins": 24.005033493041992, "rewards/rejected": -27.002145767211914, "step": 540 }, { "epoch": 1.79, "learning_rate": 2.120816009308071e-06, "logits/chosen": -4.327535629272461, "logits/rejected": -3.389578104019165, "logps/chosen": -534.9880981445312, "logps/rejected": -523.1803588867188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.3220887184143066, "rewards/margins": 22.022001266479492, "rewards/rejected": -24.34408950805664, "step": 541 }, { "epoch": 1.79, "learning_rate": 2.1112935542369546e-06, "logits/chosen": -4.527272701263428, "logits/rejected": -3.402604579925537, "logps/chosen": -429.7662658691406, "logps/rejected": -636.8261108398438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.5840179920196533, "rewards/margins": 23.76004981994629, "rewards/rejected": -27.34406852722168, "step": 542 }, { "epoch": 1.8, "learning_rate": 2.1017768748805396e-06, "logits/chosen": -4.562657833099365, "logits/rejected": -3.9488637447357178, "logps/chosen": -421.45025634765625, "logps/rejected": -567.07080078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.3161346912384033, "rewards/margins": 22.188379287719727, "rewards/rejected": -25.504514694213867, "step": 543 }, { "epoch": 1.8, "learning_rate": 2.0922661126453436e-06, "logits/chosen": -4.454812526702881, "logits/rejected": -3.6670260429382324, "logps/chosen": -431.3782653808594, "logps/rejected": -498.7132568359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.13663025200366974, "rewards/margins": 21.67325210571289, "rewards/rejected": -21.53662109375, "step": 544 }, { "epoch": 1.8, "learning_rate": 2.0827614088499624e-06, "logits/chosen": -4.409323215484619, "logits/rejected": -3.654407024383545, "logps/chosen": -439.560546875, "logps/rejected": -650.0850830078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.5039124488830566, "rewards/margins": 23.503814697265625, "rewards/rejected": -26.007726669311523, "step": 545 }, { "epoch": 1.81, "learning_rate": 2.0732629047229712e-06, "logits/chosen": -4.346444606781006, "logits/rejected": -3.9621469974517822, "logps/chosen": -263.6195373535156, "logps/rejected": -397.19451904296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.792622447013855, "rewards/margins": 13.376258850097656, "rewards/rejected": -15.1688814163208, "step": 546 }, { "epoch": 1.81, "learning_rate": 2.0637707414008267e-06, "logits/chosen": -4.511711597442627, "logits/rejected": -3.5610032081604004, "logps/chosen": -532.9492797851562, "logps/rejected": -624.8191528320312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.410839796066284, "rewards/margins": 22.01413345336914, "rewards/rejected": -25.424972534179688, "step": 547 }, { "epoch": 1.81, "learning_rate": 2.0542850599257647e-06, "logits/chosen": -4.451583385467529, "logits/rejected": -4.111062049865723, "logps/chosen": -278.7889099121094, "logps/rejected": -552.5603637695312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.358865350484848, "rewards/margins": 24.038894653320312, "rewards/rejected": -23.680028915405273, "step": 548 }, { "epoch": 1.82, "learning_rate": 2.044806001243714e-06, "logits/chosen": -4.337294578552246, "logits/rejected": -3.4350380897521973, "logps/chosen": -326.2384033203125, "logps/rejected": -515.554931640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7382354736328125, "rewards/margins": 21.979162216186523, "rewards/rejected": -23.717397689819336, "step": 549 }, { "epoch": 1.82, "learning_rate": 2.035333706202192e-06, "logits/chosen": -4.380681991577148, "logits/rejected": -3.620570421218872, "logps/chosen": -479.2039794921875, "logps/rejected": -575.4329833984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.499276876449585, "rewards/margins": 22.872432708740234, "rewards/rejected": -26.3717098236084, "step": 550 }, { "epoch": 1.82, "learning_rate": 2.02586831554822e-06, "logits/chosen": -4.407192230224609, "logits/rejected": -3.4030561447143555, "logps/chosen": -649.1802978515625, "logps/rejected": -588.2078857421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.171984672546387, "rewards/margins": 14.105921745300293, "rewards/rejected": -23.27790641784668, "step": 551 }, { "epoch": 1.83, "learning_rate": 2.016409969926224e-06, "logits/chosen": -4.430113315582275, "logits/rejected": -3.4198334217071533, "logps/chosen": -469.12835693359375, "logps/rejected": -531.3338623046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.093273878097534, "rewards/margins": 20.878278732299805, "rewards/rejected": -22.9715518951416, "step": 552 }, { "epoch": 1.83, "learning_rate": 2.0069588098759545e-06, "logits/chosen": -4.452852249145508, "logits/rejected": -3.483274221420288, "logps/chosen": -433.011962890625, "logps/rejected": -416.55499267578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.3766419887542725, "rewards/margins": 15.160001754760742, "rewards/rejected": -18.536643981933594, "step": 553 }, { "epoch": 1.83, "learning_rate": 1.9975149758303885e-06, "logits/chosen": -4.393960475921631, "logits/rejected": -4.177896976470947, "logps/chosen": -401.71173095703125, "logps/rejected": -572.4862670898438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.4083192348480225, "rewards/margins": 17.304309844970703, "rewards/rejected": -20.712629318237305, "step": 554 }, { "epoch": 1.84, "learning_rate": 1.9880786081136498e-06, "logits/chosen": -4.309216022491455, "logits/rejected": -3.407667636871338, "logps/chosen": -487.53997802734375, "logps/rejected": -560.0919189453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.376837253570557, "rewards/margins": 22.110013961791992, "rewards/rejected": -26.48685073852539, "step": 555 }, { "epoch": 1.84, "learning_rate": 1.97864984693892e-06, "logits/chosen": -4.4554338455200195, "logits/rejected": -3.7435030937194824, "logps/chosen": -295.1136779785156, "logps/rejected": -552.1599731445312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.03139038011431694, "rewards/margins": 23.800928115844727, "rewards/rejected": -23.832319259643555, "step": 556 }, { "epoch": 1.84, "learning_rate": 1.969228832406358e-06, "logits/chosen": -4.349367141723633, "logits/rejected": -3.675476312637329, "logps/chosen": -473.7110290527344, "logps/rejected": -561.7530517578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.291208028793335, "rewards/margins": 22.24436378479004, "rewards/rejected": -25.535572052001953, "step": 557 }, { "epoch": 1.85, "learning_rate": 1.9598157045010162e-06, "logits/chosen": -4.389923572540283, "logits/rejected": -3.8660364151000977, "logps/chosen": -331.4177551269531, "logps/rejected": -573.893798828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5319305658340454, "rewards/margins": 22.721609115600586, "rewards/rejected": -24.2535400390625, "step": 558 }, { "epoch": 1.85, "learning_rate": 1.9504106030907605e-06, "logits/chosen": -4.329573631286621, "logits/rejected": -3.885268449783325, "logps/chosen": -326.060546875, "logps/rejected": -568.00048828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9560517072677612, "rewards/margins": 24.47171974182129, "rewards/rejected": -26.427770614624023, "step": 559 }, { "epoch": 1.85, "learning_rate": 1.941013667924194e-06, "logits/chosen": -4.212650299072266, "logits/rejected": -4.40893030166626, "logps/chosen": -276.91845703125, "logps/rejected": -596.6785888671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.8055969476699829, "rewards/margins": 26.083093643188477, "rewards/rejected": -25.277496337890625, "step": 560 }, { "epoch": 1.86, "learning_rate": 1.931625038628577e-06, "logits/chosen": -4.581705093383789, "logits/rejected": -4.361379623413086, "logps/chosen": -182.0465087890625, "logps/rejected": -486.5976867675781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 7.336042881011963, "rewards/margins": 29.367876052856445, "rewards/rejected": -22.03183364868164, "step": 561 }, { "epoch": 1.86, "learning_rate": 1.9222448547077573e-06, "logits/chosen": -4.375710964202881, "logits/rejected": -3.460885524749756, "logps/chosen": -353.3544616699219, "logps/rejected": -569.689697265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 1.4268616437911987, "rewards/margins": 26.47759437561035, "rewards/rejected": -25.05073356628418, "step": 562 }, { "epoch": 1.86, "learning_rate": 1.9128732555400915e-06, "logits/chosen": -4.408329486846924, "logits/rejected": -3.5342228412628174, "logps/chosen": -372.318359375, "logps/rejected": -599.8499755859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.678417921066284, "rewards/margins": 24.07634162902832, "rewards/rejected": -26.754758834838867, "step": 563 }, { "epoch": 1.87, "learning_rate": 1.9035103803763793e-06, "logits/chosen": -4.354351043701172, "logits/rejected": -3.8232436180114746, "logps/chosen": -391.6895751953125, "logps/rejected": -589.3079833984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1179261207580566, "rewards/margins": 23.219764709472656, "rewards/rejected": -25.337690353393555, "step": 564 }, { "epoch": 1.87, "learning_rate": 1.8941563683377905e-06, "logits/chosen": -4.406360149383545, "logits/rejected": -3.944713830947876, "logps/chosen": -403.63153076171875, "logps/rejected": -615.5775146484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.874408006668091, "rewards/margins": 24.821279525756836, "rewards/rejected": -28.695688247680664, "step": 565 }, { "epoch": 1.87, "learning_rate": 1.884811358413801e-06, "logits/chosen": -4.464740753173828, "logits/rejected": -3.442706346511841, "logps/chosen": -399.2990417480469, "logps/rejected": -605.0084228515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.047555446624756, "rewards/margins": 23.2379093170166, "rewards/rejected": -27.285465240478516, "step": 566 }, { "epoch": 1.88, "learning_rate": 1.8754754894601252e-06, "logits/chosen": -4.522468566894531, "logits/rejected": -3.3076298236846924, "logps/chosen": -535.175048828125, "logps/rejected": -617.84130859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.08831787109375, "rewards/margins": 23.37908935546875, "rewards/rejected": -24.4674072265625, "step": 567 }, { "epoch": 1.88, "learning_rate": 1.8661489001966526e-06, "logits/chosen": -4.174889087677002, "logits/rejected": -3.616316795349121, "logps/chosen": -378.4776306152344, "logps/rejected": -532.67724609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2665985822677612, "rewards/margins": 22.59225845336914, "rewards/rejected": -23.858856201171875, "step": 568 }, { "epoch": 1.88, "learning_rate": 1.8568317292053894e-06, "logits/chosen": -4.318294525146484, "logits/rejected": -3.6056463718414307, "logps/chosen": -534.0852661132812, "logps/rejected": -597.2578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.975506544113159, "rewards/margins": 23.4164981842041, "rewards/rejected": -27.392004013061523, "step": 569 }, { "epoch": 1.89, "learning_rate": 1.8475241149283957e-06, "logits/chosen": -4.532352447509766, "logits/rejected": -4.441859245300293, "logps/chosen": -293.224853515625, "logps/rejected": -544.9093017578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.389201641082764, "rewards/margins": 20.809892654418945, "rewards/rejected": -25.199094772338867, "step": 570 }, { "epoch": 1.89, "learning_rate": 1.8382261956657318e-06, "logits/chosen": -4.497768878936768, "logits/rejected": -4.419166088104248, "logps/chosen": -292.6662292480469, "logps/rejected": -379.49395751953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1020783185958862, "rewards/margins": 11.518327713012695, "rewards/rejected": -12.620406150817871, "step": 571 }, { "epoch": 1.89, "learning_rate": 1.8289381095734005e-06, "logits/chosen": -4.367152214050293, "logits/rejected": -2.9046382904052734, "logps/chosen": -440.2015686035156, "logps/rejected": -454.2406311035156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.351083278656006, "rewards/margins": 13.648462295532227, "rewards/rejected": -17.99954605102539, "step": 572 }, { "epoch": 1.9, "learning_rate": 1.8196599946612956e-06, "logits/chosen": -4.535996913909912, "logits/rejected": -3.8284554481506348, "logps/chosen": -424.919677734375, "logps/rejected": -526.9390258789062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.5133728981018066, "rewards/margins": 21.94275665283203, "rewards/rejected": -25.45612907409668, "step": 573 }, { "epoch": 1.9, "learning_rate": 1.8103919887911525e-06, "logits/chosen": -4.466793060302734, "logits/rejected": -4.0005412101745605, "logps/chosen": -361.57464599609375, "logps/rejected": -585.3212890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.8040528297424316, "rewards/margins": 24.93181800842285, "rewards/rejected": -27.735870361328125, "step": 574 }, { "epoch": 1.9, "learning_rate": 1.8011342296744961e-06, "logits/chosen": -4.356902599334717, "logits/rejected": -3.8104918003082275, "logps/chosen": -461.15618896484375, "logps/rejected": -494.272216796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.315740942955017, "rewards/margins": 23.60637664794922, "rewards/rejected": -24.922117233276367, "step": 575 }, { "epoch": 1.91, "learning_rate": 1.7918868548705982e-06, "logits/chosen": -4.411547660827637, "logits/rejected": -3.802309989929199, "logps/chosen": -356.2088623046875, "logps/rejected": -525.9064331054688, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3290618658065796, "rewards/margins": 24.430875778198242, "rewards/rejected": -25.759937286376953, "step": 576 }, { "epoch": 1.91, "learning_rate": 1.782650001784431e-06, "logits/chosen": -4.463867664337158, "logits/rejected": -4.220371723175049, "logps/chosen": -376.61712646484375, "logps/rejected": -563.845947265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.642895698547363, "rewards/margins": 16.145103454589844, "rewards/rejected": -23.788000106811523, "step": 577 }, { "epoch": 1.91, "learning_rate": 1.7734238076646277e-06, "logits/chosen": -4.491535663604736, "logits/rejected": -3.6769204139709473, "logps/chosen": -401.6808166503906, "logps/rejected": -568.62890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.4421844482421875, "rewards/margins": 18.564163208007812, "rewards/rejected": -23.00634765625, "step": 578 }, { "epoch": 1.92, "learning_rate": 1.7642084096014405e-06, "logits/chosen": -4.624630928039551, "logits/rejected": -3.606212615966797, "logps/chosen": -432.09954833984375, "logps/rejected": -433.9404296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.48089599609375, "rewards/margins": 16.043981552124023, "rewards/rejected": -20.524877548217773, "step": 579 }, { "epoch": 1.92, "learning_rate": 1.7550039445247069e-06, "logits/chosen": -4.3712663650512695, "logits/rejected": -4.4638824462890625, "logps/chosen": -369.18035888671875, "logps/rejected": -690.4774169921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.6518921852111816, "rewards/margins": 26.73978042602539, "rewards/rejected": -30.391672134399414, "step": 580 }, { "epoch": 1.92, "learning_rate": 1.7458105492018114e-06, "logits/chosen": -4.677394866943359, "logits/rejected": -3.6269001960754395, "logps/chosen": -279.61865234375, "logps/rejected": -505.4795837402344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7254761457443237, "rewards/margins": 18.095529556274414, "rewards/rejected": -19.82100486755371, "step": 581 }, { "epoch": 1.93, "learning_rate": 1.736628360235657e-06, "logits/chosen": -4.3663225173950195, "logits/rejected": -3.769561290740967, "logps/chosen": -465.2927551269531, "logps/rejected": -652.935302734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.1756927967071533, "rewards/margins": 25.69392967224121, "rewards/rejected": -28.8696231842041, "step": 582 }, { "epoch": 1.93, "learning_rate": 1.7274575140626318e-06, "logits/chosen": -4.502114772796631, "logits/rejected": -3.25954008102417, "logps/chosen": -452.51556396484375, "logps/rejected": -602.263916015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.51710844039917, "rewards/margins": 23.61882209777832, "rewards/rejected": -28.13593101501465, "step": 583 }, { "epoch": 1.93, "learning_rate": 1.718298146950585e-06, "logits/chosen": -4.25767183303833, "logits/rejected": -3.9623312950134277, "logps/chosen": -435.0552978515625, "logps/rejected": -568.56396484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.063119411468506, "rewards/margins": 18.019424438476562, "rewards/rejected": -24.082544326782227, "step": 584 }, { "epoch": 1.94, "learning_rate": 1.7091503949967987e-06, "logits/chosen": -4.462721824645996, "logits/rejected": -3.704984664916992, "logps/chosen": -492.3757019042969, "logps/rejected": -617.8662109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.16733717918396, "rewards/margins": 26.13848876953125, "rewards/rejected": -28.30582618713379, "step": 585 }, { "epoch": 1.94, "learning_rate": 1.70001439412597e-06, "logits/chosen": -4.391927242279053, "logits/rejected": -3.5645809173583984, "logps/chosen": -467.19659423828125, "logps/rejected": -538.8868408203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.403265357017517, "rewards/margins": 21.934112548828125, "rewards/rejected": -23.337377548217773, "step": 586 }, { "epoch": 1.94, "learning_rate": 1.690890280088187e-06, "logits/chosen": -4.577780246734619, "logits/rejected": -3.5604734420776367, "logps/chosen": -326.17095947265625, "logps/rejected": -708.198486328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.004345655441284, "rewards/margins": 29.266477584838867, "rewards/rejected": -31.270822525024414, "step": 587 }, { "epoch": 1.95, "learning_rate": 1.6817781884569146e-06, "logits/chosen": -4.457272529602051, "logits/rejected": -4.030734062194824, "logps/chosen": -476.1500244140625, "logps/rejected": -608.76904296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.444561719894409, "rewards/margins": 18.88410186767578, "rewards/rejected": -22.328662872314453, "step": 588 }, { "epoch": 1.95, "learning_rate": 1.6726782546269793e-06, "logits/chosen": -4.448894500732422, "logits/rejected": -3.7090609073638916, "logps/chosen": -459.2810363769531, "logps/rejected": -570.8338012695312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.392965793609619, "rewards/margins": 22.371891021728516, "rewards/rejected": -25.764856338500977, "step": 589 }, { "epoch": 1.95, "learning_rate": 1.663590613812556e-06, "logits/chosen": -4.298842430114746, "logits/rejected": -3.4766552448272705, "logps/chosen": -532.4111328125, "logps/rejected": -557.8978881835938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.226391792297363, "rewards/margins": 18.984363555908203, "rewards/rejected": -24.21075439453125, "step": 590 }, { "epoch": 1.96, "learning_rate": 1.6545154010451613e-06, "logits/chosen": -4.334470748901367, "logits/rejected": -4.3563313484191895, "logps/chosen": -270.686767578125, "logps/rejected": -525.21630859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.5087006092071533, "rewards/margins": 20.143381118774414, "rewards/rejected": -23.652082443237305, "step": 591 }, { "epoch": 1.96, "learning_rate": 1.645452751171645e-06, "logits/chosen": -4.368016242980957, "logits/rejected": -4.006409645080566, "logps/chosen": -393.4631652832031, "logps/rejected": -556.775390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8408355712890625, "rewards/margins": 20.946945190429688, "rewards/rejected": -21.78778076171875, "step": 592 }, { "epoch": 1.96, "learning_rate": 1.6364027988521875e-06, "logits/chosen": -4.507727146148682, "logits/rejected": -4.296899795532227, "logps/chosen": -386.19500732421875, "logps/rejected": -553.1477661132812, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.39754056930542, "rewards/margins": 17.971939086914062, "rewards/rejected": -23.36948013305664, "step": 593 }, { "epoch": 1.97, "learning_rate": 1.6273656785582986e-06, "logits/chosen": -4.520374298095703, "logits/rejected": -4.154501914978027, "logps/chosen": -455.46044921875, "logps/rejected": -627.4146118164062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.685473680496216, "rewards/margins": 27.178955078125, "rewards/rejected": -30.864429473876953, "step": 594 }, { "epoch": 1.97, "learning_rate": 1.618341524570819e-06, "logits/chosen": -4.454592227935791, "logits/rejected": -3.7837131023406982, "logps/chosen": -486.25390625, "logps/rejected": -666.9676513671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.168347120285034, "rewards/margins": 29.600313186645508, "rewards/rejected": -31.768659591674805, "step": 595 }, { "epoch": 1.97, "learning_rate": 1.6093304709779273e-06, "logits/chosen": -4.353521823883057, "logits/rejected": -3.976317882537842, "logps/chosen": -329.077880859375, "logps/rejected": -591.4760131835938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.400656223297119, "rewards/margins": 23.866348266601562, "rewards/rejected": -26.267004013061523, "step": 596 }, { "epoch": 1.98, "learning_rate": 1.6003326516731431e-06, "logits/chosen": -4.472027778625488, "logits/rejected": -4.315833568572998, "logps/chosen": -276.4881896972656, "logps/rejected": -512.5158081054688, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.355543613433838, "rewards/margins": 19.143077850341797, "rewards/rejected": -24.498620986938477, "step": 597 }, { "epoch": 1.98, "learning_rate": 1.5913482003533437e-06, "logits/chosen": -4.33258581161499, "logits/rejected": -3.6555116176605225, "logps/chosen": -420.076416015625, "logps/rejected": -683.6670532226562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.608905076980591, "rewards/margins": 27.909997940063477, "rewards/rejected": -31.518903732299805, "step": 598 }, { "epoch": 1.98, "learning_rate": 1.58237725051677e-06, "logits/chosen": -4.476813793182373, "logits/rejected": -3.9552001953125, "logps/chosen": -341.75732421875, "logps/rejected": -577.337158203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.35352784395217896, "rewards/margins": 26.75636863708496, "rewards/rejected": -26.402841567993164, "step": 599 }, { "epoch": 1.99, "learning_rate": 1.5734199354610513e-06, "logits/chosen": -4.467745780944824, "logits/rejected": -3.7632243633270264, "logps/chosen": -418.8152770996094, "logps/rejected": -568.2605590820312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.041018962860107, "rewards/margins": 19.723466873168945, "rewards/rejected": -25.76448631286621, "step": 600 }, { "epoch": 1.99, "learning_rate": 1.564476388281216e-06, "logits/chosen": -4.7007622718811035, "logits/rejected": -4.7007622718811035, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 601 }, { "epoch": 1.99, "learning_rate": 1.555546741867722e-06, "logits/chosen": -4.600505828857422, "logits/rejected": -3.6562798023223877, "logps/chosen": -442.61224365234375, "logps/rejected": -563.57666015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.908184826374054, "rewards/margins": 27.995800018310547, "rewards/rejected": -27.087615966796875, "step": 602 }, { "epoch": 2.0, "learning_rate": 1.5466311289044755e-06, "logits/chosen": -4.369237899780273, "logits/rejected": -3.5185129642486572, "logps/chosen": -419.2476501464844, "logps/rejected": -565.74951171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.13568115234375, "rewards/margins": 21.686038970947266, "rewards/rejected": -26.821720123291016, "step": 603 }, { "epoch": 2.0, "learning_rate": 1.5377296818668638e-06, "logits/chosen": -4.579688549041748, "logits/rejected": -4.579688549041748, "logps/chosen": 0.0, "logps/rejected": 0.0, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 604 } ], "logging_steps": 1, "max_steps": 906, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }