{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0848712471176167, "eval_steps": 1000, "global_step": 25000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0012339299898971982, "grad_norm": 0.2333984375, "learning_rate": 5.128205128205128e-09, "loss": 0.0731, "step": 10 }, { "epoch": 0.0024678599797943964, "grad_norm": 0.279296875, "learning_rate": 1.0256410256410256e-08, "loss": 0.0783, "step": 20 }, { "epoch": 0.0037017899696915944, "grad_norm": 0.1533203125, "learning_rate": 1.5384615384615385e-08, "loss": 0.0672, "step": 30 }, { "epoch": 0.004935719959588793, "grad_norm": 0.1416015625, "learning_rate": 2.0512820512820512e-08, "loss": 0.0843, "step": 40 }, { "epoch": 0.006169649949485991, "grad_norm": 0.279296875, "learning_rate": 2.564102564102564e-08, "loss": 0.0695, "step": 50 }, { "epoch": 0.007403579939383189, "grad_norm": 0.283203125, "learning_rate": 3.076923076923077e-08, "loss": 0.083, "step": 60 }, { "epoch": 0.008637509929280387, "grad_norm": 0.359375, "learning_rate": 3.589743589743589e-08, "loss": 0.0871, "step": 70 }, { "epoch": 0.009871439919177586, "grad_norm": 0.208984375, "learning_rate": 4.1025641025641025e-08, "loss": 0.0714, "step": 80 }, { "epoch": 0.011105369909074785, "grad_norm": 0.30078125, "learning_rate": 4.615384615384615e-08, "loss": 0.082, "step": 90 }, { "epoch": 0.012339299898971982, "grad_norm": 0.259765625, "learning_rate": 5.128205128205128e-08, "loss": 0.0782, "step": 100 }, { "epoch": 0.01357322988886918, "grad_norm": 0.263671875, "learning_rate": 5.641025641025641e-08, "loss": 0.0722, "step": 110 }, { "epoch": 0.014807159878766378, "grad_norm": 0.30859375, "learning_rate": 6.153846153846154e-08, "loss": 0.0745, "step": 120 }, { "epoch": 0.016041089868663577, "grad_norm": 0.265625, "learning_rate": 6.666666666666667e-08, "loss": 0.0717, "step": 130 }, { "epoch": 0.017275019858560774, "grad_norm": 0.251953125, "learning_rate": 7.179487179487178e-08, "loss": 0.0718, "step": 140 }, { "epoch": 0.018508949848457974, "grad_norm": 0.2333984375, "learning_rate": 7.692307692307692e-08, "loss": 0.0797, "step": 150 }, { "epoch": 0.01974287983835517, "grad_norm": 0.275390625, "learning_rate": 8.205128205128205e-08, "loss": 0.0735, "step": 160 }, { "epoch": 0.02097680982825237, "grad_norm": 0.26953125, "learning_rate": 8.717948717948718e-08, "loss": 0.0869, "step": 170 }, { "epoch": 0.02221073981814957, "grad_norm": 0.1748046875, "learning_rate": 9.23076923076923e-08, "loss": 0.0798, "step": 180 }, { "epoch": 0.023444669808046766, "grad_norm": 0.171875, "learning_rate": 9.743589743589743e-08, "loss": 0.0821, "step": 190 }, { "epoch": 0.024678599797943963, "grad_norm": 0.46875, "learning_rate": 1.0256410256410256e-07, "loss": 0.0767, "step": 200 }, { "epoch": 0.025912529787841164, "grad_norm": 0.30859375, "learning_rate": 1.076923076923077e-07, "loss": 0.0756, "step": 210 }, { "epoch": 0.02714645977773836, "grad_norm": 0.140625, "learning_rate": 1.1282051282051281e-07, "loss": 0.0763, "step": 220 }, { "epoch": 0.028380389767635558, "grad_norm": 0.77734375, "learning_rate": 1.1794871794871794e-07, "loss": 0.0888, "step": 230 }, { "epoch": 0.029614319757532755, "grad_norm": 0.28515625, "learning_rate": 1.2307692307692308e-07, "loss": 0.0757, "step": 240 }, { "epoch": 0.030848249747429956, "grad_norm": 0.16796875, "learning_rate": 1.2820512820512818e-07, "loss": 0.0683, "step": 250 }, { "epoch": 0.03208217973732715, "grad_norm": 0.224609375, "learning_rate": 1.3333333333333334e-07, "loss": 0.086, "step": 260 }, { "epoch": 0.033316109727224354, "grad_norm": 0.322265625, "learning_rate": 1.3846153846153846e-07, "loss": 0.0741, "step": 270 }, { "epoch": 0.03455003971712155, "grad_norm": 0.201171875, "learning_rate": 1.4358974358974356e-07, "loss": 0.0685, "step": 280 }, { "epoch": 0.03578396970701875, "grad_norm": 0.177734375, "learning_rate": 1.4871794871794872e-07, "loss": 0.0785, "step": 290 }, { "epoch": 0.03701789969691595, "grad_norm": 0.17578125, "learning_rate": 1.5384615384615385e-07, "loss": 0.0696, "step": 300 }, { "epoch": 0.03825182968681314, "grad_norm": 0.1484375, "learning_rate": 1.5897435897435895e-07, "loss": 0.0873, "step": 310 }, { "epoch": 0.03948575967671034, "grad_norm": 0.1376953125, "learning_rate": 1.641025641025641e-07, "loss": 0.0768, "step": 320 }, { "epoch": 0.04071968966660754, "grad_norm": 0.166015625, "learning_rate": 1.6923076923076923e-07, "loss": 0.0744, "step": 330 }, { "epoch": 0.04195361965650474, "grad_norm": 0.1494140625, "learning_rate": 1.7435897435897435e-07, "loss": 0.0783, "step": 340 }, { "epoch": 0.04318754964640194, "grad_norm": 0.310546875, "learning_rate": 1.7948717948717948e-07, "loss": 0.0763, "step": 350 }, { "epoch": 0.04442147963629914, "grad_norm": 0.2060546875, "learning_rate": 1.846153846153846e-07, "loss": 0.0735, "step": 360 }, { "epoch": 0.04565540962619633, "grad_norm": 0.1630859375, "learning_rate": 1.8974358974358974e-07, "loss": 0.0723, "step": 370 }, { "epoch": 0.04688933961609353, "grad_norm": 0.1591796875, "learning_rate": 1.9487179487179486e-07, "loss": 0.0823, "step": 380 }, { "epoch": 0.04812326960599073, "grad_norm": 0.1728515625, "learning_rate": 2e-07, "loss": 0.0664, "step": 390 }, { "epoch": 0.04935719959588793, "grad_norm": 0.30078125, "learning_rate": 2.0512820512820512e-07, "loss": 0.0756, "step": 400 }, { "epoch": 0.05059112958578513, "grad_norm": 0.205078125, "learning_rate": 2.1025641025641025e-07, "loss": 0.0753, "step": 410 }, { "epoch": 0.05182505957568233, "grad_norm": 0.212890625, "learning_rate": 2.153846153846154e-07, "loss": 0.0764, "step": 420 }, { "epoch": 0.05305898956557952, "grad_norm": 0.2021484375, "learning_rate": 2.205128205128205e-07, "loss": 0.077, "step": 430 }, { "epoch": 0.05429291955547672, "grad_norm": 0.2578125, "learning_rate": 2.2564102564102563e-07, "loss": 0.0655, "step": 440 }, { "epoch": 0.05552684954537392, "grad_norm": 0.1484375, "learning_rate": 2.3076923076923078e-07, "loss": 0.0664, "step": 450 }, { "epoch": 0.056760779535271116, "grad_norm": 0.185546875, "learning_rate": 2.3589743589743588e-07, "loss": 0.0709, "step": 460 }, { "epoch": 0.05799470952516832, "grad_norm": 0.126953125, "learning_rate": 2.41025641025641e-07, "loss": 0.076, "step": 470 }, { "epoch": 0.05922863951506551, "grad_norm": 0.2470703125, "learning_rate": 2.4615384615384616e-07, "loss": 0.0752, "step": 480 }, { "epoch": 0.06046256950496271, "grad_norm": 0.1630859375, "learning_rate": 2.5128205128205126e-07, "loss": 0.0771, "step": 490 }, { "epoch": 0.06169649949485991, "grad_norm": 0.2314453125, "learning_rate": 2.5641025641025636e-07, "loss": 0.0687, "step": 500 }, { "epoch": 0.0629304294847571, "grad_norm": 0.201171875, "learning_rate": 2.615384615384615e-07, "loss": 0.0685, "step": 510 }, { "epoch": 0.0641643594746543, "grad_norm": 0.2255859375, "learning_rate": 2.6666666666666667e-07, "loss": 0.0836, "step": 520 }, { "epoch": 0.0653982894645515, "grad_norm": 0.173828125, "learning_rate": 2.7179487179487177e-07, "loss": 0.0854, "step": 530 }, { "epoch": 0.06663221945444871, "grad_norm": 0.1904296875, "learning_rate": 2.7692307692307693e-07, "loss": 0.0628, "step": 540 }, { "epoch": 0.06786614944434591, "grad_norm": 0.193359375, "learning_rate": 2.8205128205128203e-07, "loss": 0.0683, "step": 550 }, { "epoch": 0.0691000794342431, "grad_norm": 0.2197265625, "learning_rate": 2.8717948717948713e-07, "loss": 0.0703, "step": 560 }, { "epoch": 0.0703340094241403, "grad_norm": 0.212890625, "learning_rate": 2.9230769230769234e-07, "loss": 0.0729, "step": 570 }, { "epoch": 0.0715679394140375, "grad_norm": 0.181640625, "learning_rate": 2.9743589743589744e-07, "loss": 0.0698, "step": 580 }, { "epoch": 0.0728018694039347, "grad_norm": 0.166015625, "learning_rate": 3.0256410256410254e-07, "loss": 0.0621, "step": 590 }, { "epoch": 0.0740357993938319, "grad_norm": 0.2119140625, "learning_rate": 3.076923076923077e-07, "loss": 0.0772, "step": 600 }, { "epoch": 0.0752697293837291, "grad_norm": 0.21484375, "learning_rate": 3.128205128205128e-07, "loss": 0.0908, "step": 610 }, { "epoch": 0.07650365937362628, "grad_norm": 0.3046875, "learning_rate": 3.179487179487179e-07, "loss": 0.0827, "step": 620 }, { "epoch": 0.07773758936352348, "grad_norm": 0.2060546875, "learning_rate": 3.230769230769231e-07, "loss": 0.0703, "step": 630 }, { "epoch": 0.07897151935342069, "grad_norm": 0.26953125, "learning_rate": 3.282051282051282e-07, "loss": 0.0781, "step": 640 }, { "epoch": 0.08020544934331789, "grad_norm": 0.173828125, "learning_rate": 3.333333333333333e-07, "loss": 0.0656, "step": 650 }, { "epoch": 0.08143937933321509, "grad_norm": 0.23828125, "learning_rate": 3.3846153846153845e-07, "loss": 0.0785, "step": 660 }, { "epoch": 0.08267330932311227, "grad_norm": 0.314453125, "learning_rate": 3.4358974358974356e-07, "loss": 0.0713, "step": 670 }, { "epoch": 0.08390723931300947, "grad_norm": 0.1484375, "learning_rate": 3.487179487179487e-07, "loss": 0.077, "step": 680 }, { "epoch": 0.08514116930290667, "grad_norm": 0.1640625, "learning_rate": 3.5384615384615386e-07, "loss": 0.0795, "step": 690 }, { "epoch": 0.08637509929280388, "grad_norm": 0.1806640625, "learning_rate": 3.5897435897435896e-07, "loss": 0.0613, "step": 700 }, { "epoch": 0.08760902928270108, "grad_norm": 0.2177734375, "learning_rate": 3.6410256410256406e-07, "loss": 0.0716, "step": 710 }, { "epoch": 0.08884295927259828, "grad_norm": 0.1728515625, "learning_rate": 3.692307692307692e-07, "loss": 0.0721, "step": 720 }, { "epoch": 0.09007688926249546, "grad_norm": 0.1767578125, "learning_rate": 3.743589743589743e-07, "loss": 0.0739, "step": 730 }, { "epoch": 0.09131081925239266, "grad_norm": 0.267578125, "learning_rate": 3.7948717948717947e-07, "loss": 0.0706, "step": 740 }, { "epoch": 0.09254474924228986, "grad_norm": 0.255859375, "learning_rate": 3.8461538461538463e-07, "loss": 0.0546, "step": 750 }, { "epoch": 0.09377867923218706, "grad_norm": 0.169921875, "learning_rate": 3.8974358974358973e-07, "loss": 0.0596, "step": 760 }, { "epoch": 0.09501260922208427, "grad_norm": 0.205078125, "learning_rate": 3.9487179487179483e-07, "loss": 0.066, "step": 770 }, { "epoch": 0.09624653921198147, "grad_norm": 0.2578125, "learning_rate": 4e-07, "loss": 0.076, "step": 780 }, { "epoch": 0.09748046920187865, "grad_norm": 0.287109375, "learning_rate": 4.0512820512820514e-07, "loss": 0.0731, "step": 790 }, { "epoch": 0.09871439919177585, "grad_norm": 0.1494140625, "learning_rate": 4.1025641025641024e-07, "loss": 0.0568, "step": 800 }, { "epoch": 0.09994832918167305, "grad_norm": 0.2392578125, "learning_rate": 4.153846153846154e-07, "loss": 0.0897, "step": 810 }, { "epoch": 0.10118225917157025, "grad_norm": 0.11865234375, "learning_rate": 4.205128205128205e-07, "loss": 0.0765, "step": 820 }, { "epoch": 0.10241618916146746, "grad_norm": 0.1396484375, "learning_rate": 4.256410256410256e-07, "loss": 0.0782, "step": 830 }, { "epoch": 0.10365011915136466, "grad_norm": 0.177734375, "learning_rate": 4.307692307692308e-07, "loss": 0.0633, "step": 840 }, { "epoch": 0.10488404914126184, "grad_norm": 0.2119140625, "learning_rate": 4.358974358974359e-07, "loss": 0.0625, "step": 850 }, { "epoch": 0.10611797913115904, "grad_norm": 0.16015625, "learning_rate": 4.41025641025641e-07, "loss": 0.0624, "step": 860 }, { "epoch": 0.10735190912105624, "grad_norm": 0.251953125, "learning_rate": 4.4615384615384615e-07, "loss": 0.0842, "step": 870 }, { "epoch": 0.10858583911095344, "grad_norm": 0.34375, "learning_rate": 4.5128205128205125e-07, "loss": 0.0597, "step": 880 }, { "epoch": 0.10981976910085064, "grad_norm": 0.201171875, "learning_rate": 4.5641025641025636e-07, "loss": 0.0625, "step": 890 }, { "epoch": 0.11105369909074785, "grad_norm": 0.1318359375, "learning_rate": 4.6153846153846156e-07, "loss": 0.0641, "step": 900 }, { "epoch": 0.11228762908064503, "grad_norm": 0.25, "learning_rate": 4.6666666666666666e-07, "loss": 0.0818, "step": 910 }, { "epoch": 0.11352155907054223, "grad_norm": 0.1943359375, "learning_rate": 4.7179487179487176e-07, "loss": 0.0704, "step": 920 }, { "epoch": 0.11475548906043943, "grad_norm": 0.263671875, "learning_rate": 4.769230769230769e-07, "loss": 0.0863, "step": 930 }, { "epoch": 0.11598941905033663, "grad_norm": 0.185546875, "learning_rate": 4.82051282051282e-07, "loss": 0.0628, "step": 940 }, { "epoch": 0.11722334904023383, "grad_norm": 0.205078125, "learning_rate": 4.871794871794871e-07, "loss": 0.0553, "step": 950 }, { "epoch": 0.11845727903013102, "grad_norm": 0.27734375, "learning_rate": 4.923076923076923e-07, "loss": 0.0703, "step": 960 }, { "epoch": 0.11969120902002822, "grad_norm": 0.2421875, "learning_rate": 4.974358974358974e-07, "loss": 0.0767, "step": 970 }, { "epoch": 0.12092513900992542, "grad_norm": 0.169921875, "learning_rate": 5.025641025641025e-07, "loss": 0.052, "step": 980 }, { "epoch": 0.12215906899982262, "grad_norm": 0.1845703125, "learning_rate": 5.076923076923076e-07, "loss": 0.0715, "step": 990 }, { "epoch": 0.12339299898971982, "grad_norm": 0.2001953125, "learning_rate": 5.128205128205127e-07, "loss": 0.0788, "step": 1000 }, { "epoch": 0.12339299898971982, "eval_exact_match": 0.6365030674846626, "eval_has_answer_correct": 0.5742793791574279, "eval_no_answer_correct": 0.7761194029850746, "step": 1000 }, { "epoch": 0.12462692897961702, "grad_norm": 0.1962890625, "learning_rate": 5.179487179487179e-07, "loss": 0.0772, "step": 1010 }, { "epoch": 0.1258608589695142, "grad_norm": 0.201171875, "learning_rate": 5.23076923076923e-07, "loss": 0.0561, "step": 1020 }, { "epoch": 0.12709478895941143, "grad_norm": 0.1826171875, "learning_rate": 5.282051282051282e-07, "loss": 0.0895, "step": 1030 }, { "epoch": 0.1283287189493086, "grad_norm": 0.255859375, "learning_rate": 5.333333333333333e-07, "loss": 0.0847, "step": 1040 }, { "epoch": 0.1295626489392058, "grad_norm": 0.1796875, "learning_rate": 5.384615384615384e-07, "loss": 0.0527, "step": 1050 }, { "epoch": 0.130796578929103, "grad_norm": 0.2373046875, "learning_rate": 5.435897435897435e-07, "loss": 0.0696, "step": 1060 }, { "epoch": 0.1320305089190002, "grad_norm": 0.1943359375, "learning_rate": 5.487179487179488e-07, "loss": 0.0714, "step": 1070 }, { "epoch": 0.13326443890889741, "grad_norm": 0.16015625, "learning_rate": 5.538461538461539e-07, "loss": 0.0774, "step": 1080 }, { "epoch": 0.1344983688987946, "grad_norm": 0.267578125, "learning_rate": 5.58974358974359e-07, "loss": 0.082, "step": 1090 }, { "epoch": 0.13573229888869182, "grad_norm": 0.2314453125, "learning_rate": 5.641025641025641e-07, "loss": 0.0714, "step": 1100 }, { "epoch": 0.136966228878589, "grad_norm": 0.349609375, "learning_rate": 5.692307692307692e-07, "loss": 0.0823, "step": 1110 }, { "epoch": 0.1382001588684862, "grad_norm": 0.1513671875, "learning_rate": 5.743589743589743e-07, "loss": 0.0742, "step": 1120 }, { "epoch": 0.1394340888583834, "grad_norm": 0.2236328125, "learning_rate": 5.794871794871795e-07, "loss": 0.084, "step": 1130 }, { "epoch": 0.1406680188482806, "grad_norm": 0.3203125, "learning_rate": 5.846153846153847e-07, "loss": 0.0736, "step": 1140 }, { "epoch": 0.1419019488381778, "grad_norm": 0.21484375, "learning_rate": 5.897435897435898e-07, "loss": 0.0581, "step": 1150 }, { "epoch": 0.143135878828075, "grad_norm": 0.1904296875, "learning_rate": 5.948717948717949e-07, "loss": 0.0707, "step": 1160 }, { "epoch": 0.14436980881797218, "grad_norm": 0.25390625, "learning_rate": 6e-07, "loss": 0.0771, "step": 1170 }, { "epoch": 0.1456037388078694, "grad_norm": 0.296875, "learning_rate": 6.051282051282051e-07, "loss": 0.0727, "step": 1180 }, { "epoch": 0.14683766879776658, "grad_norm": 0.169921875, "learning_rate": 6.102564102564103e-07, "loss": 0.0753, "step": 1190 }, { "epoch": 0.1480715987876638, "grad_norm": 0.31640625, "learning_rate": 6.153846153846154e-07, "loss": 0.0632, "step": 1200 }, { "epoch": 0.14930552877756098, "grad_norm": 0.248046875, "learning_rate": 6.205128205128205e-07, "loss": 0.0638, "step": 1210 }, { "epoch": 0.1505394587674582, "grad_norm": 0.251953125, "learning_rate": 6.256410256410256e-07, "loss": 0.083, "step": 1220 }, { "epoch": 0.15177338875735538, "grad_norm": 0.193359375, "learning_rate": 6.307692307692307e-07, "loss": 0.0842, "step": 1230 }, { "epoch": 0.15300731874725257, "grad_norm": 0.25390625, "learning_rate": 6.358974358974358e-07, "loss": 0.0753, "step": 1240 }, { "epoch": 0.15424124873714978, "grad_norm": 0.296875, "learning_rate": 6.410256410256411e-07, "loss": 0.0704, "step": 1250 }, { "epoch": 0.15547517872704697, "grad_norm": 0.318359375, "learning_rate": 6.461538461538462e-07, "loss": 0.0703, "step": 1260 }, { "epoch": 0.15670910871694418, "grad_norm": 0.146484375, "learning_rate": 6.512820512820513e-07, "loss": 0.0695, "step": 1270 }, { "epoch": 0.15794303870684137, "grad_norm": 0.248046875, "learning_rate": 6.564102564102564e-07, "loss": 0.0618, "step": 1280 }, { "epoch": 0.15917696869673856, "grad_norm": 0.1865234375, "learning_rate": 6.615384615384615e-07, "loss": 0.0827, "step": 1290 }, { "epoch": 0.16041089868663577, "grad_norm": 0.1494140625, "learning_rate": 6.666666666666666e-07, "loss": 0.0722, "step": 1300 }, { "epoch": 0.16164482867653296, "grad_norm": 0.19921875, "learning_rate": 6.717948717948717e-07, "loss": 0.0623, "step": 1310 }, { "epoch": 0.16287875866643017, "grad_norm": 0.115234375, "learning_rate": 6.769230769230769e-07, "loss": 0.0605, "step": 1320 }, { "epoch": 0.16411268865632736, "grad_norm": 0.30078125, "learning_rate": 6.82051282051282e-07, "loss": 0.0705, "step": 1330 }, { "epoch": 0.16534661864622455, "grad_norm": 0.1962890625, "learning_rate": 6.871794871794871e-07, "loss": 0.0664, "step": 1340 }, { "epoch": 0.16658054863612176, "grad_norm": 0.19140625, "learning_rate": 6.923076923076922e-07, "loss": 0.0574, "step": 1350 }, { "epoch": 0.16781447862601895, "grad_norm": 0.1982421875, "learning_rate": 6.974358974358974e-07, "loss": 0.0724, "step": 1360 }, { "epoch": 0.16904840861591616, "grad_norm": 0.326171875, "learning_rate": 7.025641025641025e-07, "loss": 0.0826, "step": 1370 }, { "epoch": 0.17028233860581335, "grad_norm": 0.1630859375, "learning_rate": 7.076923076923077e-07, "loss": 0.0774, "step": 1380 }, { "epoch": 0.17151626859571056, "grad_norm": 0.171875, "learning_rate": 7.128205128205128e-07, "loss": 0.0547, "step": 1390 }, { "epoch": 0.17275019858560775, "grad_norm": 0.1611328125, "learning_rate": 7.179487179487179e-07, "loss": 0.0583, "step": 1400 }, { "epoch": 0.17398412857550494, "grad_norm": 0.185546875, "learning_rate": 7.23076923076923e-07, "loss": 0.0656, "step": 1410 }, { "epoch": 0.17521805856540215, "grad_norm": 0.208984375, "learning_rate": 7.282051282051281e-07, "loss": 0.0756, "step": 1420 }, { "epoch": 0.17645198855529934, "grad_norm": 0.34765625, "learning_rate": 7.333333333333332e-07, "loss": 0.0669, "step": 1430 }, { "epoch": 0.17768591854519655, "grad_norm": 0.2470703125, "learning_rate": 7.384615384615384e-07, "loss": 0.0766, "step": 1440 }, { "epoch": 0.17891984853509374, "grad_norm": 0.1865234375, "learning_rate": 7.435897435897435e-07, "loss": 0.0844, "step": 1450 }, { "epoch": 0.18015377852499093, "grad_norm": 0.2333984375, "learning_rate": 7.487179487179486e-07, "loss": 0.0809, "step": 1460 }, { "epoch": 0.18138770851488814, "grad_norm": 0.2314453125, "learning_rate": 7.538461538461538e-07, "loss": 0.0754, "step": 1470 }, { "epoch": 0.18262163850478533, "grad_norm": 0.1875, "learning_rate": 7.589743589743589e-07, "loss": 0.0751, "step": 1480 }, { "epoch": 0.18385556849468254, "grad_norm": 0.1689453125, "learning_rate": 7.64102564102564e-07, "loss": 0.0718, "step": 1490 }, { "epoch": 0.18508949848457973, "grad_norm": 0.2138671875, "learning_rate": 7.692307692307693e-07, "loss": 0.0639, "step": 1500 }, { "epoch": 0.18632342847447694, "grad_norm": 0.322265625, "learning_rate": 7.743589743589744e-07, "loss": 0.0738, "step": 1510 }, { "epoch": 0.18755735846437413, "grad_norm": 0.369140625, "learning_rate": 7.794871794871795e-07, "loss": 0.0654, "step": 1520 }, { "epoch": 0.18879128845427132, "grad_norm": 0.1962890625, "learning_rate": 7.846153846153846e-07, "loss": 0.0889, "step": 1530 }, { "epoch": 0.19002521844416853, "grad_norm": 0.212890625, "learning_rate": 7.897435897435897e-07, "loss": 0.0724, "step": 1540 }, { "epoch": 0.19125914843406572, "grad_norm": 0.16015625, "learning_rate": 7.948717948717948e-07, "loss": 0.0704, "step": 1550 }, { "epoch": 0.19249307842396293, "grad_norm": 0.205078125, "learning_rate": 8e-07, "loss": 0.0676, "step": 1560 }, { "epoch": 0.19372700841386012, "grad_norm": 0.25, "learning_rate": 8.051282051282052e-07, "loss": 0.0799, "step": 1570 }, { "epoch": 0.1949609384037573, "grad_norm": 0.1865234375, "learning_rate": 8.102564102564103e-07, "loss": 0.0704, "step": 1580 }, { "epoch": 0.19619486839365452, "grad_norm": 0.1865234375, "learning_rate": 8.153846153846154e-07, "loss": 0.0603, "step": 1590 }, { "epoch": 0.1974287983835517, "grad_norm": 0.23046875, "learning_rate": 8.205128205128205e-07, "loss": 0.0658, "step": 1600 }, { "epoch": 0.19866272837344892, "grad_norm": 0.2421875, "learning_rate": 8.256410256410256e-07, "loss": 0.0719, "step": 1610 }, { "epoch": 0.1998966583633461, "grad_norm": 0.232421875, "learning_rate": 8.307692307692308e-07, "loss": 0.0797, "step": 1620 }, { "epoch": 0.2011305883532433, "grad_norm": 0.357421875, "learning_rate": 8.358974358974359e-07, "loss": 0.0675, "step": 1630 }, { "epoch": 0.2023645183431405, "grad_norm": 0.2255859375, "learning_rate": 8.41025641025641e-07, "loss": 0.0747, "step": 1640 }, { "epoch": 0.2035984483330377, "grad_norm": 0.333984375, "learning_rate": 8.461538461538461e-07, "loss": 0.0685, "step": 1650 }, { "epoch": 0.2048323783229349, "grad_norm": 0.146484375, "learning_rate": 8.512820512820512e-07, "loss": 0.0631, "step": 1660 }, { "epoch": 0.2060663083128321, "grad_norm": 0.1865234375, "learning_rate": 8.564102564102563e-07, "loss": 0.0517, "step": 1670 }, { "epoch": 0.2073002383027293, "grad_norm": 0.25, "learning_rate": 8.615384615384616e-07, "loss": 0.0696, "step": 1680 }, { "epoch": 0.2085341682926265, "grad_norm": 0.2021484375, "learning_rate": 8.666666666666667e-07, "loss": 0.0811, "step": 1690 }, { "epoch": 0.20976809828252369, "grad_norm": 0.1220703125, "learning_rate": 8.717948717948718e-07, "loss": 0.0643, "step": 1700 }, { "epoch": 0.2110020282724209, "grad_norm": 0.23828125, "learning_rate": 8.769230769230769e-07, "loss": 0.0659, "step": 1710 }, { "epoch": 0.2122359582623181, "grad_norm": 0.4140625, "learning_rate": 8.82051282051282e-07, "loss": 0.0702, "step": 1720 }, { "epoch": 0.2134698882522153, "grad_norm": 0.302734375, "learning_rate": 8.871794871794871e-07, "loss": 0.0665, "step": 1730 }, { "epoch": 0.2147038182421125, "grad_norm": 0.2177734375, "learning_rate": 8.923076923076923e-07, "loss": 0.0821, "step": 1740 }, { "epoch": 0.21593774823200967, "grad_norm": 0.189453125, "learning_rate": 8.974358974358974e-07, "loss": 0.0777, "step": 1750 }, { "epoch": 0.2171716782219069, "grad_norm": 0.2109375, "learning_rate": 9.025641025641025e-07, "loss": 0.0796, "step": 1760 }, { "epoch": 0.21840560821180408, "grad_norm": 0.216796875, "learning_rate": 9.076923076923076e-07, "loss": 0.0808, "step": 1770 }, { "epoch": 0.2196395382017013, "grad_norm": 0.1796875, "learning_rate": 9.128205128205127e-07, "loss": 0.0742, "step": 1780 }, { "epoch": 0.22087346819159848, "grad_norm": 0.59765625, "learning_rate": 9.179487179487179e-07, "loss": 0.0639, "step": 1790 }, { "epoch": 0.2221073981814957, "grad_norm": 0.193359375, "learning_rate": 9.230769230769231e-07, "loss": 0.0568, "step": 1800 }, { "epoch": 0.22334132817139288, "grad_norm": 0.173828125, "learning_rate": 9.282051282051282e-07, "loss": 0.073, "step": 1810 }, { "epoch": 0.22457525816129006, "grad_norm": 0.1591796875, "learning_rate": 9.333333333333333e-07, "loss": 0.068, "step": 1820 }, { "epoch": 0.22580918815118728, "grad_norm": 0.2314453125, "learning_rate": 9.384615384615384e-07, "loss": 0.0663, "step": 1830 }, { "epoch": 0.22704311814108447, "grad_norm": 0.10791015625, "learning_rate": 9.435897435897435e-07, "loss": 0.0736, "step": 1840 }, { "epoch": 0.22827704813098168, "grad_norm": 0.31640625, "learning_rate": 9.487179487179486e-07, "loss": 0.0557, "step": 1850 }, { "epoch": 0.22951097812087887, "grad_norm": 0.2099609375, "learning_rate": 9.538461538461538e-07, "loss": 0.0894, "step": 1860 }, { "epoch": 0.23074490811077605, "grad_norm": 0.13671875, "learning_rate": 9.58974358974359e-07, "loss": 0.066, "step": 1870 }, { "epoch": 0.23197883810067327, "grad_norm": 0.224609375, "learning_rate": 9.64102564102564e-07, "loss": 0.075, "step": 1880 }, { "epoch": 0.23321276809057045, "grad_norm": 0.13671875, "learning_rate": 9.692307692307691e-07, "loss": 0.0849, "step": 1890 }, { "epoch": 0.23444669808046767, "grad_norm": 0.2158203125, "learning_rate": 9.743589743589742e-07, "loss": 0.075, "step": 1900 }, { "epoch": 0.23568062807036486, "grad_norm": 0.337890625, "learning_rate": 9.794871794871793e-07, "loss": 0.0825, "step": 1910 }, { "epoch": 0.23691455806026204, "grad_norm": 0.1728515625, "learning_rate": 9.846153846153847e-07, "loss": 0.0605, "step": 1920 }, { "epoch": 0.23814848805015926, "grad_norm": 0.298828125, "learning_rate": 9.897435897435898e-07, "loss": 0.0784, "step": 1930 }, { "epoch": 0.23938241804005644, "grad_norm": 0.32421875, "learning_rate": 9.948717948717949e-07, "loss": 0.0732, "step": 1940 }, { "epoch": 0.24061634802995366, "grad_norm": 0.19140625, "learning_rate": 1e-06, "loss": 0.0521, "step": 1950 }, { "epoch": 0.24185027801985085, "grad_norm": 0.173828125, "learning_rate": 9.99999937931741e-07, "loss": 0.0653, "step": 1960 }, { "epoch": 0.24308420800974806, "grad_norm": 0.142578125, "learning_rate": 9.999997517269796e-07, "loss": 0.0712, "step": 1970 }, { "epoch": 0.24431813799964525, "grad_norm": 0.1943359375, "learning_rate": 9.99999441385762e-07, "loss": 0.0715, "step": 1980 }, { "epoch": 0.24555206798954243, "grad_norm": 0.2099609375, "learning_rate": 9.999990069081652e-07, "loss": 0.0678, "step": 1990 }, { "epoch": 0.24678599797943965, "grad_norm": 0.2373046875, "learning_rate": 9.99998448294297e-07, "loss": 0.0604, "step": 2000 }, { "epoch": 0.24678599797943965, "eval_exact_match": 0.6441717791411042, "eval_has_answer_correct": 0.5787139689578714, "eval_no_answer_correct": 0.7910447761194029, "step": 2000 }, { "epoch": 0.24801992796933683, "grad_norm": 0.1796875, "learning_rate": 9.999977655442962e-07, "loss": 0.0596, "step": 2010 }, { "epoch": 0.24925385795923405, "grad_norm": 0.25, "learning_rate": 9.999969586583324e-07, "loss": 0.0894, "step": 2020 }, { "epoch": 0.25048778794913124, "grad_norm": 0.189453125, "learning_rate": 9.999960276366057e-07, "loss": 0.0679, "step": 2030 }, { "epoch": 0.2517217179390284, "grad_norm": 0.1904296875, "learning_rate": 9.999949724793473e-07, "loss": 0.0654, "step": 2040 }, { "epoch": 0.2529556479289256, "grad_norm": 0.310546875, "learning_rate": 9.999937931868195e-07, "loss": 0.0764, "step": 2050 }, { "epoch": 0.25418957791882285, "grad_norm": 0.158203125, "learning_rate": 9.999924897593146e-07, "loss": 0.0789, "step": 2060 }, { "epoch": 0.25542350790872004, "grad_norm": 0.2255859375, "learning_rate": 9.999910621971564e-07, "loss": 0.0753, "step": 2070 }, { "epoch": 0.2566574378986172, "grad_norm": 0.2470703125, "learning_rate": 9.999895105006994e-07, "loss": 0.0768, "step": 2080 }, { "epoch": 0.2578913678885144, "grad_norm": 0.357421875, "learning_rate": 9.999878346703288e-07, "loss": 0.0735, "step": 2090 }, { "epoch": 0.2591252978784116, "grad_norm": 0.2158203125, "learning_rate": 9.999860347064605e-07, "loss": 0.0669, "step": 2100 }, { "epoch": 0.26035922786830884, "grad_norm": 0.2890625, "learning_rate": 9.999841106095418e-07, "loss": 0.0769, "step": 2110 }, { "epoch": 0.261593157858206, "grad_norm": 0.1025390625, "learning_rate": 9.999820623800498e-07, "loss": 0.0618, "step": 2120 }, { "epoch": 0.2628270878481032, "grad_norm": 0.283203125, "learning_rate": 9.999798900184935e-07, "loss": 0.0746, "step": 2130 }, { "epoch": 0.2640610178380004, "grad_norm": 0.16796875, "learning_rate": 9.999775935254123e-07, "loss": 0.0642, "step": 2140 }, { "epoch": 0.2652949478278976, "grad_norm": 0.291015625, "learning_rate": 9.99975172901376e-07, "loss": 0.0672, "step": 2150 }, { "epoch": 0.26652887781779483, "grad_norm": 0.26953125, "learning_rate": 9.999726281469856e-07, "loss": 0.0703, "step": 2160 }, { "epoch": 0.267762807807692, "grad_norm": 0.1611328125, "learning_rate": 9.99969959262873e-07, "loss": 0.0804, "step": 2170 }, { "epoch": 0.2689967377975892, "grad_norm": 0.2080078125, "learning_rate": 9.999671662497011e-07, "loss": 0.0563, "step": 2180 }, { "epoch": 0.2702306677874864, "grad_norm": 0.2236328125, "learning_rate": 9.999642491081627e-07, "loss": 0.0779, "step": 2190 }, { "epoch": 0.27146459777738363, "grad_norm": 0.119140625, "learning_rate": 9.999612078389827e-07, "loss": 0.0757, "step": 2200 }, { "epoch": 0.2726985277672808, "grad_norm": 0.166015625, "learning_rate": 9.999580424429159e-07, "loss": 0.0577, "step": 2210 }, { "epoch": 0.273932457757178, "grad_norm": 0.2314453125, "learning_rate": 9.99954752920748e-07, "loss": 0.0835, "step": 2220 }, { "epoch": 0.2751663877470752, "grad_norm": 0.33984375, "learning_rate": 9.99951339273296e-07, "loss": 0.0874, "step": 2230 }, { "epoch": 0.2764003177369724, "grad_norm": 0.1484375, "learning_rate": 9.99947801501407e-07, "loss": 0.0635, "step": 2240 }, { "epoch": 0.2776342477268696, "grad_norm": 0.1123046875, "learning_rate": 9.999441396059598e-07, "loss": 0.0665, "step": 2250 }, { "epoch": 0.2788681777167668, "grad_norm": 0.1708984375, "learning_rate": 9.999403535878634e-07, "loss": 0.0542, "step": 2260 }, { "epoch": 0.280102107706664, "grad_norm": 0.1826171875, "learning_rate": 9.999364434480577e-07, "loss": 0.0549, "step": 2270 }, { "epoch": 0.2813360376965612, "grad_norm": 0.1689453125, "learning_rate": 9.999324091875135e-07, "loss": 0.0605, "step": 2280 }, { "epoch": 0.28256996768645837, "grad_norm": 0.1904296875, "learning_rate": 9.999282508072325e-07, "loss": 0.0672, "step": 2290 }, { "epoch": 0.2838038976763556, "grad_norm": 0.26171875, "learning_rate": 9.999239683082468e-07, "loss": 0.0703, "step": 2300 }, { "epoch": 0.2850378276662528, "grad_norm": 0.1728515625, "learning_rate": 9.999195616916198e-07, "loss": 0.0606, "step": 2310 }, { "epoch": 0.28627175765615, "grad_norm": 0.21484375, "learning_rate": 9.999150309584458e-07, "loss": 0.0736, "step": 2320 }, { "epoch": 0.28750568764604717, "grad_norm": 0.251953125, "learning_rate": 9.999103761098493e-07, "loss": 0.0738, "step": 2330 }, { "epoch": 0.28873961763594436, "grad_norm": 0.341796875, "learning_rate": 9.999055971469863e-07, "loss": 0.0823, "step": 2340 }, { "epoch": 0.2899735476258416, "grad_norm": 0.1806640625, "learning_rate": 9.99900694071043e-07, "loss": 0.0712, "step": 2350 }, { "epoch": 0.2912074776157388, "grad_norm": 0.27734375, "learning_rate": 9.998956668832367e-07, "loss": 0.0799, "step": 2360 }, { "epoch": 0.292441407605636, "grad_norm": 0.1708984375, "learning_rate": 9.998905155848159e-07, "loss": 0.0644, "step": 2370 }, { "epoch": 0.29367533759553316, "grad_norm": 0.2578125, "learning_rate": 9.99885240177059e-07, "loss": 0.0738, "step": 2380 }, { "epoch": 0.29490926758543035, "grad_norm": 0.361328125, "learning_rate": 9.998798406612762e-07, "loss": 0.0755, "step": 2390 }, { "epoch": 0.2961431975753276, "grad_norm": 0.25390625, "learning_rate": 9.998743170388077e-07, "loss": 0.0653, "step": 2400 }, { "epoch": 0.2973771275652248, "grad_norm": 0.2265625, "learning_rate": 9.99868669311025e-07, "loss": 0.0586, "step": 2410 }, { "epoch": 0.29861105755512196, "grad_norm": 0.310546875, "learning_rate": 9.998628974793304e-07, "loss": 0.0722, "step": 2420 }, { "epoch": 0.29984498754501915, "grad_norm": 0.125, "learning_rate": 9.998570015451567e-07, "loss": 0.0614, "step": 2430 }, { "epoch": 0.3010789175349164, "grad_norm": 0.228515625, "learning_rate": 9.998509815099677e-07, "loss": 0.0709, "step": 2440 }, { "epoch": 0.3023128475248136, "grad_norm": 0.35546875, "learning_rate": 9.998448373752583e-07, "loss": 0.0806, "step": 2450 }, { "epoch": 0.30354677751471076, "grad_norm": 0.2392578125, "learning_rate": 9.998385691425537e-07, "loss": 0.0827, "step": 2460 }, { "epoch": 0.30478070750460795, "grad_norm": 0.390625, "learning_rate": 9.9983217681341e-07, "loss": 0.0628, "step": 2470 }, { "epoch": 0.30601463749450514, "grad_norm": 0.2314453125, "learning_rate": 9.998256603894146e-07, "loss": 0.0737, "step": 2480 }, { "epoch": 0.3072485674844024, "grad_norm": 0.15625, "learning_rate": 9.99819019872185e-07, "loss": 0.0765, "step": 2490 }, { "epoch": 0.30848249747429957, "grad_norm": 0.1806640625, "learning_rate": 9.9981225526337e-07, "loss": 0.0676, "step": 2500 }, { "epoch": 0.30971642746419675, "grad_norm": 0.2734375, "learning_rate": 9.998053665646492e-07, "loss": 0.0794, "step": 2510 }, { "epoch": 0.31095035745409394, "grad_norm": 0.1962890625, "learning_rate": 9.997983537777327e-07, "loss": 0.0673, "step": 2520 }, { "epoch": 0.3121842874439911, "grad_norm": 0.3125, "learning_rate": 9.997912169043617e-07, "loss": 0.0784, "step": 2530 }, { "epoch": 0.31341821743388837, "grad_norm": 0.1943359375, "learning_rate": 9.99783955946308e-07, "loss": 0.0726, "step": 2540 }, { "epoch": 0.31465214742378556, "grad_norm": 0.1943359375, "learning_rate": 9.997765709053743e-07, "loss": 0.0541, "step": 2550 }, { "epoch": 0.31588607741368274, "grad_norm": 0.3046875, "learning_rate": 9.997690617833941e-07, "loss": 0.0691, "step": 2560 }, { "epoch": 0.31712000740357993, "grad_norm": 0.1357421875, "learning_rate": 9.997614285822318e-07, "loss": 0.0933, "step": 2570 }, { "epoch": 0.3183539373934771, "grad_norm": 0.1572265625, "learning_rate": 9.997536713037826e-07, "loss": 0.0539, "step": 2580 }, { "epoch": 0.31958786738337436, "grad_norm": 0.333984375, "learning_rate": 9.997457899499721e-07, "loss": 0.0834, "step": 2590 }, { "epoch": 0.32082179737327154, "grad_norm": 0.1953125, "learning_rate": 9.997377845227574e-07, "loss": 0.0566, "step": 2600 }, { "epoch": 0.32205572736316873, "grad_norm": 0.1552734375, "learning_rate": 9.997296550241259e-07, "loss": 0.0829, "step": 2610 }, { "epoch": 0.3232896573530659, "grad_norm": 0.2490234375, "learning_rate": 9.997214014560956e-07, "loss": 0.0838, "step": 2620 }, { "epoch": 0.3245235873429631, "grad_norm": 0.150390625, "learning_rate": 9.997130238207161e-07, "loss": 0.0666, "step": 2630 }, { "epoch": 0.32575751733286035, "grad_norm": 0.275390625, "learning_rate": 9.997045221200674e-07, "loss": 0.0674, "step": 2640 }, { "epoch": 0.32699144732275753, "grad_norm": 0.2080078125, "learning_rate": 9.996958963562598e-07, "loss": 0.0634, "step": 2650 }, { "epoch": 0.3282253773126547, "grad_norm": 0.29296875, "learning_rate": 9.99687146531435e-07, "loss": 0.0881, "step": 2660 }, { "epoch": 0.3294593073025519, "grad_norm": 0.1923828125, "learning_rate": 9.996782726477656e-07, "loss": 0.069, "step": 2670 }, { "epoch": 0.3306932372924491, "grad_norm": 0.1025390625, "learning_rate": 9.996692747074543e-07, "loss": 0.0723, "step": 2680 }, { "epoch": 0.33192716728234634, "grad_norm": 0.283203125, "learning_rate": 9.996601527127355e-07, "loss": 0.0618, "step": 2690 }, { "epoch": 0.3331610972722435, "grad_norm": 0.24609375, "learning_rate": 9.996509066658735e-07, "loss": 0.0704, "step": 2700 }, { "epoch": 0.3343950272621407, "grad_norm": 0.236328125, "learning_rate": 9.996415365691645e-07, "loss": 0.0651, "step": 2710 }, { "epoch": 0.3356289572520379, "grad_norm": 0.2451171875, "learning_rate": 9.99632042424934e-07, "loss": 0.0682, "step": 2720 }, { "epoch": 0.33686288724193514, "grad_norm": 0.2412109375, "learning_rate": 9.996224242355397e-07, "loss": 0.0571, "step": 2730 }, { "epoch": 0.3380968172318323, "grad_norm": 0.248046875, "learning_rate": 9.996126820033695e-07, "loss": 0.0729, "step": 2740 }, { "epoch": 0.3393307472217295, "grad_norm": 0.1962890625, "learning_rate": 9.996028157308421e-07, "loss": 0.054, "step": 2750 }, { "epoch": 0.3405646772116267, "grad_norm": 0.30078125, "learning_rate": 9.995928254204069e-07, "loss": 0.0709, "step": 2760 }, { "epoch": 0.3417986072015239, "grad_norm": 0.185546875, "learning_rate": 9.995827110745442e-07, "loss": 0.0901, "step": 2770 }, { "epoch": 0.3430325371914211, "grad_norm": 0.189453125, "learning_rate": 9.995724726957654e-07, "loss": 0.0471, "step": 2780 }, { "epoch": 0.3442664671813183, "grad_norm": 0.185546875, "learning_rate": 9.995621102866121e-07, "loss": 0.058, "step": 2790 }, { "epoch": 0.3455003971712155, "grad_norm": 0.279296875, "learning_rate": 9.995516238496571e-07, "loss": 0.0562, "step": 2800 }, { "epoch": 0.3467343271611127, "grad_norm": 0.18359375, "learning_rate": 9.99541013387504e-07, "loss": 0.0726, "step": 2810 }, { "epoch": 0.3479682571510099, "grad_norm": 0.35546875, "learning_rate": 9.99530278902787e-07, "loss": 0.0781, "step": 2820 }, { "epoch": 0.3492021871409071, "grad_norm": 0.2421875, "learning_rate": 9.995194203981712e-07, "loss": 0.0654, "step": 2830 }, { "epoch": 0.3504361171308043, "grad_norm": 0.166015625, "learning_rate": 9.995084378763525e-07, "loss": 0.0717, "step": 2840 }, { "epoch": 0.3516700471207015, "grad_norm": 0.26953125, "learning_rate": 9.994973313400576e-07, "loss": 0.0606, "step": 2850 }, { "epoch": 0.3529039771105987, "grad_norm": 0.1982421875, "learning_rate": 9.994861007920439e-07, "loss": 0.0749, "step": 2860 }, { "epoch": 0.35413790710049586, "grad_norm": 0.1875, "learning_rate": 9.994747462350995e-07, "loss": 0.0849, "step": 2870 }, { "epoch": 0.3553718370903931, "grad_norm": 0.2490234375, "learning_rate": 9.994632676720437e-07, "loss": 0.0644, "step": 2880 }, { "epoch": 0.3566057670802903, "grad_norm": 0.51171875, "learning_rate": 9.99451665105726e-07, "loss": 0.0703, "step": 2890 }, { "epoch": 0.3578396970701875, "grad_norm": 0.265625, "learning_rate": 9.994399385390276e-07, "loss": 0.0698, "step": 2900 }, { "epoch": 0.35907362706008467, "grad_norm": 0.142578125, "learning_rate": 9.994280879748592e-07, "loss": 0.0679, "step": 2910 }, { "epoch": 0.36030755704998185, "grad_norm": 0.265625, "learning_rate": 9.994161134161632e-07, "loss": 0.0736, "step": 2920 }, { "epoch": 0.3615414870398791, "grad_norm": 0.2275390625, "learning_rate": 9.994040148659127e-07, "loss": 0.0666, "step": 2930 }, { "epoch": 0.3627754170297763, "grad_norm": 0.263671875, "learning_rate": 9.993917923271114e-07, "loss": 0.0673, "step": 2940 }, { "epoch": 0.36400934701967347, "grad_norm": 0.1943359375, "learning_rate": 9.993794458027937e-07, "loss": 0.0627, "step": 2950 }, { "epoch": 0.36524327700957065, "grad_norm": 0.26171875, "learning_rate": 9.993669752960252e-07, "loss": 0.0563, "step": 2960 }, { "epoch": 0.36647720699946784, "grad_norm": 0.392578125, "learning_rate": 9.993543808099016e-07, "loss": 0.0774, "step": 2970 }, { "epoch": 0.3677111369893651, "grad_norm": 0.259765625, "learning_rate": 9.993416623475502e-07, "loss": 0.0892, "step": 2980 }, { "epoch": 0.36894506697926227, "grad_norm": 0.1806640625, "learning_rate": 9.993288199121282e-07, "loss": 0.0725, "step": 2990 }, { "epoch": 0.37017899696915946, "grad_norm": 0.1611328125, "learning_rate": 9.993158535068242e-07, "loss": 0.0617, "step": 3000 }, { "epoch": 0.37017899696915946, "eval_exact_match": 0.6503067484662577, "eval_has_answer_correct": 0.5720620842572062, "eval_no_answer_correct": 0.8258706467661692, "step": 3000 }, { "epoch": 0.37141292695905664, "grad_norm": 0.2392578125, "learning_rate": 9.993027631348577e-07, "loss": 0.0645, "step": 3010 }, { "epoch": 0.3726468569489539, "grad_norm": 0.2255859375, "learning_rate": 9.99289548799478e-07, "loss": 0.0933, "step": 3020 }, { "epoch": 0.3738807869388511, "grad_norm": 0.1796875, "learning_rate": 9.992762105039666e-07, "loss": 0.0634, "step": 3030 }, { "epoch": 0.37511471692874826, "grad_norm": 0.189453125, "learning_rate": 9.99262748251635e-07, "loss": 0.0826, "step": 3040 }, { "epoch": 0.37634864691864545, "grad_norm": 0.25, "learning_rate": 9.992491620458248e-07, "loss": 0.0598, "step": 3050 }, { "epoch": 0.37758257690854263, "grad_norm": 0.12890625, "learning_rate": 9.992354518899098e-07, "loss": 0.0812, "step": 3060 }, { "epoch": 0.3788165068984399, "grad_norm": 0.130859375, "learning_rate": 9.992216177872935e-07, "loss": 0.0666, "step": 3070 }, { "epoch": 0.38005043688833706, "grad_norm": 0.10546875, "learning_rate": 9.992076597414107e-07, "loss": 0.0533, "step": 3080 }, { "epoch": 0.38128436687823425, "grad_norm": 0.2265625, "learning_rate": 9.991935777557268e-07, "loss": 0.0634, "step": 3090 }, { "epoch": 0.38251829686813144, "grad_norm": 0.181640625, "learning_rate": 9.991793718337378e-07, "loss": 0.0629, "step": 3100 }, { "epoch": 0.3837522268580286, "grad_norm": 0.380859375, "learning_rate": 9.991650419789709e-07, "loss": 0.0809, "step": 3110 }, { "epoch": 0.38498615684792586, "grad_norm": 0.236328125, "learning_rate": 9.991505881949836e-07, "loss": 0.0696, "step": 3120 }, { "epoch": 0.38622008683782305, "grad_norm": 0.41015625, "learning_rate": 9.991360104853643e-07, "loss": 0.0703, "step": 3130 }, { "epoch": 0.38745401682772024, "grad_norm": 0.17578125, "learning_rate": 9.991213088537327e-07, "loss": 0.0651, "step": 3140 }, { "epoch": 0.3886879468176174, "grad_norm": 0.267578125, "learning_rate": 9.991064833037384e-07, "loss": 0.0561, "step": 3150 }, { "epoch": 0.3899218768075146, "grad_norm": 0.24609375, "learning_rate": 9.990915338390624e-07, "loss": 0.0944, "step": 3160 }, { "epoch": 0.39115580679741185, "grad_norm": 0.287109375, "learning_rate": 9.990764604634162e-07, "loss": 0.0868, "step": 3170 }, { "epoch": 0.39238973678730904, "grad_norm": 0.2255859375, "learning_rate": 9.99061263180542e-07, "loss": 0.0685, "step": 3180 }, { "epoch": 0.3936236667772062, "grad_norm": 0.119140625, "learning_rate": 9.990459419942132e-07, "loss": 0.0883, "step": 3190 }, { "epoch": 0.3948575967671034, "grad_norm": 0.3125, "learning_rate": 9.99030496908233e-07, "loss": 0.0684, "step": 3200 }, { "epoch": 0.3960915267570006, "grad_norm": 0.1435546875, "learning_rate": 9.990149279264367e-07, "loss": 0.0834, "step": 3210 }, { "epoch": 0.39732545674689784, "grad_norm": 0.251953125, "learning_rate": 9.989992350526892e-07, "loss": 0.0756, "step": 3220 }, { "epoch": 0.39855938673679503, "grad_norm": 0.2216796875, "learning_rate": 9.989834182908869e-07, "loss": 0.0735, "step": 3230 }, { "epoch": 0.3997933167266922, "grad_norm": 0.1591796875, "learning_rate": 9.989674776449567e-07, "loss": 0.0699, "step": 3240 }, { "epoch": 0.4010272467165894, "grad_norm": 0.283203125, "learning_rate": 9.989514131188558e-07, "loss": 0.07, "step": 3250 }, { "epoch": 0.4022611767064866, "grad_norm": 0.21875, "learning_rate": 9.989352247165729e-07, "loss": 0.0662, "step": 3260 }, { "epoch": 0.40349510669638383, "grad_norm": 0.15625, "learning_rate": 9.989189124421273e-07, "loss": 0.0739, "step": 3270 }, { "epoch": 0.404729036686281, "grad_norm": 0.203125, "learning_rate": 9.989024762995686e-07, "loss": 0.0693, "step": 3280 }, { "epoch": 0.4059629666761782, "grad_norm": 0.365234375, "learning_rate": 9.988859162929775e-07, "loss": 0.0713, "step": 3290 }, { "epoch": 0.4071968966660754, "grad_norm": 0.27734375, "learning_rate": 9.988692324264655e-07, "loss": 0.0633, "step": 3300 }, { "epoch": 0.40843082665597263, "grad_norm": 0.279296875, "learning_rate": 9.988524247041748e-07, "loss": 0.087, "step": 3310 }, { "epoch": 0.4096647566458698, "grad_norm": 0.1416015625, "learning_rate": 9.988354931302783e-07, "loss": 0.0765, "step": 3320 }, { "epoch": 0.410898686635767, "grad_norm": 0.27734375, "learning_rate": 9.988184377089794e-07, "loss": 0.0849, "step": 3330 }, { "epoch": 0.4121326166256642, "grad_norm": 0.2275390625, "learning_rate": 9.988012584445127e-07, "loss": 0.0856, "step": 3340 }, { "epoch": 0.4133665466155614, "grad_norm": 0.3046875, "learning_rate": 9.987839553411434e-07, "loss": 0.0558, "step": 3350 }, { "epoch": 0.4146004766054586, "grad_norm": 0.294921875, "learning_rate": 9.987665284031673e-07, "loss": 0.0728, "step": 3360 }, { "epoch": 0.4158344065953558, "grad_norm": 0.130859375, "learning_rate": 9.98748977634911e-07, "loss": 0.0807, "step": 3370 }, { "epoch": 0.417068336585253, "grad_norm": 0.2265625, "learning_rate": 9.987313030407323e-07, "loss": 0.0659, "step": 3380 }, { "epoch": 0.4183022665751502, "grad_norm": 0.1953125, "learning_rate": 9.987135046250186e-07, "loss": 0.0755, "step": 3390 }, { "epoch": 0.41953619656504737, "grad_norm": 0.11767578125, "learning_rate": 9.986955823921893e-07, "loss": 0.0599, "step": 3400 }, { "epoch": 0.4207701265549446, "grad_norm": 0.1494140625, "learning_rate": 9.986775363466937e-07, "loss": 0.053, "step": 3410 }, { "epoch": 0.4220040565448418, "grad_norm": 0.287109375, "learning_rate": 9.986593664930126e-07, "loss": 0.0736, "step": 3420 }, { "epoch": 0.423237986534739, "grad_norm": 0.181640625, "learning_rate": 9.986410728356564e-07, "loss": 0.0755, "step": 3430 }, { "epoch": 0.4244719165246362, "grad_norm": 0.2421875, "learning_rate": 9.986226553791675e-07, "loss": 0.0805, "step": 3440 }, { "epoch": 0.42570584651453336, "grad_norm": 0.296875, "learning_rate": 9.986041141281181e-07, "loss": 0.0664, "step": 3450 }, { "epoch": 0.4269397765044306, "grad_norm": 0.498046875, "learning_rate": 9.98585449087112e-07, "loss": 0.0962, "step": 3460 }, { "epoch": 0.4281737064943278, "grad_norm": 0.259765625, "learning_rate": 9.985666602607824e-07, "loss": 0.0744, "step": 3470 }, { "epoch": 0.429407636484225, "grad_norm": 0.15625, "learning_rate": 9.985477476537948e-07, "loss": 0.0705, "step": 3480 }, { "epoch": 0.43064156647412216, "grad_norm": 0.3515625, "learning_rate": 9.985287112708444e-07, "loss": 0.0624, "step": 3490 }, { "epoch": 0.43187549646401935, "grad_norm": 0.1416015625, "learning_rate": 9.985095511166575e-07, "loss": 0.0508, "step": 3500 }, { "epoch": 0.4331094264539166, "grad_norm": 0.1962890625, "learning_rate": 9.98490267195991e-07, "loss": 0.0585, "step": 3510 }, { "epoch": 0.4343433564438138, "grad_norm": 0.3515625, "learning_rate": 9.984708595136324e-07, "loss": 0.074, "step": 3520 }, { "epoch": 0.43557728643371096, "grad_norm": 0.1923828125, "learning_rate": 9.984513280744005e-07, "loss": 0.0766, "step": 3530 }, { "epoch": 0.43681121642360815, "grad_norm": 0.26953125, "learning_rate": 9.98431672883144e-07, "loss": 0.0652, "step": 3540 }, { "epoch": 0.43804514641350534, "grad_norm": 0.146484375, "learning_rate": 9.984118939447432e-07, "loss": 0.0605, "step": 3550 }, { "epoch": 0.4392790764034026, "grad_norm": 0.2333984375, "learning_rate": 9.983919912641082e-07, "loss": 0.0762, "step": 3560 }, { "epoch": 0.44051300639329977, "grad_norm": 0.25390625, "learning_rate": 9.983719648461806e-07, "loss": 0.0815, "step": 3570 }, { "epoch": 0.44174693638319695, "grad_norm": 0.1318359375, "learning_rate": 9.983518146959324e-07, "loss": 0.0823, "step": 3580 }, { "epoch": 0.44298086637309414, "grad_norm": 0.2392578125, "learning_rate": 9.983315408183663e-07, "loss": 0.0768, "step": 3590 }, { "epoch": 0.4442147963629914, "grad_norm": 0.29296875, "learning_rate": 9.983111432185158e-07, "loss": 0.0597, "step": 3600 }, { "epoch": 0.44544872635288857, "grad_norm": 0.21875, "learning_rate": 9.982906219014449e-07, "loss": 0.0632, "step": 3610 }, { "epoch": 0.44668265634278576, "grad_norm": 0.134765625, "learning_rate": 9.982699768722487e-07, "loss": 0.0713, "step": 3620 }, { "epoch": 0.44791658633268294, "grad_norm": 0.26171875, "learning_rate": 9.982492081360528e-07, "loss": 0.0795, "step": 3630 }, { "epoch": 0.44915051632258013, "grad_norm": 0.1865234375, "learning_rate": 9.98228315698013e-07, "loss": 0.077, "step": 3640 }, { "epoch": 0.45038444631247737, "grad_norm": 0.11962890625, "learning_rate": 9.982072995633173e-07, "loss": 0.066, "step": 3650 }, { "epoch": 0.45161837630237456, "grad_norm": 0.279296875, "learning_rate": 9.981861597371828e-07, "loss": 0.076, "step": 3660 }, { "epoch": 0.45285230629227174, "grad_norm": 0.408203125, "learning_rate": 9.98164896224858e-07, "loss": 0.0854, "step": 3670 }, { "epoch": 0.45408623628216893, "grad_norm": 0.154296875, "learning_rate": 9.981435090316218e-07, "loss": 0.0762, "step": 3680 }, { "epoch": 0.4553201662720661, "grad_norm": 0.193359375, "learning_rate": 9.98121998162785e-07, "loss": 0.0698, "step": 3690 }, { "epoch": 0.45655409626196336, "grad_norm": 0.1748046875, "learning_rate": 9.98100363623687e-07, "loss": 0.0646, "step": 3700 }, { "epoch": 0.45778802625186055, "grad_norm": 0.1708984375, "learning_rate": 9.980786054196998e-07, "loss": 0.0774, "step": 3710 }, { "epoch": 0.45902195624175773, "grad_norm": 0.21875, "learning_rate": 9.980567235562252e-07, "loss": 0.0686, "step": 3720 }, { "epoch": 0.4602558862316549, "grad_norm": 0.177734375, "learning_rate": 9.980347180386958e-07, "loss": 0.0607, "step": 3730 }, { "epoch": 0.4614898162215521, "grad_norm": 0.25390625, "learning_rate": 9.980125888725752e-07, "loss": 0.0726, "step": 3740 }, { "epoch": 0.46272374621144935, "grad_norm": 0.1806640625, "learning_rate": 9.979903360633573e-07, "loss": 0.0676, "step": 3750 }, { "epoch": 0.46395767620134654, "grad_norm": 0.1982421875, "learning_rate": 9.979679596165667e-07, "loss": 0.0777, "step": 3760 }, { "epoch": 0.4651916061912437, "grad_norm": 0.267578125, "learning_rate": 9.979454595377593e-07, "loss": 0.0926, "step": 3770 }, { "epoch": 0.4664255361811409, "grad_norm": 0.2392578125, "learning_rate": 9.979228358325209e-07, "loss": 0.0707, "step": 3780 }, { "epoch": 0.4676594661710381, "grad_norm": 0.37109375, "learning_rate": 9.979000885064684e-07, "loss": 0.0661, "step": 3790 }, { "epoch": 0.46889339616093534, "grad_norm": 0.12060546875, "learning_rate": 9.978772175652495e-07, "loss": 0.0656, "step": 3800 }, { "epoch": 0.4701273261508325, "grad_norm": 0.236328125, "learning_rate": 9.978542230145424e-07, "loss": 0.0772, "step": 3810 }, { "epoch": 0.4713612561407297, "grad_norm": 0.30078125, "learning_rate": 9.97831104860056e-07, "loss": 0.0753, "step": 3820 }, { "epoch": 0.4725951861306269, "grad_norm": 0.1953125, "learning_rate": 9.978078631075298e-07, "loss": 0.0775, "step": 3830 }, { "epoch": 0.4738291161205241, "grad_norm": 0.2138671875, "learning_rate": 9.977844977627343e-07, "loss": 0.0639, "step": 3840 }, { "epoch": 0.4750630461104213, "grad_norm": 0.166015625, "learning_rate": 9.977610088314704e-07, "loss": 0.0617, "step": 3850 }, { "epoch": 0.4762969761003185, "grad_norm": 0.265625, "learning_rate": 9.977373963195698e-07, "loss": 0.0801, "step": 3860 }, { "epoch": 0.4775309060902157, "grad_norm": 0.1669921875, "learning_rate": 9.977136602328947e-07, "loss": 0.075, "step": 3870 }, { "epoch": 0.4787648360801129, "grad_norm": 0.2373046875, "learning_rate": 9.976898005773383e-07, "loss": 0.0591, "step": 3880 }, { "epoch": 0.47999876607001013, "grad_norm": 0.1640625, "learning_rate": 9.976658173588243e-07, "loss": 0.0605, "step": 3890 }, { "epoch": 0.4812326960599073, "grad_norm": 0.1640625, "learning_rate": 9.97641710583307e-07, "loss": 0.0569, "step": 3900 }, { "epoch": 0.4824666260498045, "grad_norm": 0.1806640625, "learning_rate": 9.976174802567713e-07, "loss": 0.0674, "step": 3910 }, { "epoch": 0.4837005560397017, "grad_norm": 0.201171875, "learning_rate": 9.975931263852336e-07, "loss": 0.0885, "step": 3920 }, { "epoch": 0.4849344860295989, "grad_norm": 0.25390625, "learning_rate": 9.975686489747394e-07, "loss": 0.0565, "step": 3930 }, { "epoch": 0.4861684160194961, "grad_norm": 0.20703125, "learning_rate": 9.975440480313664e-07, "loss": 0.0667, "step": 3940 }, { "epoch": 0.4874023460093933, "grad_norm": 0.181640625, "learning_rate": 9.97519323561222e-07, "loss": 0.0613, "step": 3950 }, { "epoch": 0.4886362759992905, "grad_norm": 0.27734375, "learning_rate": 9.97494475570445e-07, "loss": 0.074, "step": 3960 }, { "epoch": 0.4898702059891877, "grad_norm": 0.2275390625, "learning_rate": 9.974695040652043e-07, "loss": 0.0841, "step": 3970 }, { "epoch": 0.49110413597908487, "grad_norm": 0.197265625, "learning_rate": 9.974444090516995e-07, "loss": 0.0654, "step": 3980 }, { "epoch": 0.4923380659689821, "grad_norm": 0.1572265625, "learning_rate": 9.974191905361614e-07, "loss": 0.0697, "step": 3990 }, { "epoch": 0.4935719959588793, "grad_norm": 0.326171875, "learning_rate": 9.973938485248507e-07, "loss": 0.0584, "step": 4000 }, { "epoch": 0.4935719959588793, "eval_exact_match": 0.651840490797546, "eval_has_answer_correct": 0.5698447893569845, "eval_no_answer_correct": 0.835820895522388, "step": 4000 }, { "epoch": 0.4948059259487765, "grad_norm": 0.203125, "learning_rate": 9.97368383024059e-07, "loss": 0.0811, "step": 4010 }, { "epoch": 0.49603985593867367, "grad_norm": 0.2890625, "learning_rate": 9.973427940401093e-07, "loss": 0.0737, "step": 4020 }, { "epoch": 0.49727378592857086, "grad_norm": 0.2041015625, "learning_rate": 9.973170815793542e-07, "loss": 0.0749, "step": 4030 }, { "epoch": 0.4985077159184681, "grad_norm": 0.23828125, "learning_rate": 9.972912456481774e-07, "loss": 0.0749, "step": 4040 }, { "epoch": 0.4997416459083653, "grad_norm": 0.1416015625, "learning_rate": 9.972652862529937e-07, "loss": 0.0741, "step": 4050 }, { "epoch": 0.5009755758982625, "grad_norm": 0.2392578125, "learning_rate": 9.972392034002476e-07, "loss": 0.0863, "step": 4060 }, { "epoch": 0.5022095058881597, "grad_norm": 0.232421875, "learning_rate": 9.97212997096415e-07, "loss": 0.0653, "step": 4070 }, { "epoch": 0.5034434358780568, "grad_norm": 0.19921875, "learning_rate": 9.971866673480022e-07, "loss": 0.0641, "step": 4080 }, { "epoch": 0.5046773658679541, "grad_norm": 0.2119140625, "learning_rate": 9.971602141615462e-07, "loss": 0.07, "step": 4090 }, { "epoch": 0.5059112958578512, "grad_norm": 0.181640625, "learning_rate": 9.971336375436146e-07, "loss": 0.0532, "step": 4100 }, { "epoch": 0.5071452258477485, "grad_norm": 0.2021484375, "learning_rate": 9.971069375008055e-07, "loss": 0.0713, "step": 4110 }, { "epoch": 0.5083791558376457, "grad_norm": 0.2275390625, "learning_rate": 9.97080114039748e-07, "loss": 0.0715, "step": 4120 }, { "epoch": 0.5096130858275428, "grad_norm": 0.13671875, "learning_rate": 9.970531671671016e-07, "loss": 0.0659, "step": 4130 }, { "epoch": 0.5108470158174401, "grad_norm": 0.1650390625, "learning_rate": 9.970260968895565e-07, "loss": 0.0681, "step": 4140 }, { "epoch": 0.5120809458073372, "grad_norm": 0.1455078125, "learning_rate": 9.969989032138333e-07, "loss": 0.0594, "step": 4150 }, { "epoch": 0.5133148757972344, "grad_norm": 0.142578125, "learning_rate": 9.969715861466839e-07, "loss": 0.0776, "step": 4160 }, { "epoch": 0.5145488057871317, "grad_norm": 0.271484375, "learning_rate": 9.9694414569489e-07, "loss": 0.0799, "step": 4170 }, { "epoch": 0.5157827357770288, "grad_norm": 0.1357421875, "learning_rate": 9.969165818652643e-07, "loss": 0.0762, "step": 4180 }, { "epoch": 0.5170166657669261, "grad_norm": 0.19921875, "learning_rate": 9.968888946646505e-07, "loss": 0.0683, "step": 4190 }, { "epoch": 0.5182505957568232, "grad_norm": 0.31640625, "learning_rate": 9.968610840999224e-07, "loss": 0.0654, "step": 4200 }, { "epoch": 0.5194845257467204, "grad_norm": 0.271484375, "learning_rate": 9.968331501779846e-07, "loss": 0.0802, "step": 4210 }, { "epoch": 0.5207184557366177, "grad_norm": 0.173828125, "learning_rate": 9.968050929057723e-07, "loss": 0.0794, "step": 4220 }, { "epoch": 0.5219523857265148, "grad_norm": 0.197265625, "learning_rate": 9.967769122902513e-07, "loss": 0.0621, "step": 4230 }, { "epoch": 0.523186315716412, "grad_norm": 0.171875, "learning_rate": 9.967486083384183e-07, "loss": 0.0625, "step": 4240 }, { "epoch": 0.5244202457063092, "grad_norm": 0.224609375, "learning_rate": 9.967201810573003e-07, "loss": 0.0767, "step": 4250 }, { "epoch": 0.5256541756962064, "grad_norm": 0.28125, "learning_rate": 9.96691630453955e-07, "loss": 0.0794, "step": 4260 }, { "epoch": 0.5268881056861037, "grad_norm": 0.197265625, "learning_rate": 9.96662956535471e-07, "loss": 0.0596, "step": 4270 }, { "epoch": 0.5281220356760008, "grad_norm": 0.6640625, "learning_rate": 9.966341593089668e-07, "loss": 0.0756, "step": 4280 }, { "epoch": 0.529355965665898, "grad_norm": 0.298828125, "learning_rate": 9.96605238781592e-07, "loss": 0.0901, "step": 4290 }, { "epoch": 0.5305898956557952, "grad_norm": 0.251953125, "learning_rate": 9.965761949605275e-07, "loss": 0.0722, "step": 4300 }, { "epoch": 0.5318238256456924, "grad_norm": 0.2265625, "learning_rate": 9.965470278529833e-07, "loss": 0.0714, "step": 4310 }, { "epoch": 0.5330577556355897, "grad_norm": 0.1845703125, "learning_rate": 9.965177374662012e-07, "loss": 0.0721, "step": 4320 }, { "epoch": 0.5342916856254868, "grad_norm": 0.283203125, "learning_rate": 9.96488323807453e-07, "loss": 0.0751, "step": 4330 }, { "epoch": 0.535525615615384, "grad_norm": 0.31640625, "learning_rate": 9.964587868840416e-07, "loss": 0.0625, "step": 4340 }, { "epoch": 0.5367595456052813, "grad_norm": 0.1611328125, "learning_rate": 9.964291267033e-07, "loss": 0.0648, "step": 4350 }, { "epoch": 0.5379934755951784, "grad_norm": 0.23046875, "learning_rate": 9.96399343272592e-07, "loss": 0.0755, "step": 4360 }, { "epoch": 0.5392274055850756, "grad_norm": 0.228515625, "learning_rate": 9.963694365993122e-07, "loss": 0.0719, "step": 4370 }, { "epoch": 0.5404613355749728, "grad_norm": 0.263671875, "learning_rate": 9.963394066908855e-07, "loss": 0.0743, "step": 4380 }, { "epoch": 0.54169526556487, "grad_norm": 0.2001953125, "learning_rate": 9.963092535547675e-07, "loss": 0.0577, "step": 4390 }, { "epoch": 0.5429291955547673, "grad_norm": 0.1767578125, "learning_rate": 9.962789771984445e-07, "loss": 0.0766, "step": 4400 }, { "epoch": 0.5441631255446644, "grad_norm": 0.23046875, "learning_rate": 9.962485776294332e-07, "loss": 0.0745, "step": 4410 }, { "epoch": 0.5453970555345616, "grad_norm": 0.20703125, "learning_rate": 9.96218054855281e-07, "loss": 0.0604, "step": 4420 }, { "epoch": 0.5466309855244588, "grad_norm": 0.25390625, "learning_rate": 9.96187408883566e-07, "loss": 0.0711, "step": 4430 }, { "epoch": 0.547864915514356, "grad_norm": 0.26953125, "learning_rate": 9.961566397218968e-07, "loss": 0.0714, "step": 4440 }, { "epoch": 0.5490988455042533, "grad_norm": 0.263671875, "learning_rate": 9.961257473779124e-07, "loss": 0.07, "step": 4450 }, { "epoch": 0.5503327754941504, "grad_norm": 0.15234375, "learning_rate": 9.960947318592825e-07, "loss": 0.0609, "step": 4460 }, { "epoch": 0.5515667054840476, "grad_norm": 0.326171875, "learning_rate": 9.960635931737074e-07, "loss": 0.0794, "step": 4470 }, { "epoch": 0.5528006354739448, "grad_norm": 0.2236328125, "learning_rate": 9.960323313289183e-07, "loss": 0.0846, "step": 4480 }, { "epoch": 0.554034565463842, "grad_norm": 0.1650390625, "learning_rate": 9.960009463326763e-07, "loss": 0.0811, "step": 4490 }, { "epoch": 0.5552684954537392, "grad_norm": 0.2255859375, "learning_rate": 9.959694381927739e-07, "loss": 0.0694, "step": 4500 }, { "epoch": 0.5565024254436364, "grad_norm": 0.1474609375, "learning_rate": 9.959378069170332e-07, "loss": 0.0828, "step": 4510 }, { "epoch": 0.5577363554335336, "grad_norm": 0.24609375, "learning_rate": 9.959060525133077e-07, "loss": 0.0834, "step": 4520 }, { "epoch": 0.5589702854234307, "grad_norm": 0.17578125, "learning_rate": 9.958741749894812e-07, "loss": 0.072, "step": 4530 }, { "epoch": 0.560204215413328, "grad_norm": 0.267578125, "learning_rate": 9.958421743534676e-07, "loss": 0.0719, "step": 4540 }, { "epoch": 0.5614381454032252, "grad_norm": 0.2138671875, "learning_rate": 9.958100506132126e-07, "loss": 0.0692, "step": 4550 }, { "epoch": 0.5626720753931224, "grad_norm": 0.1044921875, "learning_rate": 9.957778037766908e-07, "loss": 0.075, "step": 4560 }, { "epoch": 0.5639060053830196, "grad_norm": 0.1728515625, "learning_rate": 9.957454338519087e-07, "loss": 0.0832, "step": 4570 }, { "epoch": 0.5651399353729167, "grad_norm": 0.251953125, "learning_rate": 9.95712940846903e-07, "loss": 0.078, "step": 4580 }, { "epoch": 0.566373865362814, "grad_norm": 0.1494140625, "learning_rate": 9.956803247697404e-07, "loss": 0.0741, "step": 4590 }, { "epoch": 0.5676077953527112, "grad_norm": 0.2138671875, "learning_rate": 9.956475856285187e-07, "loss": 0.0624, "step": 4600 }, { "epoch": 0.5688417253426084, "grad_norm": 0.185546875, "learning_rate": 9.956147234313663e-07, "loss": 0.0611, "step": 4610 }, { "epoch": 0.5700756553325056, "grad_norm": 0.171875, "learning_rate": 9.955817381864422e-07, "loss": 0.0747, "step": 4620 }, { "epoch": 0.5713095853224027, "grad_norm": 0.6015625, "learning_rate": 9.955486299019352e-07, "loss": 0.0699, "step": 4630 }, { "epoch": 0.5725435153123, "grad_norm": 0.138671875, "learning_rate": 9.95515398586066e-07, "loss": 0.0652, "step": 4640 }, { "epoch": 0.5737774453021972, "grad_norm": 0.248046875, "learning_rate": 9.954820442470839e-07, "loss": 0.0561, "step": 4650 }, { "epoch": 0.5750113752920943, "grad_norm": 0.353515625, "learning_rate": 9.954485668932708e-07, "loss": 0.077, "step": 4660 }, { "epoch": 0.5762453052819916, "grad_norm": 0.2001953125, "learning_rate": 9.954149665329378e-07, "loss": 0.0831, "step": 4670 }, { "epoch": 0.5774792352718887, "grad_norm": 0.240234375, "learning_rate": 9.953812431744274e-07, "loss": 0.0844, "step": 4680 }, { "epoch": 0.578713165261786, "grad_norm": 0.1923828125, "learning_rate": 9.953473968261117e-07, "loss": 0.0738, "step": 4690 }, { "epoch": 0.5799470952516832, "grad_norm": 0.26953125, "learning_rate": 9.95313427496394e-07, "loss": 0.052, "step": 4700 }, { "epoch": 0.5811810252415803, "grad_norm": 0.2265625, "learning_rate": 9.95279335193708e-07, "loss": 0.0679, "step": 4710 }, { "epoch": 0.5824149552314776, "grad_norm": 0.2333984375, "learning_rate": 9.952451199265182e-07, "loss": 0.056, "step": 4720 }, { "epoch": 0.5836488852213747, "grad_norm": 0.251953125, "learning_rate": 9.952107817033188e-07, "loss": 0.0623, "step": 4730 }, { "epoch": 0.584882815211272, "grad_norm": 0.431640625, "learning_rate": 9.951763205326354e-07, "loss": 0.0665, "step": 4740 }, { "epoch": 0.5861167452011692, "grad_norm": 0.1748046875, "learning_rate": 9.951417364230235e-07, "loss": 0.0643, "step": 4750 }, { "epoch": 0.5873506751910663, "grad_norm": 0.2421875, "learning_rate": 9.951070293830696e-07, "loss": 0.0756, "step": 4760 }, { "epoch": 0.5885846051809636, "grad_norm": 0.1484375, "learning_rate": 9.950721994213907e-07, "loss": 0.0726, "step": 4770 }, { "epoch": 0.5898185351708607, "grad_norm": 0.3125, "learning_rate": 9.950372465466338e-07, "loss": 0.0651, "step": 4780 }, { "epoch": 0.5910524651607579, "grad_norm": 0.267578125, "learning_rate": 9.95002170767477e-07, "loss": 0.0555, "step": 4790 }, { "epoch": 0.5922863951506552, "grad_norm": 0.1875, "learning_rate": 9.949669720926282e-07, "loss": 0.0546, "step": 4800 }, { "epoch": 0.5935203251405523, "grad_norm": 0.37890625, "learning_rate": 9.94931650530827e-07, "loss": 0.082, "step": 4810 }, { "epoch": 0.5947542551304495, "grad_norm": 0.1806640625, "learning_rate": 9.948962060908424e-07, "loss": 0.0692, "step": 4820 }, { "epoch": 0.5959881851203467, "grad_norm": 0.1318359375, "learning_rate": 9.948606387814742e-07, "loss": 0.0797, "step": 4830 }, { "epoch": 0.5972221151102439, "grad_norm": 0.14453125, "learning_rate": 9.94824948611553e-07, "loss": 0.0543, "step": 4840 }, { "epoch": 0.5984560451001412, "grad_norm": 0.44140625, "learning_rate": 9.947891355899397e-07, "loss": 0.0562, "step": 4850 }, { "epoch": 0.5996899750900383, "grad_norm": 0.244140625, "learning_rate": 9.947531997255256e-07, "loss": 0.1003, "step": 4860 }, { "epoch": 0.6009239050799355, "grad_norm": 0.21484375, "learning_rate": 9.947171410272325e-07, "loss": 0.0895, "step": 4870 }, { "epoch": 0.6021578350698328, "grad_norm": 0.185546875, "learning_rate": 9.946809595040132e-07, "loss": 0.0651, "step": 4880 }, { "epoch": 0.6033917650597299, "grad_norm": 0.1953125, "learning_rate": 9.946446551648503e-07, "loss": 0.0779, "step": 4890 }, { "epoch": 0.6046256950496272, "grad_norm": 0.197265625, "learning_rate": 9.94608228018757e-07, "loss": 0.0595, "step": 4900 }, { "epoch": 0.6058596250395243, "grad_norm": 0.291015625, "learning_rate": 9.94571678074778e-07, "loss": 0.0781, "step": 4910 }, { "epoch": 0.6070935550294215, "grad_norm": 0.21484375, "learning_rate": 9.945350053419866e-07, "loss": 0.0866, "step": 4920 }, { "epoch": 0.6083274850193188, "grad_norm": 0.11474609375, "learning_rate": 9.944982098294883e-07, "loss": 0.0676, "step": 4930 }, { "epoch": 0.6095614150092159, "grad_norm": 0.197265625, "learning_rate": 9.94461291546418e-07, "loss": 0.0689, "step": 4940 }, { "epoch": 0.6107953449991131, "grad_norm": 0.12158203125, "learning_rate": 9.944242505019422e-07, "loss": 0.0615, "step": 4950 }, { "epoch": 0.6120292749890103, "grad_norm": 0.361328125, "learning_rate": 9.943870867052567e-07, "loss": 0.073, "step": 4960 }, { "epoch": 0.6132632049789075, "grad_norm": 0.1689453125, "learning_rate": 9.943498001655881e-07, "loss": 0.0889, "step": 4970 }, { "epoch": 0.6144971349688048, "grad_norm": 0.154296875, "learning_rate": 9.943123908921942e-07, "loss": 0.0715, "step": 4980 }, { "epoch": 0.6157310649587019, "grad_norm": 0.12109375, "learning_rate": 9.942748588943622e-07, "loss": 0.0678, "step": 4990 }, { "epoch": 0.6169649949485991, "grad_norm": 0.1982421875, "learning_rate": 9.942372041814108e-07, "loss": 0.0766, "step": 5000 }, { "epoch": 0.6169649949485991, "eval_exact_match": 0.6549079754601227, "eval_has_answer_correct": 0.5787139689578714, "eval_no_answer_correct": 0.8258706467661692, "step": 5000 }, { "epoch": 0.6181989249384963, "grad_norm": 0.2060546875, "learning_rate": 9.94199426762688e-07, "loss": 0.059, "step": 5010 }, { "epoch": 0.6194328549283935, "grad_norm": 0.1435546875, "learning_rate": 9.941615266475733e-07, "loss": 0.074, "step": 5020 }, { "epoch": 0.6206667849182907, "grad_norm": 0.27734375, "learning_rate": 9.941235038454763e-07, "loss": 0.0795, "step": 5030 }, { "epoch": 0.6219007149081879, "grad_norm": 0.2119140625, "learning_rate": 9.940853583658369e-07, "loss": 0.0728, "step": 5040 }, { "epoch": 0.6231346448980851, "grad_norm": 0.333984375, "learning_rate": 9.940470902181257e-07, "loss": 0.0505, "step": 5050 }, { "epoch": 0.6243685748879823, "grad_norm": 0.146484375, "learning_rate": 9.940086994118435e-07, "loss": 0.077, "step": 5060 }, { "epoch": 0.6256025048778795, "grad_norm": 0.271484375, "learning_rate": 9.93970185956522e-07, "loss": 0.0682, "step": 5070 }, { "epoch": 0.6268364348677767, "grad_norm": 0.1572265625, "learning_rate": 9.939315498617227e-07, "loss": 0.0596, "step": 5080 }, { "epoch": 0.6280703648576739, "grad_norm": 0.154296875, "learning_rate": 9.938927911370382e-07, "loss": 0.0763, "step": 5090 }, { "epoch": 0.6293042948475711, "grad_norm": 0.2060546875, "learning_rate": 9.938539097920911e-07, "loss": 0.0532, "step": 5100 }, { "epoch": 0.6305382248374682, "grad_norm": 0.212890625, "learning_rate": 9.938149058365347e-07, "loss": 0.0617, "step": 5110 }, { "epoch": 0.6317721548273655, "grad_norm": 0.19921875, "learning_rate": 9.937757792800522e-07, "loss": 0.0819, "step": 5120 }, { "epoch": 0.6330060848172627, "grad_norm": 0.2734375, "learning_rate": 9.93736530132358e-07, "loss": 0.0655, "step": 5130 }, { "epoch": 0.6342400148071599, "grad_norm": 0.2294921875, "learning_rate": 9.93697158403197e-07, "loss": 0.0699, "step": 5140 }, { "epoch": 0.6354739447970571, "grad_norm": 0.1201171875, "learning_rate": 9.936576641023435e-07, "loss": 0.0648, "step": 5150 }, { "epoch": 0.6367078747869542, "grad_norm": 0.3515625, "learning_rate": 9.936180472396032e-07, "loss": 0.0787, "step": 5160 }, { "epoch": 0.6379418047768515, "grad_norm": 0.2490234375, "learning_rate": 9.935783078248118e-07, "loss": 0.0628, "step": 5170 }, { "epoch": 0.6391757347667487, "grad_norm": 0.265625, "learning_rate": 9.935384458678356e-07, "loss": 0.0748, "step": 5180 }, { "epoch": 0.6404096647566458, "grad_norm": 0.1416015625, "learning_rate": 9.93498461378571e-07, "loss": 0.0616, "step": 5190 }, { "epoch": 0.6416435947465431, "grad_norm": 0.310546875, "learning_rate": 9.934583543669453e-07, "loss": 0.0529, "step": 5200 }, { "epoch": 0.6428775247364402, "grad_norm": 0.27734375, "learning_rate": 9.93418124842916e-07, "loss": 0.0761, "step": 5210 }, { "epoch": 0.6441114547263375, "grad_norm": 0.255859375, "learning_rate": 9.933777728164709e-07, "loss": 0.0606, "step": 5220 }, { "epoch": 0.6453453847162347, "grad_norm": 0.25, "learning_rate": 9.933372982976284e-07, "loss": 0.0734, "step": 5230 }, { "epoch": 0.6465793147061318, "grad_norm": 0.1650390625, "learning_rate": 9.932967012964373e-07, "loss": 0.0518, "step": 5240 }, { "epoch": 0.6478132446960291, "grad_norm": 0.1337890625, "learning_rate": 9.932559818229765e-07, "loss": 0.057, "step": 5250 }, { "epoch": 0.6490471746859262, "grad_norm": 0.2412109375, "learning_rate": 9.932151398873558e-07, "loss": 0.0773, "step": 5260 }, { "epoch": 0.6502811046758235, "grad_norm": 0.14453125, "learning_rate": 9.93174175499715e-07, "loss": 0.0743, "step": 5270 }, { "epoch": 0.6515150346657207, "grad_norm": 0.4140625, "learning_rate": 9.931330886702246e-07, "loss": 0.0827, "step": 5280 }, { "epoch": 0.6527489646556178, "grad_norm": 0.271484375, "learning_rate": 9.93091879409085e-07, "loss": 0.0763, "step": 5290 }, { "epoch": 0.6539828946455151, "grad_norm": 0.263671875, "learning_rate": 9.930505477265278e-07, "loss": 0.0685, "step": 5300 }, { "epoch": 0.6552168246354122, "grad_norm": 0.1650390625, "learning_rate": 9.930090936328142e-07, "loss": 0.0813, "step": 5310 }, { "epoch": 0.6564507546253094, "grad_norm": 0.48828125, "learning_rate": 9.929675171382363e-07, "loss": 0.0852, "step": 5320 }, { "epoch": 0.6576846846152067, "grad_norm": 0.1904296875, "learning_rate": 9.929258182531166e-07, "loss": 0.0792, "step": 5330 }, { "epoch": 0.6589186146051038, "grad_norm": 0.1904296875, "learning_rate": 9.928839969878075e-07, "loss": 0.0674, "step": 5340 }, { "epoch": 0.6601525445950011, "grad_norm": 0.185546875, "learning_rate": 9.92842053352692e-07, "loss": 0.0565, "step": 5350 }, { "epoch": 0.6613864745848982, "grad_norm": 0.365234375, "learning_rate": 9.92799987358184e-07, "loss": 0.0623, "step": 5360 }, { "epoch": 0.6626204045747954, "grad_norm": 0.291015625, "learning_rate": 9.927577990147271e-07, "loss": 0.0742, "step": 5370 }, { "epoch": 0.6638543345646927, "grad_norm": 0.1591796875, "learning_rate": 9.927154883327955e-07, "loss": 0.0646, "step": 5380 }, { "epoch": 0.6650882645545898, "grad_norm": 0.123046875, "learning_rate": 9.926730553228939e-07, "loss": 0.0796, "step": 5390 }, { "epoch": 0.666322194544487, "grad_norm": 0.23828125, "learning_rate": 9.92630499995557e-07, "loss": 0.0571, "step": 5400 }, { "epoch": 0.6675561245343842, "grad_norm": 0.193359375, "learning_rate": 9.925878223613507e-07, "loss": 0.0878, "step": 5410 }, { "epoch": 0.6687900545242814, "grad_norm": 0.2333984375, "learning_rate": 9.9254502243087e-07, "loss": 0.0703, "step": 5420 }, { "epoch": 0.6700239845141787, "grad_norm": 0.255859375, "learning_rate": 9.925021002147415e-07, "loss": 0.089, "step": 5430 }, { "epoch": 0.6712579145040758, "grad_norm": 0.2236328125, "learning_rate": 9.924590557236216e-07, "loss": 0.0598, "step": 5440 }, { "epoch": 0.672491844493973, "grad_norm": 0.1826171875, "learning_rate": 9.924158889681968e-07, "loss": 0.0542, "step": 5450 }, { "epoch": 0.6737257744838703, "grad_norm": 0.30859375, "learning_rate": 9.923725999591846e-07, "loss": 0.0652, "step": 5460 }, { "epoch": 0.6749597044737674, "grad_norm": 0.1474609375, "learning_rate": 9.923291887073322e-07, "loss": 0.0606, "step": 5470 }, { "epoch": 0.6761936344636647, "grad_norm": 0.1611328125, "learning_rate": 9.922856552234174e-07, "loss": 0.0668, "step": 5480 }, { "epoch": 0.6774275644535618, "grad_norm": 0.197265625, "learning_rate": 9.922419995182486e-07, "loss": 0.0648, "step": 5490 }, { "epoch": 0.678661494443459, "grad_norm": 0.494140625, "learning_rate": 9.921982216026644e-07, "loss": 0.0634, "step": 5500 }, { "epoch": 0.6798954244333563, "grad_norm": 0.189453125, "learning_rate": 9.921543214875331e-07, "loss": 0.0876, "step": 5510 }, { "epoch": 0.6811293544232534, "grad_norm": 0.2294921875, "learning_rate": 9.921102991837547e-07, "loss": 0.0946, "step": 5520 }, { "epoch": 0.6823632844131506, "grad_norm": 0.208984375, "learning_rate": 9.920661547022583e-07, "loss": 0.0776, "step": 5530 }, { "epoch": 0.6835972144030478, "grad_norm": 0.271484375, "learning_rate": 9.92021888054004e-07, "loss": 0.0673, "step": 5540 }, { "epoch": 0.684831144392945, "grad_norm": 0.140625, "learning_rate": 9.919774992499819e-07, "loss": 0.0596, "step": 5550 }, { "epoch": 0.6860650743828423, "grad_norm": 0.26171875, "learning_rate": 9.919329883012123e-07, "loss": 0.0781, "step": 5560 }, { "epoch": 0.6872990043727394, "grad_norm": 0.1494140625, "learning_rate": 9.918883552187467e-07, "loss": 0.077, "step": 5570 }, { "epoch": 0.6885329343626366, "grad_norm": 0.291015625, "learning_rate": 9.918436000136656e-07, "loss": 0.0792, "step": 5580 }, { "epoch": 0.6897668643525338, "grad_norm": 0.224609375, "learning_rate": 9.91798722697081e-07, "loss": 0.0707, "step": 5590 }, { "epoch": 0.691000794342431, "grad_norm": 0.2041015625, "learning_rate": 9.917537232801343e-07, "loss": 0.0708, "step": 5600 }, { "epoch": 0.6922347243323282, "grad_norm": 0.349609375, "learning_rate": 9.91708601773998e-07, "loss": 0.0856, "step": 5610 }, { "epoch": 0.6934686543222254, "grad_norm": 0.271484375, "learning_rate": 9.916633581898745e-07, "loss": 0.0682, "step": 5620 }, { "epoch": 0.6947025843121226, "grad_norm": 0.21484375, "learning_rate": 9.916179925389965e-07, "loss": 0.0771, "step": 5630 }, { "epoch": 0.6959365143020197, "grad_norm": 0.19140625, "learning_rate": 9.91572504832627e-07, "loss": 0.0661, "step": 5640 }, { "epoch": 0.697170444291917, "grad_norm": 0.2060546875, "learning_rate": 9.915268950820595e-07, "loss": 0.0496, "step": 5650 }, { "epoch": 0.6984043742818142, "grad_norm": 0.27734375, "learning_rate": 9.914811632986174e-07, "loss": 0.0765, "step": 5660 }, { "epoch": 0.6996383042717114, "grad_norm": 0.193359375, "learning_rate": 9.91435309493655e-07, "loss": 0.0742, "step": 5670 }, { "epoch": 0.7008722342616086, "grad_norm": 0.2236328125, "learning_rate": 9.913893336785565e-07, "loss": 0.0701, "step": 5680 }, { "epoch": 0.7021061642515057, "grad_norm": 0.201171875, "learning_rate": 9.913432358647363e-07, "loss": 0.0612, "step": 5690 }, { "epoch": 0.703340094241403, "grad_norm": 0.1884765625, "learning_rate": 9.912970160636394e-07, "loss": 0.0505, "step": 5700 }, { "epoch": 0.7045740242313002, "grad_norm": 0.1845703125, "learning_rate": 9.91250674286741e-07, "loss": 0.0723, "step": 5710 }, { "epoch": 0.7058079542211974, "grad_norm": 0.265625, "learning_rate": 9.912042105455461e-07, "loss": 0.071, "step": 5720 }, { "epoch": 0.7070418842110946, "grad_norm": 0.19140625, "learning_rate": 9.91157624851591e-07, "loss": 0.0579, "step": 5730 }, { "epoch": 0.7082758142009917, "grad_norm": 0.169921875, "learning_rate": 9.91110917216441e-07, "loss": 0.049, "step": 5740 }, { "epoch": 0.709509744190889, "grad_norm": 0.2578125, "learning_rate": 9.91064087651693e-07, "loss": 0.0689, "step": 5750 }, { "epoch": 0.7107436741807862, "grad_norm": 0.357421875, "learning_rate": 9.91017136168973e-07, "loss": 0.0687, "step": 5760 }, { "epoch": 0.7119776041706833, "grad_norm": 0.3515625, "learning_rate": 9.90970062779938e-07, "loss": 0.0795, "step": 5770 }, { "epoch": 0.7132115341605806, "grad_norm": 0.1494140625, "learning_rate": 9.90922867496275e-07, "loss": 0.0673, "step": 5780 }, { "epoch": 0.7144454641504777, "grad_norm": 0.1494140625, "learning_rate": 9.908755503297018e-07, "loss": 0.0769, "step": 5790 }, { "epoch": 0.715679394140375, "grad_norm": 0.2294921875, "learning_rate": 9.908281112919652e-07, "loss": 0.0734, "step": 5800 }, { "epoch": 0.7169133241302722, "grad_norm": 0.1435546875, "learning_rate": 9.907805503948434e-07, "loss": 0.0668, "step": 5810 }, { "epoch": 0.7181472541201693, "grad_norm": 0.267578125, "learning_rate": 9.907328676501444e-07, "loss": 0.0735, "step": 5820 }, { "epoch": 0.7193811841100666, "grad_norm": 0.34765625, "learning_rate": 9.906850630697066e-07, "loss": 0.079, "step": 5830 }, { "epoch": 0.7206151140999637, "grad_norm": 0.216796875, "learning_rate": 9.906371366653987e-07, "loss": 0.0648, "step": 5840 }, { "epoch": 0.721849044089861, "grad_norm": 0.171875, "learning_rate": 9.905890884491194e-07, "loss": 0.0629, "step": 5850 }, { "epoch": 0.7230829740797582, "grad_norm": 0.1962890625, "learning_rate": 9.90540918432798e-07, "loss": 0.0761, "step": 5860 }, { "epoch": 0.7243169040696553, "grad_norm": 0.1845703125, "learning_rate": 9.904926266283932e-07, "loss": 0.0752, "step": 5870 }, { "epoch": 0.7255508340595526, "grad_norm": 0.1767578125, "learning_rate": 9.904442130478953e-07, "loss": 0.0639, "step": 5880 }, { "epoch": 0.7267847640494497, "grad_norm": 0.2001953125, "learning_rate": 9.903956777033238e-07, "loss": 0.0643, "step": 5890 }, { "epoch": 0.7280186940393469, "grad_norm": 0.150390625, "learning_rate": 9.903470206067286e-07, "loss": 0.0593, "step": 5900 }, { "epoch": 0.7292526240292442, "grad_norm": 0.2578125, "learning_rate": 9.902982417701898e-07, "loss": 0.0793, "step": 5910 }, { "epoch": 0.7304865540191413, "grad_norm": 0.212890625, "learning_rate": 9.902493412058184e-07, "loss": 0.0752, "step": 5920 }, { "epoch": 0.7317204840090386, "grad_norm": 0.3359375, "learning_rate": 9.902003189257547e-07, "loss": 0.0801, "step": 5930 }, { "epoch": 0.7329544139989357, "grad_norm": 0.1884765625, "learning_rate": 9.901511749421695e-07, "loss": 0.0602, "step": 5940 }, { "epoch": 0.7341883439888329, "grad_norm": 0.234375, "learning_rate": 9.901019092672644e-07, "loss": 0.0673, "step": 5950 }, { "epoch": 0.7354222739787302, "grad_norm": 0.2373046875, "learning_rate": 9.900525219132702e-07, "loss": 0.0782, "step": 5960 }, { "epoch": 0.7366562039686273, "grad_norm": 0.2197265625, "learning_rate": 9.900030128924491e-07, "loss": 0.0639, "step": 5970 }, { "epoch": 0.7378901339585245, "grad_norm": 0.208984375, "learning_rate": 9.899533822170921e-07, "loss": 0.0805, "step": 5980 }, { "epoch": 0.7391240639484217, "grad_norm": 0.2041015625, "learning_rate": 9.899036298995216e-07, "loss": 0.0531, "step": 5990 }, { "epoch": 0.7403579939383189, "grad_norm": 0.20703125, "learning_rate": 9.8985375595209e-07, "loss": 0.0484, "step": 6000 }, { "epoch": 0.7403579939383189, "eval_exact_match": 0.6579754601226994, "eval_has_answer_correct": 0.5831485587583148, "eval_no_answer_correct": 0.8258706467661692, "step": 6000 }, { "epoch": 0.7415919239282162, "grad_norm": 0.19140625, "learning_rate": 9.898037603871789e-07, "loss": 0.0537, "step": 6010 }, { "epoch": 0.7428258539181133, "grad_norm": 0.1494140625, "learning_rate": 9.897536432172014e-07, "loss": 0.071, "step": 6020 }, { "epoch": 0.7440597839080105, "grad_norm": 0.12158203125, "learning_rate": 9.897034044546001e-07, "loss": 0.0746, "step": 6030 }, { "epoch": 0.7452937138979078, "grad_norm": 0.134765625, "learning_rate": 9.896530441118481e-07, "loss": 0.077, "step": 6040 }, { "epoch": 0.7465276438878049, "grad_norm": 0.2734375, "learning_rate": 9.896025622014484e-07, "loss": 0.0677, "step": 6050 }, { "epoch": 0.7477615738777021, "grad_norm": 0.2119140625, "learning_rate": 9.89551958735934e-07, "loss": 0.0633, "step": 6060 }, { "epoch": 0.7489955038675993, "grad_norm": 0.2119140625, "learning_rate": 9.89501233727869e-07, "loss": 0.08, "step": 6070 }, { "epoch": 0.7502294338574965, "grad_norm": 0.197265625, "learning_rate": 9.894503871898465e-07, "loss": 0.0652, "step": 6080 }, { "epoch": 0.7514633638473938, "grad_norm": 0.2119140625, "learning_rate": 9.893994191344905e-07, "loss": 0.0711, "step": 6090 }, { "epoch": 0.7526972938372909, "grad_norm": 0.2578125, "learning_rate": 9.89348329574455e-07, "loss": 0.0606, "step": 6100 }, { "epoch": 0.7539312238271881, "grad_norm": 0.216796875, "learning_rate": 9.892971185224244e-07, "loss": 0.0745, "step": 6110 }, { "epoch": 0.7551651538170853, "grad_norm": 0.283203125, "learning_rate": 9.892457859911126e-07, "loss": 0.0675, "step": 6120 }, { "epoch": 0.7563990838069825, "grad_norm": 0.1396484375, "learning_rate": 9.891943319932644e-07, "loss": 0.0843, "step": 6130 }, { "epoch": 0.7576330137968798, "grad_norm": 0.2060546875, "learning_rate": 9.891427565416543e-07, "loss": 0.0683, "step": 6140 }, { "epoch": 0.7588669437867769, "grad_norm": 0.17578125, "learning_rate": 9.89091059649087e-07, "loss": 0.0632, "step": 6150 }, { "epoch": 0.7601008737766741, "grad_norm": 0.1923828125, "learning_rate": 9.890392413283978e-07, "loss": 0.057, "step": 6160 }, { "epoch": 0.7613348037665713, "grad_norm": 0.208984375, "learning_rate": 9.889873015924512e-07, "loss": 0.0688, "step": 6170 }, { "epoch": 0.7625687337564685, "grad_norm": 0.2216796875, "learning_rate": 9.889352404541432e-07, "loss": 0.0494, "step": 6180 }, { "epoch": 0.7638026637463657, "grad_norm": 0.1484375, "learning_rate": 9.888830579263985e-07, "loss": 0.0636, "step": 6190 }, { "epoch": 0.7650365937362629, "grad_norm": 0.36328125, "learning_rate": 9.88830754022173e-07, "loss": 0.0585, "step": 6200 }, { "epoch": 0.7662705237261601, "grad_norm": 0.2890625, "learning_rate": 9.88778328754452e-07, "loss": 0.072, "step": 6210 }, { "epoch": 0.7675044537160572, "grad_norm": 0.34765625, "learning_rate": 9.887257821362517e-07, "loss": 0.0862, "step": 6220 }, { "epoch": 0.7687383837059545, "grad_norm": 0.314453125, "learning_rate": 9.88673114180618e-07, "loss": 0.0878, "step": 6230 }, { "epoch": 0.7699723136958517, "grad_norm": 0.310546875, "learning_rate": 9.886203249006264e-07, "loss": 0.0635, "step": 6240 }, { "epoch": 0.7712062436857489, "grad_norm": 0.19140625, "learning_rate": 9.885674143093836e-07, "loss": 0.0604, "step": 6250 }, { "epoch": 0.7724401736756461, "grad_norm": 0.404296875, "learning_rate": 9.885143824200257e-07, "loss": 0.0634, "step": 6260 }, { "epoch": 0.7736741036655432, "grad_norm": 0.259765625, "learning_rate": 9.88461229245719e-07, "loss": 0.0811, "step": 6270 }, { "epoch": 0.7749080336554405, "grad_norm": 0.318359375, "learning_rate": 9.884079547996603e-07, "loss": 0.075, "step": 6280 }, { "epoch": 0.7761419636453377, "grad_norm": 0.330078125, "learning_rate": 9.883545590950758e-07, "loss": 0.0728, "step": 6290 }, { "epoch": 0.7773758936352348, "grad_norm": 0.23828125, "learning_rate": 9.883010421452222e-07, "loss": 0.0681, "step": 6300 }, { "epoch": 0.7786098236251321, "grad_norm": 0.1650390625, "learning_rate": 9.882474039633867e-07, "loss": 0.0713, "step": 6310 }, { "epoch": 0.7798437536150292, "grad_norm": 0.1611328125, "learning_rate": 9.88193644562886e-07, "loss": 0.0674, "step": 6320 }, { "epoch": 0.7810776836049265, "grad_norm": 0.17578125, "learning_rate": 9.88139763957067e-07, "loss": 0.0802, "step": 6330 }, { "epoch": 0.7823116135948237, "grad_norm": 0.234375, "learning_rate": 9.880857621593071e-07, "loss": 0.0642, "step": 6340 }, { "epoch": 0.7835455435847208, "grad_norm": 0.115234375, "learning_rate": 9.880316391830133e-07, "loss": 0.0577, "step": 6350 }, { "epoch": 0.7847794735746181, "grad_norm": 0.1572265625, "learning_rate": 9.87977395041623e-07, "loss": 0.0605, "step": 6360 }, { "epoch": 0.7860134035645152, "grad_norm": 0.283203125, "learning_rate": 9.879230297486034e-07, "loss": 0.0911, "step": 6370 }, { "epoch": 0.7872473335544125, "grad_norm": 0.203125, "learning_rate": 9.878685433174519e-07, "loss": 0.0534, "step": 6380 }, { "epoch": 0.7884812635443097, "grad_norm": 0.2236328125, "learning_rate": 9.878139357616962e-07, "loss": 0.0648, "step": 6390 }, { "epoch": 0.7897151935342068, "grad_norm": 0.169921875, "learning_rate": 9.877592070948938e-07, "loss": 0.0689, "step": 6400 }, { "epoch": 0.7909491235241041, "grad_norm": 0.1669921875, "learning_rate": 9.87704357330632e-07, "loss": 0.0748, "step": 6410 }, { "epoch": 0.7921830535140012, "grad_norm": 0.298828125, "learning_rate": 9.876493864825293e-07, "loss": 0.0812, "step": 6420 }, { "epoch": 0.7934169835038984, "grad_norm": 0.28125, "learning_rate": 9.87594294564233e-07, "loss": 0.0715, "step": 6430 }, { "epoch": 0.7946509134937957, "grad_norm": 0.29296875, "learning_rate": 9.875390815894207e-07, "loss": 0.0671, "step": 6440 }, { "epoch": 0.7958848434836928, "grad_norm": 0.17578125, "learning_rate": 9.874837475718007e-07, "loss": 0.0642, "step": 6450 }, { "epoch": 0.7971187734735901, "grad_norm": 0.146484375, "learning_rate": 9.874282925251109e-07, "loss": 0.0614, "step": 6460 }, { "epoch": 0.7983527034634872, "grad_norm": 0.3046875, "learning_rate": 9.87372716463119e-07, "loss": 0.0703, "step": 6470 }, { "epoch": 0.7995866334533844, "grad_norm": 0.15234375, "learning_rate": 9.873170193996234e-07, "loss": 0.0568, "step": 6480 }, { "epoch": 0.8008205634432817, "grad_norm": 0.283203125, "learning_rate": 9.87261201348452e-07, "loss": 0.082, "step": 6490 }, { "epoch": 0.8020544934331788, "grad_norm": 0.150390625, "learning_rate": 9.872052623234631e-07, "loss": 0.0701, "step": 6500 }, { "epoch": 0.803288423423076, "grad_norm": 0.13671875, "learning_rate": 9.871492023385445e-07, "loss": 0.0654, "step": 6510 }, { "epoch": 0.8045223534129732, "grad_norm": 0.24609375, "learning_rate": 9.870930214076145e-07, "loss": 0.0749, "step": 6520 }, { "epoch": 0.8057562834028704, "grad_norm": 0.1689453125, "learning_rate": 9.870367195446214e-07, "loss": 0.0728, "step": 6530 }, { "epoch": 0.8069902133927677, "grad_norm": 0.375, "learning_rate": 9.869802967635436e-07, "loss": 0.0776, "step": 6540 }, { "epoch": 0.8082241433826648, "grad_norm": 0.27734375, "learning_rate": 9.869237530783892e-07, "loss": 0.0595, "step": 6550 }, { "epoch": 0.809458073372562, "grad_norm": 0.265625, "learning_rate": 9.868670885031963e-07, "loss": 0.0732, "step": 6560 }, { "epoch": 0.8106920033624592, "grad_norm": 0.298828125, "learning_rate": 9.868103030520333e-07, "loss": 0.081, "step": 6570 }, { "epoch": 0.8119259333523564, "grad_norm": 0.205078125, "learning_rate": 9.867533967389985e-07, "loss": 0.0857, "step": 6580 }, { "epoch": 0.8131598633422537, "grad_norm": 0.18359375, "learning_rate": 9.866963695782203e-07, "loss": 0.0607, "step": 6590 }, { "epoch": 0.8143937933321508, "grad_norm": 0.29296875, "learning_rate": 9.866392215838571e-07, "loss": 0.0577, "step": 6600 }, { "epoch": 0.815627723322048, "grad_norm": 0.1591796875, "learning_rate": 9.86581952770097e-07, "loss": 0.0567, "step": 6610 }, { "epoch": 0.8168616533119453, "grad_norm": 0.1767578125, "learning_rate": 9.86524563151158e-07, "loss": 0.0598, "step": 6620 }, { "epoch": 0.8180955833018424, "grad_norm": 0.1767578125, "learning_rate": 9.86467052741289e-07, "loss": 0.0721, "step": 6630 }, { "epoch": 0.8193295132917396, "grad_norm": 0.30078125, "learning_rate": 9.86409421554768e-07, "loss": 0.0675, "step": 6640 }, { "epoch": 0.8205634432816368, "grad_norm": 0.2431640625, "learning_rate": 9.863516696059033e-07, "loss": 0.0491, "step": 6650 }, { "epoch": 0.821797373271534, "grad_norm": 0.2119140625, "learning_rate": 9.862937969090331e-07, "loss": 0.0723, "step": 6660 }, { "epoch": 0.8230313032614313, "grad_norm": 0.185546875, "learning_rate": 9.86235803478526e-07, "loss": 0.0963, "step": 6670 }, { "epoch": 0.8242652332513284, "grad_norm": 0.220703125, "learning_rate": 9.861776893287795e-07, "loss": 0.0768, "step": 6680 }, { "epoch": 0.8254991632412256, "grad_norm": 0.19140625, "learning_rate": 9.861194544742222e-07, "loss": 0.0778, "step": 6690 }, { "epoch": 0.8267330932311228, "grad_norm": 0.31640625, "learning_rate": 9.860610989293123e-07, "loss": 0.0544, "step": 6700 }, { "epoch": 0.82796702322102, "grad_norm": 0.2265625, "learning_rate": 9.860026227085377e-07, "loss": 0.0658, "step": 6710 }, { "epoch": 0.8292009532109172, "grad_norm": 0.1298828125, "learning_rate": 9.859440258264167e-07, "loss": 0.0497, "step": 6720 }, { "epoch": 0.8304348832008144, "grad_norm": 0.1611328125, "learning_rate": 9.858853082974973e-07, "loss": 0.0893, "step": 6730 }, { "epoch": 0.8316688131907116, "grad_norm": 0.17578125, "learning_rate": 9.85826470136357e-07, "loss": 0.0645, "step": 6740 }, { "epoch": 0.8329027431806088, "grad_norm": 0.1650390625, "learning_rate": 9.857675113576045e-07, "loss": 0.0747, "step": 6750 }, { "epoch": 0.834136673170506, "grad_norm": 0.3671875, "learning_rate": 9.85708431975877e-07, "loss": 0.0708, "step": 6760 }, { "epoch": 0.8353706031604032, "grad_norm": 0.193359375, "learning_rate": 9.856492320058428e-07, "loss": 0.0785, "step": 6770 }, { "epoch": 0.8366045331503004, "grad_norm": 0.1806640625, "learning_rate": 9.855899114621995e-07, "loss": 0.0691, "step": 6780 }, { "epoch": 0.8378384631401976, "grad_norm": 0.44921875, "learning_rate": 9.855304703596747e-07, "loss": 0.0698, "step": 6790 }, { "epoch": 0.8390723931300947, "grad_norm": 0.314453125, "learning_rate": 9.85470908713026e-07, "loss": 0.0636, "step": 6800 }, { "epoch": 0.840306323119992, "grad_norm": 0.30859375, "learning_rate": 9.85411226537041e-07, "loss": 0.0795, "step": 6810 }, { "epoch": 0.8415402531098892, "grad_norm": 0.1884765625, "learning_rate": 9.853514238465372e-07, "loss": 0.0654, "step": 6820 }, { "epoch": 0.8427741830997864, "grad_norm": 0.1396484375, "learning_rate": 9.852915006563622e-07, "loss": 0.0644, "step": 6830 }, { "epoch": 0.8440081130896836, "grad_norm": 0.234375, "learning_rate": 9.852314569813928e-07, "loss": 0.0753, "step": 6840 }, { "epoch": 0.8452420430795807, "grad_norm": 0.2294921875, "learning_rate": 9.851712928365369e-07, "loss": 0.0655, "step": 6850 }, { "epoch": 0.846475973069478, "grad_norm": 0.2021484375, "learning_rate": 9.851110082367311e-07, "loss": 0.0678, "step": 6860 }, { "epoch": 0.8477099030593752, "grad_norm": 0.2265625, "learning_rate": 9.850506031969425e-07, "loss": 0.0738, "step": 6870 }, { "epoch": 0.8489438330492723, "grad_norm": 0.16796875, "learning_rate": 9.849900777321683e-07, "loss": 0.0836, "step": 6880 }, { "epoch": 0.8501777630391696, "grad_norm": 0.22265625, "learning_rate": 9.84929431857435e-07, "loss": 0.0537, "step": 6890 }, { "epoch": 0.8514116930290667, "grad_norm": 0.21875, "learning_rate": 9.848686655877998e-07, "loss": 0.0622, "step": 6900 }, { "epoch": 0.852645623018964, "grad_norm": 0.2216796875, "learning_rate": 9.84807778938349e-07, "loss": 0.0759, "step": 6910 }, { "epoch": 0.8538795530088612, "grad_norm": 0.1806640625, "learning_rate": 9.847467719241991e-07, "loss": 0.063, "step": 6920 }, { "epoch": 0.8551134829987583, "grad_norm": 0.251953125, "learning_rate": 9.846856445604966e-07, "loss": 0.0642, "step": 6930 }, { "epoch": 0.8563474129886556, "grad_norm": 0.1640625, "learning_rate": 9.846243968624174e-07, "loss": 0.0687, "step": 6940 }, { "epoch": 0.8575813429785527, "grad_norm": 0.205078125, "learning_rate": 9.845630288451684e-07, "loss": 0.057, "step": 6950 }, { "epoch": 0.85881527296845, "grad_norm": 0.291015625, "learning_rate": 9.84501540523985e-07, "loss": 0.0763, "step": 6960 }, { "epoch": 0.8600492029583472, "grad_norm": 0.20703125, "learning_rate": 9.844399319141336e-07, "loss": 0.0582, "step": 6970 }, { "epoch": 0.8612831329482443, "grad_norm": 0.130859375, "learning_rate": 9.843782030309094e-07, "loss": 0.0587, "step": 6980 }, { "epoch": 0.8625170629381416, "grad_norm": 0.1357421875, "learning_rate": 9.843163538896385e-07, "loss": 0.0602, "step": 6990 }, { "epoch": 0.8637509929280387, "grad_norm": 0.2734375, "learning_rate": 9.842543845056759e-07, "loss": 0.0552, "step": 7000 }, { "epoch": 0.8637509929280387, "eval_exact_match": 0.6533742331288344, "eval_has_answer_correct": 0.5742793791574279, "eval_no_answer_correct": 0.8308457711442786, "step": 7000 }, { "epoch": 0.8649849229179359, "grad_norm": 0.177734375, "learning_rate": 9.841922948944073e-07, "loss": 0.0768, "step": 7010 }, { "epoch": 0.8662188529078332, "grad_norm": 0.375, "learning_rate": 9.841300850712478e-07, "loss": 0.0707, "step": 7020 }, { "epoch": 0.8674527828977303, "grad_norm": 0.19140625, "learning_rate": 9.840677550516424e-07, "loss": 0.0801, "step": 7030 }, { "epoch": 0.8686867128876276, "grad_norm": 0.337890625, "learning_rate": 9.840053048510659e-07, "loss": 0.0617, "step": 7040 }, { "epoch": 0.8699206428775247, "grad_norm": 0.283203125, "learning_rate": 9.839427344850232e-07, "loss": 0.0646, "step": 7050 }, { "epoch": 0.8711545728674219, "grad_norm": 0.2734375, "learning_rate": 9.838800439690484e-07, "loss": 0.0856, "step": 7060 }, { "epoch": 0.8723885028573192, "grad_norm": 0.3125, "learning_rate": 9.838172333187063e-07, "loss": 0.0801, "step": 7070 }, { "epoch": 0.8736224328472163, "grad_norm": 0.271484375, "learning_rate": 9.837543025495908e-07, "loss": 0.0824, "step": 7080 }, { "epoch": 0.8748563628371135, "grad_norm": 0.140625, "learning_rate": 9.836912516773262e-07, "loss": 0.0728, "step": 7090 }, { "epoch": 0.8760902928270107, "grad_norm": 0.271484375, "learning_rate": 9.836280807175663e-07, "loss": 0.0647, "step": 7100 }, { "epoch": 0.8773242228169079, "grad_norm": 0.205078125, "learning_rate": 9.835647896859945e-07, "loss": 0.0751, "step": 7110 }, { "epoch": 0.8785581528068052, "grad_norm": 0.205078125, "learning_rate": 9.83501378598324e-07, "loss": 0.064, "step": 7120 }, { "epoch": 0.8797920827967023, "grad_norm": 0.283203125, "learning_rate": 9.83437847470299e-07, "loss": 0.0532, "step": 7130 }, { "epoch": 0.8810260127865995, "grad_norm": 0.1611328125, "learning_rate": 9.833741963176917e-07, "loss": 0.0531, "step": 7140 }, { "epoch": 0.8822599427764967, "grad_norm": 0.2890625, "learning_rate": 9.833104251563055e-07, "loss": 0.0695, "step": 7150 }, { "epoch": 0.8834938727663939, "grad_norm": 0.1767578125, "learning_rate": 9.832465340019727e-07, "loss": 0.0607, "step": 7160 }, { "epoch": 0.8847278027562911, "grad_norm": 0.169921875, "learning_rate": 9.83182522870556e-07, "loss": 0.0725, "step": 7170 }, { "epoch": 0.8859617327461883, "grad_norm": 0.2451171875, "learning_rate": 9.831183917779475e-07, "loss": 0.0663, "step": 7180 }, { "epoch": 0.8871956627360855, "grad_norm": 0.193359375, "learning_rate": 9.830541407400692e-07, "loss": 0.0781, "step": 7190 }, { "epoch": 0.8884295927259828, "grad_norm": 0.3203125, "learning_rate": 9.829897697728729e-07, "loss": 0.0617, "step": 7200 }, { "epoch": 0.8896635227158799, "grad_norm": 0.3046875, "learning_rate": 9.829252788923403e-07, "loss": 0.0594, "step": 7210 }, { "epoch": 0.8908974527057771, "grad_norm": 0.50390625, "learning_rate": 9.828606681144826e-07, "loss": 0.0639, "step": 7220 }, { "epoch": 0.8921313826956743, "grad_norm": 0.1552734375, "learning_rate": 9.827959374553411e-07, "loss": 0.0723, "step": 7230 }, { "epoch": 0.8933653126855715, "grad_norm": 0.2060546875, "learning_rate": 9.827310869309864e-07, "loss": 0.0582, "step": 7240 }, { "epoch": 0.8945992426754688, "grad_norm": 0.1640625, "learning_rate": 9.826661165575195e-07, "loss": 0.0632, "step": 7250 }, { "epoch": 0.8958331726653659, "grad_norm": 0.1484375, "learning_rate": 9.826010263510704e-07, "loss": 0.0763, "step": 7260 }, { "epoch": 0.8970671026552631, "grad_norm": 0.2158203125, "learning_rate": 9.825358163277994e-07, "loss": 0.0687, "step": 7270 }, { "epoch": 0.8983010326451603, "grad_norm": 0.11083984375, "learning_rate": 9.824704865038967e-07, "loss": 0.0789, "step": 7280 }, { "epoch": 0.8995349626350575, "grad_norm": 0.19921875, "learning_rate": 9.824050368955814e-07, "loss": 0.0686, "step": 7290 }, { "epoch": 0.9007688926249547, "grad_norm": 0.1845703125, "learning_rate": 9.823394675191033e-07, "loss": 0.0777, "step": 7300 }, { "epoch": 0.9020028226148519, "grad_norm": 0.1328125, "learning_rate": 9.822737783907412e-07, "loss": 0.0603, "step": 7310 }, { "epoch": 0.9032367526047491, "grad_norm": 0.359375, "learning_rate": 9.822079695268042e-07, "loss": 0.0604, "step": 7320 }, { "epoch": 0.9044706825946462, "grad_norm": 0.2197265625, "learning_rate": 9.821420409436306e-07, "loss": 0.0589, "step": 7330 }, { "epoch": 0.9057046125845435, "grad_norm": 0.1298828125, "learning_rate": 9.82075992657589e-07, "loss": 0.0644, "step": 7340 }, { "epoch": 0.9069385425744407, "grad_norm": 0.240234375, "learning_rate": 9.82009824685077e-07, "loss": 0.0558, "step": 7350 }, { "epoch": 0.9081724725643379, "grad_norm": 0.291015625, "learning_rate": 9.819435370425229e-07, "loss": 0.0665, "step": 7360 }, { "epoch": 0.9094064025542351, "grad_norm": 0.2314453125, "learning_rate": 9.818771297463834e-07, "loss": 0.0686, "step": 7370 }, { "epoch": 0.9106403325441322, "grad_norm": 0.279296875, "learning_rate": 9.818106028131463e-07, "loss": 0.0709, "step": 7380 }, { "epoch": 0.9118742625340295, "grad_norm": 0.41796875, "learning_rate": 9.81743956259328e-07, "loss": 0.0634, "step": 7390 }, { "epoch": 0.9131081925239267, "grad_norm": 0.1494140625, "learning_rate": 9.816771901014755e-07, "loss": 0.0581, "step": 7400 }, { "epoch": 0.9143421225138239, "grad_norm": 0.220703125, "learning_rate": 9.816103043561648e-07, "loss": 0.0771, "step": 7410 }, { "epoch": 0.9155760525037211, "grad_norm": 0.166015625, "learning_rate": 9.815432990400016e-07, "loss": 0.0881, "step": 7420 }, { "epoch": 0.9168099824936182, "grad_norm": 0.4296875, "learning_rate": 9.814761741696218e-07, "loss": 0.0686, "step": 7430 }, { "epoch": 0.9180439124835155, "grad_norm": 0.2255859375, "learning_rate": 9.814089297616903e-07, "loss": 0.0651, "step": 7440 }, { "epoch": 0.9192778424734127, "grad_norm": 0.1455078125, "learning_rate": 9.813415658329027e-07, "loss": 0.0674, "step": 7450 }, { "epoch": 0.9205117724633098, "grad_norm": 0.421875, "learning_rate": 9.812740823999832e-07, "loss": 0.0715, "step": 7460 }, { "epoch": 0.9217457024532071, "grad_norm": 0.15625, "learning_rate": 9.812064794796863e-07, "loss": 0.0632, "step": 7470 }, { "epoch": 0.9229796324431042, "grad_norm": 0.318359375, "learning_rate": 9.811387570887958e-07, "loss": 0.0808, "step": 7480 }, { "epoch": 0.9242135624330015, "grad_norm": 0.298828125, "learning_rate": 9.810709152441256e-07, "loss": 0.0946, "step": 7490 }, { "epoch": 0.9254474924228987, "grad_norm": 0.2177734375, "learning_rate": 9.810029539625187e-07, "loss": 0.0674, "step": 7500 }, { "epoch": 0.9266814224127958, "grad_norm": 0.146484375, "learning_rate": 9.809348732608484e-07, "loss": 0.0739, "step": 7510 }, { "epoch": 0.9279153524026931, "grad_norm": 0.345703125, "learning_rate": 9.80866673156017e-07, "loss": 0.0641, "step": 7520 }, { "epoch": 0.9291492823925902, "grad_norm": 0.150390625, "learning_rate": 9.80798353664957e-07, "loss": 0.0764, "step": 7530 }, { "epoch": 0.9303832123824874, "grad_norm": 0.1298828125, "learning_rate": 9.8072991480463e-07, "loss": 0.058, "step": 7540 }, { "epoch": 0.9316171423723847, "grad_norm": 0.1826171875, "learning_rate": 9.806613565920277e-07, "loss": 0.0652, "step": 7550 }, { "epoch": 0.9328510723622818, "grad_norm": 0.291015625, "learning_rate": 9.805926790441713e-07, "loss": 0.078, "step": 7560 }, { "epoch": 0.9340850023521791, "grad_norm": 0.267578125, "learning_rate": 9.805238821781115e-07, "loss": 0.0662, "step": 7570 }, { "epoch": 0.9353189323420762, "grad_norm": 0.310546875, "learning_rate": 9.804549660109285e-07, "loss": 0.0697, "step": 7580 }, { "epoch": 0.9365528623319734, "grad_norm": 0.271484375, "learning_rate": 9.803859305597329e-07, "loss": 0.0702, "step": 7590 }, { "epoch": 0.9377867923218707, "grad_norm": 0.1884765625, "learning_rate": 9.803167758416637e-07, "loss": 0.06, "step": 7600 }, { "epoch": 0.9390207223117678, "grad_norm": 0.2177734375, "learning_rate": 9.802475018738907e-07, "loss": 0.0769, "step": 7610 }, { "epoch": 0.940254652301665, "grad_norm": 0.31640625, "learning_rate": 9.801781086736122e-07, "loss": 0.0744, "step": 7620 }, { "epoch": 0.9414885822915622, "grad_norm": 0.2275390625, "learning_rate": 9.801085962580573e-07, "loss": 0.067, "step": 7630 }, { "epoch": 0.9427225122814594, "grad_norm": 0.25390625, "learning_rate": 9.800389646444835e-07, "loss": 0.0713, "step": 7640 }, { "epoch": 0.9439564422713567, "grad_norm": 0.216796875, "learning_rate": 9.799692138501788e-07, "loss": 0.0761, "step": 7650 }, { "epoch": 0.9451903722612538, "grad_norm": 0.24609375, "learning_rate": 9.798993438924602e-07, "loss": 0.057, "step": 7660 }, { "epoch": 0.946424302251151, "grad_norm": 0.369140625, "learning_rate": 9.798293547886746e-07, "loss": 0.0678, "step": 7670 }, { "epoch": 0.9476582322410482, "grad_norm": 0.203125, "learning_rate": 9.797592465561986e-07, "loss": 0.0679, "step": 7680 }, { "epoch": 0.9488921622309454, "grad_norm": 0.21484375, "learning_rate": 9.79689019212438e-07, "loss": 0.0782, "step": 7690 }, { "epoch": 0.9501260922208427, "grad_norm": 0.2578125, "learning_rate": 9.796186727748283e-07, "loss": 0.0419, "step": 7700 }, { "epoch": 0.9513600222107398, "grad_norm": 0.251953125, "learning_rate": 9.795482072608349e-07, "loss": 0.0617, "step": 7710 }, { "epoch": 0.952593952200637, "grad_norm": 0.181640625, "learning_rate": 9.79477622687952e-07, "loss": 0.0638, "step": 7720 }, { "epoch": 0.9538278821905342, "grad_norm": 0.2470703125, "learning_rate": 9.794069190737043e-07, "loss": 0.0641, "step": 7730 }, { "epoch": 0.9550618121804314, "grad_norm": 0.2138671875, "learning_rate": 9.793360964356453e-07, "loss": 0.0778, "step": 7740 }, { "epoch": 0.9562957421703286, "grad_norm": 0.142578125, "learning_rate": 9.792651547913586e-07, "loss": 0.0613, "step": 7750 }, { "epoch": 0.9575296721602258, "grad_norm": 0.2080078125, "learning_rate": 9.79194094158457e-07, "loss": 0.0748, "step": 7760 }, { "epoch": 0.958763602150123, "grad_norm": 0.1484375, "learning_rate": 9.791229145545832e-07, "loss": 0.0742, "step": 7770 }, { "epoch": 0.9599975321400203, "grad_norm": 0.2216796875, "learning_rate": 9.790516159974084e-07, "loss": 0.0578, "step": 7780 }, { "epoch": 0.9612314621299174, "grad_norm": 0.119140625, "learning_rate": 9.78980198504635e-07, "loss": 0.0662, "step": 7790 }, { "epoch": 0.9624653921198146, "grad_norm": 0.232421875, "learning_rate": 9.789086620939935e-07, "loss": 0.0652, "step": 7800 }, { "epoch": 0.9636993221097118, "grad_norm": 0.2021484375, "learning_rate": 9.788370067832446e-07, "loss": 0.0677, "step": 7810 }, { "epoch": 0.964933252099609, "grad_norm": 0.13671875, "learning_rate": 9.787652325901786e-07, "loss": 0.0678, "step": 7820 }, { "epoch": 0.9661671820895062, "grad_norm": 0.11669921875, "learning_rate": 9.786933395326147e-07, "loss": 0.0779, "step": 7830 }, { "epoch": 0.9674011120794034, "grad_norm": 0.296875, "learning_rate": 9.78621327628402e-07, "loss": 0.0764, "step": 7840 }, { "epoch": 0.9686350420693006, "grad_norm": 0.1982421875, "learning_rate": 9.785491968954197e-07, "loss": 0.0574, "step": 7850 }, { "epoch": 0.9698689720591978, "grad_norm": 0.193359375, "learning_rate": 9.784769473515754e-07, "loss": 0.0547, "step": 7860 }, { "epoch": 0.971102902049095, "grad_norm": 0.2021484375, "learning_rate": 9.784045790148068e-07, "loss": 0.0645, "step": 7870 }, { "epoch": 0.9723368320389922, "grad_norm": 0.265625, "learning_rate": 9.783320919030812e-07, "loss": 0.0641, "step": 7880 }, { "epoch": 0.9735707620288894, "grad_norm": 0.35546875, "learning_rate": 9.782594860343948e-07, "loss": 0.0612, "step": 7890 }, { "epoch": 0.9748046920187866, "grad_norm": 0.2890625, "learning_rate": 9.781867614267741e-07, "loss": 0.0603, "step": 7900 }, { "epoch": 0.9760386220086837, "grad_norm": 0.12060546875, "learning_rate": 9.781139180982745e-07, "loss": 0.0558, "step": 7910 }, { "epoch": 0.977272551998581, "grad_norm": 0.2890625, "learning_rate": 9.78040956066981e-07, "loss": 0.0774, "step": 7920 }, { "epoch": 0.9785064819884782, "grad_norm": 0.291015625, "learning_rate": 9.77967875351008e-07, "loss": 0.0788, "step": 7930 }, { "epoch": 0.9797404119783754, "grad_norm": 0.2119140625, "learning_rate": 9.778946759684996e-07, "loss": 0.066, "step": 7940 }, { "epoch": 0.9809743419682726, "grad_norm": 0.16015625, "learning_rate": 9.778213579376293e-07, "loss": 0.062, "step": 7950 }, { "epoch": 0.9822082719581697, "grad_norm": 0.2236328125, "learning_rate": 9.777479212766e-07, "loss": 0.0703, "step": 7960 }, { "epoch": 0.983442201948067, "grad_norm": 0.1455078125, "learning_rate": 9.776743660036439e-07, "loss": 0.0646, "step": 7970 }, { "epoch": 0.9846761319379642, "grad_norm": 0.220703125, "learning_rate": 9.776006921370228e-07, "loss": 0.0695, "step": 7980 }, { "epoch": 0.9859100619278613, "grad_norm": 0.2109375, "learning_rate": 9.77526899695028e-07, "loss": 0.0741, "step": 7990 }, { "epoch": 0.9871439919177586, "grad_norm": 0.337890625, "learning_rate": 9.774529886959804e-07, "loss": 0.0584, "step": 8000 }, { "epoch": 0.9871439919177586, "eval_exact_match": 0.6549079754601227, "eval_has_answer_correct": 0.5698447893569845, "eval_no_answer_correct": 0.845771144278607, "step": 8000 }, { "epoch": 0.9883779219076557, "grad_norm": 0.2119140625, "learning_rate": 9.773789591582295e-07, "loss": 0.0592, "step": 8010 }, { "epoch": 0.989611851897553, "grad_norm": 0.21484375, "learning_rate": 9.773048111001554e-07, "loss": 0.0616, "step": 8020 }, { "epoch": 0.9908457818874502, "grad_norm": 0.34375, "learning_rate": 9.772305445401669e-07, "loss": 0.0685, "step": 8030 }, { "epoch": 0.9920797118773473, "grad_norm": 0.2001953125, "learning_rate": 9.771561594967023e-07, "loss": 0.0656, "step": 8040 }, { "epoch": 0.9933136418672446, "grad_norm": 0.240234375, "learning_rate": 9.770816559882295e-07, "loss": 0.0594, "step": 8050 }, { "epoch": 0.9945475718571417, "grad_norm": 0.28125, "learning_rate": 9.770070340332456e-07, "loss": 0.0771, "step": 8060 }, { "epoch": 0.995781501847039, "grad_norm": 0.2080078125, "learning_rate": 9.769322936502772e-07, "loss": 0.0757, "step": 8070 }, { "epoch": 0.9970154318369362, "grad_norm": 0.11865234375, "learning_rate": 9.768574348578806e-07, "loss": 0.0721, "step": 8080 }, { "epoch": 0.9982493618268333, "grad_norm": 0.2451171875, "learning_rate": 9.76782457674641e-07, "loss": 0.0714, "step": 8090 }, { "epoch": 0.9994832918167306, "grad_norm": 0.185546875, "learning_rate": 9.767073621191732e-07, "loss": 0.0589, "step": 8100 }, { "epoch": 1.0007172218066278, "grad_norm": 0.330078125, "learning_rate": 9.766321482101214e-07, "loss": 0.0864, "step": 8110 }, { "epoch": 1.001951151796525, "grad_norm": 0.1904296875, "learning_rate": 9.765568159661593e-07, "loss": 0.0751, "step": 8120 }, { "epoch": 1.003185081786422, "grad_norm": 0.38671875, "learning_rate": 9.764813654059898e-07, "loss": 0.0855, "step": 8130 }, { "epoch": 1.0044190117763194, "grad_norm": 0.287109375, "learning_rate": 9.764057965483451e-07, "loss": 0.0695, "step": 8140 }, { "epoch": 1.0056529417662166, "grad_norm": 0.26171875, "learning_rate": 9.763301094119872e-07, "loss": 0.0622, "step": 8150 }, { "epoch": 1.0068868717561137, "grad_norm": 0.13671875, "learning_rate": 9.762543040157069e-07, "loss": 0.0497, "step": 8160 }, { "epoch": 1.008120801746011, "grad_norm": 0.1640625, "learning_rate": 9.76178380378325e-07, "loss": 0.061, "step": 8170 }, { "epoch": 1.0093547317359082, "grad_norm": 0.2421875, "learning_rate": 9.761023385186909e-07, "loss": 0.0797, "step": 8180 }, { "epoch": 1.0105886617258053, "grad_norm": 0.2890625, "learning_rate": 9.760261784556838e-07, "loss": 0.0639, "step": 8190 }, { "epoch": 1.0118225917157024, "grad_norm": 0.23046875, "learning_rate": 9.759499002082124e-07, "loss": 0.0775, "step": 8200 }, { "epoch": 1.0130565217055998, "grad_norm": 0.19140625, "learning_rate": 9.758735037952142e-07, "loss": 0.0584, "step": 8210 }, { "epoch": 1.014290451695497, "grad_norm": 0.296875, "learning_rate": 9.757969892356569e-07, "loss": 0.0653, "step": 8220 }, { "epoch": 1.015524381685394, "grad_norm": 0.2158203125, "learning_rate": 9.757203565485364e-07, "loss": 0.0736, "step": 8230 }, { "epoch": 1.0167583116752914, "grad_norm": 0.171875, "learning_rate": 9.756436057528788e-07, "loss": 0.07, "step": 8240 }, { "epoch": 1.0179922416651885, "grad_norm": 0.18359375, "learning_rate": 9.755667368677394e-07, "loss": 0.0674, "step": 8250 }, { "epoch": 1.0192261716550857, "grad_norm": 0.228515625, "learning_rate": 9.754897499122026e-07, "loss": 0.0722, "step": 8260 }, { "epoch": 1.020460101644983, "grad_norm": 0.310546875, "learning_rate": 9.75412644905382e-07, "loss": 0.0657, "step": 8270 }, { "epoch": 1.0216940316348802, "grad_norm": 0.208984375, "learning_rate": 9.75335421866421e-07, "loss": 0.0541, "step": 8280 }, { "epoch": 1.0229279616247773, "grad_norm": 0.095703125, "learning_rate": 9.752580808144914e-07, "loss": 0.0656, "step": 8290 }, { "epoch": 1.0241618916146744, "grad_norm": 0.3359375, "learning_rate": 9.751806217687956e-07, "loss": 0.0706, "step": 8300 }, { "epoch": 1.0253958216045718, "grad_norm": 0.2294921875, "learning_rate": 9.751030447485643e-07, "loss": 0.0637, "step": 8310 }, { "epoch": 1.026629751594469, "grad_norm": 0.51171875, "learning_rate": 9.750253497730579e-07, "loss": 0.08, "step": 8320 }, { "epoch": 1.027863681584366, "grad_norm": 0.318359375, "learning_rate": 9.749475368615657e-07, "loss": 0.0765, "step": 8330 }, { "epoch": 1.0290976115742634, "grad_norm": 0.2451171875, "learning_rate": 9.748696060334068e-07, "loss": 0.0801, "step": 8340 }, { "epoch": 1.0303315415641605, "grad_norm": 0.22265625, "learning_rate": 9.74791557307929e-07, "loss": 0.0603, "step": 8350 }, { "epoch": 1.0315654715540576, "grad_norm": 0.232421875, "learning_rate": 9.747133907045102e-07, "loss": 0.0695, "step": 8360 }, { "epoch": 1.032799401543955, "grad_norm": 0.326171875, "learning_rate": 9.746351062425566e-07, "loss": 0.0592, "step": 8370 }, { "epoch": 1.0340333315338521, "grad_norm": 0.18359375, "learning_rate": 9.745567039415046e-07, "loss": 0.0854, "step": 8380 }, { "epoch": 1.0352672615237493, "grad_norm": 0.2451171875, "learning_rate": 9.744781838208187e-07, "loss": 0.0678, "step": 8390 }, { "epoch": 1.0365011915136464, "grad_norm": 0.15234375, "learning_rate": 9.743995458999938e-07, "loss": 0.0545, "step": 8400 }, { "epoch": 1.0377351215035437, "grad_norm": 0.1591796875, "learning_rate": 9.743207901985536e-07, "loss": 0.0662, "step": 8410 }, { "epoch": 1.0389690514934409, "grad_norm": 0.29296875, "learning_rate": 9.742419167360508e-07, "loss": 0.068, "step": 8420 }, { "epoch": 1.040202981483338, "grad_norm": 0.1767578125, "learning_rate": 9.741629255320678e-07, "loss": 0.0762, "step": 8430 }, { "epoch": 1.0414369114732354, "grad_norm": 0.2236328125, "learning_rate": 9.740838166062156e-07, "loss": 0.0675, "step": 8440 }, { "epoch": 1.0426708414631325, "grad_norm": 0.103515625, "learning_rate": 9.740045899781352e-07, "loss": 0.0571, "step": 8450 }, { "epoch": 1.0439047714530296, "grad_norm": 0.33984375, "learning_rate": 9.739252456674962e-07, "loss": 0.0631, "step": 8460 }, { "epoch": 1.045138701442927, "grad_norm": 0.185546875, "learning_rate": 9.738457836939979e-07, "loss": 0.072, "step": 8470 }, { "epoch": 1.046372631432824, "grad_norm": 0.2138671875, "learning_rate": 9.737662040773681e-07, "loss": 0.0615, "step": 8480 }, { "epoch": 1.0476065614227212, "grad_norm": 0.2392578125, "learning_rate": 9.73686506837365e-07, "loss": 0.0694, "step": 8490 }, { "epoch": 1.0488404914126184, "grad_norm": 0.140625, "learning_rate": 9.736066919937747e-07, "loss": 0.076, "step": 8500 }, { "epoch": 1.0500744214025157, "grad_norm": 0.24609375, "learning_rate": 9.735267595664131e-07, "loss": 0.0637, "step": 8510 }, { "epoch": 1.0513083513924129, "grad_norm": 0.2177734375, "learning_rate": 9.734467095751254e-07, "loss": 0.0758, "step": 8520 }, { "epoch": 1.05254228138231, "grad_norm": 0.166015625, "learning_rate": 9.73366542039786e-07, "loss": 0.0733, "step": 8530 }, { "epoch": 1.0537762113722073, "grad_norm": 0.25390625, "learning_rate": 9.73286256980298e-07, "loss": 0.0878, "step": 8540 }, { "epoch": 1.0550101413621045, "grad_norm": 0.341796875, "learning_rate": 9.732058544165944e-07, "loss": 0.0593, "step": 8550 }, { "epoch": 1.0562440713520016, "grad_norm": 0.1533203125, "learning_rate": 9.731253343686368e-07, "loss": 0.0511, "step": 8560 }, { "epoch": 1.057478001341899, "grad_norm": 0.283203125, "learning_rate": 9.73044696856416e-07, "loss": 0.0699, "step": 8570 }, { "epoch": 1.058711931331796, "grad_norm": 0.33203125, "learning_rate": 9.729639418999522e-07, "loss": 0.0712, "step": 8580 }, { "epoch": 1.0599458613216932, "grad_norm": 0.205078125, "learning_rate": 9.728830695192949e-07, "loss": 0.0764, "step": 8590 }, { "epoch": 1.0611797913115906, "grad_norm": 0.21484375, "learning_rate": 9.728020797345222e-07, "loss": 0.0512, "step": 8600 }, { "epoch": 1.0624137213014877, "grad_norm": 0.2197265625, "learning_rate": 9.72720972565742e-07, "loss": 0.0587, "step": 8610 }, { "epoch": 1.0636476512913848, "grad_norm": 0.14453125, "learning_rate": 9.72639748033091e-07, "loss": 0.0632, "step": 8620 }, { "epoch": 1.064881581281282, "grad_norm": 0.3203125, "learning_rate": 9.725584061567349e-07, "loss": 0.0621, "step": 8630 }, { "epoch": 1.0661155112711793, "grad_norm": 0.142578125, "learning_rate": 9.724769469568686e-07, "loss": 0.0694, "step": 8640 }, { "epoch": 1.0673494412610764, "grad_norm": 0.240234375, "learning_rate": 9.723953704537163e-07, "loss": 0.064, "step": 8650 }, { "epoch": 1.0685833712509736, "grad_norm": 0.2109375, "learning_rate": 9.723136766675315e-07, "loss": 0.0617, "step": 8660 }, { "epoch": 1.069817301240871, "grad_norm": 0.291015625, "learning_rate": 9.72231865618596e-07, "loss": 0.0558, "step": 8670 }, { "epoch": 1.071051231230768, "grad_norm": 0.20703125, "learning_rate": 9.721499373272221e-07, "loss": 0.0931, "step": 8680 }, { "epoch": 1.0722851612206652, "grad_norm": 0.1572265625, "learning_rate": 9.7206789181375e-07, "loss": 0.0722, "step": 8690 }, { "epoch": 1.0735190912105623, "grad_norm": 0.154296875, "learning_rate": 9.719857290985489e-07, "loss": 0.0577, "step": 8700 }, { "epoch": 1.0747530212004597, "grad_norm": 0.25, "learning_rate": 9.71903449202018e-07, "loss": 0.0506, "step": 8710 }, { "epoch": 1.0759869511903568, "grad_norm": 0.1884765625, "learning_rate": 9.718210521445854e-07, "loss": 0.0687, "step": 8720 }, { "epoch": 1.077220881180254, "grad_norm": 0.2197265625, "learning_rate": 9.71738537946708e-07, "loss": 0.0736, "step": 8730 }, { "epoch": 1.0784548111701513, "grad_norm": 0.369140625, "learning_rate": 9.716559066288714e-07, "loss": 0.0538, "step": 8740 }, { "epoch": 1.0796887411600484, "grad_norm": 0.37109375, "learning_rate": 9.715731582115913e-07, "loss": 0.0657, "step": 8750 }, { "epoch": 1.0809226711499456, "grad_norm": 0.185546875, "learning_rate": 9.714902927154113e-07, "loss": 0.064, "step": 8760 }, { "epoch": 1.082156601139843, "grad_norm": 0.34375, "learning_rate": 9.714073101609053e-07, "loss": 0.0779, "step": 8770 }, { "epoch": 1.08339053112974, "grad_norm": 0.275390625, "learning_rate": 9.713242105686753e-07, "loss": 0.0647, "step": 8780 }, { "epoch": 1.0846244611196372, "grad_norm": 0.197265625, "learning_rate": 9.712409939593528e-07, "loss": 0.0621, "step": 8790 }, { "epoch": 1.0858583911095345, "grad_norm": 0.1865234375, "learning_rate": 9.711576603535978e-07, "loss": 0.0677, "step": 8800 }, { "epoch": 1.0870923210994317, "grad_norm": 0.16796875, "learning_rate": 9.710742097721004e-07, "loss": 0.0725, "step": 8810 }, { "epoch": 1.0883262510893288, "grad_norm": 0.314453125, "learning_rate": 9.70990642235579e-07, "loss": 0.0642, "step": 8820 }, { "epoch": 1.089560181079226, "grad_norm": 0.208984375, "learning_rate": 9.709069577647808e-07, "loss": 0.072, "step": 8830 }, { "epoch": 1.0907941110691233, "grad_norm": 0.20703125, "learning_rate": 9.708231563804828e-07, "loss": 0.0699, "step": 8840 }, { "epoch": 1.0920280410590204, "grad_norm": 0.1669921875, "learning_rate": 9.707392381034902e-07, "loss": 0.0693, "step": 8850 }, { "epoch": 1.0932619710489175, "grad_norm": 0.1572265625, "learning_rate": 9.70655202954638e-07, "loss": 0.0644, "step": 8860 }, { "epoch": 1.094495901038815, "grad_norm": 0.20703125, "learning_rate": 9.7057105095479e-07, "loss": 0.0716, "step": 8870 }, { "epoch": 1.095729831028712, "grad_norm": 0.1845703125, "learning_rate": 9.704867821248387e-07, "loss": 0.0605, "step": 8880 }, { "epoch": 1.0969637610186092, "grad_norm": 0.26171875, "learning_rate": 9.704023964857053e-07, "loss": 0.0713, "step": 8890 }, { "epoch": 1.0981976910085065, "grad_norm": 0.162109375, "learning_rate": 9.703178940583413e-07, "loss": 0.0651, "step": 8900 }, { "epoch": 1.0994316209984036, "grad_norm": 0.171875, "learning_rate": 9.702332748637257e-07, "loss": 0.0719, "step": 8910 }, { "epoch": 1.1006655509883008, "grad_norm": 0.19921875, "learning_rate": 9.701485389228677e-07, "loss": 0.0664, "step": 8920 }, { "epoch": 1.101899480978198, "grad_norm": 0.2216796875, "learning_rate": 9.700636862568045e-07, "loss": 0.076, "step": 8930 }, { "epoch": 1.1031334109680953, "grad_norm": 0.171875, "learning_rate": 9.699787168866028e-07, "loss": 0.0688, "step": 8940 }, { "epoch": 1.1043673409579924, "grad_norm": 0.306640625, "learning_rate": 9.698936308333587e-07, "loss": 0.0787, "step": 8950 }, { "epoch": 1.1056012709478895, "grad_norm": 0.205078125, "learning_rate": 9.698084281181961e-07, "loss": 0.0621, "step": 8960 }, { "epoch": 1.1068352009377869, "grad_norm": 0.2333984375, "learning_rate": 9.697231087622689e-07, "loss": 0.0556, "step": 8970 }, { "epoch": 1.108069130927684, "grad_norm": 0.67578125, "learning_rate": 9.696376727867596e-07, "loss": 0.0703, "step": 8980 }, { "epoch": 1.1093030609175811, "grad_norm": 0.10009765625, "learning_rate": 9.695521202128795e-07, "loss": 0.0656, "step": 8990 }, { "epoch": 1.1105369909074785, "grad_norm": 0.220703125, "learning_rate": 9.694664510618691e-07, "loss": 0.0564, "step": 9000 }, { "epoch": 1.1105369909074785, "eval_exact_match": 0.6549079754601227, "eval_has_answer_correct": 0.5787139689578714, "eval_no_answer_correct": 0.8258706467661692, "step": 9000 }, { "epoch": 1.1117709208973756, "grad_norm": 0.291015625, "learning_rate": 9.693806653549977e-07, "loss": 0.0562, "step": 9010 }, { "epoch": 1.1130048508872727, "grad_norm": 0.25390625, "learning_rate": 9.692947631135635e-07, "loss": 0.0942, "step": 9020 }, { "epoch": 1.1142387808771699, "grad_norm": 0.20703125, "learning_rate": 9.69208744358894e-07, "loss": 0.0639, "step": 9030 }, { "epoch": 1.1154727108670672, "grad_norm": 0.228515625, "learning_rate": 9.69122609112345e-07, "loss": 0.0574, "step": 9040 }, { "epoch": 1.1167066408569644, "grad_norm": 0.1943359375, "learning_rate": 9.690363573953018e-07, "loss": 0.0623, "step": 9050 }, { "epoch": 1.1179405708468615, "grad_norm": 0.1552734375, "learning_rate": 9.689499892291782e-07, "loss": 0.0684, "step": 9060 }, { "epoch": 1.1191745008367588, "grad_norm": 0.1884765625, "learning_rate": 9.688635046354171e-07, "loss": 0.0625, "step": 9070 }, { "epoch": 1.120408430826656, "grad_norm": 0.1123046875, "learning_rate": 9.687769036354904e-07, "loss": 0.0613, "step": 9080 }, { "epoch": 1.121642360816553, "grad_norm": 0.28125, "learning_rate": 9.686901862508987e-07, "loss": 0.0822, "step": 9090 }, { "epoch": 1.1228762908064505, "grad_norm": 0.16015625, "learning_rate": 9.686033525031719e-07, "loss": 0.0616, "step": 9100 }, { "epoch": 1.1241102207963476, "grad_norm": 0.2294921875, "learning_rate": 9.685164024138678e-07, "loss": 0.0546, "step": 9110 }, { "epoch": 1.1253441507862447, "grad_norm": 0.23828125, "learning_rate": 9.684293360045746e-07, "loss": 0.0709, "step": 9120 }, { "epoch": 1.126578080776142, "grad_norm": 0.1767578125, "learning_rate": 9.683421532969077e-07, "loss": 0.0644, "step": 9130 }, { "epoch": 1.1278120107660392, "grad_norm": 0.236328125, "learning_rate": 9.682548543125128e-07, "loss": 0.0691, "step": 9140 }, { "epoch": 1.1290459407559363, "grad_norm": 0.298828125, "learning_rate": 9.681674390730638e-07, "loss": 0.0866, "step": 9150 }, { "epoch": 1.1302798707458335, "grad_norm": 0.40234375, "learning_rate": 9.680799076002634e-07, "loss": 0.0676, "step": 9160 }, { "epoch": 1.1315138007357308, "grad_norm": 0.26171875, "learning_rate": 9.679922599158434e-07, "loss": 0.0682, "step": 9170 }, { "epoch": 1.132747730725628, "grad_norm": 0.1533203125, "learning_rate": 9.679044960415644e-07, "loss": 0.0763, "step": 9180 }, { "epoch": 1.133981660715525, "grad_norm": 0.30078125, "learning_rate": 9.678166159992156e-07, "loss": 0.0869, "step": 9190 }, { "epoch": 1.1352155907054224, "grad_norm": 0.1923828125, "learning_rate": 9.677286198106154e-07, "loss": 0.0683, "step": 9200 }, { "epoch": 1.1364495206953196, "grad_norm": 0.28125, "learning_rate": 9.676405074976108e-07, "loss": 0.0809, "step": 9210 }, { "epoch": 1.1376834506852167, "grad_norm": 0.208984375, "learning_rate": 9.67552279082078e-07, "loss": 0.0806, "step": 9220 }, { "epoch": 1.1389173806751138, "grad_norm": 0.1513671875, "learning_rate": 9.674639345859212e-07, "loss": 0.0809, "step": 9230 }, { "epoch": 1.1401513106650112, "grad_norm": 0.17578125, "learning_rate": 9.673754740310745e-07, "loss": 0.0544, "step": 9240 }, { "epoch": 1.1413852406549083, "grad_norm": 0.119140625, "learning_rate": 9.672868974394998e-07, "loss": 0.0514, "step": 9250 }, { "epoch": 1.1426191706448054, "grad_norm": 0.361328125, "learning_rate": 9.671982048331885e-07, "loss": 0.0662, "step": 9260 }, { "epoch": 1.1438531006347028, "grad_norm": 0.2314453125, "learning_rate": 9.671093962341608e-07, "loss": 0.0929, "step": 9270 }, { "epoch": 1.1450870306246, "grad_norm": 0.208984375, "learning_rate": 9.670204716644652e-07, "loss": 0.0833, "step": 9280 }, { "epoch": 1.146320960614497, "grad_norm": 0.208984375, "learning_rate": 9.669314311461795e-07, "loss": 0.0811, "step": 9290 }, { "epoch": 1.1475548906043944, "grad_norm": 0.388671875, "learning_rate": 9.668422747014096e-07, "loss": 0.0769, "step": 9300 }, { "epoch": 1.1487888205942915, "grad_norm": 0.265625, "learning_rate": 9.667530023522911e-07, "loss": 0.0524, "step": 9310 }, { "epoch": 1.1500227505841887, "grad_norm": 0.3515625, "learning_rate": 9.666636141209878e-07, "loss": 0.0707, "step": 9320 }, { "epoch": 1.151256680574086, "grad_norm": 0.158203125, "learning_rate": 9.665741100296922e-07, "loss": 0.0756, "step": 9330 }, { "epoch": 1.1524906105639832, "grad_norm": 0.2119140625, "learning_rate": 9.66484490100626e-07, "loss": 0.0768, "step": 9340 }, { "epoch": 1.1537245405538803, "grad_norm": 0.12890625, "learning_rate": 9.663947543560393e-07, "loss": 0.0585, "step": 9350 }, { "epoch": 1.1549584705437774, "grad_norm": 0.232421875, "learning_rate": 9.663049028182111e-07, "loss": 0.0673, "step": 9360 }, { "epoch": 1.1561924005336748, "grad_norm": 0.1845703125, "learning_rate": 9.66214935509449e-07, "loss": 0.0776, "step": 9370 }, { "epoch": 1.157426330523572, "grad_norm": 0.22265625, "learning_rate": 9.661248524520897e-07, "loss": 0.0796, "step": 9380 }, { "epoch": 1.158660260513469, "grad_norm": 0.31640625, "learning_rate": 9.660346536684983e-07, "loss": 0.0643, "step": 9390 }, { "epoch": 1.1598941905033664, "grad_norm": 0.2890625, "learning_rate": 9.659443391810685e-07, "loss": 0.0639, "step": 9400 }, { "epoch": 1.1611281204932635, "grad_norm": 0.220703125, "learning_rate": 9.65853909012223e-07, "loss": 0.0606, "step": 9410 }, { "epoch": 1.1623620504831607, "grad_norm": 0.2431640625, "learning_rate": 9.657633631844136e-07, "loss": 0.0734, "step": 9420 }, { "epoch": 1.1635959804730578, "grad_norm": 0.32421875, "learning_rate": 9.656727017201198e-07, "loss": 0.0524, "step": 9430 }, { "epoch": 1.1648299104629551, "grad_norm": 0.2392578125, "learning_rate": 9.655819246418508e-07, "loss": 0.0682, "step": 9440 }, { "epoch": 1.1660638404528523, "grad_norm": 0.177734375, "learning_rate": 9.65491031972144e-07, "loss": 0.0766, "step": 9450 }, { "epoch": 1.1672977704427496, "grad_norm": 0.1591796875, "learning_rate": 9.654000237335656e-07, "loss": 0.0646, "step": 9460 }, { "epoch": 1.1685317004326468, "grad_norm": 0.2890625, "learning_rate": 9.653088999487104e-07, "loss": 0.0739, "step": 9470 }, { "epoch": 1.169765630422544, "grad_norm": 0.26171875, "learning_rate": 9.652176606402023e-07, "loss": 0.0738, "step": 9480 }, { "epoch": 1.170999560412441, "grad_norm": 0.2392578125, "learning_rate": 9.651263058306932e-07, "loss": 0.0784, "step": 9490 }, { "epoch": 1.1722334904023384, "grad_norm": 0.203125, "learning_rate": 9.65034835542864e-07, "loss": 0.0584, "step": 9500 }, { "epoch": 1.1734674203922355, "grad_norm": 0.2080078125, "learning_rate": 9.649432497994243e-07, "loss": 0.0547, "step": 9510 }, { "epoch": 1.1747013503821326, "grad_norm": 0.271484375, "learning_rate": 9.648515486231129e-07, "loss": 0.0814, "step": 9520 }, { "epoch": 1.17593528037203, "grad_norm": 0.171875, "learning_rate": 9.647597320366961e-07, "loss": 0.0684, "step": 9530 }, { "epoch": 1.1771692103619271, "grad_norm": 0.16015625, "learning_rate": 9.646678000629698e-07, "loss": 0.0743, "step": 9540 }, { "epoch": 1.1784031403518243, "grad_norm": 0.287109375, "learning_rate": 9.645757527247582e-07, "loss": 0.0706, "step": 9550 }, { "epoch": 1.1796370703417214, "grad_norm": 0.306640625, "learning_rate": 9.644835900449142e-07, "loss": 0.0615, "step": 9560 }, { "epoch": 1.1808710003316187, "grad_norm": 0.2158203125, "learning_rate": 9.64391312046319e-07, "loss": 0.0619, "step": 9570 }, { "epoch": 1.1821049303215159, "grad_norm": 0.2890625, "learning_rate": 9.642989187518831e-07, "loss": 0.0755, "step": 9580 }, { "epoch": 1.183338860311413, "grad_norm": 0.1845703125, "learning_rate": 9.642064101845454e-07, "loss": 0.0559, "step": 9590 }, { "epoch": 1.1845727903013104, "grad_norm": 0.220703125, "learning_rate": 9.641137863672727e-07, "loss": 0.0704, "step": 9600 }, { "epoch": 1.1858067202912075, "grad_norm": 0.28515625, "learning_rate": 9.640210473230613e-07, "loss": 0.0521, "step": 9610 }, { "epoch": 1.1870406502811046, "grad_norm": 0.2216796875, "learning_rate": 9.63928193074936e-07, "loss": 0.0821, "step": 9620 }, { "epoch": 1.188274580271002, "grad_norm": 0.130859375, "learning_rate": 9.638352236459498e-07, "loss": 0.0781, "step": 9630 }, { "epoch": 1.189508510260899, "grad_norm": 0.1357421875, "learning_rate": 9.637421390591846e-07, "loss": 0.0647, "step": 9640 }, { "epoch": 1.1907424402507962, "grad_norm": 0.42578125, "learning_rate": 9.636489393377507e-07, "loss": 0.0668, "step": 9650 }, { "epoch": 1.1919763702406936, "grad_norm": 0.185546875, "learning_rate": 9.63555624504787e-07, "loss": 0.0503, "step": 9660 }, { "epoch": 1.1932103002305907, "grad_norm": 0.177734375, "learning_rate": 9.634621945834615e-07, "loss": 0.0736, "step": 9670 }, { "epoch": 1.1944442302204878, "grad_norm": 0.189453125, "learning_rate": 9.633686495969698e-07, "loss": 0.0652, "step": 9680 }, { "epoch": 1.195678160210385, "grad_norm": 0.169921875, "learning_rate": 9.632749895685365e-07, "loss": 0.0638, "step": 9690 }, { "epoch": 1.1969120902002823, "grad_norm": 0.19921875, "learning_rate": 9.631812145214155e-07, "loss": 0.0526, "step": 9700 }, { "epoch": 1.1981460201901795, "grad_norm": 0.369140625, "learning_rate": 9.630873244788882e-07, "loss": 0.0642, "step": 9710 }, { "epoch": 1.1993799501800766, "grad_norm": 0.33984375, "learning_rate": 9.62993319464265e-07, "loss": 0.0711, "step": 9720 }, { "epoch": 1.200613880169974, "grad_norm": 0.1826171875, "learning_rate": 9.628991995008848e-07, "loss": 0.0582, "step": 9730 }, { "epoch": 1.201847810159871, "grad_norm": 0.2158203125, "learning_rate": 9.628049646121153e-07, "loss": 0.0662, "step": 9740 }, { "epoch": 1.2030817401497682, "grad_norm": 0.154296875, "learning_rate": 9.62710614821352e-07, "loss": 0.0665, "step": 9750 }, { "epoch": 1.2043156701396653, "grad_norm": 0.236328125, "learning_rate": 9.626161501520198e-07, "loss": 0.0691, "step": 9760 }, { "epoch": 1.2055496001295627, "grad_norm": 0.1767578125, "learning_rate": 9.625215706275716e-07, "loss": 0.0721, "step": 9770 }, { "epoch": 1.2067835301194598, "grad_norm": 0.369140625, "learning_rate": 9.62426876271489e-07, "loss": 0.0789, "step": 9780 }, { "epoch": 1.208017460109357, "grad_norm": 0.21875, "learning_rate": 9.62332067107282e-07, "loss": 0.0615, "step": 9790 }, { "epoch": 1.2092513900992543, "grad_norm": 0.181640625, "learning_rate": 9.622371431584892e-07, "loss": 0.0626, "step": 9800 }, { "epoch": 1.2104853200891514, "grad_norm": 0.294921875, "learning_rate": 9.621421044486777e-07, "loss": 0.0841, "step": 9810 }, { "epoch": 1.2117192500790486, "grad_norm": 0.15625, "learning_rate": 9.62046951001443e-07, "loss": 0.0764, "step": 9820 }, { "epoch": 1.212953180068946, "grad_norm": 0.21875, "learning_rate": 9.619516828404089e-07, "loss": 0.0654, "step": 9830 }, { "epoch": 1.214187110058843, "grad_norm": 0.400390625, "learning_rate": 9.618562999892282e-07, "loss": 0.0714, "step": 9840 }, { "epoch": 1.2154210400487402, "grad_norm": 0.2109375, "learning_rate": 9.61760802471582e-07, "loss": 0.0694, "step": 9850 }, { "epoch": 1.2166549700386375, "grad_norm": 0.318359375, "learning_rate": 9.616651903111793e-07, "loss": 0.0672, "step": 9860 }, { "epoch": 1.2178889000285347, "grad_norm": 0.1923828125, "learning_rate": 9.615694635317585e-07, "loss": 0.0633, "step": 9870 }, { "epoch": 1.2191228300184318, "grad_norm": 0.25390625, "learning_rate": 9.61473622157086e-07, "loss": 0.0689, "step": 9880 }, { "epoch": 1.220356760008329, "grad_norm": 0.11962890625, "learning_rate": 9.613776662109558e-07, "loss": 0.0627, "step": 9890 }, { "epoch": 1.2215906899982263, "grad_norm": 0.1611328125, "learning_rate": 9.612815957171922e-07, "loss": 0.0578, "step": 9900 }, { "epoch": 1.2228246199881234, "grad_norm": 0.22265625, "learning_rate": 9.611854106996465e-07, "loss": 0.0627, "step": 9910 }, { "epoch": 1.2240585499780205, "grad_norm": 0.2392578125, "learning_rate": 9.610891111821987e-07, "loss": 0.0652, "step": 9920 }, { "epoch": 1.225292479967918, "grad_norm": 0.2119140625, "learning_rate": 9.609926971887576e-07, "loss": 0.0646, "step": 9930 }, { "epoch": 1.226526409957815, "grad_norm": 0.404296875, "learning_rate": 9.6089616874326e-07, "loss": 0.0657, "step": 9940 }, { "epoch": 1.2277603399477122, "grad_norm": 0.3984375, "learning_rate": 9.607995258696716e-07, "loss": 0.0673, "step": 9950 }, { "epoch": 1.2289942699376093, "grad_norm": 0.1806640625, "learning_rate": 9.607027685919859e-07, "loss": 0.0691, "step": 9960 }, { "epoch": 1.2302281999275066, "grad_norm": 0.3046875, "learning_rate": 9.606058969342254e-07, "loss": 0.0786, "step": 9970 }, { "epoch": 1.2314621299174038, "grad_norm": 0.34375, "learning_rate": 9.605089109204405e-07, "loss": 0.0612, "step": 9980 }, { "epoch": 1.2326960599073011, "grad_norm": 0.2265625, "learning_rate": 9.604118105747104e-07, "loss": 0.0636, "step": 9990 }, { "epoch": 1.2339299898971983, "grad_norm": 0.2314453125, "learning_rate": 9.603145959211422e-07, "loss": 0.0551, "step": 10000 }, { "epoch": 1.2339299898971983, "eval_exact_match": 0.6549079754601227, "eval_has_answer_correct": 0.5742793791574279, "eval_no_answer_correct": 0.835820895522388, "step": 10000 }, { "epoch": 1.235187056074406, "grad_norm": 0.216796875, "learning_rate": 9.60217266983872e-07, "loss": 0.0889, "step": 10010 }, { "epoch": 1.2364209860643032, "grad_norm": 0.2412109375, "learning_rate": 9.601198237870638e-07, "loss": 0.0667, "step": 10020 }, { "epoch": 1.2376549160542003, "grad_norm": 0.2080078125, "learning_rate": 9.600222663549102e-07, "loss": 0.0599, "step": 10030 }, { "epoch": 1.2388888460440977, "grad_norm": 0.2216796875, "learning_rate": 9.59924594711632e-07, "loss": 0.0647, "step": 10040 }, { "epoch": 1.2401227760339948, "grad_norm": 0.1748046875, "learning_rate": 9.598268088814783e-07, "loss": 0.0676, "step": 10050 }, { "epoch": 1.241356706023892, "grad_norm": 0.1982421875, "learning_rate": 9.59728908888727e-07, "loss": 0.0623, "step": 10060 }, { "epoch": 1.242590636013789, "grad_norm": 0.2431640625, "learning_rate": 9.596308947576838e-07, "loss": 0.0741, "step": 10070 }, { "epoch": 1.2438245660036864, "grad_norm": 0.283203125, "learning_rate": 9.59532766512683e-07, "loss": 0.0728, "step": 10080 }, { "epoch": 1.2450584959935835, "grad_norm": 0.1484375, "learning_rate": 9.594345241780872e-07, "loss": 0.074, "step": 10090 }, { "epoch": 1.2462924259834807, "grad_norm": 0.2412109375, "learning_rate": 9.593361677782874e-07, "loss": 0.0603, "step": 10100 }, { "epoch": 1.247526355973378, "grad_norm": 0.1904296875, "learning_rate": 9.59237697337703e-07, "loss": 0.064, "step": 10110 }, { "epoch": 1.2487602859632752, "grad_norm": 0.22265625, "learning_rate": 9.591391128807811e-07, "loss": 0.07, "step": 10120 }, { "epoch": 1.2499942159531723, "grad_norm": 0.275390625, "learning_rate": 9.59040414431998e-07, "loss": 0.0662, "step": 10130 }, { "epoch": 1.2512281459430696, "grad_norm": 0.1904296875, "learning_rate": 9.589416020158577e-07, "loss": 0.0651, "step": 10140 }, { "epoch": 1.2524620759329668, "grad_norm": 0.1455078125, "learning_rate": 9.588426756568924e-07, "loss": 0.067, "step": 10150 }, { "epoch": 1.253696005922864, "grad_norm": 0.21484375, "learning_rate": 9.587436353796633e-07, "loss": 0.0724, "step": 10160 }, { "epoch": 1.2549299359127613, "grad_norm": 0.1396484375, "learning_rate": 9.58644481208759e-07, "loss": 0.0699, "step": 10170 }, { "epoch": 1.2561638659026584, "grad_norm": 0.27734375, "learning_rate": 9.585452131687972e-07, "loss": 0.0623, "step": 10180 }, { "epoch": 1.2573977958925555, "grad_norm": 0.158203125, "learning_rate": 9.584458312844232e-07, "loss": 0.0744, "step": 10190 }, { "epoch": 1.2586317258824526, "grad_norm": 0.328125, "learning_rate": 9.58346335580311e-07, "loss": 0.0757, "step": 10200 }, { "epoch": 1.25986565587235, "grad_norm": 0.2158203125, "learning_rate": 9.582467260811626e-07, "loss": 0.0783, "step": 10210 }, { "epoch": 1.2610995858622471, "grad_norm": 0.1640625, "learning_rate": 9.581470028117085e-07, "loss": 0.0749, "step": 10220 }, { "epoch": 1.2623335158521443, "grad_norm": 0.203125, "learning_rate": 9.58047165796707e-07, "loss": 0.0754, "step": 10230 }, { "epoch": 1.2635674458420416, "grad_norm": 0.1865234375, "learning_rate": 9.579472150609453e-07, "loss": 0.0756, "step": 10240 }, { "epoch": 1.2648013758319387, "grad_norm": 0.140625, "learning_rate": 9.578471506292382e-07, "loss": 0.0524, "step": 10250 }, { "epoch": 1.2660353058218359, "grad_norm": 0.24609375, "learning_rate": 9.577469725264292e-07, "loss": 0.0735, "step": 10260 }, { "epoch": 1.267269235811733, "grad_norm": 0.36328125, "learning_rate": 9.576466807773898e-07, "loss": 0.0636, "step": 10270 }, { "epoch": 1.2685031658016304, "grad_norm": 0.1474609375, "learning_rate": 9.575462754070194e-07, "loss": 0.0715, "step": 10280 }, { "epoch": 1.2697370957915275, "grad_norm": 0.146484375, "learning_rate": 9.574457564402463e-07, "loss": 0.0645, "step": 10290 }, { "epoch": 1.2709710257814248, "grad_norm": 0.234375, "learning_rate": 9.573451239020268e-07, "loss": 0.0496, "step": 10300 }, { "epoch": 1.272204955771322, "grad_norm": 0.298828125, "learning_rate": 9.572443778173447e-07, "loss": 0.0729, "step": 10310 }, { "epoch": 1.273438885761219, "grad_norm": 0.263671875, "learning_rate": 9.57143518211213e-07, "loss": 0.078, "step": 10320 }, { "epoch": 1.2746728157511162, "grad_norm": 0.259765625, "learning_rate": 9.570425451086722e-07, "loss": 0.0727, "step": 10330 }, { "epoch": 1.2759067457410136, "grad_norm": 0.298828125, "learning_rate": 9.569414585347913e-07, "loss": 0.0785, "step": 10340 }, { "epoch": 1.2771406757309107, "grad_norm": 0.234375, "learning_rate": 9.568402585146674e-07, "loss": 0.0453, "step": 10350 }, { "epoch": 1.2783746057208079, "grad_norm": 0.251953125, "learning_rate": 9.567389450734255e-07, "loss": 0.073, "step": 10360 }, { "epoch": 1.2796085357107052, "grad_norm": 0.232421875, "learning_rate": 9.56637518236219e-07, "loss": 0.0843, "step": 10370 }, { "epoch": 1.2808424657006023, "grad_norm": 0.173828125, "learning_rate": 9.5653597802823e-07, "loss": 0.0733, "step": 10380 }, { "epoch": 1.2820763956904995, "grad_norm": 0.19921875, "learning_rate": 9.564343244746678e-07, "loss": 0.0713, "step": 10390 }, { "epoch": 1.2833103256803966, "grad_norm": 0.2294921875, "learning_rate": 9.5633255760077e-07, "loss": 0.0572, "step": 10400 }, { "epoch": 1.284544255670294, "grad_norm": 0.259765625, "learning_rate": 9.56230677431803e-07, "loss": 0.0633, "step": 10410 }, { "epoch": 1.285778185660191, "grad_norm": 0.4140625, "learning_rate": 9.561286839930606e-07, "loss": 0.0867, "step": 10420 }, { "epoch": 1.2870121156500884, "grad_norm": 0.2294921875, "learning_rate": 9.560265773098652e-07, "loss": 0.0661, "step": 10430 }, { "epoch": 1.2882460456399856, "grad_norm": 0.1259765625, "learning_rate": 9.55924357407567e-07, "loss": 0.0648, "step": 10440 }, { "epoch": 1.2894799756298827, "grad_norm": 0.2578125, "learning_rate": 9.558220243115445e-07, "loss": 0.0583, "step": 10450 }, { "epoch": 1.2907139056197798, "grad_norm": 0.232421875, "learning_rate": 9.557195780472042e-07, "loss": 0.0618, "step": 10460 }, { "epoch": 1.291947835609677, "grad_norm": 0.1884765625, "learning_rate": 9.55617018639981e-07, "loss": 0.0933, "step": 10470 }, { "epoch": 1.2931817655995743, "grad_norm": 0.294921875, "learning_rate": 9.555143461153375e-07, "loss": 0.0755, "step": 10480 }, { "epoch": 1.2944156955894714, "grad_norm": 0.19140625, "learning_rate": 9.554115604987643e-07, "loss": 0.0622, "step": 10490 }, { "epoch": 1.2956496255793688, "grad_norm": 0.1142578125, "learning_rate": 9.553086618157804e-07, "loss": 0.0762, "step": 10500 }, { "epoch": 1.296883555569266, "grad_norm": 0.2265625, "learning_rate": 9.55205650091933e-07, "loss": 0.0665, "step": 10510 }, { "epoch": 1.298117485559163, "grad_norm": 0.1572265625, "learning_rate": 9.55102525352797e-07, "loss": 0.0775, "step": 10520 }, { "epoch": 1.2993514155490602, "grad_norm": 0.1875, "learning_rate": 9.54999287623975e-07, "loss": 0.0694, "step": 10530 }, { "epoch": 1.3005853455389575, "grad_norm": 0.25, "learning_rate": 9.54895936931099e-07, "loss": 0.0797, "step": 10540 }, { "epoch": 1.3018192755288547, "grad_norm": 0.22265625, "learning_rate": 9.54792473299828e-07, "loss": 0.0818, "step": 10550 }, { "epoch": 1.3030532055187518, "grad_norm": 0.365234375, "learning_rate": 9.546888967558487e-07, "loss": 0.0725, "step": 10560 }, { "epoch": 1.3042871355086492, "grad_norm": 0.1796875, "learning_rate": 9.545852073248765e-07, "loss": 0.0734, "step": 10570 }, { "epoch": 1.3055210654985463, "grad_norm": 0.19921875, "learning_rate": 9.544814050326552e-07, "loss": 0.0861, "step": 10580 }, { "epoch": 1.3067549954884434, "grad_norm": 0.2470703125, "learning_rate": 9.543774899049557e-07, "loss": 0.0697, "step": 10590 }, { "epoch": 1.3079889254783406, "grad_norm": 0.166015625, "learning_rate": 9.542734619675775e-07, "loss": 0.0635, "step": 10600 }, { "epoch": 1.309222855468238, "grad_norm": 0.212890625, "learning_rate": 9.541693212463476e-07, "loss": 0.0589, "step": 10610 }, { "epoch": 1.310456785458135, "grad_norm": 0.15625, "learning_rate": 9.540650677671217e-07, "loss": 0.0643, "step": 10620 }, { "epoch": 1.3116907154480324, "grad_norm": 0.28125, "learning_rate": 9.53960701555783e-07, "loss": 0.06, "step": 10630 }, { "epoch": 1.3129246454379295, "grad_norm": 0.1923828125, "learning_rate": 9.538562226382428e-07, "loss": 0.0503, "step": 10640 }, { "epoch": 1.3141585754278267, "grad_norm": 0.490234375, "learning_rate": 9.537516310404404e-07, "loss": 0.0623, "step": 10650 }, { "epoch": 1.3153925054177238, "grad_norm": 0.2099609375, "learning_rate": 9.536469267883431e-07, "loss": 0.0687, "step": 10660 }, { "epoch": 1.3166264354076211, "grad_norm": 0.306640625, "learning_rate": 9.535421099079462e-07, "loss": 0.083, "step": 10670 }, { "epoch": 1.3178603653975183, "grad_norm": 0.10595703125, "learning_rate": 9.534371804252726e-07, "loss": 0.0628, "step": 10680 }, { "epoch": 1.3190942953874154, "grad_norm": 0.2060546875, "learning_rate": 9.53332138366374e-07, "loss": 0.0762, "step": 10690 }, { "epoch": 1.3203282253773128, "grad_norm": 0.1484375, "learning_rate": 9.532269837573291e-07, "loss": 0.0565, "step": 10700 }, { "epoch": 1.32156215536721, "grad_norm": 0.177734375, "learning_rate": 9.531217166242449e-07, "loss": 0.0626, "step": 10710 }, { "epoch": 1.322796085357107, "grad_norm": 0.283203125, "learning_rate": 9.530163369932567e-07, "loss": 0.0649, "step": 10720 }, { "epoch": 1.3240300153470042, "grad_norm": 0.1513671875, "learning_rate": 9.529108448905272e-07, "loss": 0.065, "step": 10730 }, { "epoch": 1.3252639453369015, "grad_norm": 0.359375, "learning_rate": 9.528052403422474e-07, "loss": 0.0786, "step": 10740 }, { "epoch": 1.3264978753267986, "grad_norm": 0.330078125, "learning_rate": 9.52699523374636e-07, "loss": 0.0589, "step": 10750 }, { "epoch": 1.3277318053166958, "grad_norm": 0.1279296875, "learning_rate": 9.525936940139396e-07, "loss": 0.0639, "step": 10760 }, { "epoch": 1.3289657353065931, "grad_norm": 0.259765625, "learning_rate": 9.524877522864328e-07, "loss": 0.0635, "step": 10770 }, { "epoch": 1.3301996652964903, "grad_norm": 0.25, "learning_rate": 9.523816982184181e-07, "loss": 0.0809, "step": 10780 }, { "epoch": 1.3314335952863874, "grad_norm": 0.1943359375, "learning_rate": 9.522755318362259e-07, "loss": 0.064, "step": 10790 }, { "epoch": 1.3326675252762845, "grad_norm": 0.271484375, "learning_rate": 9.521692531662145e-07, "loss": 0.0593, "step": 10800 }, { "epoch": 1.3339014552661819, "grad_norm": 0.359375, "learning_rate": 9.520628622347697e-07, "loss": 0.0559, "step": 10810 }, { "epoch": 1.335135385256079, "grad_norm": 0.349609375, "learning_rate": 9.519563590683059e-07, "loss": 0.0686, "step": 10820 }, { "epoch": 1.3363693152459764, "grad_norm": 0.2470703125, "learning_rate": 9.518497436932649e-07, "loss": 0.0768, "step": 10830 }, { "epoch": 1.3376032452358735, "grad_norm": 0.181640625, "learning_rate": 9.517430161361162e-07, "loss": 0.0624, "step": 10840 }, { "epoch": 1.3388371752257706, "grad_norm": 0.1826171875, "learning_rate": 9.516361764233576e-07, "loss": 0.067, "step": 10850 }, { "epoch": 1.3400711052156677, "grad_norm": 0.1669921875, "learning_rate": 9.515292245815143e-07, "loss": 0.0581, "step": 10860 }, { "epoch": 1.341305035205565, "grad_norm": 0.146484375, "learning_rate": 9.514221606371398e-07, "loss": 0.065, "step": 10870 }, { "epoch": 1.3425389651954622, "grad_norm": 0.19921875, "learning_rate": 9.513149846168149e-07, "loss": 0.0823, "step": 10880 }, { "epoch": 1.3437728951853594, "grad_norm": 0.2197265625, "learning_rate": 9.512076965471489e-07, "loss": 0.0668, "step": 10890 }, { "epoch": 1.3450068251752567, "grad_norm": 0.2294921875, "learning_rate": 9.511002964547783e-07, "loss": 0.0748, "step": 10900 }, { "epoch": 1.3462407551651538, "grad_norm": 0.1474609375, "learning_rate": 9.509927843663675e-07, "loss": 0.0497, "step": 10910 }, { "epoch": 1.347474685155051, "grad_norm": 0.1982421875, "learning_rate": 9.508851603086092e-07, "loss": 0.076, "step": 10920 }, { "epoch": 1.348708615144948, "grad_norm": 0.2021484375, "learning_rate": 9.507774243082233e-07, "loss": 0.0756, "step": 10930 }, { "epoch": 1.3499425451348455, "grad_norm": 0.359375, "learning_rate": 9.506695763919579e-07, "loss": 0.0479, "step": 10940 }, { "epoch": 1.3511764751247426, "grad_norm": 0.2451171875, "learning_rate": 9.505616165865885e-07, "loss": 0.0524, "step": 10950 }, { "epoch": 1.35241040511464, "grad_norm": 0.2421875, "learning_rate": 9.504535449189188e-07, "loss": 0.0749, "step": 10960 }, { "epoch": 1.353644335104537, "grad_norm": 0.287109375, "learning_rate": 9.503453614157801e-07, "loss": 0.0867, "step": 10970 }, { "epoch": 1.3548782650944342, "grad_norm": 0.2158203125, "learning_rate": 9.502370661040314e-07, "loss": 0.0685, "step": 10980 }, { "epoch": 1.3561121950843313, "grad_norm": 0.1494140625, "learning_rate": 9.501286590105595e-07, "loss": 0.0723, "step": 10990 }, { "epoch": 1.3573461250742285, "grad_norm": 0.349609375, "learning_rate": 9.500201401622789e-07, "loss": 0.0601, "step": 11000 }, { "epoch": 1.3573461250742285, "eval_exact_match": 0.6579754601226994, "eval_has_answer_correct": 0.5787139689578714, "eval_no_answer_correct": 0.835820895522388, "step": 11000 }, { "epoch": 1.3585800550641258, "grad_norm": 0.1640625, "learning_rate": 9.499115095861321e-07, "loss": 0.0523, "step": 11010 }, { "epoch": 1.359813985054023, "grad_norm": 0.26953125, "learning_rate": 9.498027673090888e-07, "loss": 0.0738, "step": 11020 }, { "epoch": 1.3610479150439203, "grad_norm": 0.1767578125, "learning_rate": 9.49693913358147e-07, "loss": 0.0747, "step": 11030 }, { "epoch": 1.3622818450338174, "grad_norm": 0.302734375, "learning_rate": 9.495849477603321e-07, "loss": 0.0635, "step": 11040 }, { "epoch": 1.3635157750237146, "grad_norm": 0.1484375, "learning_rate": 9.494758705426976e-07, "loss": 0.082, "step": 11050 }, { "epoch": 1.3647497050136117, "grad_norm": 0.169921875, "learning_rate": 9.493666817323242e-07, "loss": 0.0584, "step": 11060 }, { "epoch": 1.365983635003509, "grad_norm": 0.1591796875, "learning_rate": 9.492573813563205e-07, "loss": 0.0682, "step": 11070 }, { "epoch": 1.3672175649934062, "grad_norm": 0.271484375, "learning_rate": 9.491479694418229e-07, "loss": 0.083, "step": 11080 }, { "epoch": 1.3684514949833033, "grad_norm": 0.1533203125, "learning_rate": 9.490384460159953e-07, "loss": 0.0736, "step": 11090 }, { "epoch": 1.3696854249732007, "grad_norm": 0.1865234375, "learning_rate": 9.489288111060297e-07, "loss": 0.0675, "step": 11100 }, { "epoch": 1.3709193549630978, "grad_norm": 0.2412109375, "learning_rate": 9.488190647391453e-07, "loss": 0.0683, "step": 11110 }, { "epoch": 1.372153284952995, "grad_norm": 0.17578125, "learning_rate": 9.487092069425892e-07, "loss": 0.0629, "step": 11120 }, { "epoch": 1.373387214942892, "grad_norm": 0.232421875, "learning_rate": 9.485992377436361e-07, "loss": 0.0726, "step": 11130 }, { "epoch": 1.3746211449327894, "grad_norm": 0.2119140625, "learning_rate": 9.484891571695884e-07, "loss": 0.0711, "step": 11140 }, { "epoch": 1.3758550749226865, "grad_norm": 0.185546875, "learning_rate": 9.483789652477762e-07, "loss": 0.0609, "step": 11150 }, { "epoch": 1.377089004912584, "grad_norm": 0.2021484375, "learning_rate": 9.482686620055571e-07, "loss": 0.0589, "step": 11160 }, { "epoch": 1.378322934902481, "grad_norm": 0.1845703125, "learning_rate": 9.481582474703165e-07, "loss": 0.0742, "step": 11170 }, { "epoch": 1.3795568648923782, "grad_norm": 0.1953125, "learning_rate": 9.480477216694673e-07, "loss": 0.0651, "step": 11180 }, { "epoch": 1.3807907948822753, "grad_norm": 0.1484375, "learning_rate": 9.479370846304499e-07, "loss": 0.0596, "step": 11190 }, { "epoch": 1.3820247248721724, "grad_norm": 0.271484375, "learning_rate": 9.478263363807329e-07, "loss": 0.0769, "step": 11200 }, { "epoch": 1.3832586548620698, "grad_norm": 0.2197265625, "learning_rate": 9.477154769478118e-07, "loss": 0.0656, "step": 11210 }, { "epoch": 1.384492584851967, "grad_norm": 0.328125, "learning_rate": 9.476045063592102e-07, "loss": 0.0672, "step": 11220 }, { "epoch": 1.3857265148418643, "grad_norm": 0.1611328125, "learning_rate": 9.474934246424788e-07, "loss": 0.0794, "step": 11230 }, { "epoch": 1.3869604448317614, "grad_norm": 0.296875, "learning_rate": 9.473822318251965e-07, "loss": 0.0709, "step": 11240 }, { "epoch": 1.3881943748216585, "grad_norm": 0.1259765625, "learning_rate": 9.472709279349693e-07, "loss": 0.0631, "step": 11250 }, { "epoch": 1.3894283048115557, "grad_norm": 0.291015625, "learning_rate": 9.47159512999431e-07, "loss": 0.055, "step": 11260 }, { "epoch": 1.390662234801453, "grad_norm": 0.2197265625, "learning_rate": 9.470479870462431e-07, "loss": 0.0819, "step": 11270 }, { "epoch": 1.3918961647913501, "grad_norm": 0.25390625, "learning_rate": 9.469363501030942e-07, "loss": 0.0848, "step": 11280 }, { "epoch": 1.3931300947812473, "grad_norm": 0.142578125, "learning_rate": 9.468246021977008e-07, "loss": 0.0588, "step": 11290 }, { "epoch": 1.3943640247711446, "grad_norm": 0.212890625, "learning_rate": 9.467127433578072e-07, "loss": 0.0666, "step": 11300 }, { "epoch": 1.3955979547610418, "grad_norm": 0.1796875, "learning_rate": 9.466007736111845e-07, "loss": 0.0623, "step": 11310 }, { "epoch": 1.396831884750939, "grad_norm": 0.25390625, "learning_rate": 9.46488692985632e-07, "loss": 0.0923, "step": 11320 }, { "epoch": 1.398065814740836, "grad_norm": 0.361328125, "learning_rate": 9.463765015089764e-07, "loss": 0.0799, "step": 11330 }, { "epoch": 1.3992997447307334, "grad_norm": 0.17578125, "learning_rate": 9.462641992090715e-07, "loss": 0.0685, "step": 11340 }, { "epoch": 1.4005336747206305, "grad_norm": 0.1728515625, "learning_rate": 9.461517861137994e-07, "loss": 0.054, "step": 11350 }, { "epoch": 1.4017676047105279, "grad_norm": 0.1845703125, "learning_rate": 9.460392622510687e-07, "loss": 0.0655, "step": 11360 }, { "epoch": 1.403001534700425, "grad_norm": 0.2080078125, "learning_rate": 9.459266276488163e-07, "loss": 0.0619, "step": 11370 }, { "epoch": 1.4042354646903221, "grad_norm": 0.2353515625, "learning_rate": 9.458138823350064e-07, "loss": 0.0787, "step": 11380 }, { "epoch": 1.4054693946802193, "grad_norm": 0.1806640625, "learning_rate": 9.457010263376305e-07, "loss": 0.0529, "step": 11390 }, { "epoch": 1.4067033246701166, "grad_norm": 0.189453125, "learning_rate": 9.455880596847078e-07, "loss": 0.0636, "step": 11400 }, { "epoch": 1.4079372546600137, "grad_norm": 0.234375, "learning_rate": 9.454749824042848e-07, "loss": 0.0594, "step": 11410 }, { "epoch": 1.4091711846499109, "grad_norm": 0.275390625, "learning_rate": 9.453617945244357e-07, "loss": 0.0808, "step": 11420 }, { "epoch": 1.4104051146398082, "grad_norm": 0.1796875, "learning_rate": 9.452484960732616e-07, "loss": 0.0621, "step": 11430 }, { "epoch": 1.4116390446297054, "grad_norm": 0.119140625, "learning_rate": 9.45135087078892e-07, "loss": 0.0578, "step": 11440 }, { "epoch": 1.4128729746196025, "grad_norm": 0.1708984375, "learning_rate": 9.450215675694829e-07, "loss": 0.0566, "step": 11450 }, { "epoch": 1.4141069046094996, "grad_norm": 0.16015625, "learning_rate": 9.449079375732183e-07, "loss": 0.0594, "step": 11460 }, { "epoch": 1.415340834599397, "grad_norm": 0.1298828125, "learning_rate": 9.447941971183093e-07, "loss": 0.0549, "step": 11470 }, { "epoch": 1.416574764589294, "grad_norm": 0.236328125, "learning_rate": 9.446803462329947e-07, "loss": 0.0756, "step": 11480 }, { "epoch": 1.4178086945791912, "grad_norm": 0.25, "learning_rate": 9.445663849455407e-07, "loss": 0.0605, "step": 11490 }, { "epoch": 1.4190426245690886, "grad_norm": 0.1474609375, "learning_rate": 9.444523132842407e-07, "loss": 0.0543, "step": 11500 }, { "epoch": 1.4202765545589857, "grad_norm": 0.146484375, "learning_rate": 9.443381312774156e-07, "loss": 0.0629, "step": 11510 }, { "epoch": 1.4215104845488828, "grad_norm": 0.1318359375, "learning_rate": 9.442238389534137e-07, "loss": 0.0556, "step": 11520 }, { "epoch": 1.42274441453878, "grad_norm": 0.236328125, "learning_rate": 9.441094363406108e-07, "loss": 0.0812, "step": 11530 }, { "epoch": 1.4239783445286773, "grad_norm": 0.169921875, "learning_rate": 9.4399492346741e-07, "loss": 0.0597, "step": 11540 }, { "epoch": 1.4252122745185745, "grad_norm": 0.2080078125, "learning_rate": 9.438803003622417e-07, "loss": 0.072, "step": 11550 }, { "epoch": 1.4264462045084718, "grad_norm": 0.16796875, "learning_rate": 9.437655670535636e-07, "loss": 0.0592, "step": 11560 }, { "epoch": 1.427680134498369, "grad_norm": 0.23828125, "learning_rate": 9.43650723569861e-07, "loss": 0.0577, "step": 11570 }, { "epoch": 1.428914064488266, "grad_norm": 0.1396484375, "learning_rate": 9.435357699396466e-07, "loss": 0.075, "step": 11580 }, { "epoch": 1.4301479944781632, "grad_norm": 0.23828125, "learning_rate": 9.434207061914601e-07, "loss": 0.0746, "step": 11590 }, { "epoch": 1.4313819244680606, "grad_norm": 0.1611328125, "learning_rate": 9.433055323538686e-07, "loss": 0.0632, "step": 11600 }, { "epoch": 1.4326158544579577, "grad_norm": 0.2275390625, "learning_rate": 9.43190248455467e-07, "loss": 0.0795, "step": 11610 }, { "epoch": 1.4338497844478548, "grad_norm": 0.26953125, "learning_rate": 9.430748545248768e-07, "loss": 0.064, "step": 11620 }, { "epoch": 1.4350837144377522, "grad_norm": 0.205078125, "learning_rate": 9.429593505907476e-07, "loss": 0.0746, "step": 11630 }, { "epoch": 1.4363176444276493, "grad_norm": 0.279296875, "learning_rate": 9.428437366817557e-07, "loss": 0.0632, "step": 11640 }, { "epoch": 1.4375515744175464, "grad_norm": 0.1904296875, "learning_rate": 9.427280128266049e-07, "loss": 0.0706, "step": 11650 }, { "epoch": 1.4387855044074436, "grad_norm": 0.29296875, "learning_rate": 9.426121790540263e-07, "loss": 0.0681, "step": 11660 }, { "epoch": 1.440019434397341, "grad_norm": 0.173828125, "learning_rate": 9.424962353927784e-07, "loss": 0.071, "step": 11670 }, { "epoch": 1.441253364387238, "grad_norm": 0.294921875, "learning_rate": 9.423801818716468e-07, "loss": 0.0657, "step": 11680 }, { "epoch": 1.4424872943771354, "grad_norm": 0.2353515625, "learning_rate": 9.422640185194446e-07, "loss": 0.0581, "step": 11690 }, { "epoch": 1.4437212243670325, "grad_norm": 0.283203125, "learning_rate": 9.421477453650117e-07, "loss": 0.076, "step": 11700 }, { "epoch": 1.4449551543569297, "grad_norm": 0.2412109375, "learning_rate": 9.420313624372161e-07, "loss": 0.0697, "step": 11710 }, { "epoch": 1.4461890843468268, "grad_norm": 0.2197265625, "learning_rate": 9.419148697649519e-07, "loss": 0.0742, "step": 11720 }, { "epoch": 1.447423014336724, "grad_norm": 0.14453125, "learning_rate": 9.417982673771416e-07, "loss": 0.067, "step": 11730 }, { "epoch": 1.4486569443266213, "grad_norm": 0.1875, "learning_rate": 9.416815553027344e-07, "loss": 0.0708, "step": 11740 }, { "epoch": 1.4498908743165184, "grad_norm": 0.15234375, "learning_rate": 9.415647335707065e-07, "loss": 0.0575, "step": 11750 }, { "epoch": 1.4511248043064158, "grad_norm": 0.298828125, "learning_rate": 9.414478022100618e-07, "loss": 0.0758, "step": 11760 }, { "epoch": 1.452358734296313, "grad_norm": 0.1884765625, "learning_rate": 9.41330761249831e-07, "loss": 0.0774, "step": 11770 }, { "epoch": 1.45359266428621, "grad_norm": 0.1884765625, "learning_rate": 9.412136107190724e-07, "loss": 0.0699, "step": 11780 }, { "epoch": 1.4548265942761072, "grad_norm": 0.20703125, "learning_rate": 9.410963506468713e-07, "loss": 0.0648, "step": 11790 }, { "epoch": 1.4560605242660045, "grad_norm": 0.166015625, "learning_rate": 9.409789810623401e-07, "loss": 0.0642, "step": 11800 }, { "epoch": 1.4572944542559017, "grad_norm": 0.14453125, "learning_rate": 9.408615019946187e-07, "loss": 0.0502, "step": 11810 }, { "epoch": 1.4585283842457988, "grad_norm": 0.10107421875, "learning_rate": 9.407439134728736e-07, "loss": 0.0525, "step": 11820 }, { "epoch": 1.4597623142356961, "grad_norm": 0.2080078125, "learning_rate": 9.406262155262994e-07, "loss": 0.0754, "step": 11830 }, { "epoch": 1.4609962442255933, "grad_norm": 0.2490234375, "learning_rate": 9.405084081841168e-07, "loss": 0.0703, "step": 11840 }, { "epoch": 1.4622301742154904, "grad_norm": 0.201171875, "learning_rate": 9.403904914755745e-07, "loss": 0.0654, "step": 11850 }, { "epoch": 1.4634641042053875, "grad_norm": 0.1630859375, "learning_rate": 9.402724654299481e-07, "loss": 0.0651, "step": 11860 }, { "epoch": 1.4646980341952849, "grad_norm": 0.25, "learning_rate": 9.4015433007654e-07, "loss": 0.0675, "step": 11870 }, { "epoch": 1.465931964185182, "grad_norm": 0.2734375, "learning_rate": 9.400360854446803e-07, "loss": 0.0763, "step": 11880 }, { "epoch": 1.4671658941750794, "grad_norm": 0.1611328125, "learning_rate": 9.399177315637257e-07, "loss": 0.0764, "step": 11890 }, { "epoch": 1.4683998241649765, "grad_norm": 0.263671875, "learning_rate": 9.397992684630605e-07, "loss": 0.0603, "step": 11900 }, { "epoch": 1.4696337541548736, "grad_norm": 0.1953125, "learning_rate": 9.396806961720958e-07, "loss": 0.0701, "step": 11910 }, { "epoch": 1.4708676841447708, "grad_norm": 0.125, "learning_rate": 9.395620147202698e-07, "loss": 0.0633, "step": 11920 }, { "epoch": 1.4721016141346681, "grad_norm": 0.451171875, "learning_rate": 9.39443224137048e-07, "loss": 0.0668, "step": 11930 }, { "epoch": 1.4733355441245652, "grad_norm": 0.234375, "learning_rate": 9.393243244519231e-07, "loss": 0.0754, "step": 11940 }, { "epoch": 1.4745694741144624, "grad_norm": 0.2177734375, "learning_rate": 9.392053156944144e-07, "loss": 0.0539, "step": 11950 }, { "epoch": 1.4758034041043597, "grad_norm": 0.201171875, "learning_rate": 9.390861978940685e-07, "loss": 0.0721, "step": 11960 }, { "epoch": 1.4770373340942569, "grad_norm": 0.310546875, "learning_rate": 9.389669710804595e-07, "loss": 0.0795, "step": 11970 }, { "epoch": 1.478271264084154, "grad_norm": 0.259765625, "learning_rate": 9.388476352831879e-07, "loss": 0.0699, "step": 11980 }, { "epoch": 1.4795051940740511, "grad_norm": 0.267578125, "learning_rate": 9.387281905318817e-07, "loss": 0.0699, "step": 11990 }, { "epoch": 1.4807391240639485, "grad_norm": 0.130859375, "learning_rate": 9.386086368561956e-07, "loss": 0.0548, "step": 12000 }, { "epoch": 1.4807391240639485, "eval_exact_match": 0.6533742331288344, "eval_has_answer_correct": 0.5764966740576497, "eval_no_answer_correct": 0.8258706467661692, "step": 12000 }, { "epoch": 1.4819730540538456, "grad_norm": 0.330078125, "learning_rate": 9.38488974285812e-07, "loss": 0.0528, "step": 12010 }, { "epoch": 1.4832069840437427, "grad_norm": 0.28515625, "learning_rate": 9.383692028504395e-07, "loss": 0.0738, "step": 12020 }, { "epoch": 1.48444091403364, "grad_norm": 0.162109375, "learning_rate": 9.382493225798141e-07, "loss": 0.0685, "step": 12030 }, { "epoch": 1.4856748440235372, "grad_norm": 0.1748046875, "learning_rate": 9.38129333503699e-07, "loss": 0.062, "step": 12040 }, { "epoch": 1.4869087740134344, "grad_norm": 0.2177734375, "learning_rate": 9.380092356518844e-07, "loss": 0.0735, "step": 12050 }, { "epoch": 1.4881427040033315, "grad_norm": 0.291015625, "learning_rate": 9.37889029054187e-07, "loss": 0.0655, "step": 12060 }, { "epoch": 1.4893766339932288, "grad_norm": 0.2890625, "learning_rate": 9.377687137404512e-07, "loss": 0.073, "step": 12070 }, { "epoch": 1.490610563983126, "grad_norm": 0.212890625, "learning_rate": 9.376482897405477e-07, "loss": 0.0672, "step": 12080 }, { "epoch": 1.4918444939730233, "grad_norm": 0.265625, "learning_rate": 9.375277570843749e-07, "loss": 0.0725, "step": 12090 }, { "epoch": 1.4930784239629205, "grad_norm": 0.2099609375, "learning_rate": 9.374071158018575e-07, "loss": 0.0693, "step": 12100 }, { "epoch": 1.4943123539528176, "grad_norm": 0.181640625, "learning_rate": 9.372863659229476e-07, "loss": 0.0651, "step": 12110 }, { "epoch": 1.4955462839427147, "grad_norm": 0.15234375, "learning_rate": 9.371655074776242e-07, "loss": 0.0672, "step": 12120 }, { "epoch": 1.496780213932612, "grad_norm": 0.2392578125, "learning_rate": 9.370445404958931e-07, "loss": 0.0752, "step": 12130 }, { "epoch": 1.4980141439225092, "grad_norm": 0.1328125, "learning_rate": 9.369234650077873e-07, "loss": 0.0556, "step": 12140 }, { "epoch": 1.4992480739124063, "grad_norm": 0.328125, "learning_rate": 9.368022810433663e-07, "loss": 0.0698, "step": 12150 }, { "epoch": 1.5004820039023037, "grad_norm": 0.1630859375, "learning_rate": 9.366809886327169e-07, "loss": 0.0556, "step": 12160 }, { "epoch": 1.5017159338922008, "grad_norm": 0.248046875, "learning_rate": 9.36559587805953e-07, "loss": 0.0801, "step": 12170 }, { "epoch": 1.502949863882098, "grad_norm": 0.150390625, "learning_rate": 9.364380785932149e-07, "loss": 0.0664, "step": 12180 }, { "epoch": 1.504183793871995, "grad_norm": 0.31640625, "learning_rate": 9.3631646102467e-07, "loss": 0.0737, "step": 12190 }, { "epoch": 1.5054177238618924, "grad_norm": 0.23046875, "learning_rate": 9.361947351305128e-07, "loss": 0.076, "step": 12200 }, { "epoch": 1.5066516538517896, "grad_norm": 0.283203125, "learning_rate": 9.360729009409646e-07, "loss": 0.0575, "step": 12210 }, { "epoch": 1.507885583841687, "grad_norm": 0.1669921875, "learning_rate": 9.359509584862735e-07, "loss": 0.0836, "step": 12220 }, { "epoch": 1.509119513831584, "grad_norm": 0.1494140625, "learning_rate": 9.358289077967145e-07, "loss": 0.0612, "step": 12230 }, { "epoch": 1.5103534438214812, "grad_norm": 0.33984375, "learning_rate": 9.357067489025894e-07, "loss": 0.08, "step": 12240 }, { "epoch": 1.5115873738113783, "grad_norm": 0.1943359375, "learning_rate": 9.355844818342271e-07, "loss": 0.0492, "step": 12250 }, { "epoch": 1.5128213038012754, "grad_norm": 0.40625, "learning_rate": 9.354621066219831e-07, "loss": 0.0719, "step": 12260 }, { "epoch": 1.5140552337911728, "grad_norm": 0.265625, "learning_rate": 9.3533962329624e-07, "loss": 0.0761, "step": 12270 }, { "epoch": 1.51528916378107, "grad_norm": 0.197265625, "learning_rate": 9.352170318874071e-07, "loss": 0.0578, "step": 12280 }, { "epoch": 1.5165230937709673, "grad_norm": 0.2177734375, "learning_rate": 9.350943324259204e-07, "loss": 0.0817, "step": 12290 }, { "epoch": 1.5177570237608644, "grad_norm": 0.2080078125, "learning_rate": 9.349715249422429e-07, "loss": 0.0657, "step": 12300 }, { "epoch": 1.5189909537507615, "grad_norm": 0.318359375, "learning_rate": 9.348486094668646e-07, "loss": 0.0661, "step": 12310 }, { "epoch": 1.5202248837406587, "grad_norm": 0.2119140625, "learning_rate": 9.347255860303018e-07, "loss": 0.0608, "step": 12320 }, { "epoch": 1.5214588137305558, "grad_norm": 0.2275390625, "learning_rate": 9.346024546630982e-07, "loss": 0.0713, "step": 12330 }, { "epoch": 1.5226927437204532, "grad_norm": 0.35546875, "learning_rate": 9.344792153958238e-07, "loss": 0.0716, "step": 12340 }, { "epoch": 1.5239266737103505, "grad_norm": 0.279296875, "learning_rate": 9.343558682590755e-07, "loss": 0.0687, "step": 12350 }, { "epoch": 1.5251606037002476, "grad_norm": 0.169921875, "learning_rate": 9.342324132834772e-07, "loss": 0.069, "step": 12360 }, { "epoch": 1.5263945336901448, "grad_norm": 0.255859375, "learning_rate": 9.341088504996795e-07, "loss": 0.0649, "step": 12370 }, { "epoch": 1.527628463680042, "grad_norm": 0.1708984375, "learning_rate": 9.339851799383597e-07, "loss": 0.0498, "step": 12380 }, { "epoch": 1.528862393669939, "grad_norm": 0.2275390625, "learning_rate": 9.338614016302218e-07, "loss": 0.0545, "step": 12390 }, { "epoch": 1.5300963236598364, "grad_norm": 0.228515625, "learning_rate": 9.337375156059965e-07, "loss": 0.0786, "step": 12400 }, { "epoch": 1.5313302536497335, "grad_norm": 0.1328125, "learning_rate": 9.336135218964415e-07, "loss": 0.0522, "step": 12410 }, { "epoch": 1.5325641836396309, "grad_norm": 0.3828125, "learning_rate": 9.334894205323411e-07, "loss": 0.0627, "step": 12420 }, { "epoch": 1.533798113629528, "grad_norm": 0.1552734375, "learning_rate": 9.333652115445063e-07, "loss": 0.0809, "step": 12430 }, { "epoch": 1.5350320436194251, "grad_norm": 0.2109375, "learning_rate": 9.332408949637749e-07, "loss": 0.0841, "step": 12440 }, { "epoch": 1.5362659736093223, "grad_norm": 0.455078125, "learning_rate": 9.331164708210113e-07, "loss": 0.0645, "step": 12450 }, { "epoch": 1.5374999035992194, "grad_norm": 0.2060546875, "learning_rate": 9.329919391471066e-07, "loss": 0.07, "step": 12460 }, { "epoch": 1.5387338335891168, "grad_norm": 0.333984375, "learning_rate": 9.328672999729788e-07, "loss": 0.0726, "step": 12470 }, { "epoch": 1.5399677635790139, "grad_norm": 0.1171875, "learning_rate": 9.327425533295723e-07, "loss": 0.0616, "step": 12480 }, { "epoch": 1.5412016935689112, "grad_norm": 0.11328125, "learning_rate": 9.326176992478583e-07, "loss": 0.0671, "step": 12490 }, { "epoch": 1.5424356235588084, "grad_norm": 0.3203125, "learning_rate": 9.324927377588349e-07, "loss": 0.061, "step": 12500 }, { "epoch": 1.5436695535487055, "grad_norm": 0.30078125, "learning_rate": 9.323676688935265e-07, "loss": 0.0638, "step": 12510 }, { "epoch": 1.5449034835386026, "grad_norm": 0.150390625, "learning_rate": 9.322424926829844e-07, "loss": 0.0767, "step": 12520 }, { "epoch": 1.5461374135285, "grad_norm": 0.33984375, "learning_rate": 9.321172091582865e-07, "loss": 0.063, "step": 12530 }, { "epoch": 1.5473713435183971, "grad_norm": 0.197265625, "learning_rate": 9.319918183505373e-07, "loss": 0.07, "step": 12540 }, { "epoch": 1.5486052735082945, "grad_norm": 0.1875, "learning_rate": 9.318663202908679e-07, "loss": 0.0724, "step": 12550 }, { "epoch": 1.5498392034981916, "grad_norm": 0.212890625, "learning_rate": 9.317407150104361e-07, "loss": 0.0576, "step": 12560 }, { "epoch": 1.5510731334880887, "grad_norm": 0.2314453125, "learning_rate": 9.316150025404264e-07, "loss": 0.0738, "step": 12570 }, { "epoch": 1.5523070634779859, "grad_norm": 0.1376953125, "learning_rate": 9.314891829120495e-07, "loss": 0.0727, "step": 12580 }, { "epoch": 1.553540993467883, "grad_norm": 0.255859375, "learning_rate": 9.313632561565437e-07, "loss": 0.0604, "step": 12590 }, { "epoch": 1.5547749234577803, "grad_norm": 0.25390625, "learning_rate": 9.312372223051724e-07, "loss": 0.0712, "step": 12600 }, { "epoch": 1.5560088534476775, "grad_norm": 0.181640625, "learning_rate": 9.311110813892269e-07, "loss": 0.066, "step": 12610 }, { "epoch": 1.5572427834375748, "grad_norm": 0.458984375, "learning_rate": 9.309848334400245e-07, "loss": 0.0807, "step": 12620 }, { "epoch": 1.558476713427472, "grad_norm": 0.1923828125, "learning_rate": 9.308584784889093e-07, "loss": 0.0802, "step": 12630 }, { "epoch": 1.559710643417369, "grad_norm": 0.2490234375, "learning_rate": 9.307320165672514e-07, "loss": 0.08, "step": 12640 }, { "epoch": 1.5609445734072662, "grad_norm": 0.125, "learning_rate": 9.306054477064483e-07, "loss": 0.0653, "step": 12650 }, { "epoch": 1.5621785033971634, "grad_norm": 0.1875, "learning_rate": 9.304787719379235e-07, "loss": 0.0649, "step": 12660 }, { "epoch": 1.5634124333870607, "grad_norm": 0.1708984375, "learning_rate": 9.303519892931271e-07, "loss": 0.0717, "step": 12670 }, { "epoch": 1.564646363376958, "grad_norm": 0.11865234375, "learning_rate": 9.302250998035359e-07, "loss": 0.0629, "step": 12680 }, { "epoch": 1.5658802933668552, "grad_norm": 0.1533203125, "learning_rate": 9.300981035006532e-07, "loss": 0.0762, "step": 12690 }, { "epoch": 1.5671142233567523, "grad_norm": 0.337890625, "learning_rate": 9.299710004160086e-07, "loss": 0.0646, "step": 12700 }, { "epoch": 1.5683481533466495, "grad_norm": 0.26953125, "learning_rate": 9.298437905811585e-07, "loss": 0.0627, "step": 12710 }, { "epoch": 1.5695820833365466, "grad_norm": 0.20703125, "learning_rate": 9.297164740276855e-07, "loss": 0.0571, "step": 12720 }, { "epoch": 1.570816013326444, "grad_norm": 0.2890625, "learning_rate": 9.295890507871991e-07, "loss": 0.0693, "step": 12730 }, { "epoch": 1.572049943316341, "grad_norm": 0.1533203125, "learning_rate": 9.294615208913348e-07, "loss": 0.064, "step": 12740 }, { "epoch": 1.5732838733062384, "grad_norm": 0.150390625, "learning_rate": 9.293338843717551e-07, "loss": 0.0671, "step": 12750 }, { "epoch": 1.5745178032961356, "grad_norm": 0.2470703125, "learning_rate": 9.292061412601486e-07, "loss": 0.0713, "step": 12760 }, { "epoch": 1.5757517332860327, "grad_norm": 0.26171875, "learning_rate": 9.290782915882302e-07, "loss": 0.0687, "step": 12770 }, { "epoch": 1.5769856632759298, "grad_norm": 0.1513671875, "learning_rate": 9.28950335387742e-07, "loss": 0.0733, "step": 12780 }, { "epoch": 1.578219593265827, "grad_norm": 0.185546875, "learning_rate": 9.288222726904519e-07, "loss": 0.0731, "step": 12790 }, { "epoch": 1.5794535232557243, "grad_norm": 0.322265625, "learning_rate": 9.286941035281543e-07, "loss": 0.0747, "step": 12800 }, { "epoch": 1.5806874532456214, "grad_norm": 0.193359375, "learning_rate": 9.285658279326701e-07, "loss": 0.065, "step": 12810 }, { "epoch": 1.5819213832355188, "grad_norm": 0.259765625, "learning_rate": 9.284374459358469e-07, "loss": 0.0797, "step": 12820 }, { "epoch": 1.583155313225416, "grad_norm": 0.1953125, "learning_rate": 9.283089575695582e-07, "loss": 0.0821, "step": 12830 }, { "epoch": 1.584389243215313, "grad_norm": 0.166015625, "learning_rate": 9.281803628657046e-07, "loss": 0.0707, "step": 12840 }, { "epoch": 1.5856231732052102, "grad_norm": 0.24609375, "learning_rate": 9.280516618562123e-07, "loss": 0.0617, "step": 12850 }, { "epoch": 1.5868571031951073, "grad_norm": 0.150390625, "learning_rate": 9.279228545730346e-07, "loss": 0.0727, "step": 12860 }, { "epoch": 1.5880910331850047, "grad_norm": 0.3203125, "learning_rate": 9.277939410481505e-07, "loss": 0.0738, "step": 12870 }, { "epoch": 1.589324963174902, "grad_norm": 0.3828125, "learning_rate": 9.276649213135662e-07, "loss": 0.0833, "step": 12880 }, { "epoch": 1.5905588931647991, "grad_norm": 0.2353515625, "learning_rate": 9.275357954013136e-07, "loss": 0.0865, "step": 12890 }, { "epoch": 1.5917928231546963, "grad_norm": 0.263671875, "learning_rate": 9.27406563343451e-07, "loss": 0.0625, "step": 12900 }, { "epoch": 1.5930267531445934, "grad_norm": 0.18359375, "learning_rate": 9.272772251720635e-07, "loss": 0.0789, "step": 12910 }, { "epoch": 1.5942606831344905, "grad_norm": 0.2099609375, "learning_rate": 9.271477809192623e-07, "loss": 0.0833, "step": 12920 }, { "epoch": 1.595494613124388, "grad_norm": 0.1484375, "learning_rate": 9.270182306171848e-07, "loss": 0.0612, "step": 12930 }, { "epoch": 1.596728543114285, "grad_norm": 0.1640625, "learning_rate": 9.268885742979948e-07, "loss": 0.0536, "step": 12940 }, { "epoch": 1.5979624731041824, "grad_norm": 0.263671875, "learning_rate": 9.267588119938825e-07, "loss": 0.0632, "step": 12950 }, { "epoch": 1.5991964030940795, "grad_norm": 0.361328125, "learning_rate": 9.266289437370645e-07, "loss": 0.072, "step": 12960 }, { "epoch": 1.6004303330839766, "grad_norm": 0.2177734375, "learning_rate": 9.264989695597833e-07, "loss": 0.0739, "step": 12970 }, { "epoch": 1.6016642630738738, "grad_norm": 0.2265625, "learning_rate": 9.263688894943085e-07, "loss": 0.0647, "step": 12980 }, { "epoch": 1.602898193063771, "grad_norm": 0.181640625, "learning_rate": 9.26238703572935e-07, "loss": 0.0549, "step": 12990 }, { "epoch": 1.6041321230536683, "grad_norm": 0.2158203125, "learning_rate": 9.261084118279846e-07, "loss": 0.0609, "step": 13000 }, { "epoch": 1.6041321230536683, "eval_exact_match": 0.651840490797546, "eval_has_answer_correct": 0.5720620842572062, "eval_no_answer_correct": 0.8308457711442786, "step": 13000 }, { "epoch": 1.6053660530435654, "grad_norm": 0.1630859375, "learning_rate": 9.259780142918053e-07, "loss": 0.0598, "step": 13010 }, { "epoch": 1.6065999830334627, "grad_norm": 0.328125, "learning_rate": 9.258475109967712e-07, "loss": 0.074, "step": 13020 }, { "epoch": 1.6078339130233599, "grad_norm": 0.1455078125, "learning_rate": 9.257169019752826e-07, "loss": 0.0726, "step": 13030 }, { "epoch": 1.609067843013257, "grad_norm": 0.1630859375, "learning_rate": 9.255861872597665e-07, "loss": 0.08, "step": 13040 }, { "epoch": 1.6103017730031541, "grad_norm": 0.2109375, "learning_rate": 9.254553668826759e-07, "loss": 0.0655, "step": 13050 }, { "epoch": 1.6115357029930513, "grad_norm": 0.2119140625, "learning_rate": 9.253244408764897e-07, "loss": 0.0539, "step": 13060 }, { "epoch": 1.6127696329829486, "grad_norm": 0.154296875, "learning_rate": 9.251934092737133e-07, "loss": 0.0696, "step": 13070 }, { "epoch": 1.614003562972846, "grad_norm": 0.224609375, "learning_rate": 9.250622721068784e-07, "loss": 0.0755, "step": 13080 }, { "epoch": 1.615237492962743, "grad_norm": 0.2158203125, "learning_rate": 9.249310294085429e-07, "loss": 0.0772, "step": 13090 }, { "epoch": 1.6164714229526402, "grad_norm": 0.296875, "learning_rate": 9.247996812112906e-07, "loss": 0.0707, "step": 13100 }, { "epoch": 1.6177053529425374, "grad_norm": 0.205078125, "learning_rate": 9.24668227547732e-07, "loss": 0.0664, "step": 13110 }, { "epoch": 1.6189392829324345, "grad_norm": 0.2734375, "learning_rate": 9.245366684505034e-07, "loss": 0.0737, "step": 13120 }, { "epoch": 1.6201732129223319, "grad_norm": 0.232421875, "learning_rate": 9.244050039522672e-07, "loss": 0.0702, "step": 13130 }, { "epoch": 1.621407142912229, "grad_norm": 0.181640625, "learning_rate": 9.242732340857123e-07, "loss": 0.068, "step": 13140 }, { "epoch": 1.6226410729021263, "grad_norm": 0.2158203125, "learning_rate": 9.241413588835536e-07, "loss": 0.0591, "step": 13150 }, { "epoch": 1.6238750028920235, "grad_norm": 0.232421875, "learning_rate": 9.240093783785323e-07, "loss": 0.0613, "step": 13160 }, { "epoch": 1.6251089328819206, "grad_norm": 0.2255859375, "learning_rate": 9.238772926034153e-07, "loss": 0.0626, "step": 13170 }, { "epoch": 1.6263428628718177, "grad_norm": 0.2060546875, "learning_rate": 9.23745101590996e-07, "loss": 0.0668, "step": 13180 }, { "epoch": 1.6275767928617149, "grad_norm": 0.2119140625, "learning_rate": 9.236128053740941e-07, "loss": 0.0674, "step": 13190 }, { "epoch": 1.6288107228516122, "grad_norm": 0.228515625, "learning_rate": 9.23480403985555e-07, "loss": 0.0625, "step": 13200 }, { "epoch": 1.6300446528415096, "grad_norm": 0.255859375, "learning_rate": 9.233478974582503e-07, "loss": 0.0808, "step": 13210 }, { "epoch": 1.6312785828314067, "grad_norm": 0.1513671875, "learning_rate": 9.232152858250781e-07, "loss": 0.0828, "step": 13220 }, { "epoch": 1.6325125128213038, "grad_norm": 0.2734375, "learning_rate": 9.230825691189619e-07, "loss": 0.0656, "step": 13230 }, { "epoch": 1.633746442811201, "grad_norm": 0.244140625, "learning_rate": 9.229497473728522e-07, "loss": 0.0681, "step": 13240 }, { "epoch": 1.634980372801098, "grad_norm": 0.1416015625, "learning_rate": 9.228168206197245e-07, "loss": 0.0582, "step": 13250 }, { "epoch": 1.6362143027909954, "grad_norm": 0.1865234375, "learning_rate": 9.226837888925812e-07, "loss": 0.0815, "step": 13260 }, { "epoch": 1.6374482327808926, "grad_norm": 0.2255859375, "learning_rate": 9.225506522244506e-07, "loss": 0.0687, "step": 13270 }, { "epoch": 1.63868216277079, "grad_norm": 0.1689453125, "learning_rate": 9.224174106483867e-07, "loss": 0.0769, "step": 13280 }, { "epoch": 1.639916092760687, "grad_norm": 0.228515625, "learning_rate": 9.2228406419747e-07, "loss": 0.0672, "step": 13290 }, { "epoch": 1.6411500227505842, "grad_norm": 0.181640625, "learning_rate": 9.221506129048067e-07, "loss": 0.071, "step": 13300 }, { "epoch": 1.6423839527404813, "grad_norm": 0.1591796875, "learning_rate": 9.220170568035292e-07, "loss": 0.0664, "step": 13310 }, { "epoch": 1.6436178827303785, "grad_norm": 0.2138671875, "learning_rate": 9.218833959267957e-07, "loss": 0.0721, "step": 13320 }, { "epoch": 1.6448518127202758, "grad_norm": 0.1923828125, "learning_rate": 9.21749630307791e-07, "loss": 0.0777, "step": 13330 }, { "epoch": 1.646085742710173, "grad_norm": 0.1337890625, "learning_rate": 9.216157599797252e-07, "loss": 0.079, "step": 13340 }, { "epoch": 1.6473196727000703, "grad_norm": 0.5078125, "learning_rate": 9.214817849758347e-07, "loss": 0.068, "step": 13350 }, { "epoch": 1.6485536026899674, "grad_norm": 0.16796875, "learning_rate": 9.213477053293819e-07, "loss": 0.081, "step": 13360 }, { "epoch": 1.6497875326798646, "grad_norm": 0.271484375, "learning_rate": 9.212135210736552e-07, "loss": 0.0779, "step": 13370 }, { "epoch": 1.6510214626697617, "grad_norm": 0.142578125, "learning_rate": 9.210792322419689e-07, "loss": 0.0742, "step": 13380 }, { "epoch": 1.6522553926596588, "grad_norm": 0.216796875, "learning_rate": 9.209448388676635e-07, "loss": 0.0607, "step": 13390 }, { "epoch": 1.6534893226495562, "grad_norm": 0.25, "learning_rate": 9.20810340984105e-07, "loss": 0.0612, "step": 13400 }, { "epoch": 1.6547232526394535, "grad_norm": 0.2294921875, "learning_rate": 9.206757386246855e-07, "loss": 0.0785, "step": 13410 }, { "epoch": 1.6559571826293507, "grad_norm": 0.2734375, "learning_rate": 9.205410318228235e-07, "loss": 0.0841, "step": 13420 }, { "epoch": 1.6571911126192478, "grad_norm": 0.123046875, "learning_rate": 9.204062206119627e-07, "loss": 0.0669, "step": 13430 }, { "epoch": 1.658425042609145, "grad_norm": 0.2470703125, "learning_rate": 9.202713050255735e-07, "loss": 0.0641, "step": 13440 }, { "epoch": 1.659658972599042, "grad_norm": 0.2314453125, "learning_rate": 9.201362850971514e-07, "loss": 0.0697, "step": 13450 }, { "epoch": 1.6608929025889394, "grad_norm": 0.275390625, "learning_rate": 9.200011608602182e-07, "loss": 0.0669, "step": 13460 }, { "epoch": 1.6621268325788365, "grad_norm": 0.1630859375, "learning_rate": 9.198659323483222e-07, "loss": 0.0619, "step": 13470 }, { "epoch": 1.6633607625687339, "grad_norm": 0.169921875, "learning_rate": 9.197305995950362e-07, "loss": 0.0606, "step": 13480 }, { "epoch": 1.664594692558631, "grad_norm": 0.2001953125, "learning_rate": 9.195951626339602e-07, "loss": 0.0658, "step": 13490 }, { "epoch": 1.6658286225485281, "grad_norm": 0.2041015625, "learning_rate": 9.194596214987193e-07, "loss": 0.0607, "step": 13500 }, { "epoch": 1.6670625525384253, "grad_norm": 0.2197265625, "learning_rate": 9.19323976222965e-07, "loss": 0.0572, "step": 13510 }, { "epoch": 1.6682964825283224, "grad_norm": 0.2294921875, "learning_rate": 9.191882268403741e-07, "loss": 0.077, "step": 13520 }, { "epoch": 1.6695304125182198, "grad_norm": 0.158203125, "learning_rate": 9.190523733846494e-07, "loss": 0.073, "step": 13530 }, { "epoch": 1.670764342508117, "grad_norm": 0.1904296875, "learning_rate": 9.1891641588952e-07, "loss": 0.073, "step": 13540 }, { "epoch": 1.6719982724980142, "grad_norm": 0.306640625, "learning_rate": 9.187803543887403e-07, "loss": 0.06, "step": 13550 }, { "epoch": 1.6732322024879114, "grad_norm": 0.1025390625, "learning_rate": 9.186441889160906e-07, "loss": 0.061, "step": 13560 }, { "epoch": 1.6744661324778085, "grad_norm": 0.1748046875, "learning_rate": 9.185079195053772e-07, "loss": 0.0622, "step": 13570 }, { "epoch": 1.6757000624677056, "grad_norm": 0.32421875, "learning_rate": 9.183715461904323e-07, "loss": 0.0717, "step": 13580 }, { "epoch": 1.6769339924576028, "grad_norm": 0.3203125, "learning_rate": 9.182350690051132e-07, "loss": 0.0732, "step": 13590 }, { "epoch": 1.6781679224475001, "grad_norm": 0.30859375, "learning_rate": 9.180984879833042e-07, "loss": 0.0645, "step": 13600 }, { "epoch": 1.6794018524373975, "grad_norm": 0.1396484375, "learning_rate": 9.179618031589143e-07, "loss": 0.0663, "step": 13610 }, { "epoch": 1.6806357824272946, "grad_norm": 0.2177734375, "learning_rate": 9.178250145658787e-07, "loss": 0.0591, "step": 13620 }, { "epoch": 1.6818697124171917, "grad_norm": 0.2158203125, "learning_rate": 9.176881222381583e-07, "loss": 0.0607, "step": 13630 }, { "epoch": 1.6831036424070889, "grad_norm": 0.279296875, "learning_rate": 9.175511262097397e-07, "loss": 0.0746, "step": 13640 }, { "epoch": 1.684337572396986, "grad_norm": 0.20703125, "learning_rate": 9.174140265146355e-07, "loss": 0.0651, "step": 13650 }, { "epoch": 1.6855715023868834, "grad_norm": 0.2060546875, "learning_rate": 9.172768231868838e-07, "loss": 0.0795, "step": 13660 }, { "epoch": 1.6868054323767805, "grad_norm": 0.21875, "learning_rate": 9.171395162605485e-07, "loss": 0.0698, "step": 13670 }, { "epoch": 1.6880393623666778, "grad_norm": 0.263671875, "learning_rate": 9.17002105769719e-07, "loss": 0.0732, "step": 13680 }, { "epoch": 1.689273292356575, "grad_norm": 0.189453125, "learning_rate": 9.168645917485109e-07, "loss": 0.0787, "step": 13690 }, { "epoch": 1.690507222346472, "grad_norm": 0.259765625, "learning_rate": 9.167269742310651e-07, "loss": 0.0746, "step": 13700 }, { "epoch": 1.6917411523363692, "grad_norm": 0.287109375, "learning_rate": 9.165892532515484e-07, "loss": 0.0696, "step": 13710 }, { "epoch": 1.6929750823262664, "grad_norm": 0.185546875, "learning_rate": 9.164514288441531e-07, "loss": 0.0724, "step": 13720 }, { "epoch": 1.6942090123161637, "grad_norm": 0.2255859375, "learning_rate": 9.163135010430972e-07, "loss": 0.0759, "step": 13730 }, { "epoch": 1.695442942306061, "grad_norm": 0.2890625, "learning_rate": 9.161754698826247e-07, "loss": 0.0545, "step": 13740 }, { "epoch": 1.6966768722959582, "grad_norm": 0.236328125, "learning_rate": 9.160373353970048e-07, "loss": 0.0739, "step": 13750 }, { "epoch": 1.6979108022858553, "grad_norm": 0.17578125, "learning_rate": 9.158990976205327e-07, "loss": 0.0673, "step": 13760 }, { "epoch": 1.6991447322757525, "grad_norm": 0.4375, "learning_rate": 9.157607565875292e-07, "loss": 0.0922, "step": 13770 }, { "epoch": 1.7003786622656496, "grad_norm": 0.271484375, "learning_rate": 9.156223123323404e-07, "loss": 0.0764, "step": 13780 }, { "epoch": 1.701612592255547, "grad_norm": 0.169921875, "learning_rate": 9.154837648893384e-07, "loss": 0.0565, "step": 13790 }, { "epoch": 1.702846522245444, "grad_norm": 0.16015625, "learning_rate": 9.153451142929207e-07, "loss": 0.0804, "step": 13800 }, { "epoch": 1.7040804522353414, "grad_norm": 0.1923828125, "learning_rate": 9.152063605775107e-07, "loss": 0.071, "step": 13810 }, { "epoch": 1.7053143822252386, "grad_norm": 0.408203125, "learning_rate": 9.150675037775571e-07, "loss": 0.0718, "step": 13820 }, { "epoch": 1.7065483122151357, "grad_norm": 0.28515625, "learning_rate": 9.149285439275341e-07, "loss": 0.0656, "step": 13830 }, { "epoch": 1.7077822422050328, "grad_norm": 0.25, "learning_rate": 9.147894810619421e-07, "loss": 0.073, "step": 13840 }, { "epoch": 1.70901617219493, "grad_norm": 0.189453125, "learning_rate": 9.146503152153063e-07, "loss": 0.0685, "step": 13850 }, { "epoch": 1.7102501021848273, "grad_norm": 0.294921875, "learning_rate": 9.145110464221781e-07, "loss": 0.074, "step": 13860 }, { "epoch": 1.7114840321747244, "grad_norm": 0.201171875, "learning_rate": 9.143716747171339e-07, "loss": 0.0683, "step": 13870 }, { "epoch": 1.7127179621646218, "grad_norm": 0.11767578125, "learning_rate": 9.142322001347763e-07, "loss": 0.0683, "step": 13880 }, { "epoch": 1.713951892154519, "grad_norm": 0.3359375, "learning_rate": 9.140926227097327e-07, "loss": 0.0855, "step": 13890 }, { "epoch": 1.715185822144416, "grad_norm": 0.1337890625, "learning_rate": 9.139529424766567e-07, "loss": 0.0718, "step": 13900 }, { "epoch": 1.7164197521343132, "grad_norm": 0.119140625, "learning_rate": 9.13813159470227e-07, "loss": 0.0596, "step": 13910 }, { "epoch": 1.7176536821242103, "grad_norm": 0.265625, "learning_rate": 9.136732737251478e-07, "loss": 0.0762, "step": 13920 }, { "epoch": 1.7188876121141077, "grad_norm": 0.24609375, "learning_rate": 9.135332852761492e-07, "loss": 0.081, "step": 13930 }, { "epoch": 1.720121542104005, "grad_norm": 0.1953125, "learning_rate": 9.133931941579867e-07, "loss": 0.0661, "step": 13940 }, { "epoch": 1.7213554720939022, "grad_norm": 0.2275390625, "learning_rate": 9.132530004054407e-07, "loss": 0.0706, "step": 13950 }, { "epoch": 1.7225894020837993, "grad_norm": 0.25, "learning_rate": 9.131127040533178e-07, "loss": 0.0627, "step": 13960 }, { "epoch": 1.7238233320736964, "grad_norm": 0.18359375, "learning_rate": 9.129723051364497e-07, "loss": 0.0772, "step": 13970 }, { "epoch": 1.7250572620635936, "grad_norm": 0.34765625, "learning_rate": 9.128318036896939e-07, "loss": 0.0774, "step": 13980 }, { "epoch": 1.726291192053491, "grad_norm": 0.29296875, "learning_rate": 9.126911997479327e-07, "loss": 0.064, "step": 13990 }, { "epoch": 1.727525122043388, "grad_norm": 0.17578125, "learning_rate": 9.125504933460748e-07, "loss": 0.0706, "step": 14000 }, { "epoch": 1.727525122043388, "eval_exact_match": 0.656441717791411, "eval_has_answer_correct": 0.5809312638580931, "eval_no_answer_correct": 0.8258706467661692, "step": 14000 }, { "epoch": 1.7287590520332854, "grad_norm": 0.126953125, "learning_rate": 9.124096845190533e-07, "loss": 0.0532, "step": 14010 }, { "epoch": 1.7299929820231825, "grad_norm": 0.1689453125, "learning_rate": 9.122687733018275e-07, "loss": 0.063, "step": 14020 }, { "epoch": 1.7312269120130797, "grad_norm": 0.2060546875, "learning_rate": 9.121277597293818e-07, "loss": 0.0633, "step": 14030 }, { "epoch": 1.7324608420029768, "grad_norm": 0.1787109375, "learning_rate": 9.119866438367262e-07, "loss": 0.0584, "step": 14040 }, { "epoch": 1.733694771992874, "grad_norm": 0.27734375, "learning_rate": 9.118454256588956e-07, "loss": 0.0633, "step": 14050 }, { "epoch": 1.7349287019827713, "grad_norm": 0.212890625, "learning_rate": 9.117041052309511e-07, "loss": 0.0605, "step": 14060 }, { "epoch": 1.7361626319726684, "grad_norm": 0.1923828125, "learning_rate": 9.115626825879786e-07, "loss": 0.0732, "step": 14070 }, { "epoch": 1.7373965619625658, "grad_norm": 0.142578125, "learning_rate": 9.114211577650895e-07, "loss": 0.0591, "step": 14080 }, { "epoch": 1.7386304919524629, "grad_norm": 0.208984375, "learning_rate": 9.112795307974205e-07, "loss": 0.0676, "step": 14090 }, { "epoch": 1.73986442194236, "grad_norm": 0.3125, "learning_rate": 9.111378017201339e-07, "loss": 0.0728, "step": 14100 }, { "epoch": 1.7410983519322571, "grad_norm": 0.1396484375, "learning_rate": 9.109959705684171e-07, "loss": 0.0568, "step": 14110 }, { "epoch": 1.7423322819221543, "grad_norm": 0.27734375, "learning_rate": 9.108540373774829e-07, "loss": 0.1002, "step": 14120 }, { "epoch": 1.7435662119120516, "grad_norm": 0.228515625, "learning_rate": 9.107120021825698e-07, "loss": 0.076, "step": 14130 }, { "epoch": 1.744800141901949, "grad_norm": 0.1416015625, "learning_rate": 9.105698650189411e-07, "loss": 0.0699, "step": 14140 }, { "epoch": 1.7460340718918461, "grad_norm": 0.1416015625, "learning_rate": 9.104276259218855e-07, "loss": 0.0504, "step": 14150 }, { "epoch": 1.7472680018817432, "grad_norm": 0.1904296875, "learning_rate": 9.102852849267173e-07, "loss": 0.0701, "step": 14160 }, { "epoch": 1.7485019318716404, "grad_norm": 0.25, "learning_rate": 9.101428420687757e-07, "loss": 0.0694, "step": 14170 }, { "epoch": 1.7497358618615375, "grad_norm": 0.248046875, "learning_rate": 9.10000297383426e-07, "loss": 0.0693, "step": 14180 }, { "epoch": 1.7509697918514349, "grad_norm": 0.1943359375, "learning_rate": 9.098576509060576e-07, "loss": 0.0745, "step": 14190 }, { "epoch": 1.752203721841332, "grad_norm": 0.1728515625, "learning_rate": 9.09714902672086e-07, "loss": 0.0508, "step": 14200 }, { "epoch": 1.7534376518312293, "grad_norm": 0.142578125, "learning_rate": 9.095720527169517e-07, "loss": 0.0486, "step": 14210 }, { "epoch": 1.7546715818211265, "grad_norm": 0.1552734375, "learning_rate": 9.094291010761206e-07, "loss": 0.0693, "step": 14220 }, { "epoch": 1.7559055118110236, "grad_norm": 0.2177734375, "learning_rate": 9.092860477850834e-07, "loss": 0.0547, "step": 14230 }, { "epoch": 1.7571394418009207, "grad_norm": 0.3046875, "learning_rate": 9.09142892879357e-07, "loss": 0.066, "step": 14240 }, { "epoch": 1.7583733717908179, "grad_norm": 0.23046875, "learning_rate": 9.089996363944823e-07, "loss": 0.0587, "step": 14250 }, { "epoch": 1.7596073017807152, "grad_norm": 0.1953125, "learning_rate": 9.088562783660263e-07, "loss": 0.0534, "step": 14260 }, { "epoch": 1.7608412317706126, "grad_norm": 0.1904296875, "learning_rate": 9.087128188295809e-07, "loss": 0.0753, "step": 14270 }, { "epoch": 1.7620751617605097, "grad_norm": 0.1591796875, "learning_rate": 9.085692578207631e-07, "loss": 0.0715, "step": 14280 }, { "epoch": 1.7633090917504068, "grad_norm": 0.12451171875, "learning_rate": 9.084255953752155e-07, "loss": 0.0482, "step": 14290 }, { "epoch": 1.764543021740304, "grad_norm": 0.3046875, "learning_rate": 9.082818315286054e-07, "loss": 0.0684, "step": 14300 }, { "epoch": 1.765776951730201, "grad_norm": 0.31640625, "learning_rate": 9.081379663166256e-07, "loss": 0.0844, "step": 14310 }, { "epoch": 1.7670108817200985, "grad_norm": 0.359375, "learning_rate": 9.079939997749939e-07, "loss": 0.0561, "step": 14320 }, { "epoch": 1.7682448117099956, "grad_norm": 0.16796875, "learning_rate": 9.078499319394532e-07, "loss": 0.0745, "step": 14330 }, { "epoch": 1.769478741699893, "grad_norm": 0.16015625, "learning_rate": 9.077057628457719e-07, "loss": 0.09, "step": 14340 }, { "epoch": 1.77071267168979, "grad_norm": 0.140625, "learning_rate": 9.075614925297431e-07, "loss": 0.0642, "step": 14350 }, { "epoch": 1.7719466016796872, "grad_norm": 0.283203125, "learning_rate": 9.074171210271853e-07, "loss": 0.05, "step": 14360 }, { "epoch": 1.7731805316695843, "grad_norm": 0.203125, "learning_rate": 9.072726483739419e-07, "loss": 0.0835, "step": 14370 }, { "epoch": 1.7744144616594815, "grad_norm": 0.1572265625, "learning_rate": 9.071280746058819e-07, "loss": 0.077, "step": 14380 }, { "epoch": 1.7756483916493788, "grad_norm": 0.11376953125, "learning_rate": 9.069833997588989e-07, "loss": 0.0616, "step": 14390 }, { "epoch": 1.776882321639276, "grad_norm": 0.26953125, "learning_rate": 9.068386238689117e-07, "loss": 0.0607, "step": 14400 }, { "epoch": 1.7781162516291733, "grad_norm": 0.166015625, "learning_rate": 9.066937469718642e-07, "loss": 0.0664, "step": 14410 }, { "epoch": 1.7793501816190704, "grad_norm": 0.1943359375, "learning_rate": 9.065487691037255e-07, "loss": 0.0667, "step": 14420 }, { "epoch": 1.7805841116089676, "grad_norm": 0.263671875, "learning_rate": 9.064036903004899e-07, "loss": 0.0853, "step": 14430 }, { "epoch": 1.7818180415988647, "grad_norm": 0.31640625, "learning_rate": 9.062585105981762e-07, "loss": 0.0568, "step": 14440 }, { "epoch": 1.7830519715887618, "grad_norm": 0.244140625, "learning_rate": 9.061132300328289e-07, "loss": 0.0733, "step": 14450 }, { "epoch": 1.7842859015786592, "grad_norm": 0.1962890625, "learning_rate": 9.059678486405171e-07, "loss": 0.0653, "step": 14460 }, { "epoch": 1.7855198315685565, "grad_norm": 0.138671875, "learning_rate": 9.05822366457335e-07, "loss": 0.0771, "step": 14470 }, { "epoch": 1.7867537615584537, "grad_norm": 0.396484375, "learning_rate": 9.056767835194021e-07, "loss": 0.0888, "step": 14480 }, { "epoch": 1.7879876915483508, "grad_norm": 0.26171875, "learning_rate": 9.055310998628627e-07, "loss": 0.0682, "step": 14490 }, { "epoch": 1.789221621538248, "grad_norm": 0.1767578125, "learning_rate": 9.05385315523886e-07, "loss": 0.0631, "step": 14500 }, { "epoch": 1.790455551528145, "grad_norm": 0.26171875, "learning_rate": 9.052394305386663e-07, "loss": 0.0728, "step": 14510 }, { "epoch": 1.7916894815180424, "grad_norm": 0.1728515625, "learning_rate": 9.050934449434229e-07, "loss": 0.0751, "step": 14520 }, { "epoch": 1.7929234115079395, "grad_norm": 0.232421875, "learning_rate": 9.049473587744004e-07, "loss": 0.0766, "step": 14530 }, { "epoch": 1.794157341497837, "grad_norm": 0.1865234375, "learning_rate": 9.048011720678676e-07, "loss": 0.0671, "step": 14540 }, { "epoch": 1.795391271487734, "grad_norm": 0.22265625, "learning_rate": 9.04654884860119e-07, "loss": 0.0621, "step": 14550 }, { "epoch": 1.7966252014776312, "grad_norm": 0.1748046875, "learning_rate": 9.045084971874737e-07, "loss": 0.0646, "step": 14560 }, { "epoch": 1.7978591314675283, "grad_norm": 0.2890625, "learning_rate": 9.043620090862758e-07, "loss": 0.0643, "step": 14570 }, { "epoch": 1.7990930614574254, "grad_norm": 0.27734375, "learning_rate": 9.042154205928945e-07, "loss": 0.0757, "step": 14580 }, { "epoch": 1.8003269914473228, "grad_norm": 0.1806640625, "learning_rate": 9.040687317437234e-07, "loss": 0.0663, "step": 14590 }, { "epoch": 1.80156092143722, "grad_norm": 0.1748046875, "learning_rate": 9.039219425751819e-07, "loss": 0.0562, "step": 14600 }, { "epoch": 1.8027948514271173, "grad_norm": 0.2578125, "learning_rate": 9.037750531237132e-07, "loss": 0.0896, "step": 14610 }, { "epoch": 1.8040287814170144, "grad_norm": 0.294921875, "learning_rate": 9.036280634257865e-07, "loss": 0.0927, "step": 14620 }, { "epoch": 1.8052627114069115, "grad_norm": 0.279296875, "learning_rate": 9.034809735178952e-07, "loss": 0.0643, "step": 14630 }, { "epoch": 1.8064966413968087, "grad_norm": 0.193359375, "learning_rate": 9.033337834365576e-07, "loss": 0.0632, "step": 14640 }, { "epoch": 1.8077305713867058, "grad_norm": 0.2216796875, "learning_rate": 9.031864932183172e-07, "loss": 0.0624, "step": 14650 }, { "epoch": 1.8089645013766031, "grad_norm": 0.2255859375, "learning_rate": 9.030391028997424e-07, "loss": 0.0553, "step": 14660 }, { "epoch": 1.8101984313665005, "grad_norm": 0.283203125, "learning_rate": 9.028916125174258e-07, "loss": 0.0669, "step": 14670 }, { "epoch": 1.8114323613563976, "grad_norm": 0.2099609375, "learning_rate": 9.027440221079853e-07, "loss": 0.0649, "step": 14680 }, { "epoch": 1.8126662913462948, "grad_norm": 0.193359375, "learning_rate": 9.02596331708064e-07, "loss": 0.0732, "step": 14690 }, { "epoch": 1.8139002213361919, "grad_norm": 0.1923828125, "learning_rate": 9.024485413543294e-07, "loss": 0.0608, "step": 14700 }, { "epoch": 1.815134151326089, "grad_norm": 0.2421875, "learning_rate": 9.023006510834735e-07, "loss": 0.0687, "step": 14710 }, { "epoch": 1.8163680813159864, "grad_norm": 0.2021484375, "learning_rate": 9.021526609322137e-07, "loss": 0.0662, "step": 14720 }, { "epoch": 1.8176020113058835, "grad_norm": 0.306640625, "learning_rate": 9.020045709372919e-07, "loss": 0.0725, "step": 14730 }, { "epoch": 1.8188359412957809, "grad_norm": 0.1865234375, "learning_rate": 9.018563811354749e-07, "loss": 0.073, "step": 14740 }, { "epoch": 1.820069871285678, "grad_norm": 0.197265625, "learning_rate": 9.017080915635543e-07, "loss": 0.0599, "step": 14750 }, { "epoch": 1.8213038012755751, "grad_norm": 0.267578125, "learning_rate": 9.015597022583463e-07, "loss": 0.0588, "step": 14760 }, { "epoch": 1.8225377312654722, "grad_norm": 0.283203125, "learning_rate": 9.01411213256692e-07, "loss": 0.0782, "step": 14770 }, { "epoch": 1.8237716612553694, "grad_norm": 0.2294921875, "learning_rate": 9.012626245954571e-07, "loss": 0.0937, "step": 14780 }, { "epoch": 1.8250055912452667, "grad_norm": 0.1767578125, "learning_rate": 9.011139363115323e-07, "loss": 0.079, "step": 14790 }, { "epoch": 1.8262395212351639, "grad_norm": 0.33984375, "learning_rate": 9.009651484418331e-07, "loss": 0.0617, "step": 14800 }, { "epoch": 1.8274734512250612, "grad_norm": 0.33984375, "learning_rate": 9.008162610232989e-07, "loss": 0.0771, "step": 14810 }, { "epoch": 1.8287073812149583, "grad_norm": 0.15625, "learning_rate": 9.00667274092895e-07, "loss": 0.0921, "step": 14820 }, { "epoch": 1.8299413112048555, "grad_norm": 0.2412109375, "learning_rate": 9.005181876876106e-07, "loss": 0.0619, "step": 14830 }, { "epoch": 1.8311752411947526, "grad_norm": 0.3359375, "learning_rate": 9.003690018444599e-07, "loss": 0.0682, "step": 14840 }, { "epoch": 1.83240917118465, "grad_norm": 0.181640625, "learning_rate": 9.002197166004816e-07, "loss": 0.0676, "step": 14850 }, { "epoch": 1.833643101174547, "grad_norm": 0.251953125, "learning_rate": 9.000703319927394e-07, "loss": 0.0711, "step": 14860 }, { "epoch": 1.8348770311644444, "grad_norm": 0.1689453125, "learning_rate": 8.999208480583213e-07, "loss": 0.07, "step": 14870 }, { "epoch": 1.8361109611543416, "grad_norm": 0.166015625, "learning_rate": 8.9977126483434e-07, "loss": 0.0636, "step": 14880 }, { "epoch": 1.8373448911442387, "grad_norm": 0.2060546875, "learning_rate": 8.996215823579335e-07, "loss": 0.0652, "step": 14890 }, { "epoch": 1.8385788211341358, "grad_norm": 0.162109375, "learning_rate": 8.994718006662635e-07, "loss": 0.0769, "step": 14900 }, { "epoch": 1.839812751124033, "grad_norm": 0.228515625, "learning_rate": 8.993219197965169e-07, "loss": 0.067, "step": 14910 }, { "epoch": 1.8410466811139303, "grad_norm": 0.267578125, "learning_rate": 8.991719397859049e-07, "loss": 0.0627, "step": 14920 }, { "epoch": 1.8422806111038275, "grad_norm": 0.13671875, "learning_rate": 8.990218606716637e-07, "loss": 0.064, "step": 14930 }, { "epoch": 1.8435145410937248, "grad_norm": 0.1748046875, "learning_rate": 8.988716824910539e-07, "loss": 0.0642, "step": 14940 }, { "epoch": 1.844748471083622, "grad_norm": 0.1982421875, "learning_rate": 8.987214052813603e-07, "loss": 0.0753, "step": 14950 }, { "epoch": 1.845982401073519, "grad_norm": 0.2353515625, "learning_rate": 8.985710290798933e-07, "loss": 0.0609, "step": 14960 }, { "epoch": 1.8472163310634162, "grad_norm": 0.384765625, "learning_rate": 8.984205539239868e-07, "loss": 0.0722, "step": 14970 }, { "epoch": 1.8484502610533133, "grad_norm": 0.150390625, "learning_rate": 8.98269979851e-07, "loss": 0.0532, "step": 14980 }, { "epoch": 1.8496841910432107, "grad_norm": 0.2021484375, "learning_rate": 8.981193068983162e-07, "loss": 0.0709, "step": 14990 }, { "epoch": 1.850918121033108, "grad_norm": 0.1513671875, "learning_rate": 8.979685351033434e-07, "loss": 0.0592, "step": 15000 }, { "epoch": 1.850918121033108, "eval_exact_match": 0.6625766871165644, "eval_has_answer_correct": 0.5853658536585366, "eval_no_answer_correct": 0.835820895522388, "step": 15000 }, { "epoch": 1.8521520510230052, "grad_norm": 0.328125, "learning_rate": 8.978176645035144e-07, "loss": 0.0555, "step": 15010 }, { "epoch": 1.8533859810129023, "grad_norm": 0.1025390625, "learning_rate": 8.976666951362861e-07, "loss": 0.0635, "step": 15020 }, { "epoch": 1.8546199110027994, "grad_norm": 0.216796875, "learning_rate": 8.975156270391401e-07, "loss": 0.0711, "step": 15030 }, { "epoch": 1.8558538409926966, "grad_norm": 0.1591796875, "learning_rate": 8.973644602495827e-07, "loss": 0.052, "step": 15040 }, { "epoch": 1.857087770982594, "grad_norm": 0.224609375, "learning_rate": 8.972131948051445e-07, "loss": 0.0671, "step": 15050 }, { "epoch": 1.858321700972491, "grad_norm": 0.193359375, "learning_rate": 8.970618307433806e-07, "loss": 0.0665, "step": 15060 }, { "epoch": 1.8595556309623884, "grad_norm": 0.279296875, "learning_rate": 8.969103681018705e-07, "loss": 0.0592, "step": 15070 }, { "epoch": 1.8607895609522855, "grad_norm": 0.1923828125, "learning_rate": 8.967588069182183e-07, "loss": 0.0724, "step": 15080 }, { "epoch": 1.8620234909421827, "grad_norm": 0.27734375, "learning_rate": 8.966071472300529e-07, "loss": 0.0658, "step": 15090 }, { "epoch": 1.8632574209320798, "grad_norm": 0.177734375, "learning_rate": 8.964553890750269e-07, "loss": 0.0549, "step": 15100 }, { "epoch": 1.864491350921977, "grad_norm": 0.2490234375, "learning_rate": 8.963035324908179e-07, "loss": 0.0854, "step": 15110 }, { "epoch": 1.8657252809118743, "grad_norm": 0.1806640625, "learning_rate": 8.96151577515128e-07, "loss": 0.0668, "step": 15120 }, { "epoch": 1.8669592109017714, "grad_norm": 0.400390625, "learning_rate": 8.959995241856831e-07, "loss": 0.0634, "step": 15130 }, { "epoch": 1.8681931408916688, "grad_norm": 0.2060546875, "learning_rate": 8.958473725402342e-07, "loss": 0.0749, "step": 15140 }, { "epoch": 1.869427070881566, "grad_norm": 0.46875, "learning_rate": 8.956951226165566e-07, "loss": 0.0668, "step": 15150 }, { "epoch": 1.870661000871463, "grad_norm": 0.1572265625, "learning_rate": 8.955427744524495e-07, "loss": 0.0812, "step": 15160 }, { "epoch": 1.8718949308613602, "grad_norm": 0.1552734375, "learning_rate": 8.953903280857371e-07, "loss": 0.0852, "step": 15170 }, { "epoch": 1.8731288608512573, "grad_norm": 0.2216796875, "learning_rate": 8.952377835542677e-07, "loss": 0.0559, "step": 15180 }, { "epoch": 1.8743627908411546, "grad_norm": 0.140625, "learning_rate": 8.95085140895914e-07, "loss": 0.0675, "step": 15190 }, { "epoch": 1.875596720831052, "grad_norm": 0.353515625, "learning_rate": 8.949324001485728e-07, "loss": 0.0726, "step": 15200 }, { "epoch": 1.8768306508209491, "grad_norm": 0.267578125, "learning_rate": 8.947795613501656e-07, "loss": 0.0752, "step": 15210 }, { "epoch": 1.8780645808108463, "grad_norm": 0.2216796875, "learning_rate": 8.946266245386385e-07, "loss": 0.0644, "step": 15220 }, { "epoch": 1.8792985108007434, "grad_norm": 0.197265625, "learning_rate": 8.944735897519613e-07, "loss": 0.0525, "step": 15230 }, { "epoch": 1.8805324407906405, "grad_norm": 0.1689453125, "learning_rate": 8.943204570281284e-07, "loss": 0.064, "step": 15240 }, { "epoch": 1.8817663707805379, "grad_norm": 0.248046875, "learning_rate": 8.941672264051585e-07, "loss": 0.055, "step": 15250 }, { "epoch": 1.883000300770435, "grad_norm": 0.255859375, "learning_rate": 8.940138979210948e-07, "loss": 0.0625, "step": 15260 }, { "epoch": 1.8842342307603324, "grad_norm": 0.33203125, "learning_rate": 8.938604716140045e-07, "loss": 0.0706, "step": 15270 }, { "epoch": 1.8854681607502295, "grad_norm": 0.2021484375, "learning_rate": 8.937069475219794e-07, "loss": 0.0696, "step": 15280 }, { "epoch": 1.8867020907401266, "grad_norm": 0.146484375, "learning_rate": 8.935533256831351e-07, "loss": 0.0728, "step": 15290 }, { "epoch": 1.8879360207300238, "grad_norm": 0.263671875, "learning_rate": 8.933996061356119e-07, "loss": 0.0705, "step": 15300 }, { "epoch": 1.8891699507199209, "grad_norm": 0.15625, "learning_rate": 8.932457889175742e-07, "loss": 0.0646, "step": 15310 }, { "epoch": 1.8904038807098182, "grad_norm": 0.3203125, "learning_rate": 8.930918740672108e-07, "loss": 0.0619, "step": 15320 }, { "epoch": 1.8916378106997154, "grad_norm": 0.1923828125, "learning_rate": 8.929378616227345e-07, "loss": 0.063, "step": 15330 }, { "epoch": 1.8928717406896127, "grad_norm": 0.29296875, "learning_rate": 8.927837516223823e-07, "loss": 0.0831, "step": 15340 }, { "epoch": 1.8941056706795099, "grad_norm": 0.302734375, "learning_rate": 8.926295441044158e-07, "loss": 0.0658, "step": 15350 }, { "epoch": 1.895339600669407, "grad_norm": 0.291015625, "learning_rate": 8.924752391071204e-07, "loss": 0.0818, "step": 15360 }, { "epoch": 1.8965735306593041, "grad_norm": 0.201171875, "learning_rate": 8.92320836668806e-07, "loss": 0.0553, "step": 15370 }, { "epoch": 1.8978074606492012, "grad_norm": 0.26953125, "learning_rate": 8.921663368278064e-07, "loss": 0.0557, "step": 15380 }, { "epoch": 1.8990413906390986, "grad_norm": 0.1748046875, "learning_rate": 8.920117396224799e-07, "loss": 0.0664, "step": 15390 }, { "epoch": 1.900275320628996, "grad_norm": 0.4375, "learning_rate": 8.918570450912087e-07, "loss": 0.0729, "step": 15400 }, { "epoch": 1.901509250618893, "grad_norm": 0.1787109375, "learning_rate": 8.917022532723994e-07, "loss": 0.0706, "step": 15410 }, { "epoch": 1.9027431806087902, "grad_norm": 0.162109375, "learning_rate": 8.915473642044826e-07, "loss": 0.0552, "step": 15420 }, { "epoch": 1.9039771105986874, "grad_norm": 0.07666015625, "learning_rate": 8.913923779259129e-07, "loss": 0.0522, "step": 15430 }, { "epoch": 1.9052110405885845, "grad_norm": 0.146484375, "learning_rate": 8.912372944751695e-07, "loss": 0.0508, "step": 15440 }, { "epoch": 1.9064449705784818, "grad_norm": 0.1611328125, "learning_rate": 8.910821138907553e-07, "loss": 0.0778, "step": 15450 }, { "epoch": 1.907678900568379, "grad_norm": 0.203125, "learning_rate": 8.909268362111975e-07, "loss": 0.0559, "step": 15460 }, { "epoch": 1.9089128305582763, "grad_norm": 0.12060546875, "learning_rate": 8.907714614750472e-07, "loss": 0.0696, "step": 15470 }, { "epoch": 1.9101467605481735, "grad_norm": 0.1376953125, "learning_rate": 8.906159897208799e-07, "loss": 0.0561, "step": 15480 }, { "epoch": 1.9113806905380706, "grad_norm": 0.2294921875, "learning_rate": 8.90460420987295e-07, "loss": 0.071, "step": 15490 }, { "epoch": 1.9126146205279677, "grad_norm": 0.26171875, "learning_rate": 8.903047553129161e-07, "loss": 0.0803, "step": 15500 }, { "epoch": 1.9138485505178648, "grad_norm": 0.2421875, "learning_rate": 8.901489927363907e-07, "loss": 0.0678, "step": 15510 }, { "epoch": 1.9150824805077622, "grad_norm": 0.255859375, "learning_rate": 8.899931332963905e-07, "loss": 0.076, "step": 15520 }, { "epoch": 1.9163164104976596, "grad_norm": 0.193359375, "learning_rate": 8.898371770316111e-07, "loss": 0.0664, "step": 15530 }, { "epoch": 1.9175503404875567, "grad_norm": 0.171875, "learning_rate": 8.896811239807724e-07, "loss": 0.0764, "step": 15540 }, { "epoch": 1.9187842704774538, "grad_norm": 0.2138671875, "learning_rate": 8.895249741826181e-07, "loss": 0.0644, "step": 15550 }, { "epoch": 1.920018200467351, "grad_norm": 0.189453125, "learning_rate": 8.893687276759158e-07, "loss": 0.0609, "step": 15560 }, { "epoch": 1.921252130457248, "grad_norm": 0.291015625, "learning_rate": 8.892123844994577e-07, "loss": 0.064, "step": 15570 }, { "epoch": 1.9224860604471454, "grad_norm": 0.205078125, "learning_rate": 8.890559446920591e-07, "loss": 0.0698, "step": 15580 }, { "epoch": 1.9237199904370426, "grad_norm": 0.2158203125, "learning_rate": 8.888994082925603e-07, "loss": 0.0591, "step": 15590 }, { "epoch": 1.92495392042694, "grad_norm": 0.1865234375, "learning_rate": 8.887427753398247e-07, "loss": 0.0581, "step": 15600 }, { "epoch": 1.926187850416837, "grad_norm": 0.2001953125, "learning_rate": 8.8858604587274e-07, "loss": 0.0597, "step": 15610 }, { "epoch": 1.9274217804067342, "grad_norm": 0.1474609375, "learning_rate": 8.884292199302181e-07, "loss": 0.0611, "step": 15620 }, { "epoch": 1.9286557103966313, "grad_norm": 0.2138671875, "learning_rate": 8.882722975511947e-07, "loss": 0.0742, "step": 15630 }, { "epoch": 1.9298896403865284, "grad_norm": 0.21484375, "learning_rate": 8.881152787746292e-07, "loss": 0.0531, "step": 15640 }, { "epoch": 1.9311235703764258, "grad_norm": 0.373046875, "learning_rate": 8.879581636395053e-07, "loss": 0.0704, "step": 15650 }, { "epoch": 1.932357500366323, "grad_norm": 0.1396484375, "learning_rate": 8.878009521848303e-07, "loss": 0.0428, "step": 15660 }, { "epoch": 1.9335914303562203, "grad_norm": 0.224609375, "learning_rate": 8.876436444496356e-07, "loss": 0.0743, "step": 15670 }, { "epoch": 1.9348253603461174, "grad_norm": 0.177734375, "learning_rate": 8.874862404729766e-07, "loss": 0.064, "step": 15680 }, { "epoch": 1.9360592903360145, "grad_norm": 0.1328125, "learning_rate": 8.873287402939323e-07, "loss": 0.066, "step": 15690 }, { "epoch": 1.9372932203259117, "grad_norm": 0.1298828125, "learning_rate": 8.871711439516058e-07, "loss": 0.0676, "step": 15700 }, { "epoch": 1.9385271503158088, "grad_norm": 0.193359375, "learning_rate": 8.870134514851242e-07, "loss": 0.0802, "step": 15710 }, { "epoch": 1.9397610803057062, "grad_norm": 0.318359375, "learning_rate": 8.868556629336379e-07, "loss": 0.0667, "step": 15720 }, { "epoch": 1.9409950102956035, "grad_norm": 0.22265625, "learning_rate": 8.866977783363218e-07, "loss": 0.07, "step": 15730 }, { "epoch": 1.9422289402855006, "grad_norm": 0.16015625, "learning_rate": 8.865397977323745e-07, "loss": 0.0618, "step": 15740 }, { "epoch": 1.9434628702753978, "grad_norm": 0.33203125, "learning_rate": 8.863817211610182e-07, "loss": 0.0626, "step": 15750 }, { "epoch": 1.944696800265295, "grad_norm": 0.251953125, "learning_rate": 8.862235486614988e-07, "loss": 0.081, "step": 15760 }, { "epoch": 1.945930730255192, "grad_norm": 0.1650390625, "learning_rate": 8.860652802730867e-07, "loss": 0.0833, "step": 15770 }, { "epoch": 1.9471646602450894, "grad_norm": 0.171875, "learning_rate": 8.859069160350754e-07, "loss": 0.0559, "step": 15780 }, { "epoch": 1.9483985902349865, "grad_norm": 0.41796875, "learning_rate": 8.857484559867825e-07, "loss": 0.077, "step": 15790 }, { "epoch": 1.9496325202248839, "grad_norm": 0.1611328125, "learning_rate": 8.855899001675495e-07, "loss": 0.0609, "step": 15800 }, { "epoch": 1.950866450214781, "grad_norm": 0.18359375, "learning_rate": 8.854312486167413e-07, "loss": 0.0486, "step": 15810 }, { "epoch": 1.9521003802046781, "grad_norm": 0.376953125, "learning_rate": 8.85272501373747e-07, "loss": 0.079, "step": 15820 }, { "epoch": 1.9533343101945753, "grad_norm": 0.283203125, "learning_rate": 8.851136584779792e-07, "loss": 0.0527, "step": 15830 }, { "epoch": 1.9545682401844724, "grad_norm": 0.158203125, "learning_rate": 8.849547199688744e-07, "loss": 0.0628, "step": 15840 }, { "epoch": 1.9558021701743697, "grad_norm": 0.283203125, "learning_rate": 8.847956858858925e-07, "loss": 0.0675, "step": 15850 }, { "epoch": 1.9570361001642669, "grad_norm": 0.208984375, "learning_rate": 8.846365562685176e-07, "loss": 0.0607, "step": 15860 }, { "epoch": 1.9582700301541642, "grad_norm": 0.2333984375, "learning_rate": 8.844773311562572e-07, "loss": 0.0705, "step": 15870 }, { "epoch": 1.9595039601440614, "grad_norm": 0.2578125, "learning_rate": 8.843180105886425e-07, "loss": 0.0628, "step": 15880 }, { "epoch": 1.9607378901339585, "grad_norm": 0.27734375, "learning_rate": 8.841585946052288e-07, "loss": 0.0713, "step": 15890 }, { "epoch": 1.9619718201238556, "grad_norm": 0.220703125, "learning_rate": 8.839990832455945e-07, "loss": 0.0702, "step": 15900 }, { "epoch": 1.9632057501137528, "grad_norm": 0.2412109375, "learning_rate": 8.838394765493422e-07, "loss": 0.0481, "step": 15910 }, { "epoch": 1.96443968010365, "grad_norm": 0.1787109375, "learning_rate": 8.836797745560977e-07, "loss": 0.082, "step": 15920 }, { "epoch": 1.9656736100935475, "grad_norm": 0.353515625, "learning_rate": 8.835199773055108e-07, "loss": 0.0661, "step": 15930 }, { "epoch": 1.9669075400834446, "grad_norm": 0.3984375, "learning_rate": 8.833600848372548e-07, "loss": 0.0686, "step": 15940 }, { "epoch": 1.9681414700733417, "grad_norm": 0.291015625, "learning_rate": 8.83200097191027e-07, "loss": 0.0666, "step": 15950 }, { "epoch": 1.9693754000632389, "grad_norm": 0.2392578125, "learning_rate": 8.830400144065476e-07, "loss": 0.0616, "step": 15960 }, { "epoch": 1.970609330053136, "grad_norm": 0.25, "learning_rate": 8.82879836523561e-07, "loss": 0.0544, "step": 15970 }, { "epoch": 1.9718432600430333, "grad_norm": 0.1025390625, "learning_rate": 8.827195635818351e-07, "loss": 0.0659, "step": 15980 }, { "epoch": 1.9730771900329305, "grad_norm": 0.1279296875, "learning_rate": 8.825591956211614e-07, "loss": 0.0613, "step": 15990 }, { "epoch": 1.9743111200228278, "grad_norm": 0.1669921875, "learning_rate": 8.823987326813546e-07, "loss": 0.0611, "step": 16000 }, { "epoch": 1.9743111200228278, "eval_exact_match": 0.6579754601226994, "eval_has_answer_correct": 0.5787139689578714, "eval_no_answer_correct": 0.835820895522388, "step": 16000 }, { "epoch": 1.975545050012725, "grad_norm": 0.1943359375, "learning_rate": 8.822381748022538e-07, "loss": 0.0629, "step": 16010 }, { "epoch": 1.976778980002622, "grad_norm": 0.32421875, "learning_rate": 8.820775220237209e-07, "loss": 0.0585, "step": 16020 }, { "epoch": 1.9780129099925192, "grad_norm": 0.23828125, "learning_rate": 8.819167743856418e-07, "loss": 0.0696, "step": 16030 }, { "epoch": 1.9792468399824164, "grad_norm": 0.154296875, "learning_rate": 8.817559319279256e-07, "loss": 0.0626, "step": 16040 }, { "epoch": 1.9804807699723137, "grad_norm": 0.17578125, "learning_rate": 8.815949946905053e-07, "loss": 0.0646, "step": 16050 }, { "epoch": 1.981714699962211, "grad_norm": 0.28125, "learning_rate": 8.814339627133373e-07, "loss": 0.0655, "step": 16060 }, { "epoch": 1.9829486299521082, "grad_norm": 0.328125, "learning_rate": 8.812728360364013e-07, "loss": 0.0783, "step": 16070 }, { "epoch": 1.9841825599420053, "grad_norm": 0.37890625, "learning_rate": 8.81111614699701e-07, "loss": 0.0618, "step": 16080 }, { "epoch": 1.9854164899319025, "grad_norm": 0.2119140625, "learning_rate": 8.809502987432631e-07, "loss": 0.0791, "step": 16090 }, { "epoch": 1.9866504199217996, "grad_norm": 0.2119140625, "learning_rate": 8.80788888207138e-07, "loss": 0.0661, "step": 16100 }, { "epoch": 1.987884349911697, "grad_norm": 0.50390625, "learning_rate": 8.806273831313997e-07, "loss": 0.0619, "step": 16110 }, { "epoch": 1.989118279901594, "grad_norm": 0.1708984375, "learning_rate": 8.804657835561456e-07, "loss": 0.0645, "step": 16120 }, { "epoch": 1.9903522098914914, "grad_norm": 0.2578125, "learning_rate": 8.803040895214961e-07, "loss": 0.0741, "step": 16130 }, { "epoch": 1.9915861398813886, "grad_norm": 0.310546875, "learning_rate": 8.801423010675962e-07, "loss": 0.0667, "step": 16140 }, { "epoch": 1.9928200698712857, "grad_norm": 0.2021484375, "learning_rate": 8.799804182346129e-07, "loss": 0.0527, "step": 16150 }, { "epoch": 1.9940539998611828, "grad_norm": 0.302734375, "learning_rate": 8.798184410627378e-07, "loss": 0.0635, "step": 16160 }, { "epoch": 1.99528792985108, "grad_norm": 0.216796875, "learning_rate": 8.796563695921852e-07, "loss": 0.0761, "step": 16170 }, { "epoch": 1.9965218598409773, "grad_norm": 0.365234375, "learning_rate": 8.794942038631933e-07, "loss": 0.0605, "step": 16180 }, { "epoch": 1.9977557898308744, "grad_norm": 0.25390625, "learning_rate": 8.793319439160233e-07, "loss": 0.0668, "step": 16190 }, { "epoch": 1.9989897198207718, "grad_norm": 0.298828125, "learning_rate": 8.7916958979096e-07, "loss": 0.0724, "step": 16200 }, { "epoch": 2.000223649810669, "grad_norm": 0.30859375, "learning_rate": 8.790071415283115e-07, "loss": 0.0654, "step": 16210 }, { "epoch": 2.001457579800566, "grad_norm": 0.1455078125, "learning_rate": 8.788445991684095e-07, "loss": 0.0631, "step": 16220 }, { "epoch": 2.002691509790463, "grad_norm": 0.236328125, "learning_rate": 8.786819627516089e-07, "loss": 0.0627, "step": 16230 }, { "epoch": 2.0039254397803603, "grad_norm": 0.1533203125, "learning_rate": 8.785192323182877e-07, "loss": 0.0593, "step": 16240 }, { "epoch": 2.005159369770258, "grad_norm": 0.36328125, "learning_rate": 8.783564079088476e-07, "loss": 0.0655, "step": 16250 }, { "epoch": 2.006393299760155, "grad_norm": 0.212890625, "learning_rate": 8.781934895637136e-07, "loss": 0.0594, "step": 16260 }, { "epoch": 2.007627229750052, "grad_norm": 0.2080078125, "learning_rate": 8.780304773233337e-07, "loss": 0.0633, "step": 16270 }, { "epoch": 2.0088611597399493, "grad_norm": 0.2099609375, "learning_rate": 8.778673712281797e-07, "loss": 0.082, "step": 16280 }, { "epoch": 2.0100950897298464, "grad_norm": 0.2080078125, "learning_rate": 8.777041713187463e-07, "loss": 0.0662, "step": 16290 }, { "epoch": 2.0113290197197435, "grad_norm": 0.12255859375, "learning_rate": 8.775408776355516e-07, "loss": 0.0657, "step": 16300 }, { "epoch": 2.0125629497096407, "grad_norm": 0.2265625, "learning_rate": 8.773774902191372e-07, "loss": 0.0612, "step": 16310 }, { "epoch": 2.0137968796995382, "grad_norm": 0.1337890625, "learning_rate": 8.772140091100676e-07, "loss": 0.0578, "step": 16320 }, { "epoch": 2.0150308096894354, "grad_norm": 0.41015625, "learning_rate": 8.770504343489309e-07, "loss": 0.0705, "step": 16330 }, { "epoch": 2.0162647396793325, "grad_norm": 0.2890625, "learning_rate": 8.768867659763381e-07, "loss": 0.0725, "step": 16340 }, { "epoch": 2.0174986696692296, "grad_norm": 0.1826171875, "learning_rate": 8.767230040329241e-07, "loss": 0.0632, "step": 16350 }, { "epoch": 2.0187325996591268, "grad_norm": 0.28125, "learning_rate": 8.76559148559346e-07, "loss": 0.0712, "step": 16360 }, { "epoch": 2.019966529649024, "grad_norm": 0.10791015625, "learning_rate": 8.76395199596285e-07, "loss": 0.0753, "step": 16370 }, { "epoch": 2.021200459638921, "grad_norm": 0.1796875, "learning_rate": 8.76231157184445e-07, "loss": 0.0723, "step": 16380 }, { "epoch": 2.0224343896288186, "grad_norm": 0.1337890625, "learning_rate": 8.760670213645536e-07, "loss": 0.0667, "step": 16390 }, { "epoch": 2.0236683196187157, "grad_norm": 0.2353515625, "learning_rate": 8.759027921773613e-07, "loss": 0.0517, "step": 16400 }, { "epoch": 2.024902249608613, "grad_norm": 0.349609375, "learning_rate": 8.757384696636416e-07, "loss": 0.0595, "step": 16410 }, { "epoch": 2.02613617959851, "grad_norm": 0.1591796875, "learning_rate": 8.755740538641911e-07, "loss": 0.0525, "step": 16420 }, { "epoch": 2.027370109588407, "grad_norm": 0.265625, "learning_rate": 8.754095448198304e-07, "loss": 0.0691, "step": 16430 }, { "epoch": 2.0286040395783043, "grad_norm": 0.2578125, "learning_rate": 8.752449425714023e-07, "loss": 0.0589, "step": 16440 }, { "epoch": 2.029837969568202, "grad_norm": 0.142578125, "learning_rate": 8.750802471597731e-07, "loss": 0.064, "step": 16450 }, { "epoch": 2.031071899558099, "grad_norm": 0.2265625, "learning_rate": 8.749154586258322e-07, "loss": 0.0513, "step": 16460 }, { "epoch": 2.032305829547996, "grad_norm": 0.1787109375, "learning_rate": 8.747505770104924e-07, "loss": 0.0757, "step": 16470 }, { "epoch": 2.0335397595378932, "grad_norm": 0.224609375, "learning_rate": 8.745856023546891e-07, "loss": 0.0682, "step": 16480 }, { "epoch": 2.0347736895277904, "grad_norm": 0.1982421875, "learning_rate": 8.744205346993811e-07, "loss": 0.0649, "step": 16490 }, { "epoch": 2.0360076195176875, "grad_norm": 0.1943359375, "learning_rate": 8.742553740855505e-07, "loss": 0.0697, "step": 16500 }, { "epoch": 2.0372415495075846, "grad_norm": 0.2021484375, "learning_rate": 8.74090120554202e-07, "loss": 0.0534, "step": 16510 }, { "epoch": 2.038475479497482, "grad_norm": 0.240234375, "learning_rate": 8.739247741463636e-07, "loss": 0.0729, "step": 16520 }, { "epoch": 2.0397094094873793, "grad_norm": 0.216796875, "learning_rate": 8.737593349030863e-07, "loss": 0.0825, "step": 16530 }, { "epoch": 2.0409433394772765, "grad_norm": 0.2080078125, "learning_rate": 8.735938028654442e-07, "loss": 0.0581, "step": 16540 }, { "epoch": 2.0421772694671736, "grad_norm": 0.1328125, "learning_rate": 8.734281780745347e-07, "loss": 0.07, "step": 16550 }, { "epoch": 2.0434111994570707, "grad_norm": 0.1826171875, "learning_rate": 8.732624605714778e-07, "loss": 0.0606, "step": 16560 }, { "epoch": 2.044645129446968, "grad_norm": 0.279296875, "learning_rate": 8.730966503974166e-07, "loss": 0.0874, "step": 16570 }, { "epoch": 2.045879059436865, "grad_norm": 0.2177734375, "learning_rate": 8.729307475935174e-07, "loss": 0.0534, "step": 16580 }, { "epoch": 2.0471129894267626, "grad_norm": 0.2041015625, "learning_rate": 8.727647522009693e-07, "loss": 0.0623, "step": 16590 }, { "epoch": 2.0483469194166597, "grad_norm": 0.2392578125, "learning_rate": 8.725986642609848e-07, "loss": 0.0827, "step": 16600 }, { "epoch": 2.049580849406557, "grad_norm": 0.48046875, "learning_rate": 8.724324838147985e-07, "loss": 0.0621, "step": 16610 }, { "epoch": 2.050814779396454, "grad_norm": 0.357421875, "learning_rate": 8.72266210903669e-07, "loss": 0.0858, "step": 16620 }, { "epoch": 2.052048709386351, "grad_norm": 0.23828125, "learning_rate": 8.720998455688772e-07, "loss": 0.0802, "step": 16630 }, { "epoch": 2.053282639376248, "grad_norm": 0.15234375, "learning_rate": 8.719333878517273e-07, "loss": 0.0602, "step": 16640 }, { "epoch": 2.054516569366146, "grad_norm": 0.75390625, "learning_rate": 8.717668377935457e-07, "loss": 0.0687, "step": 16650 }, { "epoch": 2.055750499356043, "grad_norm": 0.2041015625, "learning_rate": 8.71600195435683e-07, "loss": 0.0638, "step": 16660 }, { "epoch": 2.05698442934594, "grad_norm": 0.2431640625, "learning_rate": 8.714334608195116e-07, "loss": 0.0638, "step": 16670 }, { "epoch": 2.058218359335837, "grad_norm": 0.240234375, "learning_rate": 8.712666339864274e-07, "loss": 0.0662, "step": 16680 }, { "epoch": 2.0594522893257343, "grad_norm": 0.1748046875, "learning_rate": 8.710997149778487e-07, "loss": 0.0659, "step": 16690 }, { "epoch": 2.0606862193156315, "grad_norm": 0.2412109375, "learning_rate": 8.709327038352174e-07, "loss": 0.0742, "step": 16700 }, { "epoch": 2.0619201493055286, "grad_norm": 0.220703125, "learning_rate": 8.707656005999975e-07, "loss": 0.0632, "step": 16710 }, { "epoch": 2.063154079295426, "grad_norm": 0.345703125, "learning_rate": 8.705984053136765e-07, "loss": 0.0657, "step": 16720 }, { "epoch": 2.0643880092853233, "grad_norm": 0.134765625, "learning_rate": 8.704311180177644e-07, "loss": 0.0744, "step": 16730 }, { "epoch": 2.0656219392752204, "grad_norm": 0.20703125, "learning_rate": 8.70263738753794e-07, "loss": 0.0562, "step": 16740 }, { "epoch": 2.0668558692651176, "grad_norm": 0.185546875, "learning_rate": 8.700962675633212e-07, "loss": 0.0854, "step": 16750 }, { "epoch": 2.0680897992550147, "grad_norm": 0.134765625, "learning_rate": 8.699287044879244e-07, "loss": 0.0561, "step": 16760 }, { "epoch": 2.069323729244912, "grad_norm": 0.1494140625, "learning_rate": 8.697610495692054e-07, "loss": 0.0773, "step": 16770 }, { "epoch": 2.070557659234809, "grad_norm": 0.16015625, "learning_rate": 8.69593302848788e-07, "loss": 0.0736, "step": 16780 }, { "epoch": 2.0717915892247065, "grad_norm": 0.24609375, "learning_rate": 8.694254643683192e-07, "loss": 0.062, "step": 16790 }, { "epoch": 2.0730255192146037, "grad_norm": 0.2099609375, "learning_rate": 8.69257534169469e-07, "loss": 0.0593, "step": 16800 }, { "epoch": 2.074259449204501, "grad_norm": 0.240234375, "learning_rate": 8.690895122939297e-07, "loss": 0.0516, "step": 16810 }, { "epoch": 2.075493379194398, "grad_norm": 0.193359375, "learning_rate": 8.689213987834168e-07, "loss": 0.0751, "step": 16820 }, { "epoch": 2.076727309184295, "grad_norm": 0.212890625, "learning_rate": 8.687531936796683e-07, "loss": 0.0768, "step": 16830 }, { "epoch": 2.077961239174192, "grad_norm": 0.2392578125, "learning_rate": 8.685848970244449e-07, "loss": 0.0654, "step": 16840 }, { "epoch": 2.0791951691640898, "grad_norm": 0.1435546875, "learning_rate": 8.684165088595304e-07, "loss": 0.0641, "step": 16850 }, { "epoch": 2.080429099153987, "grad_norm": 0.181640625, "learning_rate": 8.682480292267305e-07, "loss": 0.0672, "step": 16860 }, { "epoch": 2.081663029143884, "grad_norm": 0.1728515625, "learning_rate": 8.680794581678746e-07, "loss": 0.0624, "step": 16870 }, { "epoch": 2.082896959133781, "grad_norm": 0.294921875, "learning_rate": 8.679107957248143e-07, "loss": 0.0672, "step": 16880 }, { "epoch": 2.0841308891236783, "grad_norm": 0.1787109375, "learning_rate": 8.677420419394238e-07, "loss": 0.0708, "step": 16890 }, { "epoch": 2.0853648191135754, "grad_norm": 0.1689453125, "learning_rate": 8.675731968536002e-07, "loss": 0.0584, "step": 16900 }, { "epoch": 2.0865987491034725, "grad_norm": 0.2734375, "learning_rate": 8.67404260509263e-07, "loss": 0.0539, "step": 16910 }, { "epoch": 2.08783267909337, "grad_norm": 0.392578125, "learning_rate": 8.67235232948355e-07, "loss": 0.0655, "step": 16920 }, { "epoch": 2.0890666090832672, "grad_norm": 0.2099609375, "learning_rate": 8.670661142128407e-07, "loss": 0.0655, "step": 16930 }, { "epoch": 2.0903005390731644, "grad_norm": 0.1630859375, "learning_rate": 8.668969043447079e-07, "loss": 0.0704, "step": 16940 }, { "epoch": 2.0915344690630615, "grad_norm": 0.279296875, "learning_rate": 8.667276033859668e-07, "loss": 0.0546, "step": 16950 }, { "epoch": 2.0927683990529586, "grad_norm": 0.271484375, "learning_rate": 8.665582113786505e-07, "loss": 0.0677, "step": 16960 }, { "epoch": 2.0940023290428558, "grad_norm": 0.1708984375, "learning_rate": 8.663887283648142e-07, "loss": 0.0754, "step": 16970 }, { "epoch": 2.0952362590327533, "grad_norm": 0.1962890625, "learning_rate": 8.66219154386536e-07, "loss": 0.0835, "step": 16980 }, { "epoch": 2.0964701890226505, "grad_norm": 0.150390625, "learning_rate": 8.660494894859165e-07, "loss": 0.057, "step": 16990 }, { "epoch": 2.0977041190125476, "grad_norm": 0.173828125, "learning_rate": 8.658797337050791e-07, "loss": 0.0565, "step": 17000 }, { "epoch": 2.0977041190125476, "eval_exact_match": 0.6579754601226994, "eval_has_answer_correct": 0.5764966740576497, "eval_no_answer_correct": 0.8407960199004975, "step": 17000 }, { "epoch": 2.0989380490024447, "grad_norm": 0.205078125, "learning_rate": 8.657098870861695e-07, "loss": 0.052, "step": 17010 }, { "epoch": 2.100171978992342, "grad_norm": 0.2236328125, "learning_rate": 8.655399496713562e-07, "loss": 0.0676, "step": 17020 }, { "epoch": 2.101405908982239, "grad_norm": 0.251953125, "learning_rate": 8.653699215028296e-07, "loss": 0.0636, "step": 17030 }, { "epoch": 2.102639838972136, "grad_norm": 0.181640625, "learning_rate": 8.651998026228036e-07, "loss": 0.055, "step": 17040 }, { "epoch": 2.1038737689620337, "grad_norm": 0.24609375, "learning_rate": 8.65029593073514e-07, "loss": 0.0634, "step": 17050 }, { "epoch": 2.105107698951931, "grad_norm": 0.1767578125, "learning_rate": 8.648592928972191e-07, "loss": 0.0681, "step": 17060 }, { "epoch": 2.106341628941828, "grad_norm": 0.21875, "learning_rate": 8.646889021362e-07, "loss": 0.0599, "step": 17070 }, { "epoch": 2.107575558931725, "grad_norm": 0.1630859375, "learning_rate": 8.6451842083276e-07, "loss": 0.0603, "step": 17080 }, { "epoch": 2.1088094889216222, "grad_norm": 0.3125, "learning_rate": 8.64347849029225e-07, "loss": 0.0789, "step": 17090 }, { "epoch": 2.1100434189115194, "grad_norm": 0.162109375, "learning_rate": 8.641771867679435e-07, "loss": 0.0592, "step": 17100 }, { "epoch": 2.1112773489014165, "grad_norm": 0.328125, "learning_rate": 8.640064340912864e-07, "loss": 0.0488, "step": 17110 }, { "epoch": 2.112511278891314, "grad_norm": 0.287109375, "learning_rate": 8.638355910416468e-07, "loss": 0.0663, "step": 17120 }, { "epoch": 2.113745208881211, "grad_norm": 0.162109375, "learning_rate": 8.636646576614405e-07, "loss": 0.0603, "step": 17130 }, { "epoch": 2.1149791388711083, "grad_norm": 0.263671875, "learning_rate": 8.634936339931056e-07, "loss": 0.0658, "step": 17140 }, { "epoch": 2.1162130688610055, "grad_norm": 0.330078125, "learning_rate": 8.633225200791027e-07, "loss": 0.0682, "step": 17150 }, { "epoch": 2.1174469988509026, "grad_norm": 0.53515625, "learning_rate": 8.631513159619149e-07, "loss": 0.0611, "step": 17160 }, { "epoch": 2.1186809288407997, "grad_norm": 0.16796875, "learning_rate": 8.629800216840475e-07, "loss": 0.0742, "step": 17170 }, { "epoch": 2.1199148588306973, "grad_norm": 0.19140625, "learning_rate": 8.628086372880281e-07, "loss": 0.0704, "step": 17180 }, { "epoch": 2.1211487888205944, "grad_norm": 0.2001953125, "learning_rate": 8.626371628164069e-07, "loss": 0.0764, "step": 17190 }, { "epoch": 2.1223827188104916, "grad_norm": 0.166015625, "learning_rate": 8.624655983117563e-07, "loss": 0.064, "step": 17200 }, { "epoch": 2.1236166488003887, "grad_norm": 0.134765625, "learning_rate": 8.622939438166714e-07, "loss": 0.0625, "step": 17210 }, { "epoch": 2.124850578790286, "grad_norm": 0.2060546875, "learning_rate": 8.621221993737693e-07, "loss": 0.0658, "step": 17220 }, { "epoch": 2.126084508780183, "grad_norm": 0.55859375, "learning_rate": 8.619503650256894e-07, "loss": 0.0902, "step": 17230 }, { "epoch": 2.12731843877008, "grad_norm": 0.2490234375, "learning_rate": 8.617784408150935e-07, "loss": 0.07, "step": 17240 }, { "epoch": 2.1285523687599777, "grad_norm": 0.15234375, "learning_rate": 8.616064267846659e-07, "loss": 0.0688, "step": 17250 }, { "epoch": 2.129786298749875, "grad_norm": 0.1474609375, "learning_rate": 8.614343229771131e-07, "loss": 0.0571, "step": 17260 }, { "epoch": 2.131020228739772, "grad_norm": 0.2490234375, "learning_rate": 8.612621294351635e-07, "loss": 0.0716, "step": 17270 }, { "epoch": 2.132254158729669, "grad_norm": 0.373046875, "learning_rate": 8.610898462015683e-07, "loss": 0.0626, "step": 17280 }, { "epoch": 2.133488088719566, "grad_norm": 0.1357421875, "learning_rate": 8.60917473319101e-07, "loss": 0.0684, "step": 17290 }, { "epoch": 2.1347220187094633, "grad_norm": 0.25, "learning_rate": 8.607450108305569e-07, "loss": 0.068, "step": 17300 }, { "epoch": 2.135955948699361, "grad_norm": 0.1728515625, "learning_rate": 8.605724587787537e-07, "loss": 0.0618, "step": 17310 }, { "epoch": 2.137189878689258, "grad_norm": 0.2021484375, "learning_rate": 8.603998172065316e-07, "loss": 0.0846, "step": 17320 }, { "epoch": 2.138423808679155, "grad_norm": 0.185546875, "learning_rate": 8.602270861567529e-07, "loss": 0.0592, "step": 17330 }, { "epoch": 2.1396577386690523, "grad_norm": 0.1630859375, "learning_rate": 8.60054265672302e-07, "loss": 0.0865, "step": 17340 }, { "epoch": 2.1408916686589494, "grad_norm": 0.2021484375, "learning_rate": 8.598813557960853e-07, "loss": 0.0576, "step": 17350 }, { "epoch": 2.1421255986488466, "grad_norm": 0.390625, "learning_rate": 8.597083565710321e-07, "loss": 0.0489, "step": 17360 }, { "epoch": 2.1433595286387437, "grad_norm": 0.1328125, "learning_rate": 8.595352680400932e-07, "loss": 0.0655, "step": 17370 }, { "epoch": 2.1445934586286413, "grad_norm": 0.17578125, "learning_rate": 8.593620902462417e-07, "loss": 0.0664, "step": 17380 }, { "epoch": 2.1458273886185384, "grad_norm": 0.46484375, "learning_rate": 8.591888232324732e-07, "loss": 0.0758, "step": 17390 }, { "epoch": 2.1470613186084355, "grad_norm": 0.232421875, "learning_rate": 8.590154670418051e-07, "loss": 0.0748, "step": 17400 }, { "epoch": 2.1482952485983327, "grad_norm": 0.1923828125, "learning_rate": 8.588420217172771e-07, "loss": 0.0458, "step": 17410 }, { "epoch": 2.14952917858823, "grad_norm": 0.1767578125, "learning_rate": 8.586684873019512e-07, "loss": 0.0567, "step": 17420 }, { "epoch": 2.150763108578127, "grad_norm": 0.19921875, "learning_rate": 8.584948638389111e-07, "loss": 0.0658, "step": 17430 }, { "epoch": 2.151997038568024, "grad_norm": 0.1962890625, "learning_rate": 8.583211513712627e-07, "loss": 0.0661, "step": 17440 }, { "epoch": 2.1532309685579216, "grad_norm": 0.251953125, "learning_rate": 8.581473499421343e-07, "loss": 0.0567, "step": 17450 }, { "epoch": 2.1544648985478188, "grad_norm": 0.2197265625, "learning_rate": 8.579734595946762e-07, "loss": 0.0568, "step": 17460 }, { "epoch": 2.155698828537716, "grad_norm": 0.193359375, "learning_rate": 8.577994803720605e-07, "loss": 0.0748, "step": 17470 }, { "epoch": 2.156932758527613, "grad_norm": 0.171875, "learning_rate": 8.576254123174816e-07, "loss": 0.0597, "step": 17480 }, { "epoch": 2.15816668851751, "grad_norm": 0.26171875, "learning_rate": 8.57451255474156e-07, "loss": 0.0594, "step": 17490 }, { "epoch": 2.1594006185074073, "grad_norm": 0.271484375, "learning_rate": 8.572770098853221e-07, "loss": 0.073, "step": 17500 }, { "epoch": 2.1606345484973044, "grad_norm": 0.15625, "learning_rate": 8.571026755942402e-07, "loss": 0.061, "step": 17510 }, { "epoch": 2.161868478487202, "grad_norm": 0.271484375, "learning_rate": 8.56928252644193e-07, "loss": 0.0722, "step": 17520 }, { "epoch": 2.163102408477099, "grad_norm": 0.201171875, "learning_rate": 8.567537410784851e-07, "loss": 0.0625, "step": 17530 }, { "epoch": 2.1643363384669962, "grad_norm": 0.1357421875, "learning_rate": 8.565791409404427e-07, "loss": 0.0658, "step": 17540 }, { "epoch": 2.1655702684568934, "grad_norm": 0.173828125, "learning_rate": 8.564044522734146e-07, "loss": 0.0746, "step": 17550 }, { "epoch": 2.1668041984467905, "grad_norm": 0.21484375, "learning_rate": 8.562296751207711e-07, "loss": 0.054, "step": 17560 }, { "epoch": 2.1680381284366876, "grad_norm": 0.1962890625, "learning_rate": 8.560548095259048e-07, "loss": 0.0586, "step": 17570 }, { "epoch": 2.169272058426585, "grad_norm": 0.220703125, "learning_rate": 8.558798555322302e-07, "loss": 0.0723, "step": 17580 }, { "epoch": 2.1705059884164823, "grad_norm": 0.25390625, "learning_rate": 8.557048131831832e-07, "loss": 0.0821, "step": 17590 }, { "epoch": 2.1717399184063795, "grad_norm": 0.1591796875, "learning_rate": 8.555296825222227e-07, "loss": 0.0608, "step": 17600 }, { "epoch": 2.1729738483962766, "grad_norm": 0.27734375, "learning_rate": 8.553544635928285e-07, "loss": 0.0602, "step": 17610 }, { "epoch": 2.1742077783861737, "grad_norm": 0.2431640625, "learning_rate": 8.551791564385027e-07, "loss": 0.0706, "step": 17620 }, { "epoch": 2.175441708376071, "grad_norm": 0.294921875, "learning_rate": 8.550037611027696e-07, "loss": 0.0786, "step": 17630 }, { "epoch": 2.1766756383659684, "grad_norm": 0.2734375, "learning_rate": 8.54828277629175e-07, "loss": 0.0686, "step": 17640 }, { "epoch": 2.1779095683558656, "grad_norm": 0.1767578125, "learning_rate": 8.546527060612869e-07, "loss": 0.0526, "step": 17650 }, { "epoch": 2.1791434983457627, "grad_norm": 0.2158203125, "learning_rate": 8.544770464426948e-07, "loss": 0.0616, "step": 17660 }, { "epoch": 2.18037742833566, "grad_norm": 0.181640625, "learning_rate": 8.543012988170101e-07, "loss": 0.0796, "step": 17670 }, { "epoch": 2.181611358325557, "grad_norm": 0.166015625, "learning_rate": 8.541254632278665e-07, "loss": 0.0603, "step": 17680 }, { "epoch": 2.182845288315454, "grad_norm": 0.216796875, "learning_rate": 8.539495397189191e-07, "loss": 0.069, "step": 17690 }, { "epoch": 2.1840792183053512, "grad_norm": 0.60546875, "learning_rate": 8.53773528333845e-07, "loss": 0.06, "step": 17700 }, { "epoch": 2.185313148295249, "grad_norm": 0.1455078125, "learning_rate": 8.53597429116343e-07, "loss": 0.0799, "step": 17710 }, { "epoch": 2.186547078285146, "grad_norm": 0.150390625, "learning_rate": 8.534212421101339e-07, "loss": 0.0822, "step": 17720 }, { "epoch": 2.187781008275043, "grad_norm": 0.2734375, "learning_rate": 8.532449673589601e-07, "loss": 0.0801, "step": 17730 }, { "epoch": 2.18901493826494, "grad_norm": 0.1953125, "learning_rate": 8.530686049065858e-07, "loss": 0.0768, "step": 17740 }, { "epoch": 2.1902488682548373, "grad_norm": 0.28515625, "learning_rate": 8.528921547967974e-07, "loss": 0.065, "step": 17750 }, { "epoch": 2.1914827982447345, "grad_norm": 0.2451171875, "learning_rate": 8.527156170734021e-07, "loss": 0.0638, "step": 17760 }, { "epoch": 2.1927167282346316, "grad_norm": 0.365234375, "learning_rate": 8.525389917802299e-07, "loss": 0.0875, "step": 17770 }, { "epoch": 2.193950658224529, "grad_norm": 0.1611328125, "learning_rate": 8.523622789611321e-07, "loss": 0.0629, "step": 17780 }, { "epoch": 2.1951845882144263, "grad_norm": 0.2890625, "learning_rate": 8.521854786599816e-07, "loss": 0.0801, "step": 17790 }, { "epoch": 2.1964185182043234, "grad_norm": 0.291015625, "learning_rate": 8.520085909206731e-07, "loss": 0.0714, "step": 17800 }, { "epoch": 2.1976524481942206, "grad_norm": 0.33203125, "learning_rate": 8.51831615787123e-07, "loss": 0.0537, "step": 17810 }, { "epoch": 2.1988863781841177, "grad_norm": 0.1630859375, "learning_rate": 8.516545533032697e-07, "loss": 0.0646, "step": 17820 }, { "epoch": 2.200120308174015, "grad_norm": 0.267578125, "learning_rate": 8.51477403513073e-07, "loss": 0.0579, "step": 17830 }, { "epoch": 2.201354238163912, "grad_norm": 0.1650390625, "learning_rate": 8.513001664605144e-07, "loss": 0.0619, "step": 17840 }, { "epoch": 2.2025881681538095, "grad_norm": 0.205078125, "learning_rate": 8.511228421895968e-07, "loss": 0.0728, "step": 17850 }, { "epoch": 2.2038220981437067, "grad_norm": 0.2236328125, "learning_rate": 8.509454307443454e-07, "loss": 0.069, "step": 17860 }, { "epoch": 2.205056028133604, "grad_norm": 0.169921875, "learning_rate": 8.507679321688064e-07, "loss": 0.0826, "step": 17870 }, { "epoch": 2.206289958123501, "grad_norm": 0.1318359375, "learning_rate": 8.505903465070481e-07, "loss": 0.0626, "step": 17880 }, { "epoch": 2.207523888113398, "grad_norm": 0.158203125, "learning_rate": 8.504126738031601e-07, "loss": 0.0537, "step": 17890 }, { "epoch": 2.208757818103295, "grad_norm": 0.11474609375, "learning_rate": 8.502349141012539e-07, "loss": 0.0627, "step": 17900 }, { "epoch": 2.2099917480931928, "grad_norm": 0.251953125, "learning_rate": 8.500570674454624e-07, "loss": 0.0628, "step": 17910 }, { "epoch": 2.21122567808309, "grad_norm": 0.12158203125, "learning_rate": 8.498791338799399e-07, "loss": 0.0608, "step": 17920 }, { "epoch": 2.212459608072987, "grad_norm": 0.1318359375, "learning_rate": 8.497011134488629e-07, "loss": 0.0666, "step": 17930 }, { "epoch": 2.213693538062884, "grad_norm": 0.1484375, "learning_rate": 8.495230061964287e-07, "loss": 0.0619, "step": 17940 }, { "epoch": 2.2149274680527813, "grad_norm": 0.1796875, "learning_rate": 8.493448121668567e-07, "loss": 0.0614, "step": 17950 }, { "epoch": 2.2161613980426784, "grad_norm": 0.73046875, "learning_rate": 8.491665314043878e-07, "loss": 0.0638, "step": 17960 }, { "epoch": 2.2173953280325756, "grad_norm": 0.1689453125, "learning_rate": 8.48988163953284e-07, "loss": 0.0665, "step": 17970 }, { "epoch": 2.218629258022473, "grad_norm": 0.232421875, "learning_rate": 8.488097098578293e-07, "loss": 0.0695, "step": 17980 }, { "epoch": 2.2198631880123703, "grad_norm": 0.248046875, "learning_rate": 8.486311691623291e-07, "loss": 0.077, "step": 17990 }, { "epoch": 2.2210971180022674, "grad_norm": 0.357421875, "learning_rate": 8.484525419111103e-07, "loss": 0.0568, "step": 18000 }, { "epoch": 2.2210971180022674, "eval_exact_match": 0.651840490797546, "eval_has_answer_correct": 0.5676274944567627, "eval_no_answer_correct": 0.8407960199004975, "step": 18000 }, { "epoch": 2.2223310479921645, "grad_norm": 0.189453125, "learning_rate": 8.482738281485209e-07, "loss": 0.0567, "step": 18010 }, { "epoch": 2.2235649779820617, "grad_norm": 0.298828125, "learning_rate": 8.48095027918931e-07, "loss": 0.0824, "step": 18020 }, { "epoch": 2.224798907971959, "grad_norm": 0.28125, "learning_rate": 8.479161412667318e-07, "loss": 0.0581, "step": 18030 }, { "epoch": 2.2260328379618564, "grad_norm": 0.1494140625, "learning_rate": 8.47737168236336e-07, "loss": 0.0562, "step": 18040 }, { "epoch": 2.2272667679517535, "grad_norm": 0.26171875, "learning_rate": 8.475581088721778e-07, "loss": 0.0562, "step": 18050 }, { "epoch": 2.2285006979416506, "grad_norm": 0.1767578125, "learning_rate": 8.47378963218713e-07, "loss": 0.0583, "step": 18060 }, { "epoch": 2.2297346279315478, "grad_norm": 0.357421875, "learning_rate": 8.471997313204182e-07, "loss": 0.0627, "step": 18070 }, { "epoch": 2.230968557921445, "grad_norm": 0.234375, "learning_rate": 8.470204132217921e-07, "loss": 0.0754, "step": 18080 }, { "epoch": 2.232202487911342, "grad_norm": 0.248046875, "learning_rate": 8.468410089673546e-07, "loss": 0.0787, "step": 18090 }, { "epoch": 2.233436417901239, "grad_norm": 0.19921875, "learning_rate": 8.466615186016468e-07, "loss": 0.0817, "step": 18100 }, { "epoch": 2.2346703478911367, "grad_norm": 0.166015625, "learning_rate": 8.464819421692316e-07, "loss": 0.0452, "step": 18110 }, { "epoch": 2.235904277881034, "grad_norm": 0.2578125, "learning_rate": 8.463022797146926e-07, "loss": 0.0797, "step": 18120 }, { "epoch": 2.237138207870931, "grad_norm": 0.1904296875, "learning_rate": 8.461225312826354e-07, "loss": 0.0686, "step": 18130 }, { "epoch": 2.238372137860828, "grad_norm": 0.2255859375, "learning_rate": 8.459426969176866e-07, "loss": 0.0653, "step": 18140 }, { "epoch": 2.2396060678507252, "grad_norm": 0.39453125, "learning_rate": 8.457627766644944e-07, "loss": 0.0802, "step": 18150 }, { "epoch": 2.2408399978406224, "grad_norm": 0.25390625, "learning_rate": 8.455827705677277e-07, "loss": 0.0618, "step": 18160 }, { "epoch": 2.2420739278305195, "grad_norm": 0.296875, "learning_rate": 8.454026786720776e-07, "loss": 0.0724, "step": 18170 }, { "epoch": 2.243307857820417, "grad_norm": 0.1630859375, "learning_rate": 8.45222501022256e-07, "loss": 0.0767, "step": 18180 }, { "epoch": 2.244541787810314, "grad_norm": 0.203125, "learning_rate": 8.450422376629959e-07, "loss": 0.0721, "step": 18190 }, { "epoch": 2.2457757178002113, "grad_norm": 0.326171875, "learning_rate": 8.448618886390521e-07, "loss": 0.0664, "step": 18200 }, { "epoch": 2.2470096477901085, "grad_norm": 0.2099609375, "learning_rate": 8.446814539952002e-07, "loss": 0.0486, "step": 18210 }, { "epoch": 2.2482435777800056, "grad_norm": 0.1376953125, "learning_rate": 8.445009337762374e-07, "loss": 0.0522, "step": 18220 }, { "epoch": 2.2494775077699027, "grad_norm": 0.412109375, "learning_rate": 8.44320328026982e-07, "loss": 0.0626, "step": 18230 }, { "epoch": 2.2507114377598, "grad_norm": 0.26171875, "learning_rate": 8.441396367922734e-07, "loss": 0.0767, "step": 18240 }, { "epoch": 2.2519453677496974, "grad_norm": 0.1767578125, "learning_rate": 8.439588601169724e-07, "loss": 0.0697, "step": 18250 }, { "epoch": 2.2531792977395946, "grad_norm": 0.25390625, "learning_rate": 8.437779980459613e-07, "loss": 0.0534, "step": 18260 }, { "epoch": 2.2544132277294917, "grad_norm": 0.248046875, "learning_rate": 8.435970506241427e-07, "loss": 0.0666, "step": 18270 }, { "epoch": 2.255647157719389, "grad_norm": 0.1474609375, "learning_rate": 8.434160178964416e-07, "loss": 0.0664, "step": 18280 }, { "epoch": 2.256881087709286, "grad_norm": 0.20703125, "learning_rate": 8.432348999078029e-07, "loss": 0.0832, "step": 18290 }, { "epoch": 2.258115017699183, "grad_norm": 0.1669921875, "learning_rate": 8.430536967031938e-07, "loss": 0.0627, "step": 18300 }, { "epoch": 2.2593489476890807, "grad_norm": 0.234375, "learning_rate": 8.428724083276019e-07, "loss": 0.0634, "step": 18310 }, { "epoch": 2.260582877678978, "grad_norm": 0.291015625, "learning_rate": 8.426910348260364e-07, "loss": 0.0809, "step": 18320 }, { "epoch": 2.261816807668875, "grad_norm": 0.2451171875, "learning_rate": 8.425095762435273e-07, "loss": 0.0867, "step": 18330 }, { "epoch": 2.263050737658772, "grad_norm": 0.2265625, "learning_rate": 8.423280326251259e-07, "loss": 0.069, "step": 18340 }, { "epoch": 2.264284667648669, "grad_norm": 0.171875, "learning_rate": 8.421464040159047e-07, "loss": 0.0809, "step": 18350 }, { "epoch": 2.2655185976385663, "grad_norm": 0.19140625, "learning_rate": 8.41964690460957e-07, "loss": 0.0651, "step": 18360 }, { "epoch": 2.266752527628464, "grad_norm": 0.1572265625, "learning_rate": 8.417828920053975e-07, "loss": 0.0734, "step": 18370 }, { "epoch": 2.267986457618361, "grad_norm": 0.208984375, "learning_rate": 8.416010086943619e-07, "loss": 0.0846, "step": 18380 }, { "epoch": 2.269220387608258, "grad_norm": 0.1865234375, "learning_rate": 8.414190405730069e-07, "loss": 0.067, "step": 18390 }, { "epoch": 2.2704543175981553, "grad_norm": 0.337890625, "learning_rate": 8.412369876865102e-07, "loss": 0.0709, "step": 18400 }, { "epoch": 2.2716882475880524, "grad_norm": 0.2333984375, "learning_rate": 8.410548500800705e-07, "loss": 0.06, "step": 18410 }, { "epoch": 2.2729221775779496, "grad_norm": 0.177734375, "learning_rate": 8.40872627798908e-07, "loss": 0.0613, "step": 18420 }, { "epoch": 2.2741561075678467, "grad_norm": 0.142578125, "learning_rate": 8.406903208882634e-07, "loss": 0.0652, "step": 18430 }, { "epoch": 2.2753900375577443, "grad_norm": 0.162109375, "learning_rate": 8.405079293933985e-07, "loss": 0.0615, "step": 18440 }, { "epoch": 2.2766239675476414, "grad_norm": 0.33984375, "learning_rate": 8.403254533595962e-07, "loss": 0.0761, "step": 18450 }, { "epoch": 2.2778578975375385, "grad_norm": 0.26953125, "learning_rate": 8.401428928321607e-07, "loss": 0.057, "step": 18460 }, { "epoch": 2.2790918275274357, "grad_norm": 0.28515625, "learning_rate": 8.399602478564165e-07, "loss": 0.065, "step": 18470 }, { "epoch": 2.280325757517333, "grad_norm": 0.173828125, "learning_rate": 8.397775184777094e-07, "loss": 0.0624, "step": 18480 }, { "epoch": 2.28155968750723, "grad_norm": 0.1484375, "learning_rate": 8.395947047414065e-07, "loss": 0.0688, "step": 18490 }, { "epoch": 2.282793617497127, "grad_norm": 0.27734375, "learning_rate": 8.394118066928951e-07, "loss": 0.0635, "step": 18500 }, { "epoch": 2.2840275474870246, "grad_norm": 0.212890625, "learning_rate": 8.392288243775842e-07, "loss": 0.0602, "step": 18510 }, { "epoch": 2.2852614774769218, "grad_norm": 0.1826171875, "learning_rate": 8.390457578409034e-07, "loss": 0.0633, "step": 18520 }, { "epoch": 2.286495407466819, "grad_norm": 0.185546875, "learning_rate": 8.38862607128303e-07, "loss": 0.0532, "step": 18530 }, { "epoch": 2.287729337456716, "grad_norm": 0.150390625, "learning_rate": 8.386793722852542e-07, "loss": 0.0713, "step": 18540 }, { "epoch": 2.288963267446613, "grad_norm": 0.1396484375, "learning_rate": 8.384960533572496e-07, "loss": 0.0601, "step": 18550 }, { "epoch": 2.2901971974365103, "grad_norm": 0.34765625, "learning_rate": 8.383126503898024e-07, "loss": 0.0506, "step": 18560 }, { "epoch": 2.2914311274264074, "grad_norm": 0.203125, "learning_rate": 8.381291634284462e-07, "loss": 0.0811, "step": 18570 }, { "epoch": 2.292665057416305, "grad_norm": 0.287109375, "learning_rate": 8.379455925187363e-07, "loss": 0.0848, "step": 18580 }, { "epoch": 2.293898987406202, "grad_norm": 0.203125, "learning_rate": 8.377619377062482e-07, "loss": 0.0824, "step": 18590 }, { "epoch": 2.2951329173960993, "grad_norm": 0.37890625, "learning_rate": 8.375781990365784e-07, "loss": 0.0773, "step": 18600 }, { "epoch": 2.2963668473859964, "grad_norm": 0.2138671875, "learning_rate": 8.373943765553444e-07, "loss": 0.0687, "step": 18610 }, { "epoch": 2.2976007773758935, "grad_norm": 0.197265625, "learning_rate": 8.372104703081842e-07, "loss": 0.0738, "step": 18620 }, { "epoch": 2.2988347073657907, "grad_norm": 0.212890625, "learning_rate": 8.370264803407569e-07, "loss": 0.0614, "step": 18630 }, { "epoch": 2.300068637355688, "grad_norm": 0.474609375, "learning_rate": 8.368424066987423e-07, "loss": 0.0718, "step": 18640 }, { "epoch": 2.3013025673455854, "grad_norm": 0.2294921875, "learning_rate": 8.366582494278407e-07, "loss": 0.0635, "step": 18650 }, { "epoch": 2.3025364973354825, "grad_norm": 0.244140625, "learning_rate": 8.364740085737736e-07, "loss": 0.0629, "step": 18660 }, { "epoch": 2.3037704273253796, "grad_norm": 0.2412109375, "learning_rate": 8.362896841822829e-07, "loss": 0.0503, "step": 18670 }, { "epoch": 2.3050043573152768, "grad_norm": 0.2197265625, "learning_rate": 8.361052762991314e-07, "loss": 0.0859, "step": 18680 }, { "epoch": 2.306238287305174, "grad_norm": 0.19921875, "learning_rate": 8.359207849701026e-07, "loss": 0.0653, "step": 18690 }, { "epoch": 2.3074722172950715, "grad_norm": 0.33984375, "learning_rate": 8.357362102410007e-07, "loss": 0.0654, "step": 18700 }, { "epoch": 2.3087061472849686, "grad_norm": 0.296875, "learning_rate": 8.355515521576508e-07, "loss": 0.0723, "step": 18710 }, { "epoch": 2.3099400772748657, "grad_norm": 0.197265625, "learning_rate": 8.353668107658983e-07, "loss": 0.0637, "step": 18720 }, { "epoch": 2.311174007264763, "grad_norm": 0.162109375, "learning_rate": 8.351819861116097e-07, "loss": 0.0721, "step": 18730 }, { "epoch": 2.31240793725466, "grad_norm": 0.26953125, "learning_rate": 8.349970782406717e-07, "loss": 0.0661, "step": 18740 }, { "epoch": 2.313641867244557, "grad_norm": 0.322265625, "learning_rate": 8.348120871989923e-07, "loss": 0.0744, "step": 18750 }, { "epoch": 2.3148757972344542, "grad_norm": 0.244140625, "learning_rate": 8.346270130324995e-07, "loss": 0.0637, "step": 18760 }, { "epoch": 2.316109727224352, "grad_norm": 0.18359375, "learning_rate": 8.344418557871424e-07, "loss": 0.0689, "step": 18770 }, { "epoch": 2.317343657214249, "grad_norm": 0.228515625, "learning_rate": 8.342566155088905e-07, "loss": 0.0718, "step": 18780 }, { "epoch": 2.318577587204146, "grad_norm": 0.189453125, "learning_rate": 8.340712922437339e-07, "loss": 0.0747, "step": 18790 }, { "epoch": 2.319811517194043, "grad_norm": 0.2294921875, "learning_rate": 8.338858860376833e-07, "loss": 0.06, "step": 18800 }, { "epoch": 2.3210454471839403, "grad_norm": 0.1708984375, "learning_rate": 8.337003969367704e-07, "loss": 0.0619, "step": 18810 }, { "epoch": 2.3222793771738375, "grad_norm": 0.44921875, "learning_rate": 8.335148249870469e-07, "loss": 0.0702, "step": 18820 }, { "epoch": 2.3235133071637346, "grad_norm": 0.498046875, "learning_rate": 8.333291702345853e-07, "loss": 0.0602, "step": 18830 }, { "epoch": 2.324747237153632, "grad_norm": 0.1796875, "learning_rate": 8.331434327254784e-07, "loss": 0.0801, "step": 18840 }, { "epoch": 2.3259811671435293, "grad_norm": 0.169921875, "learning_rate": 8.329576125058405e-07, "loss": 0.072, "step": 18850 }, { "epoch": 2.3272150971334264, "grad_norm": 0.2734375, "learning_rate": 8.327717096218052e-07, "loss": 0.0582, "step": 18860 }, { "epoch": 2.3284490271233236, "grad_norm": 0.1806640625, "learning_rate": 8.325857241195275e-07, "loss": 0.0634, "step": 18870 }, { "epoch": 2.3296829571132207, "grad_norm": 0.201171875, "learning_rate": 8.323996560451822e-07, "loss": 0.0763, "step": 18880 }, { "epoch": 2.330916887103118, "grad_norm": 0.16796875, "learning_rate": 8.322135054449655e-07, "loss": 0.0722, "step": 18890 }, { "epoch": 2.332150817093015, "grad_norm": 0.251953125, "learning_rate": 8.32027272365093e-07, "loss": 0.0712, "step": 18900 }, { "epoch": 2.3333847470829125, "grad_norm": 0.38671875, "learning_rate": 8.318409568518018e-07, "loss": 0.0704, "step": 18910 }, { "epoch": 2.3346186770728097, "grad_norm": 0.296875, "learning_rate": 8.316545589513488e-07, "loss": 0.0509, "step": 18920 }, { "epoch": 2.335852607062707, "grad_norm": 0.185546875, "learning_rate": 8.314680787100116e-07, "loss": 0.0886, "step": 18930 }, { "epoch": 2.337086537052604, "grad_norm": 0.3828125, "learning_rate": 8.312815161740883e-07, "loss": 0.0779, "step": 18940 }, { "epoch": 2.338320467042501, "grad_norm": 0.150390625, "learning_rate": 8.310948713898973e-07, "loss": 0.0641, "step": 18950 }, { "epoch": 2.339554397032398, "grad_norm": 0.294921875, "learning_rate": 8.309081444037774e-07, "loss": 0.0622, "step": 18960 }, { "epoch": 2.3407883270222953, "grad_norm": 0.2158203125, "learning_rate": 8.307213352620881e-07, "loss": 0.0959, "step": 18970 }, { "epoch": 2.342022257012193, "grad_norm": 0.232421875, "learning_rate": 8.305344440112087e-07, "loss": 0.0613, "step": 18980 }, { "epoch": 2.34325618700209, "grad_norm": 0.2734375, "learning_rate": 8.303474706975395e-07, "loss": 0.0586, "step": 18990 }, { "epoch": 2.344490116991987, "grad_norm": 0.12890625, "learning_rate": 8.30160415367501e-07, "loss": 0.066, "step": 19000 }, { "epoch": 2.344490116991987, "eval_exact_match": 0.656441717791411, "eval_has_answer_correct": 0.5787139689578714, "eval_no_answer_correct": 0.8308457711442786, "step": 19000 }, { "epoch": 2.3457240469818843, "grad_norm": 0.1650390625, "learning_rate": 8.299732780675338e-07, "loss": 0.071, "step": 19010 }, { "epoch": 2.3469579769717814, "grad_norm": 0.2109375, "learning_rate": 8.297860588440993e-07, "loss": 0.0692, "step": 19020 }, { "epoch": 2.348191906961679, "grad_norm": 0.291015625, "learning_rate": 8.295987577436786e-07, "loss": 0.062, "step": 19030 }, { "epoch": 2.349425836951576, "grad_norm": 0.15234375, "learning_rate": 8.294113748127738e-07, "loss": 0.0626, "step": 19040 }, { "epoch": 2.3506597669414733, "grad_norm": 0.2412109375, "learning_rate": 8.29223910097907e-07, "loss": 0.0667, "step": 19050 }, { "epoch": 2.3518936969313704, "grad_norm": 0.1669921875, "learning_rate": 8.290363636456206e-07, "loss": 0.0721, "step": 19060 }, { "epoch": 2.3531276269212675, "grad_norm": 0.2001953125, "learning_rate": 8.288487355024774e-07, "loss": 0.0763, "step": 19070 }, { "epoch": 2.3543615569111647, "grad_norm": 0.275390625, "learning_rate": 8.286610257150602e-07, "loss": 0.0843, "step": 19080 }, { "epoch": 2.355595486901062, "grad_norm": 0.158203125, "learning_rate": 8.284732343299725e-07, "loss": 0.0759, "step": 19090 }, { "epoch": 2.3568294168909594, "grad_norm": 0.3203125, "learning_rate": 8.282853613938377e-07, "loss": 0.0777, "step": 19100 }, { "epoch": 2.3580633468808565, "grad_norm": 0.169921875, "learning_rate": 8.280974069532998e-07, "loss": 0.0455, "step": 19110 }, { "epoch": 2.3592972768707536, "grad_norm": 0.142578125, "learning_rate": 8.279093710550225e-07, "loss": 0.0629, "step": 19120 }, { "epoch": 2.3605312068606508, "grad_norm": 0.2392578125, "learning_rate": 8.277212537456902e-07, "loss": 0.0677, "step": 19130 }, { "epoch": 2.361765136850548, "grad_norm": 0.1640625, "learning_rate": 8.275330550720073e-07, "loss": 0.0614, "step": 19140 }, { "epoch": 2.362999066840445, "grad_norm": 0.1630859375, "learning_rate": 8.273447750806985e-07, "loss": 0.0601, "step": 19150 }, { "epoch": 2.364232996830342, "grad_norm": 0.193359375, "learning_rate": 8.271564138185089e-07, "loss": 0.0718, "step": 19160 }, { "epoch": 2.3654669268202397, "grad_norm": 0.234375, "learning_rate": 8.269679713322031e-07, "loss": 0.0768, "step": 19170 }, { "epoch": 2.366700856810137, "grad_norm": 0.306640625, "learning_rate": 8.267794476685664e-07, "loss": 0.0905, "step": 19180 }, { "epoch": 2.367934786800034, "grad_norm": 0.2373046875, "learning_rate": 8.265908428744044e-07, "loss": 0.071, "step": 19190 }, { "epoch": 2.369168716789931, "grad_norm": 0.28515625, "learning_rate": 8.264021569965421e-07, "loss": 0.0591, "step": 19200 }, { "epoch": 2.3704026467798283, "grad_norm": 0.2060546875, "learning_rate": 8.262133900818257e-07, "loss": 0.0521, "step": 19210 }, { "epoch": 2.3716365767697254, "grad_norm": 0.1904296875, "learning_rate": 8.260245421771204e-07, "loss": 0.0778, "step": 19220 }, { "epoch": 2.3728705067596225, "grad_norm": 0.142578125, "learning_rate": 8.258356133293125e-07, "loss": 0.073, "step": 19230 }, { "epoch": 2.37410443674952, "grad_norm": 0.1796875, "learning_rate": 8.256466035853075e-07, "loss": 0.068, "step": 19240 }, { "epoch": 2.3753383667394172, "grad_norm": 0.1416015625, "learning_rate": 8.25457512992032e-07, "loss": 0.0604, "step": 19250 }, { "epoch": 2.3765722967293144, "grad_norm": 0.171875, "learning_rate": 8.252683415964315e-07, "loss": 0.0571, "step": 19260 }, { "epoch": 2.3778062267192115, "grad_norm": 0.181640625, "learning_rate": 8.250790894454723e-07, "loss": 0.0701, "step": 19270 }, { "epoch": 2.3790401567091086, "grad_norm": 0.28125, "learning_rate": 8.248897565861408e-07, "loss": 0.0729, "step": 19280 }, { "epoch": 2.3802740866990058, "grad_norm": 0.369140625, "learning_rate": 8.247003430654433e-07, "loss": 0.0744, "step": 19290 }, { "epoch": 2.381508016688903, "grad_norm": 0.1689453125, "learning_rate": 8.245108489304058e-07, "loss": 0.0718, "step": 19300 }, { "epoch": 2.3827419466788005, "grad_norm": 0.2275390625, "learning_rate": 8.243212742280747e-07, "loss": 0.0597, "step": 19310 }, { "epoch": 2.3839758766686976, "grad_norm": 0.333984375, "learning_rate": 8.241316190055163e-07, "loss": 0.0723, "step": 19320 }, { "epoch": 2.3852098066585947, "grad_norm": 0.1806640625, "learning_rate": 8.239418833098169e-07, "loss": 0.0738, "step": 19330 }, { "epoch": 2.386443736648492, "grad_norm": 0.208984375, "learning_rate": 8.237520671880826e-07, "loss": 0.0651, "step": 19340 }, { "epoch": 2.387677666638389, "grad_norm": 0.23828125, "learning_rate": 8.235621706874398e-07, "loss": 0.0729, "step": 19350 }, { "epoch": 2.388911596628286, "grad_norm": 0.259765625, "learning_rate": 8.233721938550346e-07, "loss": 0.0553, "step": 19360 }, { "epoch": 2.3901455266181837, "grad_norm": 0.251953125, "learning_rate": 8.231821367380334e-07, "loss": 0.059, "step": 19370 }, { "epoch": 2.391379456608081, "grad_norm": 0.2138671875, "learning_rate": 8.229919993836217e-07, "loss": 0.0699, "step": 19380 }, { "epoch": 2.392613386597978, "grad_norm": 0.1259765625, "learning_rate": 8.22801781839006e-07, "loss": 0.0657, "step": 19390 }, { "epoch": 2.393847316587875, "grad_norm": 0.640625, "learning_rate": 8.226114841514119e-07, "loss": 0.0655, "step": 19400 }, { "epoch": 2.395081246577772, "grad_norm": 0.263671875, "learning_rate": 8.224211063680852e-07, "loss": 0.0586, "step": 19410 }, { "epoch": 2.3963151765676693, "grad_norm": 0.1416015625, "learning_rate": 8.222306485362918e-07, "loss": 0.0709, "step": 19420 }, { "epoch": 2.397549106557567, "grad_norm": 0.216796875, "learning_rate": 8.220401107033169e-07, "loss": 0.0648, "step": 19430 }, { "epoch": 2.398783036547464, "grad_norm": 0.12109375, "learning_rate": 8.218494929164662e-07, "loss": 0.0623, "step": 19440 }, { "epoch": 2.400016966537361, "grad_norm": 0.263671875, "learning_rate": 8.216587952230648e-07, "loss": 0.0798, "step": 19450 }, { "epoch": 2.4012508965272583, "grad_norm": 0.39453125, "learning_rate": 8.214680176704578e-07, "loss": 0.0566, "step": 19460 }, { "epoch": 2.4024848265171554, "grad_norm": 0.455078125, "learning_rate": 8.212771603060103e-07, "loss": 0.0628, "step": 19470 }, { "epoch": 2.4037187565070526, "grad_norm": 0.142578125, "learning_rate": 8.210862231771067e-07, "loss": 0.0667, "step": 19480 }, { "epoch": 2.4049526864969497, "grad_norm": 0.1796875, "learning_rate": 8.208952063311518e-07, "loss": 0.0819, "step": 19490 }, { "epoch": 2.4061866164868473, "grad_norm": 0.1943359375, "learning_rate": 8.207041098155699e-07, "loss": 0.0535, "step": 19500 }, { "epoch": 2.4074205464767444, "grad_norm": 0.287109375, "learning_rate": 8.205129336778051e-07, "loss": 0.0523, "step": 19510 }, { "epoch": 2.4086544764666415, "grad_norm": 0.12451171875, "learning_rate": 8.203216779653213e-07, "loss": 0.0791, "step": 19520 }, { "epoch": 2.4098884064565387, "grad_norm": 0.2119140625, "learning_rate": 8.201303427256019e-07, "loss": 0.0636, "step": 19530 }, { "epoch": 2.411122336446436, "grad_norm": 0.27734375, "learning_rate": 8.199389280061506e-07, "loss": 0.0589, "step": 19540 }, { "epoch": 2.412356266436333, "grad_norm": 0.1337890625, "learning_rate": 8.197474338544904e-07, "loss": 0.0686, "step": 19550 }, { "epoch": 2.41359019642623, "grad_norm": 0.2021484375, "learning_rate": 8.195558603181641e-07, "loss": 0.0615, "step": 19560 }, { "epoch": 2.4148241264161276, "grad_norm": 0.30078125, "learning_rate": 8.193642074447343e-07, "loss": 0.0772, "step": 19570 }, { "epoch": 2.416058056406025, "grad_norm": 0.2333984375, "learning_rate": 8.191724752817832e-07, "loss": 0.0719, "step": 19580 }, { "epoch": 2.417291986395922, "grad_norm": 0.18359375, "learning_rate": 8.189806638769127e-07, "loss": 0.0708, "step": 19590 }, { "epoch": 2.418525916385819, "grad_norm": 0.255859375, "learning_rate": 8.187887732777446e-07, "loss": 0.0654, "step": 19600 }, { "epoch": 2.419759846375716, "grad_norm": 0.171875, "learning_rate": 8.185968035319198e-07, "loss": 0.0618, "step": 19610 }, { "epoch": 2.4209937763656133, "grad_norm": 0.34375, "learning_rate": 8.184047546870994e-07, "loss": 0.0761, "step": 19620 }, { "epoch": 2.4222277063555104, "grad_norm": 0.21875, "learning_rate": 8.18212626790964e-07, "loss": 0.0618, "step": 19630 }, { "epoch": 2.423461636345408, "grad_norm": 0.24609375, "learning_rate": 8.180204198912138e-07, "loss": 0.0662, "step": 19640 }, { "epoch": 2.424695566335305, "grad_norm": 0.421875, "learning_rate": 8.178281340355685e-07, "loss": 0.0731, "step": 19650 }, { "epoch": 2.4259294963252023, "grad_norm": 0.298828125, "learning_rate": 8.176357692717675e-07, "loss": 0.0644, "step": 19660 }, { "epoch": 2.4271634263150994, "grad_norm": 0.177734375, "learning_rate": 8.174433256475697e-07, "loss": 0.0817, "step": 19670 }, { "epoch": 2.4283973563049965, "grad_norm": 0.1611328125, "learning_rate": 8.172508032107539e-07, "loss": 0.0723, "step": 19680 }, { "epoch": 2.4296312862948937, "grad_norm": 0.27734375, "learning_rate": 8.170582020091181e-07, "loss": 0.0697, "step": 19690 }, { "epoch": 2.430865216284791, "grad_norm": 0.2080078125, "learning_rate": 8.168655220904799e-07, "loss": 0.0572, "step": 19700 }, { "epoch": 2.4320991462746884, "grad_norm": 0.349609375, "learning_rate": 8.166727635026767e-07, "loss": 0.076, "step": 19710 }, { "epoch": 2.4333330762645855, "grad_norm": 0.23046875, "learning_rate": 8.164799262935651e-07, "loss": 0.0678, "step": 19720 }, { "epoch": 2.4345670062544826, "grad_norm": 0.162109375, "learning_rate": 8.162870105110215e-07, "loss": 0.0562, "step": 19730 }, { "epoch": 2.4358009362443798, "grad_norm": 0.21484375, "learning_rate": 8.160940162029415e-07, "loss": 0.0702, "step": 19740 }, { "epoch": 2.437034866234277, "grad_norm": 0.27734375, "learning_rate": 8.159009434172406e-07, "loss": 0.0761, "step": 19750 }, { "epoch": 2.4382687962241745, "grad_norm": 0.19921875, "learning_rate": 8.157077922018536e-07, "loss": 0.0705, "step": 19760 }, { "epoch": 2.4395027262140716, "grad_norm": 0.44921875, "learning_rate": 8.155145626047344e-07, "loss": 0.0818, "step": 19770 }, { "epoch": 2.4407366562039687, "grad_norm": 0.267578125, "learning_rate": 8.15321254673857e-07, "loss": 0.0607, "step": 19780 }, { "epoch": 2.441970586193866, "grad_norm": 0.2333984375, "learning_rate": 8.151278684572145e-07, "loss": 0.0586, "step": 19790 }, { "epoch": 2.443204516183763, "grad_norm": 0.265625, "learning_rate": 8.149344040028194e-07, "loss": 0.0753, "step": 19800 }, { "epoch": 2.44443844617366, "grad_norm": 0.205078125, "learning_rate": 8.14740861358704e-07, "loss": 0.0603, "step": 19810 }, { "epoch": 2.4456723761635573, "grad_norm": 0.1728515625, "learning_rate": 8.145472405729193e-07, "loss": 0.0705, "step": 19820 }, { "epoch": 2.446906306153455, "grad_norm": 0.23046875, "learning_rate": 8.143535416935361e-07, "loss": 0.0584, "step": 19830 }, { "epoch": 2.448140236143352, "grad_norm": 0.1611328125, "learning_rate": 8.14159764768645e-07, "loss": 0.0557, "step": 19840 }, { "epoch": 2.449374166133249, "grad_norm": 0.21875, "learning_rate": 8.139659098463554e-07, "loss": 0.0704, "step": 19850 }, { "epoch": 2.4506080961231462, "grad_norm": 0.22265625, "learning_rate": 8.137719769747961e-07, "loss": 0.0601, "step": 19860 }, { "epoch": 2.4518420261130434, "grad_norm": 0.19140625, "learning_rate": 8.135779662021158e-07, "loss": 0.072, "step": 19870 }, { "epoch": 2.4530759561029405, "grad_norm": 0.333984375, "learning_rate": 8.133838775764816e-07, "loss": 0.0619, "step": 19880 }, { "epoch": 2.4543098860928376, "grad_norm": 0.2099609375, "learning_rate": 8.131897111460809e-07, "loss": 0.0695, "step": 19890 }, { "epoch": 2.455543816082735, "grad_norm": 0.1376953125, "learning_rate": 8.129954669591199e-07, "loss": 0.0472, "step": 19900 }, { "epoch": 2.4567777460726323, "grad_norm": 0.140625, "learning_rate": 8.128011450638241e-07, "loss": 0.0629, "step": 19910 }, { "epoch": 2.4580116760625295, "grad_norm": 0.30859375, "learning_rate": 8.126067455084385e-07, "loss": 0.0509, "step": 19920 }, { "epoch": 2.4592456060524266, "grad_norm": 0.1630859375, "learning_rate": 8.12412268341227e-07, "loss": 0.0774, "step": 19930 }, { "epoch": 2.4604795360423237, "grad_norm": 0.2216796875, "learning_rate": 8.122177136104733e-07, "loss": 0.0716, "step": 19940 }, { "epoch": 2.461713466032221, "grad_norm": 0.0927734375, "learning_rate": 8.120230813644802e-07, "loss": 0.0493, "step": 19950 }, { "epoch": 2.462947396022118, "grad_norm": 0.20703125, "learning_rate": 8.118283716515693e-07, "loss": 0.0642, "step": 19960 }, { "epoch": 2.4641813260120156, "grad_norm": 0.2119140625, "learning_rate": 8.11633584520082e-07, "loss": 0.0698, "step": 19970 }, { "epoch": 2.4654152560019127, "grad_norm": 0.21875, "learning_rate": 8.114387200183785e-07, "loss": 0.0823, "step": 19980 }, { "epoch": 2.46664918599181, "grad_norm": 0.1708984375, "learning_rate": 8.112437781948387e-07, "loss": 0.0648, "step": 19990 }, { "epoch": 2.467883115981707, "grad_norm": 0.23046875, "learning_rate": 8.110487590978612e-07, "loss": 0.072, "step": 20000 }, { "epoch": 2.467883115981707, "eval_exact_match": 0.6549079754601227, "eval_has_answer_correct": 0.5742793791574279, "eval_no_answer_correct": 0.835820895522388, "step": 20000 }, { "epoch": 2.469117045971604, "grad_norm": 0.388671875, "learning_rate": 8.10853662775864e-07, "loss": 0.0663, "step": 20010 }, { "epoch": 2.470350975961501, "grad_norm": 0.21484375, "learning_rate": 8.106584892772843e-07, "loss": 0.0604, "step": 20020 }, { "epoch": 2.4715849059513983, "grad_norm": 0.27734375, "learning_rate": 8.104632386505783e-07, "loss": 0.0679, "step": 20030 }, { "epoch": 2.472818835941296, "grad_norm": 0.212890625, "learning_rate": 8.102679109442217e-07, "loss": 0.0794, "step": 20040 }, { "epoch": 2.474052765931193, "grad_norm": 0.453125, "learning_rate": 8.100725062067088e-07, "loss": 0.064, "step": 20050 }, { "epoch": 2.47528669592109, "grad_norm": 0.318359375, "learning_rate": 8.098770244865536e-07, "loss": 0.0699, "step": 20060 }, { "epoch": 2.4765206259109873, "grad_norm": 0.189453125, "learning_rate": 8.096814658322888e-07, "loss": 0.0685, "step": 20070 }, { "epoch": 2.4777545559008844, "grad_norm": 0.396484375, "learning_rate": 8.094858302924662e-07, "loss": 0.0842, "step": 20080 }, { "epoch": 2.4789884858907816, "grad_norm": 0.275390625, "learning_rate": 8.092901179156572e-07, "loss": 0.069, "step": 20090 }, { "epoch": 2.480222415880679, "grad_norm": 0.1201171875, "learning_rate": 8.090943287504515e-07, "loss": 0.0676, "step": 20100 }, { "epoch": 2.4814563458705763, "grad_norm": 0.2109375, "learning_rate": 8.088984628454587e-07, "loss": 0.0732, "step": 20110 }, { "epoch": 2.4826902758604734, "grad_norm": 0.21484375, "learning_rate": 8.087025202493066e-07, "loss": 0.0752, "step": 20120 }, { "epoch": 2.4839242058503705, "grad_norm": 0.1787109375, "learning_rate": 8.085065010106427e-07, "loss": 0.0703, "step": 20130 }, { "epoch": 2.4851581358402677, "grad_norm": 0.251953125, "learning_rate": 8.083104051781334e-07, "loss": 0.065, "step": 20140 }, { "epoch": 2.486392065830165, "grad_norm": 0.173828125, "learning_rate": 8.081142328004636e-07, "loss": 0.0708, "step": 20150 }, { "epoch": 2.4876259958200624, "grad_norm": 0.447265625, "learning_rate": 8.07917983926338e-07, "loss": 0.0625, "step": 20160 }, { "epoch": 2.4888599258099595, "grad_norm": 0.255859375, "learning_rate": 8.077216586044798e-07, "loss": 0.0725, "step": 20170 }, { "epoch": 2.4900938557998566, "grad_norm": 0.34375, "learning_rate": 8.075252568836312e-07, "loss": 0.0697, "step": 20180 }, { "epoch": 2.491327785789754, "grad_norm": 0.1240234375, "learning_rate": 8.073287788125535e-07, "loss": 0.0599, "step": 20190 }, { "epoch": 2.492561715779651, "grad_norm": 0.23828125, "learning_rate": 8.071322244400269e-07, "loss": 0.0717, "step": 20200 }, { "epoch": 2.493795645769548, "grad_norm": 0.244140625, "learning_rate": 8.069355938148506e-07, "loss": 0.068, "step": 20210 }, { "epoch": 2.495029575759445, "grad_norm": 0.1953125, "learning_rate": 8.067388869858427e-07, "loss": 0.084, "step": 20220 }, { "epoch": 2.4962635057493427, "grad_norm": 0.30859375, "learning_rate": 8.0654210400184e-07, "loss": 0.0599, "step": 20230 }, { "epoch": 2.49749743573924, "grad_norm": 0.1220703125, "learning_rate": 8.063452449116987e-07, "loss": 0.0583, "step": 20240 }, { "epoch": 2.498731365729137, "grad_norm": 0.33984375, "learning_rate": 8.061483097642934e-07, "loss": 0.0529, "step": 20250 }, { "epoch": 2.499965295719034, "grad_norm": 0.296875, "learning_rate": 8.059512986085177e-07, "loss": 0.0542, "step": 20260 }, { "epoch": 2.5011992257089313, "grad_norm": 0.181640625, "learning_rate": 8.057542114932845e-07, "loss": 0.0604, "step": 20270 }, { "epoch": 2.5024331556988284, "grad_norm": 0.15625, "learning_rate": 8.05557048467525e-07, "loss": 0.0669, "step": 20280 }, { "epoch": 2.5036670856887255, "grad_norm": 0.220703125, "learning_rate": 8.053598095801895e-07, "loss": 0.0652, "step": 20290 }, { "epoch": 2.504901015678623, "grad_norm": 0.291015625, "learning_rate": 8.051624948802471e-07, "loss": 0.0664, "step": 20300 }, { "epoch": 2.5061349456685202, "grad_norm": 0.298828125, "learning_rate": 8.049651044166855e-07, "loss": 0.0698, "step": 20310 }, { "epoch": 2.5073688756584174, "grad_norm": 0.23046875, "learning_rate": 8.047676382385118e-07, "loss": 0.076, "step": 20320 }, { "epoch": 2.5086028056483145, "grad_norm": 0.2119140625, "learning_rate": 8.045700963947513e-07, "loss": 0.0743, "step": 20330 }, { "epoch": 2.5098367356382116, "grad_norm": 0.2177734375, "learning_rate": 8.043724789344483e-07, "loss": 0.0677, "step": 20340 }, { "epoch": 2.5110706656281088, "grad_norm": 0.1943359375, "learning_rate": 8.041747859066662e-07, "loss": 0.0671, "step": 20350 }, { "epoch": 2.512304595618006, "grad_norm": 0.458984375, "learning_rate": 8.039770173604864e-07, "loss": 0.058, "step": 20360 }, { "epoch": 2.5135385256079035, "grad_norm": 0.34375, "learning_rate": 8.037791733450097e-07, "loss": 0.0768, "step": 20370 }, { "epoch": 2.5147724555978006, "grad_norm": 0.1416015625, "learning_rate": 8.035812539093556e-07, "loss": 0.0759, "step": 20380 }, { "epoch": 2.5160063855876977, "grad_norm": 0.1982421875, "learning_rate": 8.033832591026619e-07, "loss": 0.0503, "step": 20390 }, { "epoch": 2.517240315577595, "grad_norm": 0.23046875, "learning_rate": 8.031851889740855e-07, "loss": 0.0689, "step": 20400 }, { "epoch": 2.518474245567492, "grad_norm": 0.1962890625, "learning_rate": 8.029870435728017e-07, "loss": 0.0621, "step": 20410 }, { "epoch": 2.5197081755573896, "grad_norm": 0.244140625, "learning_rate": 8.027888229480049e-07, "loss": 0.054, "step": 20420 }, { "epoch": 2.5209421055472863, "grad_norm": 0.2080078125, "learning_rate": 8.025905271489078e-07, "loss": 0.0623, "step": 20430 }, { "epoch": 2.522176035537184, "grad_norm": 0.1494140625, "learning_rate": 8.023921562247419e-07, "loss": 0.0612, "step": 20440 }, { "epoch": 2.523409965527081, "grad_norm": 0.1708984375, "learning_rate": 8.021937102247576e-07, "loss": 0.0471, "step": 20450 }, { "epoch": 2.524643895516978, "grad_norm": 0.1396484375, "learning_rate": 8.019951891982233e-07, "loss": 0.0518, "step": 20460 }, { "epoch": 2.5258778255068752, "grad_norm": 0.2138671875, "learning_rate": 8.017965931944266e-07, "loss": 0.0738, "step": 20470 }, { "epoch": 2.5271117554967724, "grad_norm": 0.32421875, "learning_rate": 8.015979222626737e-07, "loss": 0.0802, "step": 20480 }, { "epoch": 2.52834568548667, "grad_norm": 0.18359375, "learning_rate": 8.013991764522887e-07, "loss": 0.0634, "step": 20490 }, { "epoch": 2.5295796154765666, "grad_norm": 0.189453125, "learning_rate": 8.012003558126154e-07, "loss": 0.0623, "step": 20500 }, { "epoch": 2.530813545466464, "grad_norm": 0.22265625, "learning_rate": 8.010014603930155e-07, "loss": 0.0751, "step": 20510 }, { "epoch": 2.5320474754563613, "grad_norm": 0.1015625, "learning_rate": 8.008024902428692e-07, "loss": 0.0497, "step": 20520 }, { "epoch": 2.5332814054462585, "grad_norm": 0.1484375, "learning_rate": 8.006034454115754e-07, "loss": 0.0618, "step": 20530 }, { "epoch": 2.5345153354361556, "grad_norm": 0.232421875, "learning_rate": 8.004043259485518e-07, "loss": 0.0612, "step": 20540 }, { "epoch": 2.5357492654260527, "grad_norm": 0.2255859375, "learning_rate": 8.002051319032342e-07, "loss": 0.0798, "step": 20550 }, { "epoch": 2.5369831954159503, "grad_norm": 0.306640625, "learning_rate": 8.00005863325077e-07, "loss": 0.0632, "step": 20560 }, { "epoch": 2.5382171254058474, "grad_norm": 0.345703125, "learning_rate": 7.998065202635535e-07, "loss": 0.0675, "step": 20570 }, { "epoch": 2.5394510553957446, "grad_norm": 0.1748046875, "learning_rate": 7.99607102768155e-07, "loss": 0.0682, "step": 20580 }, { "epoch": 2.5406849853856417, "grad_norm": 0.1884765625, "learning_rate": 7.994076108883917e-07, "loss": 0.0693, "step": 20590 }, { "epoch": 2.541918915375539, "grad_norm": 0.3046875, "learning_rate": 7.992080446737919e-07, "loss": 0.0641, "step": 20600 }, { "epoch": 2.543152845365436, "grad_norm": 0.234375, "learning_rate": 7.990084041739023e-07, "loss": 0.078, "step": 20610 }, { "epoch": 2.544386775355333, "grad_norm": 0.2265625, "learning_rate": 7.988086894382887e-07, "loss": 0.0655, "step": 20620 }, { "epoch": 2.5456207053452307, "grad_norm": 0.330078125, "learning_rate": 7.986089005165346e-07, "loss": 0.0883, "step": 20630 }, { "epoch": 2.546854635335128, "grad_norm": 0.1328125, "learning_rate": 7.984090374582424e-07, "loss": 0.0799, "step": 20640 }, { "epoch": 2.548088565325025, "grad_norm": 0.3671875, "learning_rate": 7.982091003130325e-07, "loss": 0.061, "step": 20650 }, { "epoch": 2.549322495314922, "grad_norm": 0.203125, "learning_rate": 7.980090891305437e-07, "loss": 0.0545, "step": 20660 }, { "epoch": 2.550556425304819, "grad_norm": 0.41796875, "learning_rate": 7.978090039604341e-07, "loss": 0.0676, "step": 20670 }, { "epoch": 2.5517903552947163, "grad_norm": 0.416015625, "learning_rate": 7.976088448523788e-07, "loss": 0.0765, "step": 20680 }, { "epoch": 2.5530242852846134, "grad_norm": 0.1005859375, "learning_rate": 7.974086118560721e-07, "loss": 0.0663, "step": 20690 }, { "epoch": 2.554258215274511, "grad_norm": 0.2060546875, "learning_rate": 7.972083050212266e-07, "loss": 0.0698, "step": 20700 }, { "epoch": 2.555492145264408, "grad_norm": 0.32421875, "learning_rate": 7.970079243975727e-07, "loss": 0.0672, "step": 20710 }, { "epoch": 2.5567260752543053, "grad_norm": 0.33984375, "learning_rate": 7.9680747003486e-07, "loss": 0.0692, "step": 20720 }, { "epoch": 2.5579600052442024, "grad_norm": 0.255859375, "learning_rate": 7.966069419828556e-07, "loss": 0.0681, "step": 20730 }, { "epoch": 2.5591939352340995, "grad_norm": 0.404296875, "learning_rate": 7.964063402913455e-07, "loss": 0.0638, "step": 20740 }, { "epoch": 2.560427865223997, "grad_norm": 0.2490234375, "learning_rate": 7.962056650101332e-07, "loss": 0.064, "step": 20750 }, { "epoch": 2.561661795213894, "grad_norm": 0.357421875, "learning_rate": 7.960049161890413e-07, "loss": 0.0719, "step": 20760 }, { "epoch": 2.5628957252037914, "grad_norm": 0.20703125, "learning_rate": 7.958040938779103e-07, "loss": 0.0696, "step": 20770 }, { "epoch": 2.5641296551936885, "grad_norm": 0.3984375, "learning_rate": 7.956031981265989e-07, "loss": 0.0686, "step": 20780 }, { "epoch": 2.5653635851835856, "grad_norm": 0.265625, "learning_rate": 7.954022289849842e-07, "loss": 0.0823, "step": 20790 }, { "epoch": 2.566597515173483, "grad_norm": 0.181640625, "learning_rate": 7.952011865029613e-07, "loss": 0.0683, "step": 20800 }, { "epoch": 2.56783144516338, "grad_norm": 0.21484375, "learning_rate": 7.950000707304436e-07, "loss": 0.0627, "step": 20810 }, { "epoch": 2.5690653751532775, "grad_norm": 0.15234375, "learning_rate": 7.94798881717363e-07, "loss": 0.0619, "step": 20820 }, { "epoch": 2.570299305143174, "grad_norm": 0.1337890625, "learning_rate": 7.945976195136689e-07, "loss": 0.0781, "step": 20830 }, { "epoch": 2.5715332351330717, "grad_norm": 0.18359375, "learning_rate": 7.943962841693295e-07, "loss": 0.075, "step": 20840 }, { "epoch": 2.572767165122969, "grad_norm": 0.193359375, "learning_rate": 7.941948757343311e-07, "loss": 0.0662, "step": 20850 }, { "epoch": 2.574001095112866, "grad_norm": 0.1845703125, "learning_rate": 7.939933942586776e-07, "loss": 0.0632, "step": 20860 }, { "epoch": 2.575235025102763, "grad_norm": 0.1826171875, "learning_rate": 7.937918397923917e-07, "loss": 0.0708, "step": 20870 }, { "epoch": 2.5764689550926603, "grad_norm": 0.1845703125, "learning_rate": 7.935902123855138e-07, "loss": 0.0771, "step": 20880 }, { "epoch": 2.577702885082558, "grad_norm": 0.3671875, "learning_rate": 7.933885120881027e-07, "loss": 0.0686, "step": 20890 }, { "epoch": 2.578936815072455, "grad_norm": 0.2021484375, "learning_rate": 7.93186738950235e-07, "loss": 0.0709, "step": 20900 }, { "epoch": 2.580170745062352, "grad_norm": 0.2734375, "learning_rate": 7.929848930220054e-07, "loss": 0.0731, "step": 20910 }, { "epoch": 2.5814046750522492, "grad_norm": 0.30859375, "learning_rate": 7.927829743535272e-07, "loss": 0.0711, "step": 20920 }, { "epoch": 2.5826386050421464, "grad_norm": 0.255859375, "learning_rate": 7.925809829949311e-07, "loss": 0.0782, "step": 20930 }, { "epoch": 2.5838725350320435, "grad_norm": 0.2060546875, "learning_rate": 7.923789189963661e-07, "loss": 0.0684, "step": 20940 }, { "epoch": 2.5851064650219406, "grad_norm": 0.2001953125, "learning_rate": 7.921767824079994e-07, "loss": 0.06, "step": 20950 }, { "epoch": 2.586340395011838, "grad_norm": 0.1064453125, "learning_rate": 7.919745732800158e-07, "loss": 0.0711, "step": 20960 }, { "epoch": 2.5875743250017353, "grad_norm": 0.171875, "learning_rate": 7.917722916626187e-07, "loss": 0.0601, "step": 20970 }, { "epoch": 2.5888082549916325, "grad_norm": 0.2734375, "learning_rate": 7.91569937606029e-07, "loss": 0.0654, "step": 20980 }, { "epoch": 2.5900421849815296, "grad_norm": 0.234375, "learning_rate": 7.913675111604856e-07, "loss": 0.0585, "step": 20990 }, { "epoch": 2.5912761149714267, "grad_norm": 0.33984375, "learning_rate": 7.911650123762459e-07, "loss": 0.0633, "step": 21000 }, { "epoch": 2.5912761149714267, "eval_exact_match": 0.6533742331288344, "eval_has_answer_correct": 0.5720620842572062, "eval_no_answer_correct": 0.835820895522388, "step": 21000 }, { "epoch": 2.5925331811486347, "grad_norm": 0.2099609375, "learning_rate": 7.909624413035845e-07, "loss": 0.0488, "step": 21010 }, { "epoch": 2.593767111138532, "grad_norm": 0.201171875, "learning_rate": 7.907597979927945e-07, "loss": 0.0797, "step": 21020 }, { "epoch": 2.595001041128429, "grad_norm": 0.2314453125, "learning_rate": 7.905570824941867e-07, "loss": 0.0801, "step": 21030 }, { "epoch": 2.596234971118326, "grad_norm": 0.1865234375, "learning_rate": 7.903542948580902e-07, "loss": 0.0744, "step": 21040 }, { "epoch": 2.5974689011082233, "grad_norm": 0.181640625, "learning_rate": 7.901514351348513e-07, "loss": 0.0649, "step": 21050 }, { "epoch": 2.5987028310981204, "grad_norm": 0.146484375, "learning_rate": 7.89948503374835e-07, "loss": 0.059, "step": 21060 }, { "epoch": 2.599936761088018, "grad_norm": 0.27734375, "learning_rate": 7.897454996284234e-07, "loss": 0.0759, "step": 21070 }, { "epoch": 2.601170691077915, "grad_norm": 0.302734375, "learning_rate": 7.895424239460169e-07, "loss": 0.0803, "step": 21080 }, { "epoch": 2.6024046210678122, "grad_norm": 0.1728515625, "learning_rate": 7.89339276378034e-07, "loss": 0.0662, "step": 21090 }, { "epoch": 2.6036385510577094, "grad_norm": 0.1943359375, "learning_rate": 7.891360569749106e-07, "loss": 0.0828, "step": 21100 }, { "epoch": 2.6048724810476065, "grad_norm": 0.1474609375, "learning_rate": 7.889327657871004e-07, "loss": 0.0634, "step": 21110 }, { "epoch": 2.6061064110375036, "grad_norm": 0.279296875, "learning_rate": 7.887294028650755e-07, "loss": 0.0669, "step": 21120 }, { "epoch": 2.6073403410274008, "grad_norm": 0.232421875, "learning_rate": 7.885259682593253e-07, "loss": 0.085, "step": 21130 }, { "epoch": 2.6085742710172983, "grad_norm": 0.2001953125, "learning_rate": 7.883224620203568e-07, "loss": 0.0773, "step": 21140 }, { "epoch": 2.6098082010071955, "grad_norm": 0.17578125, "learning_rate": 7.881188841986956e-07, "loss": 0.0698, "step": 21150 }, { "epoch": 2.6110421309970926, "grad_norm": 0.1357421875, "learning_rate": 7.879152348448843e-07, "loss": 0.0529, "step": 21160 }, { "epoch": 2.6122760609869897, "grad_norm": 0.142578125, "learning_rate": 7.877115140094835e-07, "loss": 0.0656, "step": 21170 }, { "epoch": 2.613509990976887, "grad_norm": 0.23828125, "learning_rate": 7.875077217430719e-07, "loss": 0.0681, "step": 21180 }, { "epoch": 2.614743920966784, "grad_norm": 0.287109375, "learning_rate": 7.873038580962453e-07, "loss": 0.066, "step": 21190 }, { "epoch": 2.615977850956681, "grad_norm": 0.271484375, "learning_rate": 7.870999231196176e-07, "loss": 0.0739, "step": 21200 }, { "epoch": 2.6172117809465787, "grad_norm": 0.296875, "learning_rate": 7.868959168638206e-07, "loss": 0.0603, "step": 21210 }, { "epoch": 2.618445710936476, "grad_norm": 0.1962890625, "learning_rate": 7.866918393795032e-07, "loss": 0.0756, "step": 21220 }, { "epoch": 2.619679640926373, "grad_norm": 0.2138671875, "learning_rate": 7.864876907173327e-07, "loss": 0.0738, "step": 21230 }, { "epoch": 2.62091357091627, "grad_norm": 0.2236328125, "learning_rate": 7.862834709279933e-07, "loss": 0.0723, "step": 21240 }, { "epoch": 2.622147500906167, "grad_norm": 0.2578125, "learning_rate": 7.860791800621876e-07, "loss": 0.059, "step": 21250 }, { "epoch": 2.623381430896065, "grad_norm": 0.298828125, "learning_rate": 7.858748181706355e-07, "loss": 0.0543, "step": 21260 }, { "epoch": 2.6246153608859615, "grad_norm": 0.154296875, "learning_rate": 7.856703853040743e-07, "loss": 0.0676, "step": 21270 }, { "epoch": 2.625849290875859, "grad_norm": 0.212890625, "learning_rate": 7.854658815132594e-07, "loss": 0.081, "step": 21280 }, { "epoch": 2.627083220865756, "grad_norm": 0.412109375, "learning_rate": 7.852613068489635e-07, "loss": 0.0727, "step": 21290 }, { "epoch": 2.6283171508556533, "grad_norm": 0.2451171875, "learning_rate": 7.850566613619769e-07, "loss": 0.0813, "step": 21300 }, { "epoch": 2.6295510808455504, "grad_norm": 0.259765625, "learning_rate": 7.848519451031077e-07, "loss": 0.0494, "step": 21310 }, { "epoch": 2.6307850108354476, "grad_norm": 0.1416015625, "learning_rate": 7.846471581231813e-07, "loss": 0.068, "step": 21320 }, { "epoch": 2.632018940825345, "grad_norm": 0.1611328125, "learning_rate": 7.84442300473041e-07, "loss": 0.0707, "step": 21330 }, { "epoch": 2.6332528708152423, "grad_norm": 0.208984375, "learning_rate": 7.84237372203547e-07, "loss": 0.0628, "step": 21340 }, { "epoch": 2.6344868008051394, "grad_norm": 0.404296875, "learning_rate": 7.840323733655778e-07, "loss": 0.0671, "step": 21350 }, { "epoch": 2.6357207307950365, "grad_norm": 0.173828125, "learning_rate": 7.838273040100291e-07, "loss": 0.0567, "step": 21360 }, { "epoch": 2.6369546607849337, "grad_norm": 0.15234375, "learning_rate": 7.83622164187814e-07, "loss": 0.0663, "step": 21370 }, { "epoch": 2.638188590774831, "grad_norm": 0.1630859375, "learning_rate": 7.834169539498631e-07, "loss": 0.0663, "step": 21380 }, { "epoch": 2.639422520764728, "grad_norm": 0.3828125, "learning_rate": 7.832116733471248e-07, "loss": 0.0728, "step": 21390 }, { "epoch": 2.6406564507546255, "grad_norm": 0.234375, "learning_rate": 7.830063224305645e-07, "loss": 0.0648, "step": 21400 }, { "epoch": 2.6418903807445226, "grad_norm": 0.2431640625, "learning_rate": 7.828009012511654e-07, "loss": 0.0556, "step": 21410 }, { "epoch": 2.64312431073442, "grad_norm": 0.2158203125, "learning_rate": 7.825954098599281e-07, "loss": 0.0575, "step": 21420 }, { "epoch": 2.644358240724317, "grad_norm": 0.140625, "learning_rate": 7.823898483078704e-07, "loss": 0.0607, "step": 21430 }, { "epoch": 2.645592170714214, "grad_norm": 0.384765625, "learning_rate": 7.821842166460278e-07, "loss": 0.067, "step": 21440 }, { "epoch": 2.646826100704111, "grad_norm": 0.162109375, "learning_rate": 7.819785149254532e-07, "loss": 0.0685, "step": 21450 }, { "epoch": 2.6480600306940083, "grad_norm": 0.173828125, "learning_rate": 7.817727431972167e-07, "loss": 0.0651, "step": 21460 }, { "epoch": 2.649293960683906, "grad_norm": 0.1982421875, "learning_rate": 7.815669015124058e-07, "loss": 0.0683, "step": 21470 }, { "epoch": 2.650527890673803, "grad_norm": 0.181640625, "learning_rate": 7.813609899221254e-07, "loss": 0.0677, "step": 21480 }, { "epoch": 2.6517618206637, "grad_norm": 0.1474609375, "learning_rate": 7.811550084774981e-07, "loss": 0.0657, "step": 21490 }, { "epoch": 2.6529957506535973, "grad_norm": 0.2119140625, "learning_rate": 7.809489572296631e-07, "loss": 0.0553, "step": 21500 }, { "epoch": 2.6542296806434944, "grad_norm": 0.1533203125, "learning_rate": 7.807428362297777e-07, "loss": 0.0512, "step": 21510 }, { "epoch": 2.6554636106333915, "grad_norm": 0.1865234375, "learning_rate": 7.805366455290162e-07, "loss": 0.0709, "step": 21520 }, { "epoch": 2.6566975406232887, "grad_norm": 0.33203125, "learning_rate": 7.803303851785699e-07, "loss": 0.0582, "step": 21530 }, { "epoch": 2.6579314706131862, "grad_norm": 0.259765625, "learning_rate": 7.801240552296479e-07, "loss": 0.0764, "step": 21540 }, { "epoch": 2.6591654006030834, "grad_norm": 0.271484375, "learning_rate": 7.799176557334764e-07, "loss": 0.0637, "step": 21550 }, { "epoch": 2.6603993305929805, "grad_norm": 0.2177734375, "learning_rate": 7.797111867412986e-07, "loss": 0.0582, "step": 21560 }, { "epoch": 2.6616332605828776, "grad_norm": 0.1904296875, "learning_rate": 7.795046483043755e-07, "loss": 0.0715, "step": 21570 }, { "epoch": 2.6628671905727748, "grad_norm": 0.2392578125, "learning_rate": 7.792980404739847e-07, "loss": 0.0735, "step": 21580 }, { "epoch": 2.6641011205626723, "grad_norm": 0.287109375, "learning_rate": 7.790913633014215e-07, "loss": 0.0619, "step": 21590 }, { "epoch": 2.665335050552569, "grad_norm": 0.384765625, "learning_rate": 7.788846168379986e-07, "loss": 0.0692, "step": 21600 }, { "epoch": 2.6665689805424666, "grad_norm": 0.341796875, "learning_rate": 7.78677801135045e-07, "loss": 0.0671, "step": 21610 }, { "epoch": 2.6678029105323637, "grad_norm": 0.251953125, "learning_rate": 7.784709162439077e-07, "loss": 0.0654, "step": 21620 }, { "epoch": 2.669036840522261, "grad_norm": 0.26953125, "learning_rate": 7.782639622159507e-07, "loss": 0.0673, "step": 21630 }, { "epoch": 2.670270770512158, "grad_norm": 0.3203125, "learning_rate": 7.780569391025551e-07, "loss": 0.0786, "step": 21640 }, { "epoch": 2.671504700502055, "grad_norm": 0.14453125, "learning_rate": 7.778498469551191e-07, "loss": 0.0528, "step": 21650 }, { "epoch": 2.6727386304919527, "grad_norm": 0.2431640625, "learning_rate": 7.776426858250581e-07, "loss": 0.0774, "step": 21660 }, { "epoch": 2.6739725604818494, "grad_norm": 0.2138671875, "learning_rate": 7.774354557638047e-07, "loss": 0.0683, "step": 21670 }, { "epoch": 2.675206490471747, "grad_norm": 0.2490234375, "learning_rate": 7.772281568228082e-07, "loss": 0.0619, "step": 21680 }, { "epoch": 2.676440420461644, "grad_norm": 0.1982421875, "learning_rate": 7.770207890535359e-07, "loss": 0.0677, "step": 21690 }, { "epoch": 2.6776743504515412, "grad_norm": 0.296875, "learning_rate": 7.768133525074714e-07, "loss": 0.0739, "step": 21700 }, { "epoch": 2.6789082804414384, "grad_norm": 0.33984375, "learning_rate": 7.766058472361153e-07, "loss": 0.068, "step": 21710 }, { "epoch": 2.6801422104313355, "grad_norm": 0.205078125, "learning_rate": 7.76398273290986e-07, "loss": 0.0854, "step": 21720 }, { "epoch": 2.681376140421233, "grad_norm": 0.2490234375, "learning_rate": 7.761906307236181e-07, "loss": 0.0664, "step": 21730 }, { "epoch": 2.68261007041113, "grad_norm": 0.2197265625, "learning_rate": 7.759829195855641e-07, "loss": 0.0815, "step": 21740 }, { "epoch": 2.6838440004010273, "grad_norm": 0.181640625, "learning_rate": 7.757751399283928e-07, "loss": 0.0628, "step": 21750 }, { "epoch": 2.6850779303909245, "grad_norm": 0.15625, "learning_rate": 7.755672918036901e-07, "loss": 0.0606, "step": 21760 }, { "epoch": 2.6863118603808216, "grad_norm": 0.240234375, "learning_rate": 7.753593752630595e-07, "loss": 0.0866, "step": 21770 }, { "epoch": 2.6875457903707187, "grad_norm": 0.255859375, "learning_rate": 7.751513903581208e-07, "loss": 0.0666, "step": 21780 }, { "epoch": 2.688779720360616, "grad_norm": 0.412109375, "learning_rate": 7.749433371405112e-07, "loss": 0.0582, "step": 21790 }, { "epoch": 2.6900136503505134, "grad_norm": 0.16015625, "learning_rate": 7.747352156618846e-07, "loss": 0.0782, "step": 21800 }, { "epoch": 2.6912475803404106, "grad_norm": 0.28515625, "learning_rate": 7.74527025973912e-07, "loss": 0.0593, "step": 21810 }, { "epoch": 2.6924815103303077, "grad_norm": 0.2333984375, "learning_rate": 7.743187681282813e-07, "loss": 0.0644, "step": 21820 }, { "epoch": 2.693715440320205, "grad_norm": 0.26953125, "learning_rate": 7.741104421766972e-07, "loss": 0.0611, "step": 21830 }, { "epoch": 2.694949370310102, "grad_norm": 0.42578125, "learning_rate": 7.739020481708814e-07, "loss": 0.0741, "step": 21840 }, { "epoch": 2.696183300299999, "grad_norm": 0.13671875, "learning_rate": 7.736935861625728e-07, "loss": 0.0533, "step": 21850 }, { "epoch": 2.697417230289896, "grad_norm": 0.40234375, "learning_rate": 7.734850562035267e-07, "loss": 0.0702, "step": 21860 }, { "epoch": 2.698651160279794, "grad_norm": 0.1435546875, "learning_rate": 7.732764583455152e-07, "loss": 0.0749, "step": 21870 }, { "epoch": 2.699885090269691, "grad_norm": 0.1865234375, "learning_rate": 7.73067792640328e-07, "loss": 0.0834, "step": 21880 }, { "epoch": 2.701119020259588, "grad_norm": 0.1845703125, "learning_rate": 7.72859059139771e-07, "loss": 0.0653, "step": 21890 }, { "epoch": 2.702352950249485, "grad_norm": 0.33984375, "learning_rate": 7.726502578956668e-07, "loss": 0.0723, "step": 21900 }, { "epoch": 2.7035868802393823, "grad_norm": 0.27734375, "learning_rate": 7.724413889598556e-07, "loss": 0.0637, "step": 21910 }, { "epoch": 2.70482081022928, "grad_norm": 0.25390625, "learning_rate": 7.722324523841938e-07, "loss": 0.0767, "step": 21920 }, { "epoch": 2.7060547402191766, "grad_norm": 0.18359375, "learning_rate": 7.720234482205545e-07, "loss": 0.0705, "step": 21930 }, { "epoch": 2.707288670209074, "grad_norm": 0.185546875, "learning_rate": 7.718143765208278e-07, "loss": 0.0761, "step": 21940 }, { "epoch": 2.7085226001989713, "grad_norm": 0.6171875, "learning_rate": 7.716052373369207e-07, "loss": 0.0685, "step": 21950 }, { "epoch": 2.7097565301888684, "grad_norm": 0.1376953125, "learning_rate": 7.71396030720757e-07, "loss": 0.0857, "step": 21960 }, { "epoch": 2.7109904601787655, "grad_norm": 0.2001953125, "learning_rate": 7.711867567242766e-07, "loss": 0.0676, "step": 21970 }, { "epoch": 2.7122243901686627, "grad_norm": 0.291015625, "learning_rate": 7.709774153994371e-07, "loss": 0.0776, "step": 21980 }, { "epoch": 2.7134583201585603, "grad_norm": 0.2412109375, "learning_rate": 7.707680067982119e-07, "loss": 0.0668, "step": 21990 }, { "epoch": 2.714692250148457, "grad_norm": 0.1416015625, "learning_rate": 7.705585309725917e-07, "loss": 0.0671, "step": 22000 }, { "epoch": 2.714692250148457, "eval_exact_match": 0.6533742331288344, "eval_has_answer_correct": 0.5742793791574279, "eval_no_answer_correct": 0.8308457711442786, "step": 22000 }, { "epoch": 2.7159261801383545, "grad_norm": 0.33203125, "learning_rate": 7.703489879745837e-07, "loss": 0.0681, "step": 22010 }, { "epoch": 2.7171601101282516, "grad_norm": 0.1962890625, "learning_rate": 7.701393778562118e-07, "loss": 0.0586, "step": 22020 }, { "epoch": 2.718394040118149, "grad_norm": 0.21484375, "learning_rate": 7.699297006695162e-07, "loss": 0.0661, "step": 22030 }, { "epoch": 2.719627970108046, "grad_norm": 0.2109375, "learning_rate": 7.697199564665546e-07, "loss": 0.0743, "step": 22040 }, { "epoch": 2.720861900097943, "grad_norm": 0.259765625, "learning_rate": 7.695101452994007e-07, "loss": 0.0564, "step": 22050 }, { "epoch": 2.7220958300878406, "grad_norm": 0.3828125, "learning_rate": 7.693002672201447e-07, "loss": 0.0661, "step": 22060 }, { "epoch": 2.7233297600777377, "grad_norm": 0.2177734375, "learning_rate": 7.69090322280894e-07, "loss": 0.0638, "step": 22070 }, { "epoch": 2.724563690067635, "grad_norm": 0.255859375, "learning_rate": 7.68880310533772e-07, "loss": 0.0684, "step": 22080 }, { "epoch": 2.725797620057532, "grad_norm": 0.26171875, "learning_rate": 7.686702320309191e-07, "loss": 0.0782, "step": 22090 }, { "epoch": 2.727031550047429, "grad_norm": 0.2021484375, "learning_rate": 7.684600868244919e-07, "loss": 0.0677, "step": 22100 }, { "epoch": 2.7282654800373263, "grad_norm": 0.1796875, "learning_rate": 7.682498749666642e-07, "loss": 0.0567, "step": 22110 }, { "epoch": 2.7294994100272234, "grad_norm": 0.19921875, "learning_rate": 7.680395965096254e-07, "loss": 0.0748, "step": 22120 }, { "epoch": 2.730733340017121, "grad_norm": 0.1611328125, "learning_rate": 7.678292515055824e-07, "loss": 0.0603, "step": 22130 }, { "epoch": 2.731967270007018, "grad_norm": 0.197265625, "learning_rate": 7.676188400067583e-07, "loss": 0.0717, "step": 22140 }, { "epoch": 2.7332011999969152, "grad_norm": 0.306640625, "learning_rate": 7.67408362065392e-07, "loss": 0.0806, "step": 22150 }, { "epoch": 2.7344351299868124, "grad_norm": 0.251953125, "learning_rate": 7.671978177337401e-07, "loss": 0.0655, "step": 22160 }, { "epoch": 2.7356690599767095, "grad_norm": 0.1484375, "learning_rate": 7.669872070640749e-07, "loss": 0.0613, "step": 22170 }, { "epoch": 2.7369029899666066, "grad_norm": 0.408203125, "learning_rate": 7.667765301086851e-07, "loss": 0.0711, "step": 22180 }, { "epoch": 2.7381369199565038, "grad_norm": 0.298828125, "learning_rate": 7.665657869198765e-07, "loss": 0.063, "step": 22190 }, { "epoch": 2.7393708499464013, "grad_norm": 0.251953125, "learning_rate": 7.663549775499707e-07, "loss": 0.0712, "step": 22200 }, { "epoch": 2.7406047799362985, "grad_norm": 0.609375, "learning_rate": 7.66144102051306e-07, "loss": 0.0634, "step": 22210 }, { "epoch": 2.7418387099261956, "grad_norm": 0.263671875, "learning_rate": 7.659331604762371e-07, "loss": 0.0735, "step": 22220 }, { "epoch": 2.7430726399160927, "grad_norm": 0.251953125, "learning_rate": 7.657221528771351e-07, "loss": 0.0602, "step": 22230 }, { "epoch": 2.74430656990599, "grad_norm": 0.36328125, "learning_rate": 7.655110793063876e-07, "loss": 0.0723, "step": 22240 }, { "epoch": 2.745540499895887, "grad_norm": 0.154296875, "learning_rate": 7.652999398163984e-07, "loss": 0.0645, "step": 22250 }, { "epoch": 2.746774429885784, "grad_norm": 0.27734375, "learning_rate": 7.650887344595878e-07, "loss": 0.0699, "step": 22260 }, { "epoch": 2.7480083598756817, "grad_norm": 0.27734375, "learning_rate": 7.648774632883923e-07, "loss": 0.0656, "step": 22270 }, { "epoch": 2.749242289865579, "grad_norm": 0.248046875, "learning_rate": 7.64666126355265e-07, "loss": 0.0627, "step": 22280 }, { "epoch": 2.750476219855476, "grad_norm": 0.212890625, "learning_rate": 7.64454723712675e-07, "loss": 0.0723, "step": 22290 }, { "epoch": 2.751710149845373, "grad_norm": 0.1650390625, "learning_rate": 7.642432554131079e-07, "loss": 0.0654, "step": 22300 }, { "epoch": 2.7529440798352702, "grad_norm": 0.17578125, "learning_rate": 7.640317215090657e-07, "loss": 0.0797, "step": 22310 }, { "epoch": 2.754178009825168, "grad_norm": 0.201171875, "learning_rate": 7.638201220530663e-07, "loss": 0.0578, "step": 22320 }, { "epoch": 2.7554119398150645, "grad_norm": 0.21875, "learning_rate": 7.636084570976444e-07, "loss": 0.073, "step": 22330 }, { "epoch": 2.756645869804962, "grad_norm": 0.2578125, "learning_rate": 7.633967266953505e-07, "loss": 0.0628, "step": 22340 }, { "epoch": 2.757879799794859, "grad_norm": 0.2431640625, "learning_rate": 7.631849308987519e-07, "loss": 0.0696, "step": 22350 }, { "epoch": 2.7591137297847563, "grad_norm": 0.412109375, "learning_rate": 7.629730697604313e-07, "loss": 0.0689, "step": 22360 }, { "epoch": 2.7603476597746535, "grad_norm": 0.1650390625, "learning_rate": 7.627611433329885e-07, "loss": 0.0583, "step": 22370 }, { "epoch": 2.7615815897645506, "grad_norm": 0.24609375, "learning_rate": 7.625491516690389e-07, "loss": 0.0571, "step": 22380 }, { "epoch": 2.762815519754448, "grad_norm": 0.189453125, "learning_rate": 7.623370948212144e-07, "loss": 0.065, "step": 22390 }, { "epoch": 2.764049449744345, "grad_norm": 0.43359375, "learning_rate": 7.621249728421631e-07, "loss": 0.0647, "step": 22400 }, { "epoch": 2.7652833797342424, "grad_norm": 0.166015625, "learning_rate": 7.619127857845489e-07, "loss": 0.0673, "step": 22410 }, { "epoch": 2.7665173097241396, "grad_norm": 0.3359375, "learning_rate": 7.617005337010523e-07, "loss": 0.0603, "step": 22420 }, { "epoch": 2.7677512397140367, "grad_norm": 0.18359375, "learning_rate": 7.614882166443697e-07, "loss": 0.0582, "step": 22430 }, { "epoch": 2.768985169703934, "grad_norm": 0.212890625, "learning_rate": 7.612758346672137e-07, "loss": 0.0806, "step": 22440 }, { "epoch": 2.770219099693831, "grad_norm": 0.181640625, "learning_rate": 7.610633878223132e-07, "loss": 0.06, "step": 22450 }, { "epoch": 2.7714530296837285, "grad_norm": 0.345703125, "learning_rate": 7.608508761624127e-07, "loss": 0.0528, "step": 22460 }, { "epoch": 2.7726869596736257, "grad_norm": 0.255859375, "learning_rate": 7.606382997402735e-07, "loss": 0.0679, "step": 22470 }, { "epoch": 2.773920889663523, "grad_norm": 0.212890625, "learning_rate": 7.604256586086723e-07, "loss": 0.0648, "step": 22480 }, { "epoch": 2.77515481965342, "grad_norm": 0.2314453125, "learning_rate": 7.602129528204022e-07, "loss": 0.0704, "step": 22490 }, { "epoch": 2.776388749643317, "grad_norm": 0.1806640625, "learning_rate": 7.600001824282725e-07, "loss": 0.0697, "step": 22500 }, { "epoch": 2.777622679633214, "grad_norm": 0.1552734375, "learning_rate": 7.597873474851082e-07, "loss": 0.0621, "step": 22510 }, { "epoch": 2.7788566096231113, "grad_norm": 0.2890625, "learning_rate": 7.595744480437504e-07, "loss": 0.0846, "step": 22520 }, { "epoch": 2.780090539613009, "grad_norm": 0.255859375, "learning_rate": 7.593614841570563e-07, "loss": 0.0707, "step": 22530 }, { "epoch": 2.781324469602906, "grad_norm": 0.19140625, "learning_rate": 7.591484558778993e-07, "loss": 0.068, "step": 22540 }, { "epoch": 2.782558399592803, "grad_norm": 0.466796875, "learning_rate": 7.589353632591684e-07, "loss": 0.0704, "step": 22550 }, { "epoch": 2.7837923295827003, "grad_norm": 0.185546875, "learning_rate": 7.587222063537688e-07, "loss": 0.0677, "step": 22560 }, { "epoch": 2.7850262595725974, "grad_norm": 0.287109375, "learning_rate": 7.585089852146217e-07, "loss": 0.0691, "step": 22570 }, { "epoch": 2.7862601895624945, "grad_norm": 0.2119140625, "learning_rate": 7.582956998946641e-07, "loss": 0.0839, "step": 22580 }, { "epoch": 2.7874941195523917, "grad_norm": 0.1748046875, "learning_rate": 7.580823504468489e-07, "loss": 0.0663, "step": 22590 }, { "epoch": 2.7887280495422893, "grad_norm": 0.294921875, "learning_rate": 7.57868936924145e-07, "loss": 0.0588, "step": 22600 }, { "epoch": 2.7899619795321864, "grad_norm": 0.2216796875, "learning_rate": 7.576554593795374e-07, "loss": 0.0665, "step": 22610 }, { "epoch": 2.7911959095220835, "grad_norm": 0.1904296875, "learning_rate": 7.574419178660268e-07, "loss": 0.0813, "step": 22620 }, { "epoch": 2.7924298395119806, "grad_norm": 0.11865234375, "learning_rate": 7.572283124366295e-07, "loss": 0.0883, "step": 22630 }, { "epoch": 2.793663769501878, "grad_norm": 0.28515625, "learning_rate": 7.570146431443782e-07, "loss": 0.0678, "step": 22640 }, { "epoch": 2.7948976994917754, "grad_norm": 0.1826171875, "learning_rate": 7.568009100423214e-07, "loss": 0.0608, "step": 22650 }, { "epoch": 2.796131629481672, "grad_norm": 0.2412109375, "learning_rate": 7.565871131835231e-07, "loss": 0.0793, "step": 22660 }, { "epoch": 2.7973655594715696, "grad_norm": 0.21875, "learning_rate": 7.563732526210631e-07, "loss": 0.0701, "step": 22670 }, { "epoch": 2.7985994894614667, "grad_norm": 0.2275390625, "learning_rate": 7.561593284080374e-07, "loss": 0.0731, "step": 22680 }, { "epoch": 2.799833419451364, "grad_norm": 0.150390625, "learning_rate": 7.559453405975578e-07, "loss": 0.0708, "step": 22690 }, { "epoch": 2.801067349441261, "grad_norm": 0.2216796875, "learning_rate": 7.557312892427513e-07, "loss": 0.0736, "step": 22700 }, { "epoch": 2.802301279431158, "grad_norm": 0.1611328125, "learning_rate": 7.555171743967614e-07, "loss": 0.0563, "step": 22710 }, { "epoch": 2.8035352094210557, "grad_norm": 0.57421875, "learning_rate": 7.553029961127468e-07, "loss": 0.0754, "step": 22720 }, { "epoch": 2.8047691394109524, "grad_norm": 0.251953125, "learning_rate": 7.550887544438826e-07, "loss": 0.0804, "step": 22730 }, { "epoch": 2.80600306940085, "grad_norm": 0.271484375, "learning_rate": 7.548744494433587e-07, "loss": 0.0701, "step": 22740 }, { "epoch": 2.807236999390747, "grad_norm": 0.36328125, "learning_rate": 7.546600811643816e-07, "loss": 0.0679, "step": 22750 }, { "epoch": 2.8084709293806442, "grad_norm": 0.23828125, "learning_rate": 7.544456496601728e-07, "loss": 0.0677, "step": 22760 }, { "epoch": 2.8097048593705414, "grad_norm": 0.1748046875, "learning_rate": 7.542311549839704e-07, "loss": 0.0569, "step": 22770 }, { "epoch": 2.8109387893604385, "grad_norm": 0.306640625, "learning_rate": 7.540165971890273e-07, "loss": 0.0666, "step": 22780 }, { "epoch": 2.812172719350336, "grad_norm": 0.263671875, "learning_rate": 7.538019763286125e-07, "loss": 0.0689, "step": 22790 }, { "epoch": 2.813406649340233, "grad_norm": 0.45703125, "learning_rate": 7.535872924560107e-07, "loss": 0.0766, "step": 22800 }, { "epoch": 2.8146405793301303, "grad_norm": 0.1630859375, "learning_rate": 7.533725456245218e-07, "loss": 0.0508, "step": 22810 }, { "epoch": 2.8158745093200275, "grad_norm": 0.19921875, "learning_rate": 7.531577358874619e-07, "loss": 0.0851, "step": 22820 }, { "epoch": 2.8171084393099246, "grad_norm": 0.3125, "learning_rate": 7.529428632981622e-07, "loss": 0.07, "step": 22830 }, { "epoch": 2.8183423692998217, "grad_norm": 0.390625, "learning_rate": 7.527279279099703e-07, "loss": 0.0665, "step": 22840 }, { "epoch": 2.819576299289719, "grad_norm": 0.388671875, "learning_rate": 7.525129297762483e-07, "loss": 0.0845, "step": 22850 }, { "epoch": 2.8208102292796164, "grad_norm": 0.443359375, "learning_rate": 7.522978689503745e-07, "loss": 0.0654, "step": 22860 }, { "epoch": 2.8220441592695136, "grad_norm": 0.322265625, "learning_rate": 7.520827454857432e-07, "loss": 0.0691, "step": 22870 }, { "epoch": 2.8232780892594107, "grad_norm": 0.33203125, "learning_rate": 7.518675594357632e-07, "loss": 0.0756, "step": 22880 }, { "epoch": 2.824512019249308, "grad_norm": 0.1640625, "learning_rate": 7.516523108538598e-07, "loss": 0.062, "step": 22890 }, { "epoch": 2.825745949239205, "grad_norm": 0.162109375, "learning_rate": 7.514369997934731e-07, "loss": 0.0605, "step": 22900 }, { "epoch": 2.826979879229102, "grad_norm": 0.3828125, "learning_rate": 7.512216263080591e-07, "loss": 0.067, "step": 22910 }, { "epoch": 2.8282138092189992, "grad_norm": 0.1728515625, "learning_rate": 7.510061904510894e-07, "loss": 0.071, "step": 22920 }, { "epoch": 2.829447739208897, "grad_norm": 0.3125, "learning_rate": 7.507906922760508e-07, "loss": 0.0823, "step": 22930 }, { "epoch": 2.830681669198794, "grad_norm": 0.1494140625, "learning_rate": 7.505751318364456e-07, "loss": 0.0631, "step": 22940 }, { "epoch": 2.831915599188691, "grad_norm": 0.2109375, "learning_rate": 7.503595091857917e-07, "loss": 0.0674, "step": 22950 }, { "epoch": 2.833149529178588, "grad_norm": 0.294921875, "learning_rate": 7.501438243776226e-07, "loss": 0.0506, "step": 22960 }, { "epoch": 2.8343834591684853, "grad_norm": 0.19140625, "learning_rate": 7.499280774654869e-07, "loss": 0.0714, "step": 22970 }, { "epoch": 2.8356173891583825, "grad_norm": 0.271484375, "learning_rate": 7.497122685029484e-07, "loss": 0.0735, "step": 22980 }, { "epoch": 2.8368513191482796, "grad_norm": 0.1259765625, "learning_rate": 7.49496397543587e-07, "loss": 0.0695, "step": 22990 }, { "epoch": 2.838085249138177, "grad_norm": 0.25390625, "learning_rate": 7.492804646409978e-07, "loss": 0.0652, "step": 23000 }, { "epoch": 2.838085249138177, "eval_exact_match": 0.6533742331288344, "eval_has_answer_correct": 0.5742793791574279, "eval_no_answer_correct": 0.8308457711442786, "step": 23000 }, { "epoch": 2.8393191791280743, "grad_norm": 0.2021484375, "learning_rate": 7.490644698487908e-07, "loss": 0.063, "step": 23010 }, { "epoch": 2.8405531091179714, "grad_norm": 0.23046875, "learning_rate": 7.488484132205916e-07, "loss": 0.0728, "step": 23020 }, { "epoch": 2.8417870391078686, "grad_norm": 0.1787109375, "learning_rate": 7.486322948100416e-07, "loss": 0.0619, "step": 23030 }, { "epoch": 2.8430209690977657, "grad_norm": 0.302734375, "learning_rate": 7.484161146707971e-07, "loss": 0.082, "step": 23040 }, { "epoch": 2.8442548990876633, "grad_norm": 0.1416015625, "learning_rate": 7.481998728565293e-07, "loss": 0.0583, "step": 23050 }, { "epoch": 2.84548882907756, "grad_norm": 0.169921875, "learning_rate": 7.479835694209258e-07, "loss": 0.045, "step": 23060 }, { "epoch": 2.8467227590674575, "grad_norm": 0.2021484375, "learning_rate": 7.477672044176888e-07, "loss": 0.0941, "step": 23070 }, { "epoch": 2.8479566890573547, "grad_norm": 0.21484375, "learning_rate": 7.475507779005357e-07, "loss": 0.0628, "step": 23080 }, { "epoch": 2.849190619047252, "grad_norm": 0.33203125, "learning_rate": 7.473342899231994e-07, "loss": 0.0769, "step": 23090 }, { "epoch": 2.850424549037149, "grad_norm": 0.1875, "learning_rate": 7.471177405394283e-07, "loss": 0.0595, "step": 23100 }, { "epoch": 2.851658479027046, "grad_norm": 0.1669921875, "learning_rate": 7.469011298029854e-07, "loss": 0.0648, "step": 23110 }, { "epoch": 2.8528924090169436, "grad_norm": 0.236328125, "learning_rate": 7.466844577676496e-07, "loss": 0.0607, "step": 23120 }, { "epoch": 2.8541263390068408, "grad_norm": 0.166015625, "learning_rate": 7.464677244872144e-07, "loss": 0.0823, "step": 23130 }, { "epoch": 2.855360268996738, "grad_norm": 0.26171875, "learning_rate": 7.462509300154891e-07, "loss": 0.0769, "step": 23140 }, { "epoch": 2.856594198986635, "grad_norm": 0.2392578125, "learning_rate": 7.460340744062979e-07, "loss": 0.0794, "step": 23150 }, { "epoch": 2.857828128976532, "grad_norm": 0.28515625, "learning_rate": 7.458171577134802e-07, "loss": 0.0729, "step": 23160 }, { "epoch": 2.8590620589664293, "grad_norm": 0.251953125, "learning_rate": 7.456001799908903e-07, "loss": 0.0667, "step": 23170 }, { "epoch": 2.8602959889563264, "grad_norm": 0.205078125, "learning_rate": 7.453831412923983e-07, "loss": 0.0681, "step": 23180 }, { "epoch": 2.861529918946224, "grad_norm": 0.1845703125, "learning_rate": 7.451660416718889e-07, "loss": 0.0783, "step": 23190 }, { "epoch": 2.862763848936121, "grad_norm": 0.1455078125, "learning_rate": 7.449488811832621e-07, "loss": 0.0787, "step": 23200 }, { "epoch": 2.8639977789260183, "grad_norm": 0.2470703125, "learning_rate": 7.447316598804329e-07, "loss": 0.0658, "step": 23210 }, { "epoch": 2.8652317089159154, "grad_norm": 0.1748046875, "learning_rate": 7.445143778173316e-07, "loss": 0.0787, "step": 23220 }, { "epoch": 2.8664656389058125, "grad_norm": 0.236328125, "learning_rate": 7.442970350479034e-07, "loss": 0.0723, "step": 23230 }, { "epoch": 2.8676995688957096, "grad_norm": 0.1904296875, "learning_rate": 7.440796316261087e-07, "loss": 0.0685, "step": 23240 }, { "epoch": 2.868933498885607, "grad_norm": 0.1484375, "learning_rate": 7.438621676059229e-07, "loss": 0.063, "step": 23250 }, { "epoch": 2.8701674288755044, "grad_norm": 0.275390625, "learning_rate": 7.436446430413365e-07, "loss": 0.062, "step": 23260 }, { "epoch": 2.8714013588654015, "grad_norm": 0.21484375, "learning_rate": 7.434270579863548e-07, "loss": 0.0781, "step": 23270 }, { "epoch": 2.8726352888552986, "grad_norm": 0.349609375, "learning_rate": 7.432094124949985e-07, "loss": 0.0706, "step": 23280 }, { "epoch": 2.8738692188451957, "grad_norm": 0.2392578125, "learning_rate": 7.429917066213029e-07, "loss": 0.062, "step": 23290 }, { "epoch": 2.875103148835093, "grad_norm": 0.10400390625, "learning_rate": 7.427739404193188e-07, "loss": 0.064, "step": 23300 }, { "epoch": 2.87633707882499, "grad_norm": 0.291015625, "learning_rate": 7.425561139431116e-07, "loss": 0.0728, "step": 23310 }, { "epoch": 2.877571008814887, "grad_norm": 0.1943359375, "learning_rate": 7.423382272467615e-07, "loss": 0.0748, "step": 23320 }, { "epoch": 2.8788049388047847, "grad_norm": 0.2490234375, "learning_rate": 7.42120280384364e-07, "loss": 0.0645, "step": 23330 }, { "epoch": 2.880038868794682, "grad_norm": 0.287109375, "learning_rate": 7.419022734100296e-07, "loss": 0.0905, "step": 23340 }, { "epoch": 2.881272798784579, "grad_norm": 0.32421875, "learning_rate": 7.416842063778834e-07, "loss": 0.0627, "step": 23350 }, { "epoch": 2.882506728774476, "grad_norm": 0.26953125, "learning_rate": 7.414660793420655e-07, "loss": 0.0675, "step": 23360 }, { "epoch": 2.8837406587643732, "grad_norm": 0.2041015625, "learning_rate": 7.412478923567312e-07, "loss": 0.0638, "step": 23370 }, { "epoch": 2.884974588754271, "grad_norm": 0.21484375, "learning_rate": 7.4102964547605e-07, "loss": 0.0545, "step": 23380 }, { "epoch": 2.8862085187441675, "grad_norm": 0.328125, "learning_rate": 7.408113387542074e-07, "loss": 0.0777, "step": 23390 }, { "epoch": 2.887442448734065, "grad_norm": 0.4921875, "learning_rate": 7.405929722454025e-07, "loss": 0.0805, "step": 23400 }, { "epoch": 2.888676378723962, "grad_norm": 0.2451171875, "learning_rate": 7.4037454600385e-07, "loss": 0.0479, "step": 23410 }, { "epoch": 2.8899103087138593, "grad_norm": 0.255859375, "learning_rate": 7.401560600837793e-07, "loss": 0.0686, "step": 23420 }, { "epoch": 2.8911442387037565, "grad_norm": 0.166015625, "learning_rate": 7.399375145394344e-07, "loss": 0.0692, "step": 23430 }, { "epoch": 2.8923781686936536, "grad_norm": 0.291015625, "learning_rate": 7.397189094250745e-07, "loss": 0.0646, "step": 23440 }, { "epoch": 2.893612098683551, "grad_norm": 0.208984375, "learning_rate": 7.395002447949731e-07, "loss": 0.0789, "step": 23450 }, { "epoch": 2.894846028673448, "grad_norm": 0.1591796875, "learning_rate": 7.39281520703419e-07, "loss": 0.0801, "step": 23460 }, { "epoch": 2.8960799586633454, "grad_norm": 0.25390625, "learning_rate": 7.390627372047152e-07, "loss": 0.0766, "step": 23470 }, { "epoch": 2.8973138886532426, "grad_norm": 0.29296875, "learning_rate": 7.3884389435318e-07, "loss": 0.0674, "step": 23480 }, { "epoch": 2.8985478186431397, "grad_norm": 0.1611328125, "learning_rate": 7.386249922031461e-07, "loss": 0.0671, "step": 23490 }, { "epoch": 2.899781748633037, "grad_norm": 0.2236328125, "learning_rate": 7.38406030808961e-07, "loss": 0.0728, "step": 23500 }, { "epoch": 2.901015678622934, "grad_norm": 0.169921875, "learning_rate": 7.381870102249869e-07, "loss": 0.0484, "step": 23510 }, { "epoch": 2.9022496086128315, "grad_norm": 0.158203125, "learning_rate": 7.379679305056006e-07, "loss": 0.0644, "step": 23520 }, { "epoch": 2.9034835386027287, "grad_norm": 0.193359375, "learning_rate": 7.377487917051938e-07, "loss": 0.0728, "step": 23530 }, { "epoch": 2.904717468592626, "grad_norm": 0.23828125, "learning_rate": 7.375295938781728e-07, "loss": 0.0741, "step": 23540 }, { "epoch": 2.905951398582523, "grad_norm": 0.169921875, "learning_rate": 7.373103370789584e-07, "loss": 0.0666, "step": 23550 }, { "epoch": 2.90718532857242, "grad_norm": 0.193359375, "learning_rate": 7.370910213619862e-07, "loss": 0.0594, "step": 23560 }, { "epoch": 2.908419258562317, "grad_norm": 0.1337890625, "learning_rate": 7.368716467817063e-07, "loss": 0.0818, "step": 23570 }, { "epoch": 2.9096531885522143, "grad_norm": 0.1552734375, "learning_rate": 7.366522133925836e-07, "loss": 0.0701, "step": 23580 }, { "epoch": 2.910887118542112, "grad_norm": 0.1591796875, "learning_rate": 7.364327212490976e-07, "loss": 0.072, "step": 23590 }, { "epoch": 2.912121048532009, "grad_norm": 0.357421875, "learning_rate": 7.362131704057421e-07, "loss": 0.0677, "step": 23600 }, { "epoch": 2.913354978521906, "grad_norm": 0.15625, "learning_rate": 7.359935609170257e-07, "loss": 0.0646, "step": 23610 }, { "epoch": 2.9145889085118033, "grad_norm": 0.150390625, "learning_rate": 7.357738928374715e-07, "loss": 0.0694, "step": 23620 }, { "epoch": 2.9158228385017004, "grad_norm": 0.21875, "learning_rate": 7.355541662216171e-07, "loss": 0.0761, "step": 23630 }, { "epoch": 2.9170567684915976, "grad_norm": 0.3984375, "learning_rate": 7.353343811240149e-07, "loss": 0.0635, "step": 23640 }, { "epoch": 2.9182906984814947, "grad_norm": 0.23828125, "learning_rate": 7.351145375992314e-07, "loss": 0.09, "step": 23650 }, { "epoch": 2.9195246284713923, "grad_norm": 0.294921875, "learning_rate": 7.348946357018479e-07, "loss": 0.0624, "step": 23660 }, { "epoch": 2.9207585584612894, "grad_norm": 0.251953125, "learning_rate": 7.346746754864601e-07, "loss": 0.0616, "step": 23670 }, { "epoch": 2.9219924884511865, "grad_norm": 0.1953125, "learning_rate": 7.344546570076782e-07, "loss": 0.0942, "step": 23680 }, { "epoch": 2.9232264184410837, "grad_norm": 0.2216796875, "learning_rate": 7.342345803201269e-07, "loss": 0.0824, "step": 23690 }, { "epoch": 2.924460348430981, "grad_norm": 0.3359375, "learning_rate": 7.340144454784452e-07, "loss": 0.0852, "step": 23700 }, { "epoch": 2.9256942784208784, "grad_norm": 0.267578125, "learning_rate": 7.337942525372867e-07, "loss": 0.063, "step": 23710 }, { "epoch": 2.926928208410775, "grad_norm": 0.248046875, "learning_rate": 7.335740015513195e-07, "loss": 0.052, "step": 23720 }, { "epoch": 2.9281621384006726, "grad_norm": 0.15625, "learning_rate": 7.333536925752259e-07, "loss": 0.0674, "step": 23730 }, { "epoch": 2.9293960683905698, "grad_norm": 0.2119140625, "learning_rate": 7.331333256637026e-07, "loss": 0.0564, "step": 23740 }, { "epoch": 2.930629998380467, "grad_norm": 0.2236328125, "learning_rate": 7.329129008714607e-07, "loss": 0.0697, "step": 23750 }, { "epoch": 2.931863928370364, "grad_norm": 0.1572265625, "learning_rate": 7.326924182532259e-07, "loss": 0.057, "step": 23760 }, { "epoch": 2.933097858360261, "grad_norm": 0.248046875, "learning_rate": 7.324718778637379e-07, "loss": 0.0658, "step": 23770 }, { "epoch": 2.9343317883501587, "grad_norm": 0.310546875, "learning_rate": 7.322512797577513e-07, "loss": 0.0631, "step": 23780 }, { "epoch": 2.9355657183400554, "grad_norm": 0.1611328125, "learning_rate": 7.320306239900342e-07, "loss": 0.0541, "step": 23790 }, { "epoch": 2.936799648329953, "grad_norm": 0.244140625, "learning_rate": 7.318099106153698e-07, "loss": 0.0637, "step": 23800 }, { "epoch": 2.93803357831985, "grad_norm": 0.265625, "learning_rate": 7.315891396885552e-07, "loss": 0.0564, "step": 23810 }, { "epoch": 2.9392675083097473, "grad_norm": 0.2578125, "learning_rate": 7.313683112644018e-07, "loss": 0.0593, "step": 23820 }, { "epoch": 2.9405014382996444, "grad_norm": 0.177734375, "learning_rate": 7.311474253977354e-07, "loss": 0.0832, "step": 23830 }, { "epoch": 2.9417353682895415, "grad_norm": 0.1826171875, "learning_rate": 7.30926482143396e-07, "loss": 0.0535, "step": 23840 }, { "epoch": 2.942969298279439, "grad_norm": 0.140625, "learning_rate": 7.307054815562379e-07, "loss": 0.0722, "step": 23850 }, { "epoch": 2.9442032282693362, "grad_norm": 0.19140625, "learning_rate": 7.304844236911294e-07, "loss": 0.0613, "step": 23860 }, { "epoch": 2.9454371582592334, "grad_norm": 0.1962890625, "learning_rate": 7.302633086029534e-07, "loss": 0.0786, "step": 23870 }, { "epoch": 2.9466710882491305, "grad_norm": 0.2275390625, "learning_rate": 7.300421363466067e-07, "loss": 0.0814, "step": 23880 }, { "epoch": 2.9479050182390276, "grad_norm": 0.166015625, "learning_rate": 7.298209069770005e-07, "loss": 0.0784, "step": 23890 }, { "epoch": 2.9491389482289248, "grad_norm": 0.26171875, "learning_rate": 7.2959962054906e-07, "loss": 0.0635, "step": 23900 }, { "epoch": 2.950372878218822, "grad_norm": 0.177734375, "learning_rate": 7.293782771177248e-07, "loss": 0.0675, "step": 23910 }, { "epoch": 2.9516068082087195, "grad_norm": 0.2080078125, "learning_rate": 7.291568767379483e-07, "loss": 0.0831, "step": 23920 }, { "epoch": 2.9528407381986166, "grad_norm": 0.2333984375, "learning_rate": 7.289354194646985e-07, "loss": 0.068, "step": 23930 }, { "epoch": 2.9540746681885137, "grad_norm": 0.265625, "learning_rate": 7.287139053529571e-07, "loss": 0.063, "step": 23940 }, { "epoch": 2.955308598178411, "grad_norm": 0.251953125, "learning_rate": 7.2849233445772e-07, "loss": 0.0822, "step": 23950 }, { "epoch": 2.956542528168308, "grad_norm": 0.1708984375, "learning_rate": 7.282707068339973e-07, "loss": 0.0703, "step": 23960 }, { "epoch": 2.957776458158205, "grad_norm": 0.2314453125, "learning_rate": 7.280490225368134e-07, "loss": 0.0595, "step": 23970 }, { "epoch": 2.9590103881481022, "grad_norm": 0.21875, "learning_rate": 7.278272816212064e-07, "loss": 0.0813, "step": 23980 }, { "epoch": 2.960244318138, "grad_norm": 0.189453125, "learning_rate": 7.276054841422283e-07, "loss": 0.0656, "step": 23990 }, { "epoch": 2.961478248127897, "grad_norm": 0.2138671875, "learning_rate": 7.27383630154946e-07, "loss": 0.0757, "step": 24000 }, { "epoch": 2.961478248127897, "eval_exact_match": 0.6533742331288344, "eval_has_answer_correct": 0.5787139689578714, "eval_no_answer_correct": 0.8208955223880597, "step": 24000 }, { "epoch": 2.962712178117794, "grad_norm": 0.2080078125, "learning_rate": 7.271617197144395e-07, "loss": 0.0614, "step": 24010 }, { "epoch": 2.963946108107691, "grad_norm": 0.5234375, "learning_rate": 7.269397528758032e-07, "loss": 0.0792, "step": 24020 }, { "epoch": 2.9651800380975883, "grad_norm": 0.3125, "learning_rate": 7.267177296941455e-07, "loss": 0.0661, "step": 24030 }, { "epoch": 2.9664139680874855, "grad_norm": 0.1630859375, "learning_rate": 7.264956502245888e-07, "loss": 0.0709, "step": 24040 }, { "epoch": 2.9676478980773826, "grad_norm": 0.373046875, "learning_rate": 7.262735145222695e-07, "loss": 0.0641, "step": 24050 }, { "epoch": 2.96888182806728, "grad_norm": 0.173828125, "learning_rate": 7.260513226423377e-07, "loss": 0.0672, "step": 24060 }, { "epoch": 2.9701157580571773, "grad_norm": 0.1611328125, "learning_rate": 7.258290746399579e-07, "loss": 0.0526, "step": 24070 }, { "epoch": 2.9713496880470744, "grad_norm": 0.734375, "learning_rate": 7.256067705703082e-07, "loss": 0.0665, "step": 24080 }, { "epoch": 2.9725836180369716, "grad_norm": 0.16015625, "learning_rate": 7.253844104885805e-07, "loss": 0.0613, "step": 24090 }, { "epoch": 2.9738175480268687, "grad_norm": 0.27734375, "learning_rate": 7.251619944499811e-07, "loss": 0.0662, "step": 24100 }, { "epoch": 2.9750514780167663, "grad_norm": 0.2373046875, "learning_rate": 7.249395225097299e-07, "loss": 0.0618, "step": 24110 }, { "epoch": 2.976285408006663, "grad_norm": 0.1884765625, "learning_rate": 7.247169947230605e-07, "loss": 0.069, "step": 24120 }, { "epoch": 2.9775193379965605, "grad_norm": 0.244140625, "learning_rate": 7.244944111452207e-07, "loss": 0.0777, "step": 24130 }, { "epoch": 2.9787532679864577, "grad_norm": 0.2333984375, "learning_rate": 7.242717718314719e-07, "loss": 0.0602, "step": 24140 }, { "epoch": 2.979987197976355, "grad_norm": 0.1669921875, "learning_rate": 7.240490768370895e-07, "loss": 0.0657, "step": 24150 }, { "epoch": 2.981221127966252, "grad_norm": 0.263671875, "learning_rate": 7.238263262173626e-07, "loss": 0.0569, "step": 24160 }, { "epoch": 2.982455057956149, "grad_norm": 0.205078125, "learning_rate": 7.236035200275943e-07, "loss": 0.0703, "step": 24170 }, { "epoch": 2.9836889879460466, "grad_norm": 0.322265625, "learning_rate": 7.233806583231011e-07, "loss": 0.0796, "step": 24180 }, { "epoch": 2.9849229179359438, "grad_norm": 0.21875, "learning_rate": 7.23157741159214e-07, "loss": 0.0756, "step": 24190 }, { "epoch": 2.986156847925841, "grad_norm": 0.228515625, "learning_rate": 7.229347685912767e-07, "loss": 0.0636, "step": 24200 }, { "epoch": 2.987390777915738, "grad_norm": 0.1845703125, "learning_rate": 7.227117406746478e-07, "loss": 0.0637, "step": 24210 }, { "epoch": 2.988624707905635, "grad_norm": 0.158203125, "learning_rate": 7.224886574646989e-07, "loss": 0.0581, "step": 24220 }, { "epoch": 2.9898586378955323, "grad_norm": 0.1982421875, "learning_rate": 7.222655190168156e-07, "loss": 0.0691, "step": 24230 }, { "epoch": 2.9910925678854294, "grad_norm": 0.23828125, "learning_rate": 7.220423253863973e-07, "loss": 0.0758, "step": 24240 }, { "epoch": 2.992326497875327, "grad_norm": 0.234375, "learning_rate": 7.218190766288568e-07, "loss": 0.0616, "step": 24250 }, { "epoch": 2.993560427865224, "grad_norm": 0.1923828125, "learning_rate": 7.215957727996207e-07, "loss": 0.052, "step": 24260 }, { "epoch": 2.9947943578551213, "grad_norm": 0.142578125, "learning_rate": 7.213724139541293e-07, "loss": 0.0766, "step": 24270 }, { "epoch": 2.9960282878450184, "grad_norm": 0.17578125, "learning_rate": 7.211490001478367e-07, "loss": 0.0722, "step": 24280 }, { "epoch": 2.9972622178349155, "grad_norm": 0.2451171875, "learning_rate": 7.209255314362105e-07, "loss": 0.0707, "step": 24290 }, { "epoch": 2.9984961478248127, "grad_norm": 0.39453125, "learning_rate": 7.207020078747319e-07, "loss": 0.0587, "step": 24300 }, { "epoch": 2.99973007781471, "grad_norm": 0.1494140625, "learning_rate": 7.204784295188958e-07, "loss": 0.0606, "step": 24310 }, { "epoch": 3.0009640078046074, "grad_norm": 0.3125, "learning_rate": 7.202547964242108e-07, "loss": 0.0771, "step": 24320 }, { "epoch": 3.0021979377945045, "grad_norm": 0.2265625, "learning_rate": 7.200311086461988e-07, "loss": 0.0644, "step": 24330 }, { "epoch": 3.0034318677844016, "grad_norm": 0.1396484375, "learning_rate": 7.198073662403954e-07, "loss": 0.0619, "step": 24340 }, { "epoch": 3.0046657977742988, "grad_norm": 0.51171875, "learning_rate": 7.1958356926235e-07, "loss": 0.0725, "step": 24350 }, { "epoch": 3.005899727764196, "grad_norm": 0.271484375, "learning_rate": 7.193597177676251e-07, "loss": 0.0543, "step": 24360 }, { "epoch": 3.007133657754093, "grad_norm": 0.1787109375, "learning_rate": 7.191358118117973e-07, "loss": 0.0567, "step": 24370 }, { "epoch": 3.00836758774399, "grad_norm": 0.18359375, "learning_rate": 7.189118514504564e-07, "loss": 0.0728, "step": 24380 }, { "epoch": 3.0096015177338877, "grad_norm": 0.158203125, "learning_rate": 7.186878367392054e-07, "loss": 0.0597, "step": 24390 }, { "epoch": 3.010835447723785, "grad_norm": 0.25390625, "learning_rate": 7.184637677336612e-07, "loss": 0.0736, "step": 24400 }, { "epoch": 3.012069377713682, "grad_norm": 0.17578125, "learning_rate": 7.182396444894543e-07, "loss": 0.0538, "step": 24410 }, { "epoch": 3.013303307703579, "grad_norm": 0.205078125, "learning_rate": 7.180154670622284e-07, "loss": 0.0614, "step": 24420 }, { "epoch": 3.0145372376934763, "grad_norm": 0.177734375, "learning_rate": 7.177912355076405e-07, "loss": 0.0791, "step": 24430 }, { "epoch": 3.0157711676833734, "grad_norm": 0.1943359375, "learning_rate": 7.175669498813616e-07, "loss": 0.0627, "step": 24440 }, { "epoch": 3.017005097673271, "grad_norm": 0.1611328125, "learning_rate": 7.173426102390755e-07, "loss": 0.071, "step": 24450 }, { "epoch": 3.018239027663168, "grad_norm": 0.2294921875, "learning_rate": 7.171182166364797e-07, "loss": 0.0628, "step": 24460 }, { "epoch": 3.0194729576530652, "grad_norm": 0.13671875, "learning_rate": 7.168937691292852e-07, "loss": 0.0562, "step": 24470 }, { "epoch": 3.0207068876429624, "grad_norm": 0.287109375, "learning_rate": 7.166692677732162e-07, "loss": 0.0829, "step": 24480 }, { "epoch": 3.0219408176328595, "grad_norm": 0.275390625, "learning_rate": 7.164447126240103e-07, "loss": 0.0752, "step": 24490 }, { "epoch": 3.0231747476227566, "grad_norm": 0.27734375, "learning_rate": 7.162201037374185e-07, "loss": 0.0704, "step": 24500 }, { "epoch": 3.0244086776126538, "grad_norm": 0.31640625, "learning_rate": 7.159954411692051e-07, "loss": 0.0648, "step": 24510 }, { "epoch": 3.0256426076025513, "grad_norm": 0.271484375, "learning_rate": 7.157707249751479e-07, "loss": 0.055, "step": 24520 }, { "epoch": 3.0268765375924485, "grad_norm": 0.78125, "learning_rate": 7.155459552110379e-07, "loss": 0.063, "step": 24530 }, { "epoch": 3.0281104675823456, "grad_norm": 0.4609375, "learning_rate": 7.15321131932679e-07, "loss": 0.0792, "step": 24540 }, { "epoch": 3.0293443975722427, "grad_norm": 0.2412109375, "learning_rate": 7.150962551958892e-07, "loss": 0.0693, "step": 24550 }, { "epoch": 3.03057832756214, "grad_norm": 0.2158203125, "learning_rate": 7.148713250564992e-07, "loss": 0.0608, "step": 24560 }, { "epoch": 3.031812257552037, "grad_norm": 0.1953125, "learning_rate": 7.14646341570353e-07, "loss": 0.069, "step": 24570 }, { "epoch": 3.0330461875419346, "grad_norm": 0.1533203125, "learning_rate": 7.144213047933079e-07, "loss": 0.0682, "step": 24580 }, { "epoch": 3.0342801175318317, "grad_norm": 0.19921875, "learning_rate": 7.141962147812345e-07, "loss": 0.0705, "step": 24590 }, { "epoch": 3.035514047521729, "grad_norm": 0.2197265625, "learning_rate": 7.139710715900167e-07, "loss": 0.0814, "step": 24600 }, { "epoch": 3.036747977511626, "grad_norm": 0.2353515625, "learning_rate": 7.137458752755512e-07, "loss": 0.0449, "step": 24610 }, { "epoch": 3.037981907501523, "grad_norm": 0.251953125, "learning_rate": 7.135206258937485e-07, "loss": 0.0778, "step": 24620 }, { "epoch": 3.03921583749142, "grad_norm": 0.408203125, "learning_rate": 7.132953235005319e-07, "loss": 0.0765, "step": 24630 }, { "epoch": 3.0404497674813173, "grad_norm": 0.2578125, "learning_rate": 7.130699681518378e-07, "loss": 0.0625, "step": 24640 }, { "epoch": 3.041683697471215, "grad_norm": 0.1259765625, "learning_rate": 7.128445599036157e-07, "loss": 0.0508, "step": 24650 }, { "epoch": 3.042917627461112, "grad_norm": 0.1552734375, "learning_rate": 7.126190988118287e-07, "loss": 0.0649, "step": 24660 }, { "epoch": 3.044151557451009, "grad_norm": 0.35546875, "learning_rate": 7.123935849324526e-07, "loss": 0.0799, "step": 24670 }, { "epoch": 3.0453854874409063, "grad_norm": 0.142578125, "learning_rate": 7.121680183214764e-07, "loss": 0.0755, "step": 24680 }, { "epoch": 3.0466194174308034, "grad_norm": 0.271484375, "learning_rate": 7.119423990349022e-07, "loss": 0.0683, "step": 24690 }, { "epoch": 3.0478533474207006, "grad_norm": 0.228515625, "learning_rate": 7.117167271287452e-07, "loss": 0.0711, "step": 24700 }, { "epoch": 3.0490872774105977, "grad_norm": 0.263671875, "learning_rate": 7.114910026590335e-07, "loss": 0.0716, "step": 24710 }, { "epoch": 3.0503212074004953, "grad_norm": 0.2099609375, "learning_rate": 7.112652256818086e-07, "loss": 0.0577, "step": 24720 }, { "epoch": 3.0515551373903924, "grad_norm": 0.1162109375, "learning_rate": 7.110393962531248e-07, "loss": 0.064, "step": 24730 }, { "epoch": 3.0527890673802895, "grad_norm": 0.322265625, "learning_rate": 7.108135144290494e-07, "loss": 0.0608, "step": 24740 }, { "epoch": 3.0540229973701867, "grad_norm": 0.154296875, "learning_rate": 7.105875802656629e-07, "loss": 0.068, "step": 24750 }, { "epoch": 3.055256927360084, "grad_norm": 0.125, "learning_rate": 7.103615938190584e-07, "loss": 0.0754, "step": 24760 }, { "epoch": 3.056490857349981, "grad_norm": 0.201171875, "learning_rate": 7.101355551453426e-07, "loss": 0.0615, "step": 24770 }, { "epoch": 3.0577247873398785, "grad_norm": 0.26171875, "learning_rate": 7.099094643006344e-07, "loss": 0.091, "step": 24780 }, { "epoch": 3.0589587173297756, "grad_norm": 0.1611328125, "learning_rate": 7.096833213410664e-07, "loss": 0.06, "step": 24790 }, { "epoch": 3.0601926473196728, "grad_norm": 0.361328125, "learning_rate": 7.094571263227838e-07, "loss": 0.0748, "step": 24800 }, { "epoch": 3.06142657730957, "grad_norm": 0.1650390625, "learning_rate": 7.092308793019443e-07, "loss": 0.0707, "step": 24810 }, { "epoch": 3.062660507299467, "grad_norm": 0.318359375, "learning_rate": 7.090045803347194e-07, "loss": 0.0698, "step": 24820 }, { "epoch": 3.063894437289364, "grad_norm": 0.20703125, "learning_rate": 7.087782294772926e-07, "loss": 0.0539, "step": 24830 }, { "epoch": 3.0651283672792613, "grad_norm": 0.279296875, "learning_rate": 7.085518267858612e-07, "loss": 0.0713, "step": 24840 }, { "epoch": 3.066362297269159, "grad_norm": 0.310546875, "learning_rate": 7.083253723166347e-07, "loss": 0.0744, "step": 24850 }, { "epoch": 3.067596227259056, "grad_norm": 0.2021484375, "learning_rate": 7.080988661258355e-07, "loss": 0.0492, "step": 24860 }, { "epoch": 3.068830157248953, "grad_norm": 0.205078125, "learning_rate": 7.078723082696992e-07, "loss": 0.0704, "step": 24870 }, { "epoch": 3.0700640872388503, "grad_norm": 0.193359375, "learning_rate": 7.076456988044736e-07, "loss": 0.0754, "step": 24880 }, { "epoch": 3.0712980172287474, "grad_norm": 0.2490234375, "learning_rate": 7.074190377864202e-07, "loss": 0.0733, "step": 24890 }, { "epoch": 3.0725319472186445, "grad_norm": 0.294921875, "learning_rate": 7.071923252718126e-07, "loss": 0.0702, "step": 24900 }, { "epoch": 3.073765877208542, "grad_norm": 0.1484375, "learning_rate": 7.069655613169373e-07, "loss": 0.0617, "step": 24910 }, { "epoch": 3.0749998071984392, "grad_norm": 0.224609375, "learning_rate": 7.067387459780939e-07, "loss": 0.0759, "step": 24920 }, { "epoch": 3.0762337371883364, "grad_norm": 0.328125, "learning_rate": 7.065118793115944e-07, "loss": 0.0686, "step": 24930 }, { "epoch": 3.0774676671782335, "grad_norm": 0.15234375, "learning_rate": 7.062849613737637e-07, "loss": 0.0626, "step": 24940 }, { "epoch": 3.0787015971681306, "grad_norm": 0.1943359375, "learning_rate": 7.060579922209395e-07, "loss": 0.078, "step": 24950 }, { "epoch": 3.0799355271580278, "grad_norm": 0.416015625, "learning_rate": 7.05830971909472e-07, "loss": 0.0578, "step": 24960 }, { "epoch": 3.081169457147925, "grad_norm": 0.314453125, "learning_rate": 7.056039004957242e-07, "loss": 0.0758, "step": 24970 }, { "epoch": 3.0824033871378225, "grad_norm": 0.1669921875, "learning_rate": 7.053767780360718e-07, "loss": 0.0712, "step": 24980 }, { "epoch": 3.0836373171277196, "grad_norm": 0.2001953125, "learning_rate": 7.051496045869034e-07, "loss": 0.0687, "step": 24990 }, { "epoch": 3.0848712471176167, "grad_norm": 0.140625, "learning_rate": 7.049223802046198e-07, "loss": 0.0668, "step": 25000 }, { "epoch": 3.0848712471176167, "eval_exact_match": 0.6549079754601227, "eval_has_answer_correct": 0.5742793791574279, "eval_no_answer_correct": 0.835820895522388, "step": 25000 } ], "logging_steps": 10, "max_steps": 65000, "num_input_tokens_seen": 0, "num_train_epochs": 9, "save_steps": 1000, "total_flos": 6.931430029085e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }