deberta-v3-large-hate / trainer_state.json
Elron's picture
Pushing deberta-v3-large-hate to hub
5b50f24
raw
history blame
No virus
21.1 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 10.0,
"global_step": 5630,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.18,
"learning_rate": 6.937275985663082e-06,
"loss": 0.6362,
"step": 100
},
{
"epoch": 0.18,
"eval_accuracy": 0.7197197079658508,
"eval_loss": 0.5481122136116028,
"eval_runtime": 6.2072,
"eval_samples_per_second": 160.941,
"eval_steps_per_second": 10.149,
"step": 100
},
{
"epoch": 0.36,
"learning_rate": 6.811827956989247e-06,
"loss": 0.4264,
"step": 200
},
{
"epoch": 0.36,
"eval_accuracy": 0.8008008003234863,
"eval_loss": 0.4550396203994751,
"eval_runtime": 6.2195,
"eval_samples_per_second": 160.623,
"eval_steps_per_second": 10.129,
"step": 200
},
{
"epoch": 0.53,
"learning_rate": 6.6863799283154114e-06,
"loss": 0.4174,
"step": 300
},
{
"epoch": 0.53,
"eval_accuracy": 0.7867867946624756,
"eval_loss": 0.452409952878952,
"eval_runtime": 6.2183,
"eval_samples_per_second": 160.655,
"eval_steps_per_second": 10.131,
"step": 300
},
{
"epoch": 0.71,
"learning_rate": 6.560931899641577e-06,
"loss": 0.4197,
"step": 400
},
{
"epoch": 0.71,
"eval_accuracy": 0.7917917966842651,
"eval_loss": 0.4586125910282135,
"eval_runtime": 6.2441,
"eval_samples_per_second": 159.991,
"eval_steps_per_second": 10.09,
"step": 400
},
{
"epoch": 0.89,
"learning_rate": 6.435483870967742e-06,
"loss": 0.3819,
"step": 500
},
{
"epoch": 0.89,
"eval_accuracy": 0.8078078031539917,
"eval_loss": 0.4367608428001404,
"eval_runtime": 6.2213,
"eval_samples_per_second": 160.577,
"eval_steps_per_second": 10.126,
"step": 500
},
{
"epoch": 1.07,
"learning_rate": 6.310035842293907e-06,
"loss": 0.3558,
"step": 600
},
{
"epoch": 1.07,
"eval_accuracy": 0.8068068027496338,
"eval_loss": 0.4524727463722229,
"eval_runtime": 6.2342,
"eval_samples_per_second": 160.246,
"eval_steps_per_second": 10.106,
"step": 600
},
{
"epoch": 1.24,
"learning_rate": 6.184587813620071e-06,
"loss": 0.2982,
"step": 700
},
{
"epoch": 1.24,
"eval_accuracy": 0.792792797088623,
"eval_loss": 0.49992287158966064,
"eval_runtime": 6.206,
"eval_samples_per_second": 160.973,
"eval_steps_per_second": 10.151,
"step": 700
},
{
"epoch": 1.42,
"learning_rate": 6.059139784946236e-06,
"loss": 0.2885,
"step": 800
},
{
"epoch": 1.42,
"eval_accuracy": 0.8108108043670654,
"eval_loss": 0.5129059553146362,
"eval_runtime": 6.2199,
"eval_samples_per_second": 160.613,
"eval_steps_per_second": 10.129,
"step": 800
},
{
"epoch": 1.6,
"learning_rate": 5.933691756272401e-06,
"loss": 0.253,
"step": 900
},
{
"epoch": 1.6,
"eval_accuracy": 0.8208208084106445,
"eval_loss": 0.5872611403465271,
"eval_runtime": 6.2332,
"eval_samples_per_second": 160.27,
"eval_steps_per_second": 10.107,
"step": 900
},
{
"epoch": 1.78,
"learning_rate": 5.8082437275985665e-06,
"loss": 0.3354,
"step": 1000
},
{
"epoch": 1.78,
"eval_accuracy": 0.8178178071975708,
"eval_loss": 0.4244420826435089,
"eval_runtime": 6.2275,
"eval_samples_per_second": 160.417,
"eval_steps_per_second": 10.116,
"step": 1000
},
{
"epoch": 1.95,
"learning_rate": 5.682795698924731e-06,
"loss": 0.3083,
"step": 1100
},
{
"epoch": 1.95,
"eval_accuracy": 0.8058058023452759,
"eval_loss": 0.4852960705757141,
"eval_runtime": 6.2193,
"eval_samples_per_second": 160.63,
"eval_steps_per_second": 10.13,
"step": 1100
},
{
"epoch": 2.13,
"learning_rate": 5.557347670250896e-06,
"loss": 0.2301,
"step": 1200
},
{
"epoch": 2.13,
"eval_accuracy": 0.8018018007278442,
"eval_loss": 0.7208853960037231,
"eval_runtime": 6.2021,
"eval_samples_per_second": 161.075,
"eval_steps_per_second": 10.158,
"step": 1200
},
{
"epoch": 2.31,
"learning_rate": 5.431899641577061e-06,
"loss": 0.2167,
"step": 1300
},
{
"epoch": 2.31,
"eval_accuracy": 0.7777777910232544,
"eval_loss": 0.8089737892150879,
"eval_runtime": 6.2037,
"eval_samples_per_second": 161.034,
"eval_steps_per_second": 10.155,
"step": 1300
},
{
"epoch": 2.49,
"learning_rate": 5.306451612903225e-06,
"loss": 0.1863,
"step": 1400
},
{
"epoch": 2.49,
"eval_accuracy": 0.8038038015365601,
"eval_loss": 0.6812323927879333,
"eval_runtime": 6.2398,
"eval_samples_per_second": 160.102,
"eval_steps_per_second": 10.097,
"step": 1400
},
{
"epoch": 2.66,
"learning_rate": 5.181003584229391e-06,
"loss": 0.2181,
"step": 1500
},
{
"epoch": 2.66,
"eval_accuracy": 0.8138138055801392,
"eval_loss": 0.6958026885986328,
"eval_runtime": 6.2122,
"eval_samples_per_second": 160.812,
"eval_steps_per_second": 10.141,
"step": 1500
},
{
"epoch": 2.84,
"learning_rate": 5.0555555555555555e-06,
"loss": 0.2159,
"step": 1600
},
{
"epoch": 2.84,
"eval_accuracy": 0.8118118047714233,
"eval_loss": 0.6314735412597656,
"eval_runtime": 6.2306,
"eval_samples_per_second": 160.337,
"eval_steps_per_second": 10.111,
"step": 1600
},
{
"epoch": 3.02,
"learning_rate": 4.930107526881721e-06,
"loss": 0.1828,
"step": 1700
},
{
"epoch": 3.02,
"eval_accuracy": 0.8138138055801392,
"eval_loss": 0.7173236608505249,
"eval_runtime": 6.2107,
"eval_samples_per_second": 160.851,
"eval_steps_per_second": 10.144,
"step": 1700
},
{
"epoch": 3.2,
"learning_rate": 4.804659498207885e-06,
"loss": 0.1287,
"step": 1800
},
{
"epoch": 3.2,
"eval_accuracy": 0.8018018007278442,
"eval_loss": 0.9080932140350342,
"eval_runtime": 6.2027,
"eval_samples_per_second": 161.06,
"eval_steps_per_second": 10.157,
"step": 1800
},
{
"epoch": 3.37,
"learning_rate": 4.67921146953405e-06,
"loss": 0.1711,
"step": 1900
},
{
"epoch": 3.37,
"eval_accuracy": 0.8068068027496338,
"eval_loss": 0.8858422040939331,
"eval_runtime": 6.2188,
"eval_samples_per_second": 160.641,
"eval_steps_per_second": 10.131,
"step": 1900
},
{
"epoch": 3.55,
"learning_rate": 4.553763440860215e-06,
"loss": 0.1598,
"step": 2000
},
{
"epoch": 3.55,
"eval_accuracy": 0.8028028011322021,
"eval_loss": 0.7877860069274902,
"eval_runtime": 6.2062,
"eval_samples_per_second": 160.967,
"eval_steps_per_second": 10.151,
"step": 2000
},
{
"epoch": 3.73,
"learning_rate": 4.42831541218638e-06,
"loss": 0.1467,
"step": 2100
},
{
"epoch": 3.73,
"eval_accuracy": 0.7947947978973389,
"eval_loss": 0.900332510471344,
"eval_runtime": 6.2358,
"eval_samples_per_second": 160.203,
"eval_steps_per_second": 10.103,
"step": 2100
},
{
"epoch": 3.91,
"learning_rate": 4.302867383512545e-06,
"loss": 0.127,
"step": 2200
},
{
"epoch": 3.91,
"eval_accuracy": 0.804804801940918,
"eval_loss": 0.9066368341445923,
"eval_runtime": 6.2129,
"eval_samples_per_second": 160.795,
"eval_steps_per_second": 10.14,
"step": 2200
},
{
"epoch": 4.09,
"learning_rate": 4.17741935483871e-06,
"loss": 0.1134,
"step": 2300
},
{
"epoch": 4.09,
"eval_accuracy": 0.8118118047714233,
"eval_loss": 0.9645766615867615,
"eval_runtime": 6.2157,
"eval_samples_per_second": 160.721,
"eval_steps_per_second": 10.136,
"step": 2300
},
{
"epoch": 4.26,
"learning_rate": 4.051971326164874e-06,
"loss": 0.1017,
"step": 2400
},
{
"epoch": 4.26,
"eval_accuracy": 0.804804801940918,
"eval_loss": 0.9778422713279724,
"eval_runtime": 6.2303,
"eval_samples_per_second": 160.346,
"eval_steps_per_second": 10.112,
"step": 2400
},
{
"epoch": 4.44,
"learning_rate": 3.926523297491039e-06,
"loss": 0.085,
"step": 2500
},
{
"epoch": 4.44,
"eval_accuracy": 0.8088088035583496,
"eval_loss": 1.0528582334518433,
"eval_runtime": 6.238,
"eval_samples_per_second": 160.149,
"eval_steps_per_second": 10.099,
"step": 2500
},
{
"epoch": 4.62,
"learning_rate": 3.801075268817204e-06,
"loss": 0.0996,
"step": 2600
},
{
"epoch": 4.62,
"eval_accuracy": 0.8058058023452759,
"eval_loss": 1.0082268714904785,
"eval_runtime": 6.2065,
"eval_samples_per_second": 160.961,
"eval_steps_per_second": 10.151,
"step": 2600
},
{
"epoch": 4.8,
"learning_rate": 3.6756272401433694e-06,
"loss": 0.1054,
"step": 2700
},
{
"epoch": 4.8,
"eval_accuracy": 0.8108108043670654,
"eval_loss": 0.9697705507278442,
"eval_runtime": 6.2348,
"eval_samples_per_second": 160.231,
"eval_steps_per_second": 10.105,
"step": 2700
},
{
"epoch": 4.97,
"learning_rate": 3.5501792114695336e-06,
"loss": 0.1375,
"step": 2800
},
{
"epoch": 4.97,
"eval_accuracy": 0.804804801940918,
"eval_loss": 0.9333746433258057,
"eval_runtime": 6.2109,
"eval_samples_per_second": 160.846,
"eval_steps_per_second": 10.143,
"step": 2800
},
{
"epoch": 5.15,
"learning_rate": 3.4247311827956988e-06,
"loss": 0.0487,
"step": 2900
},
{
"epoch": 5.15,
"eval_accuracy": 0.8108108043670654,
"eval_loss": 1.1273365020751953,
"eval_runtime": 6.2065,
"eval_samples_per_second": 160.961,
"eval_steps_per_second": 10.151,
"step": 2900
},
{
"epoch": 5.33,
"learning_rate": 3.299283154121864e-06,
"loss": 0.0611,
"step": 3000
},
{
"epoch": 5.33,
"eval_accuracy": 0.8058058023452759,
"eval_loss": 1.1528337001800537,
"eval_runtime": 6.2119,
"eval_samples_per_second": 160.821,
"eval_steps_per_second": 10.142,
"step": 3000
},
{
"epoch": 5.51,
"learning_rate": 3.1738351254480286e-06,
"loss": 0.0668,
"step": 3100
},
{
"epoch": 5.51,
"eval_accuracy": 0.8118118047714233,
"eval_loss": 1.0147671699523926,
"eval_runtime": 6.2218,
"eval_samples_per_second": 160.564,
"eval_steps_per_second": 10.126,
"step": 3100
},
{
"epoch": 5.68,
"learning_rate": 3.0483870967741937e-06,
"loss": 0.0582,
"step": 3200
},
{
"epoch": 5.68,
"eval_accuracy": 0.8108108043670654,
"eval_loss": 1.1332666873931885,
"eval_runtime": 6.2186,
"eval_samples_per_second": 160.648,
"eval_steps_per_second": 10.131,
"step": 3200
},
{
"epoch": 5.86,
"learning_rate": 2.9229390681003584e-06,
"loss": 0.0869,
"step": 3300
},
{
"epoch": 5.86,
"eval_accuracy": 0.8088088035583496,
"eval_loss": 1.060727596282959,
"eval_runtime": 6.1932,
"eval_samples_per_second": 161.305,
"eval_steps_per_second": 10.172,
"step": 3300
},
{
"epoch": 6.04,
"learning_rate": 2.797491039426523e-06,
"loss": 0.0623,
"step": 3400
},
{
"epoch": 6.04,
"eval_accuracy": 0.8068068027496338,
"eval_loss": 1.1880476474761963,
"eval_runtime": 6.2192,
"eval_samples_per_second": 160.631,
"eval_steps_per_second": 10.13,
"step": 3400
},
{
"epoch": 6.22,
"learning_rate": 2.6720430107526883e-06,
"loss": 0.0317,
"step": 3500
},
{
"epoch": 6.22,
"eval_accuracy": 0.8008008003234863,
"eval_loss": 1.2836244106292725,
"eval_runtime": 6.2079,
"eval_samples_per_second": 160.925,
"eval_steps_per_second": 10.148,
"step": 3500
},
{
"epoch": 6.39,
"learning_rate": 2.546594982078853e-06,
"loss": 0.0546,
"step": 3600
},
{
"epoch": 6.39,
"eval_accuracy": 0.8058058023452759,
"eval_loss": 1.2147704362869263,
"eval_runtime": 6.2243,
"eval_samples_per_second": 160.501,
"eval_steps_per_second": 10.122,
"step": 3600
},
{
"epoch": 6.57,
"learning_rate": 2.4211469534050177e-06,
"loss": 0.0486,
"step": 3700
},
{
"epoch": 6.57,
"eval_accuracy": 0.8008008003234863,
"eval_loss": 1.334807276725769,
"eval_runtime": 6.1963,
"eval_samples_per_second": 161.225,
"eval_steps_per_second": 10.167,
"step": 3700
},
{
"epoch": 6.75,
"learning_rate": 2.2956989247311828e-06,
"loss": 0.0332,
"step": 3800
},
{
"epoch": 6.75,
"eval_accuracy": 0.8018018007278442,
"eval_loss": 1.3734461069107056,
"eval_runtime": 6.3321,
"eval_samples_per_second": 157.768,
"eval_steps_per_second": 9.949,
"step": 3800
},
{
"epoch": 6.93,
"learning_rate": 2.1702508960573475e-06,
"loss": 0.051,
"step": 3900
},
{
"epoch": 6.93,
"eval_accuracy": 0.7977977991104126,
"eval_loss": 1.2966439723968506,
"eval_runtime": 6.2073,
"eval_samples_per_second": 160.94,
"eval_steps_per_second": 10.149,
"step": 3900
},
{
"epoch": 7.1,
"learning_rate": 2.044802867383512e-06,
"loss": 0.0217,
"step": 4000
},
{
"epoch": 7.1,
"eval_accuracy": 0.804804801940918,
"eval_loss": 1.385273814201355,
"eval_runtime": 6.2117,
"eval_samples_per_second": 160.826,
"eval_steps_per_second": 10.142,
"step": 4000
},
{
"epoch": 7.28,
"learning_rate": 1.9193548387096773e-06,
"loss": 0.0109,
"step": 4100
},
{
"epoch": 7.28,
"eval_accuracy": 0.8068068027496338,
"eval_loss": 1.480326533317566,
"eval_runtime": 6.2106,
"eval_samples_per_second": 160.854,
"eval_steps_per_second": 10.144,
"step": 4100
},
{
"epoch": 7.46,
"learning_rate": 1.793906810035842e-06,
"loss": 0.0345,
"step": 4200
},
{
"epoch": 7.46,
"eval_accuracy": 0.7997997999191284,
"eval_loss": 1.4906260967254639,
"eval_runtime": 6.2002,
"eval_samples_per_second": 161.124,
"eval_steps_per_second": 10.161,
"step": 4200
},
{
"epoch": 7.64,
"learning_rate": 1.6684587813620071e-06,
"loss": 0.0365,
"step": 4300
},
{
"epoch": 7.64,
"eval_accuracy": 0.8028028011322021,
"eval_loss": 1.4347106218338013,
"eval_runtime": 6.2133,
"eval_samples_per_second": 160.783,
"eval_steps_per_second": 10.139,
"step": 4300
},
{
"epoch": 7.82,
"learning_rate": 1.543010752688172e-06,
"loss": 0.0265,
"step": 4400
},
{
"epoch": 7.82,
"eval_accuracy": 0.8128128051757812,
"eval_loss": 1.3976863622665405,
"eval_runtime": 6.224,
"eval_samples_per_second": 160.508,
"eval_steps_per_second": 10.122,
"step": 4400
},
{
"epoch": 7.99,
"learning_rate": 1.417562724014337e-06,
"loss": 0.0257,
"step": 4500
},
{
"epoch": 7.99,
"eval_accuracy": 0.8108108043670654,
"eval_loss": 1.370467185974121,
"eval_runtime": 6.2313,
"eval_samples_per_second": 160.321,
"eval_steps_per_second": 10.11,
"step": 4500
},
{
"epoch": 8.17,
"learning_rate": 1.2921146953405017e-06,
"loss": 0.0036,
"step": 4600
},
{
"epoch": 8.17,
"eval_accuracy": 0.8168168067932129,
"eval_loss": 1.4352822303771973,
"eval_runtime": 6.2072,
"eval_samples_per_second": 160.943,
"eval_steps_per_second": 10.15,
"step": 4600
},
{
"epoch": 8.35,
"learning_rate": 1.1666666666666666e-06,
"loss": 0.0269,
"step": 4700
},
{
"epoch": 8.35,
"eval_accuracy": 0.8068068027496338,
"eval_loss": 1.4826140403747559,
"eval_runtime": 6.2178,
"eval_samples_per_second": 160.669,
"eval_steps_per_second": 10.132,
"step": 4700
},
{
"epoch": 8.53,
"learning_rate": 1.0412186379928315e-06,
"loss": 0.0231,
"step": 4800
},
{
"epoch": 8.53,
"eval_accuracy": 0.8118118047714233,
"eval_loss": 1.4810999631881714,
"eval_runtime": 6.3061,
"eval_samples_per_second": 158.417,
"eval_steps_per_second": 9.99,
"step": 4800
},
{
"epoch": 8.7,
"learning_rate": 9.157706093189965e-07,
"loss": 0.0204,
"step": 4900
},
{
"epoch": 8.7,
"eval_accuracy": 0.8028028011322021,
"eval_loss": 1.5245323181152344,
"eval_runtime": 6.2057,
"eval_samples_per_second": 160.982,
"eval_steps_per_second": 10.152,
"step": 4900
},
{
"epoch": 8.88,
"learning_rate": 7.903225806451612e-07,
"loss": 0.0263,
"step": 5000
},
{
"epoch": 8.88,
"eval_accuracy": 0.8018018007278442,
"eval_loss": 1.5123308897018433,
"eval_runtime": 6.2053,
"eval_samples_per_second": 160.991,
"eval_steps_per_second": 10.153,
"step": 5000
},
{
"epoch": 9.06,
"learning_rate": 6.648745519713261e-07,
"loss": 0.0138,
"step": 5100
},
{
"epoch": 9.06,
"eval_accuracy": 0.8028028011322021,
"eval_loss": 1.51128089427948,
"eval_runtime": 6.2898,
"eval_samples_per_second": 158.83,
"eval_steps_per_second": 10.016,
"step": 5100
},
{
"epoch": 9.24,
"learning_rate": 5.39426523297491e-07,
"loss": 0.0089,
"step": 5200
},
{
"epoch": 9.24,
"eval_accuracy": 0.7977977991104126,
"eval_loss": 1.5846397876739502,
"eval_runtime": 6.2124,
"eval_samples_per_second": 160.808,
"eval_steps_per_second": 10.141,
"step": 5200
},
{
"epoch": 9.41,
"learning_rate": 4.1397849462365595e-07,
"loss": 0.029,
"step": 5300
},
{
"epoch": 9.41,
"eval_accuracy": 0.8008008003234863,
"eval_loss": 1.5361814498901367,
"eval_runtime": 6.2541,
"eval_samples_per_second": 159.736,
"eval_steps_per_second": 10.073,
"step": 5300
},
{
"epoch": 9.59,
"learning_rate": 2.8853046594982076e-07,
"loss": 0.0058,
"step": 5400
},
{
"epoch": 9.59,
"eval_accuracy": 0.8018018007278442,
"eval_loss": 1.5759379863739014,
"eval_runtime": 6.221,
"eval_samples_per_second": 160.585,
"eval_steps_per_second": 10.127,
"step": 5400
},
{
"epoch": 9.77,
"learning_rate": 1.6308243727598568e-07,
"loss": 0.0084,
"step": 5500
},
{
"epoch": 9.77,
"eval_accuracy": 0.8018018007278442,
"eval_loss": 1.5678976774215698,
"eval_runtime": 6.2009,
"eval_samples_per_second": 161.105,
"eval_steps_per_second": 10.16,
"step": 5500
},
{
"epoch": 9.95,
"learning_rate": 3.763440860215054e-08,
"loss": 0.0065,
"step": 5600
},
{
"epoch": 9.95,
"eval_accuracy": 0.8028028011322021,
"eval_loss": 1.568334937095642,
"eval_runtime": 6.2439,
"eval_samples_per_second": 159.996,
"eval_steps_per_second": 10.09,
"step": 5600
},
{
"epoch": 10.0,
"step": 5630,
"total_flos": 4.193719446528e+16,
"train_loss": 0.13640729715885533,
"train_runtime": 2182.3127,
"train_samples_per_second": 41.241,
"train_steps_per_second": 2.58
}
],
"max_steps": 5630,
"num_train_epochs": 10,
"total_flos": 4.193719446528e+16,
"trial_name": null,
"trial_params": null
}