{ "best_metric": 1.0906689167022705, "best_model_checkpoint": "./results/checkpoint-2699", "epoch": 4.0, "eval_steps": 500, "global_step": 10796, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 2.6147003173828125, "learning_rate": 4.672437244961049e-05, "loss": 0.327, "step": 2699 }, { "epoch": 1.0, "eval_accuracy": 0.7347354138398915, "eval_conf_mat": [ [ 6123, 717 ], [ 2802, 3624 ] ], "eval_f1": 0.673168013374199, "eval_loss": 1.0906689167022705, "eval_precision": 0.8348306841741534, "eval_recall": 0.5639589169000934, "eval_runtime": 50.9369, "eval_samples_per_second": 260.44, "eval_steps_per_second": 8.147, "step": 2699 }, { "epoch": 2.0, "grad_norm": 4.085843563079834, "learning_rate": 4.338691727463831e-05, "loss": 0.14, "step": 5398 }, { "epoch": 2.0, "eval_accuracy": 0.7100105532941354, "eval_conf_mat": [ [ 6405, 435 ], [ 3412, 3014 ] ], "eval_f1": 0.6104303797468354, "eval_loss": 1.5652275085449219, "eval_precision": 0.8738764859379531, "eval_recall": 0.46903205726735137, "eval_runtime": 51.0274, "eval_samples_per_second": 259.978, "eval_steps_per_second": 8.133, "step": 5398 }, { "epoch": 3.0, "grad_norm": 7.21763277053833, "learning_rate": 4.004946209966614e-05, "loss": 0.0959, "step": 8097 }, { "epoch": 3.0, "eval_accuracy": 0.7397105382180009, "eval_conf_mat": [ [ 6322, 518 ], [ 2935, 3491 ] ], "eval_f1": 0.6690943938667945, "eval_loss": 1.292863130569458, "eval_precision": 0.8707907208780244, "eval_recall": 0.5432617491441021, "eval_runtime": 49.9344, "eval_samples_per_second": 265.669, "eval_steps_per_second": 8.311, "step": 8097 }, { "epoch": 4.0, "grad_norm": 18.75651741027832, "learning_rate": 3.6712006924693956e-05, "loss": 0.0676, "step": 10796 }, { "epoch": 4.0, "eval_accuracy": 0.7308156188753203, "eval_conf_mat": [ [ 6358, 482 ], [ 3089, 3337 ] ], "eval_f1": 0.6514397266959493, "eval_loss": 1.5299923419952393, "eval_precision": 0.8737889499869076, "eval_recall": 0.5192966075319017, "eval_runtime": 51.18, "eval_samples_per_second": 259.203, "eval_steps_per_second": 8.109, "step": 10796 } ], "logging_steps": 500, "max_steps": 40485, "num_input_tokens_seen": 0, "num_train_epochs": 15, "save_steps": 500, "total_flos": 4.192333290845046e+16, "train_batch_size": 32, "trial_name": null, "trial_params": null }