barbaroo commited on
Commit
d4daa3f
1 Parent(s): 7ff0c39

Upload 8 files

Browse files
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9cd696e217478d0fd2598209d39fb1a98794e14440ff92a6e3eb44f0cdc3e1c9
3
  size 23093424
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:529b74d808e7406775eb8e3196fe594af28c4ea71ec9f5ae5dba2ee6bc7b58b9
3
  size 23093424
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4ac3e086bd43e4d2c77c39b96a5efc4ba30f382377f9c12bbb6e02c6a8ca8b59
3
  size 46298682
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:058caa8e6c0fc57c07b779f33b25636610ecf2cf367654b5843fc8dc63021f1f
3
  size 46298682
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5d46c56b2ff5f1d7e4350bd5a78a3c38071bcb0e540a8783b3d5dcf4123df2f0
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c792e52c193f0f16c8ed09ef3cbc20ef01c97daab2c1902596f6e68ba633485b
3
  size 14244
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ab30705be11a7f0d47ae24808b51c36fd3d9958a81ef53b71ea1841770f6e963
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2053d27afacc6bc6dded4d38ad7a85f2c6d88921977580cd78f5c5f7e27ff9a
3
  size 1064
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 2.909609317779541,
3
- "best_model_checkpoint": "outputs-6_7/checkpoint-48000",
4
- "epoch": 2.061041169297357,
5
  "eval_steps": 4000,
6
- "global_step": 48000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -775,6 +775,262 @@
775
  "eval_samples_per_second": 14.272,
776
  "eval_steps_per_second": 3.568,
777
  "step": 48000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
778
  }
779
  ],
780
  "logging_steps": 500,
@@ -794,7 +1050,7 @@
794
  "attributes": {}
795
  }
796
  },
797
- "total_flos": 5.807418710561096e+18,
798
  "train_batch_size": 4,
799
  "trial_name": null,
800
  "trial_params": null
 
1
  {
2
+ "best_metric": 2.8960378170013428,
3
+ "best_model_checkpoint": "outputs-6_7/checkpoint-64000",
4
+ "epoch": 2.7480548923964756,
5
  "eval_steps": 4000,
6
+ "global_step": 64000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
775
  "eval_samples_per_second": 14.272,
776
  "eval_steps_per_second": 3.568,
777
  "step": 48000
778
+ },
779
+ {
780
+ "epoch": 2.082510348144204,
781
+ "grad_norm": 0.7904226183891296,
782
+ "learning_rate": 0.00028752952290977794,
783
+ "loss": 2.2271,
784
+ "step": 48500
785
+ },
786
+ {
787
+ "epoch": 2.1039795269910515,
788
+ "grad_norm": 0.8151206374168396,
789
+ "learning_rate": 0.0002874006956671104,
790
+ "loss": 2.2104,
791
+ "step": 49000
792
+ },
793
+ {
794
+ "epoch": 2.1254487058378992,
795
+ "grad_norm": 0.6942662596702576,
796
+ "learning_rate": 0.0002872718684244428,
797
+ "loss": 2.2197,
798
+ "step": 49500
799
+ },
800
+ {
801
+ "epoch": 2.1469178846847465,
802
+ "grad_norm": 0.6846303939819336,
803
+ "learning_rate": 0.0002871430411817752,
804
+ "loss": 2.2286,
805
+ "step": 50000
806
+ },
807
+ {
808
+ "epoch": 2.1683870635315943,
809
+ "grad_norm": 0.8961315155029297,
810
+ "learning_rate": 0.0002870142139391076,
811
+ "loss": 2.2329,
812
+ "step": 50500
813
+ },
814
+ {
815
+ "epoch": 2.1898562423784416,
816
+ "grad_norm": 0.5635807514190674,
817
+ "learning_rate": 0.0002868853866964401,
818
+ "loss": 2.2216,
819
+ "step": 51000
820
+ },
821
+ {
822
+ "epoch": 2.211325421225289,
823
+ "grad_norm": 0.6454870700836182,
824
+ "learning_rate": 0.00028675655945377247,
825
+ "loss": 2.2119,
826
+ "step": 51500
827
+ },
828
+ {
829
+ "epoch": 2.2327946000721366,
830
+ "grad_norm": 0.5770113468170166,
831
+ "learning_rate": 0.00028662773221110486,
832
+ "loss": 2.2216,
833
+ "step": 52000
834
+ },
835
+ {
836
+ "epoch": 2.2327946000721366,
837
+ "eval_loss": 2.9131178855895996,
838
+ "eval_runtime": 170.3262,
839
+ "eval_samples_per_second": 14.678,
840
+ "eval_steps_per_second": 3.669,
841
+ "step": 52000
842
+ },
843
+ {
844
+ "epoch": 2.254263778918984,
845
+ "grad_norm": 0.6589009165763855,
846
+ "learning_rate": 0.0002864989049684373,
847
+ "loss": 2.2237,
848
+ "step": 52500
849
+ },
850
+ {
851
+ "epoch": 2.2757329577658316,
852
+ "grad_norm": 0.6595714688301086,
853
+ "learning_rate": 0.0002863700777257697,
854
+ "loss": 2.2331,
855
+ "step": 53000
856
+ },
857
+ {
858
+ "epoch": 2.297202136612679,
859
+ "grad_norm": 0.5528385639190674,
860
+ "learning_rate": 0.0002862412504831021,
861
+ "loss": 2.2316,
862
+ "step": 53500
863
+ },
864
+ {
865
+ "epoch": 2.318671315459526,
866
+ "grad_norm": 0.6706179976463318,
867
+ "learning_rate": 0.00028611242324043455,
868
+ "loss": 2.2312,
869
+ "step": 54000
870
+ },
871
+ {
872
+ "epoch": 2.340140494306374,
873
+ "grad_norm": 0.6599323153495789,
874
+ "learning_rate": 0.000285983595997767,
875
+ "loss": 2.2228,
876
+ "step": 54500
877
+ },
878
+ {
879
+ "epoch": 2.3616096731532212,
880
+ "grad_norm": 0.7218915820121765,
881
+ "learning_rate": 0.0002858547687550994,
882
+ "loss": 2.2269,
883
+ "step": 55000
884
+ },
885
+ {
886
+ "epoch": 2.3830788520000685,
887
+ "grad_norm": 0.6501777768135071,
888
+ "learning_rate": 0.0002857259415124318,
889
+ "loss": 2.225,
890
+ "step": 55500
891
+ },
892
+ {
893
+ "epoch": 2.4045480308469163,
894
+ "grad_norm": 0.6774255037307739,
895
+ "learning_rate": 0.00028559711426976423,
896
+ "loss": 2.2193,
897
+ "step": 56000
898
+ },
899
+ {
900
+ "epoch": 2.4045480308469163,
901
+ "eval_loss": 2.90551495552063,
902
+ "eval_runtime": 173.4554,
903
+ "eval_samples_per_second": 14.413,
904
+ "eval_steps_per_second": 3.603,
905
+ "step": 56000
906
+ },
907
+ {
908
+ "epoch": 2.4260172096937636,
909
+ "grad_norm": 0.667073130607605,
910
+ "learning_rate": 0.00028546828702709663,
911
+ "loss": 2.2228,
912
+ "step": 56500
913
+ },
914
+ {
915
+ "epoch": 2.4474863885406113,
916
+ "grad_norm": 0.776077389717102,
917
+ "learning_rate": 0.000285339459784429,
918
+ "loss": 2.226,
919
+ "step": 57000
920
+ },
921
+ {
922
+ "epoch": 2.4689555673874586,
923
+ "grad_norm": 0.7873576879501343,
924
+ "learning_rate": 0.00028521063254176147,
925
+ "loss": 2.2219,
926
+ "step": 57500
927
+ },
928
+ {
929
+ "epoch": 2.490424746234306,
930
+ "grad_norm": 0.7621210813522339,
931
+ "learning_rate": 0.0002850818052990939,
932
+ "loss": 2.2226,
933
+ "step": 58000
934
+ },
935
+ {
936
+ "epoch": 2.5118939250811536,
937
+ "grad_norm": 0.774750828742981,
938
+ "learning_rate": 0.0002849529780564263,
939
+ "loss": 2.2328,
940
+ "step": 58500
941
+ },
942
+ {
943
+ "epoch": 2.533363103928001,
944
+ "grad_norm": 0.707665205001831,
945
+ "learning_rate": 0.0002848241508137587,
946
+ "loss": 2.2342,
947
+ "step": 59000
948
+ },
949
+ {
950
+ "epoch": 2.554832282774848,
951
+ "grad_norm": 0.7524703741073608,
952
+ "learning_rate": 0.00028469532357109116,
953
+ "loss": 2.2296,
954
+ "step": 59500
955
+ },
956
+ {
957
+ "epoch": 2.576301461621696,
958
+ "grad_norm": 0.6186488270759583,
959
+ "learning_rate": 0.00028456649632842355,
960
+ "loss": 2.2282,
961
+ "step": 60000
962
+ },
963
+ {
964
+ "epoch": 2.576301461621696,
965
+ "eval_loss": 2.907226085662842,
966
+ "eval_runtime": 176.429,
967
+ "eval_samples_per_second": 14.17,
968
+ "eval_steps_per_second": 3.543,
969
+ "step": 60000
970
+ },
971
+ {
972
+ "epoch": 2.5977706404685432,
973
+ "grad_norm": 0.6811486482620239,
974
+ "learning_rate": 0.00028443766908575594,
975
+ "loss": 2.2357,
976
+ "step": 60500
977
+ },
978
+ {
979
+ "epoch": 2.619239819315391,
980
+ "grad_norm": 0.7401767373085022,
981
+ "learning_rate": 0.0002843088418430884,
982
+ "loss": 2.2264,
983
+ "step": 61000
984
+ },
985
+ {
986
+ "epoch": 2.6407089981622383,
987
+ "grad_norm": 0.6240813136100769,
988
+ "learning_rate": 0.00028418001460042084,
989
+ "loss": 2.2402,
990
+ "step": 61500
991
+ },
992
+ {
993
+ "epoch": 2.6621781770090855,
994
+ "grad_norm": 0.6217384338378906,
995
+ "learning_rate": 0.00028405118735775324,
996
+ "loss": 2.2294,
997
+ "step": 62000
998
+ },
999
+ {
1000
+ "epoch": 2.6836473558559333,
1001
+ "grad_norm": 0.5563312768936157,
1002
+ "learning_rate": 0.00028392236011508563,
1003
+ "loss": 2.2344,
1004
+ "step": 62500
1005
+ },
1006
+ {
1007
+ "epoch": 2.7051165347027806,
1008
+ "grad_norm": 0.7275550961494446,
1009
+ "learning_rate": 0.0002837935328724181,
1010
+ "loss": 2.2233,
1011
+ "step": 63000
1012
+ },
1013
+ {
1014
+ "epoch": 2.7265857135496283,
1015
+ "grad_norm": 0.6657426953315735,
1016
+ "learning_rate": 0.00028366470562975047,
1017
+ "loss": 2.2269,
1018
+ "step": 63500
1019
+ },
1020
+ {
1021
+ "epoch": 2.7480548923964756,
1022
+ "grad_norm": 0.5833483934402466,
1023
+ "learning_rate": 0.0002835358783870829,
1024
+ "loss": 2.2234,
1025
+ "step": 64000
1026
+ },
1027
+ {
1028
+ "epoch": 2.7480548923964756,
1029
+ "eval_loss": 2.8960378170013428,
1030
+ "eval_runtime": 165.7901,
1031
+ "eval_samples_per_second": 15.079,
1032
+ "eval_steps_per_second": 3.77,
1033
+ "step": 64000
1034
  }
1035
  ],
1036
  "logging_steps": 500,
 
1050
  "attributes": {}
1051
  }
1052
  },
1053
+ "total_flos": 7.740828282336215e+18,
1054
  "train_batch_size": 4,
1055
  "trial_name": null,
1056
  "trial_params": null