load

Browse files

Files changed (16) hide show

README.md +339 -0
added_tokens.json +5 -0
all_results.json +15 -0
config.json +27 -0
eval_results.json +10 -0
generation_config.json +6 -0
pytorch_model-00001-of-00002.bin +3 -0
pytorch_model-00002-of-00002.bin +3 -0
pytorch_model.bin.index.json +298 -0
special_tokens_map.json +10 -0
tokenizer.json +0 -0
tokenizer.model +3 -0
tokenizer_config.json +44 -0
train_results.json +8 -0
trainer_state.json +0 -0
training_args.bin +3 -0

README.md ADDED Viewed

	@@ -0,0 +1,339 @@

+---
+base_model: llama2_7b_darulm_unigram_init_tie_16_11_23
+tags:
+- generated_from_trainer
+metrics:
+- accuracy
+model-index:
+- name: llama2_7b_darulm_unigram_tie_2e_16_11_23
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# llama2_7b_darulm_unigram_tie_2e_16_11_23
+This model is a fine-tuned version of [llama2_7b_darulm_unigram_init_tie_16_11_23](https://huggingface.co/llama2_7b_darulm_unigram_init_tie_16_11_23) on the None dataset.
+It achieves the following results on the evaluation set:
+- Loss: 2.7569
+- Accuracy: 0.4617
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 2e-05
+- train_batch_size: 6
+- eval_batch_size: 6
+- seed: 42
+- distributed_type: multi-GPU
+- num_devices: 16
+- gradient_accumulation_steps: 2
+- total_train_batch_size: 192
+- total_eval_batch_size: 96
+- optimizer: Adam with betas=(0.9,0.95) and epsilon=1e-05
+- lr_scheduler_type: linear
+- num_epochs: 2.0
+### Training results
+| Training Loss | Epoch | Step   | Validation Loss | Accuracy |
+|:-------------:|:-----:|:------:|:---------------:|:--------:|
+| 4.9167        | 0.01  | 1000   | 4.8647          | 0.2686   |
+| 3.9697        | 0.01  | 2000   | 3.9705          | 0.3409   |
+| 3.6398        | 0.02  | 3000   | 3.6476          | 0.3694   |
+| 3.468         | 0.03  | 4000   | 3.4784          | 0.3850   |
+| 3.3567        | 0.04  | 5000   | 3.3733          | 0.3953   |
+| 3.2828        | 0.04  | 6000   | 3.2999          | 0.4026   |
+| 3.2235        | 0.05  | 7000   | 3.2453          | 0.4081   |
+| 3.1898        | 0.06  | 8000   | 3.2028          | 0.4125   |
+| 3.1552        | 0.07  | 9000   | 3.1683          | 0.4160   |
+| 3.1068        | 0.07  | 10000  | 3.1397          | 0.4190   |
+| 3.1019        | 0.08  | 11000  | 3.1152          | 0.4217   |
+| 3.0849        | 0.09  | 12000  | 3.0942          | 0.4239   |
+| 3.0561        | 0.09  | 13000  | 3.0761          | 0.4256   |
+| 3.0429        | 0.1   | 14000  | 3.0595          | 0.4277   |
+| 3.035         | 0.11  | 15000  | 3.0451          | 0.4293   |
+| 3.0077        | 0.12  | 16000  | 3.0322          | 0.4306   |
+| 3.0008        | 0.12  | 17000  | 3.0200          | 0.4320   |
+| 2.9952        | 0.13  | 18000  | 3.0093          | 0.4330   |
+| 2.9825        | 0.14  | 19000  | 2.9996          | 0.4341   |
+| 2.9781        | 0.14  | 20000  | 2.9903          | 0.4351   |
+| 2.957         | 0.15  | 21000  | 2.9821          | 0.4360   |
+| 2.9676        | 0.16  | 22000  | 2.9738          | 0.4368   |
+| 2.9513        | 0.17  | 23000  | 2.9663          | 0.4376   |
+| 2.9475        | 0.17  | 24000  | 2.9594          | 0.4385   |
+| 2.9406        | 0.18  | 25000  | 2.9531          | 0.4391   |
+| 2.9387        | 0.19  | 26000  | 2.9473          | 0.4398   |
+| 2.9353        | 0.2   | 27000  | 2.9416          | 0.4403   |
+| 2.9208        | 0.2   | 28000  | 2.9363          | 0.4411   |
+| 2.9142        | 0.21  | 29000  | 2.9310          | 0.4415   |
+| 2.9167        | 0.22  | 30000  | 2.9265          | 0.4419   |
+| 2.9069        | 0.22  | 31000  | 2.9214          | 0.4425   |
+| 2.9067        | 0.23  | 32000  | 2.9168          | 0.4430   |
+| 2.8978        | 0.24  | 33000  | 2.9128          | 0.4434   |
+| 2.8982        | 0.25  | 34000  | 2.9088          | 0.4438   |
+| 2.8856        | 0.25  | 35000  | 2.9050          | 0.4444   |
+| 2.8981        | 0.26  | 36000  | 2.9013          | 0.4445   |
+| 2.8813        | 0.27  | 37000  | 2.8977          | 0.4450   |
+| 2.8765        | 0.27  | 38000  | 2.8944          | 0.4453   |
+| 2.879         | 0.28  | 39000  | 2.8910          | 0.4458   |
+| 2.8738        | 0.29  | 40000  | 2.8878          | 0.4462   |
+| 2.8671        | 0.3   | 41000  | 2.8851          | 0.4465   |
+| 2.866         | 0.3   | 42000  | 2.8820          | 0.4468   |
+| 2.8561        | 0.31  | 43000  | 2.8791          | 0.4473   |
+| 2.8601        | 0.32  | 44000  | 2.8765          | 0.4477   |
+| 2.8518        | 0.33  | 45000  | 2.8741          | 0.4479   |
+| 2.8577        | 0.33  | 46000  | 2.8713          | 0.4483   |
+| 2.8588        | 0.34  | 47000  | 2.8691          | 0.4484   |
+| 2.8584        | 0.35  | 48000  | 2.8666          | 0.4487   |
+| 2.8527        | 0.35  | 49000  | 2.8646          | 0.4488   |
+| 2.8425        | 0.36  | 50000  | 2.8624          | 0.4490   |
+| 2.8457        | 0.37  | 51000  | 2.8601          | 0.4494   |
+| 2.849         | 0.38  | 52000  | 2.8580          | 0.4496   |
+| 2.8431        | 0.38  | 53000  | 2.8560          | 0.4499   |
+| 2.8463        | 0.39  | 54000  | 2.8540          | 0.4501   |
+| 2.8437        | 0.4   | 55000  | 2.8521          | 0.4504   |
+| 2.845         | 0.41  | 56000  | 2.8505          | 0.4505   |
+| 2.8218        | 0.41  | 57000  | 2.8486          | 0.4508   |
+| 2.8366        | 0.42  | 58000  | 2.8470          | 0.4509   |
+| 2.8339        | 0.43  | 59000  | 2.8453          | 0.4512   |
+| 2.8338        | 0.43  | 60000  | 2.8437          | 0.4511   |
+| 2.8237        | 0.44  | 61000  | 2.8420          | 0.4513   |
+| 2.8334        | 0.45  | 62000  | 2.8405          | 0.4515   |
+| 2.8229        | 0.46  | 63000  | 2.8388          | 0.4518   |
+| 2.8214        | 0.46  | 64000  | 2.8373          | 0.4519   |
+| 2.8245        | 0.47  | 65000  | 2.8356          | 0.4522   |
+| 2.822         | 0.48  | 66000  | 2.8343          | 0.4524   |
+| 2.8139        | 0.48  | 67000  | 2.8331          | 0.4526   |
+| 2.8201        | 0.49  | 68000  | 2.8317          | 0.4526   |
+| 2.8132        | 0.5   | 69000  | 2.8305          | 0.4527   |
+| 2.8138        | 0.51  | 70000  | 2.8290          | 0.4530   |
+| 2.8171        | 0.51  | 71000  | 2.8279          | 0.4530   |
+| 2.8123        | 0.52  | 72000  | 2.8267          | 0.4532   |
+| 2.8118        | 0.53  | 73000  | 2.8255          | 0.4534   |
+| 2.8183        | 0.54  | 74000  | 2.8243          | 0.4536   |
+| 2.8052        | 0.54  | 75000  | 2.8233          | 0.4536   |
+| 2.8101        | 0.55  | 76000  | 2.8220          | 0.4538   |
+| 2.8021        | 0.56  | 77000  | 2.8209          | 0.4540   |
+| 2.8076        | 0.56  | 78000  | 2.8196          | 0.4540   |
+| 2.7937        | 0.57  | 79000  | 2.8190          | 0.4542   |
+| 2.8057        | 0.58  | 80000  | 2.8179          | 0.4541   |
+| 2.8082        | 0.59  | 81000  | 2.8168          | 0.4545   |
+| 2.7986        | 0.59  | 82000  | 2.8157          | 0.4546   |
+| 2.8062        | 0.6   | 83000  | 2.8150          | 0.4545   |
+| 2.7981        | 0.61  | 84000  | 2.8138          | 0.4546   |
+| 2.8041        | 0.61  | 85000  | 2.8130          | 0.4546   |
+| 2.7978        | 0.62  | 86000  | 2.8118          | 0.4549   |
+| 2.8016        | 0.63  | 87000  | 2.8109          | 0.4549   |
+| 2.7901        | 0.64  | 88000  | 2.8099          | 0.4551   |
+| 2.8075        | 0.64  | 89000  | 2.8093          | 0.4553   |
+| 2.7915        | 0.65  | 90000  | 2.8084          | 0.4552   |
+| 2.7916        | 0.66  | 91000  | 2.8074          | 0.4555   |
+| 2.7751        | 0.67  | 92000  | 2.8068          | 0.4554   |
+| 2.7896        | 0.67  | 93000  | 2.8059          | 0.4556   |
+| 2.7886        | 0.68  | 94000  | 2.8051          | 0.4557   |
+| 2.7909        | 0.69  | 95000  | 2.8044          | 0.4557   |
+| 2.7926        | 0.69  | 96000  | 2.8035          | 0.4558   |
+| 2.7931        | 0.7   | 97000  | 2.8028          | 0.4560   |
+| 2.7838        | 0.71  | 98000  | 2.8020          | 0.4562   |
+| 2.779         | 0.72  | 99000  | 2.8014          | 0.4561   |
+| 2.7922        | 0.72  | 100000 | 2.8006          | 0.4562   |
+| 2.7786        | 0.73  | 101000 | 2.7999          | 0.4562   |
+| 2.7791        | 0.74  | 102000 | 2.7992          | 0.4563   |
+| 2.7908        | 0.74  | 103000 | 2.7984          | 0.4565   |
+| 2.7872        | 0.75  | 104000 | 2.7978          | 0.4566   |
+| 2.7763        | 0.76  | 105000 | 2.7972          | 0.4567   |
+| 2.7785        | 0.77  | 106000 | 2.7966          | 0.4568   |
+| 2.7861        | 0.77  | 107000 | 2.7960          | 0.4568   |
+| 2.784         | 0.78  | 108000 | 2.7953          | 0.4570   |
+| 2.7804        | 0.79  | 109000 | 2.7944          | 0.4571   |
+| 2.7828        | 0.8   | 110000 | 2.7940          | 0.4570   |
+| 2.7761        | 0.8   | 111000 | 2.7933          | 0.4571   |
+| 2.7797        | 0.81  | 112000 | 2.7928          | 0.4571   |
+| 2.7792        | 0.82  | 113000 | 2.7922          | 0.4573   |
+| 2.7819        | 0.82  | 114000 | 2.7915          | 0.4573   |
+| 2.7837        | 0.83  | 115000 | 2.7910          | 0.4573   |
+| 2.781         | 0.84  | 116000 | 2.7906          | 0.4575   |
+| 2.7765        | 0.85  | 117000 | 2.7898          | 0.4577   |
+| 2.7778        | 0.85  | 118000 | 2.7895          | 0.4575   |
+| 2.776         | 0.86  | 119000 | 2.7887          | 0.4577   |
+| 2.7719        | 0.87  | 120000 | 2.7883          | 0.4578   |
+| 2.7759        | 0.88  | 121000 | 2.7878          | 0.4579   |
+| 2.7654        | 0.88  | 122000 | 2.7874          | 0.4578   |
+| 2.7661        | 0.89  | 123000 | 2.7868          | 0.4580   |
+| 2.7718        | 0.9   | 124000 | 2.7861          | 0.4580   |
+| 2.7775        | 0.9   | 125000 | 2.7858          | 0.4580   |
+| 2.7835        | 0.91  | 126000 | 2.7855          | 0.4580   |
+| 2.768         | 0.92  | 127000 | 2.7848          | 0.4581   |
+| 2.7701        | 0.93  | 128000 | 2.7843          | 0.4582   |
+| 2.7682        | 0.93  | 129000 | 2.7838          | 0.4583   |
+| 2.7595        | 0.94  | 130000 | 2.7834          | 0.4583   |
+| 2.7627        | 0.95  | 131000 | 2.7831          | 0.4583   |
+| 2.7716        | 0.95  | 132000 | 2.7827          | 0.4584   |
+| 2.7719        | 0.96  | 133000 | 2.7821          | 0.4585   |
+| 2.7723        | 0.97  | 134000 | 2.7816          | 0.4583   |
+| 2.7736        | 0.98  | 135000 | 2.7812          | 0.4585   |
+| 2.7646        | 0.98  | 136000 | 2.7809          | 0.4586   |
+| 2.76          | 0.99  | 137000 | 2.7805          | 0.4586   |
+| 2.7659        | 1.0   | 138000 | 2.7803          | 0.4586   |
+| 2.7604        | 1.01  | 139000 | 2.7799          | 0.4587   |
+| 2.7597        | 1.01  | 140000 | 2.7794          | 0.4587   |
+| 2.7551        | 1.02  | 141000 | 2.7791          | 0.4588   |
+| 2.7619        | 1.03  | 142000 | 2.7788          | 0.4588   |
+| 2.7658        | 1.03  | 143000 | 2.7785          | 0.4589   |
+| 2.751         | 1.04  | 144000 | 2.7781          | 0.4589   |
+| 2.7589        | 1.05  | 145000 | 2.7778          | 0.4590   |
+| 2.7459        | 1.06  | 146000 | 2.7776          | 0.4590   |
+| 2.7646        | 1.06  | 147000 | 2.7771          | 0.4591   |
+| 2.7529        | 1.07  | 148000 | 2.7768          | 0.4589   |
+| 2.7573        | 1.08  | 149000 | 2.7764          | 0.4592   |
+| 2.754         | 1.08  | 150000 | 2.7762          | 0.4591   |
+| 2.7553        | 1.09  | 151000 | 2.7759          | 0.4591   |
+| 2.7485        | 1.1   | 152000 | 2.7755          | 0.4593   |
+| 2.7558        | 1.11  | 153000 | 2.7752          | 0.4593   |
+| 2.7563        | 1.11  | 154000 | 2.7748          | 0.4593   |
+| 2.7557        | 1.12  | 155000 | 2.7747          | 0.4594   |
+| 2.7593        | 1.13  | 156000 | 2.7744          | 0.4592   |
+| 2.752         | 1.14  | 157000 | 2.7741          | 0.4593   |
+| 2.748         | 1.14  | 158000 | 2.7737          | 0.4593   |
+| 2.7549        | 1.15  | 159000 | 2.7735          | 0.4594   |
+| 2.7455        | 1.16  | 160000 | 2.7733          | 0.4596   |
+| 2.7582        | 1.16  | 161000 | 2.7731          | 0.4594   |
+| 2.7532        | 1.17  | 162000 | 2.7728          | 0.4595   |
+| 2.7496        | 1.18  | 163000 | 2.7724          | 0.4595   |
+| 2.75          | 1.19  | 164000 | 2.7721          | 0.4596   |
+| 2.7517        | 1.19  | 165000 | 2.7718          | 0.4597   |
+| 2.7522        | 1.2   | 166000 | 2.7716          | 0.4597   |
+| 2.7514        | 1.21  | 167000 | 2.7713          | 0.4599   |
+| 2.7515        | 1.22  | 168000 | 2.7711          | 0.4598   |
+| 2.7493        | 1.22  | 169000 | 2.7708          | 0.4598   |
+| 2.7491        | 1.23  | 170000 | 2.7705          | 0.4598   |
+| 2.7552        | 1.24  | 171000 | 2.7704          | 0.4599   |
+| 2.7536        | 1.24  | 172000 | 2.7700          | 0.4600   |
+| 2.7485        | 1.25  | 173000 | 2.7697          | 0.4599   |
+| 2.7455        | 1.26  | 174000 | 2.7697          | 0.4599   |
+| 2.7516        | 1.27  | 175000 | 2.7694          | 0.4599   |
+| 2.754         | 1.27  | 176000 | 2.7690          | 0.4600   |
+| 2.7489        | 1.28  | 177000 | 2.7690          | 0.4598   |
+| 2.7491        | 1.29  | 178000 | 2.7686          | 0.4601   |
+| 2.7432        | 1.29  | 179000 | 2.7684          | 0.4600   |
+| 2.7388        | 1.3   | 180000 | 2.7681          | 0.4602   |
+| 2.7501        | 1.31  | 181000 | 2.7679          | 0.4602   |
+| 2.7526        | 1.32  | 182000 | 2.7675          | 0.4603   |
+| 2.7478        | 1.32  | 183000 | 2.7674          | 0.4603   |
+| 2.7491        | 1.33  | 184000 | 2.7670          | 0.4604   |
+| 2.7505        | 1.34  | 185000 | 2.7670          | 0.4604   |
+| 2.7436        | 1.35  | 186000 | 2.7666          | 0.4605   |
+| 2.7389        | 1.35  | 187000 | 2.7665          | 0.4603   |
+| 2.7564        | 1.36  | 188000 | 2.7662          | 0.4604   |
+| 2.7464        | 1.37  | 189000 | 2.7661          | 0.4604   |
+| 2.7459        | 1.37  | 190000 | 2.7659          | 0.4605   |
+| 2.7481        | 1.38  | 191000 | 2.7657          | 0.4605   |
+| 2.7458        | 1.39  | 192000 | 2.7655          | 0.4604   |
+| 2.7427        | 1.4   | 193000 | 2.7653          | 0.4605   |
+| 2.741         | 1.4   | 194000 | 2.7651          | 0.4606   |
+| 2.7488        | 1.41  | 195000 | 2.7649          | 0.4606   |
+| 2.7353        | 1.42  | 196000 | 2.7647          | 0.4605   |
+| 2.7503        | 1.42  | 197000 | 2.7645          | 0.4607   |
+| 2.7446        | 1.43  | 198000 | 2.7644          | 0.4607   |
+| 2.748         | 1.44  | 199000 | 2.7642          | 0.4607   |
+| 2.7394        | 1.45  | 200000 | 2.7641          | 0.4607   |
+| 2.7403        | 1.45  | 201000 | 2.7638          | 0.4607   |
+| 2.7467        | 1.46  | 202000 | 2.7637          | 0.4607   |
+| 2.7532        | 1.47  | 203000 | 2.7635          | 0.4608   |
+| 2.7431        | 1.48  | 204000 | 2.7634          | 0.4609   |
+| 2.7433        | 1.48  | 205000 | 2.7632          | 0.4608   |
+| 2.7436        | 1.49  | 206000 | 2.7630          | 0.4609   |
+| 2.747         | 1.5   | 207000 | 2.7628          | 0.4609   |
+| 2.7395        | 1.5   | 208000 | 2.7626          | 0.4609   |
+| 2.7443        | 1.51  | 209000 | 2.7624          | 0.4609   |
+| 2.7395        | 1.52  | 210000 | 2.7623          | 0.4608   |
+| 2.7353        | 1.53  | 211000 | 2.7621          | 0.4608   |
+| 2.7401        | 1.53  | 212000 | 2.7618          | 0.4610   |
+| 2.7371        | 1.54  | 213000 | 2.7617          | 0.4610   |
+| 2.7458        | 1.55  | 214000 | 2.7616          | 0.4610   |
+| 2.7416        | 1.56  | 215000 | 2.7615          | 0.4611   |
+| 2.7434        | 1.56  | 216000 | 2.7614          | 0.4611   |
+| 2.7456        | 1.57  | 217000 | 2.7614          | 0.4611   |
+| 2.7499        | 1.58  | 218000 | 2.7611          | 0.4611   |
+| 2.744         | 1.58  | 219000 | 2.7609          | 0.4611   |
+| 2.7375        | 1.59  | 220000 | 2.7608          | 0.4611   |
+| 2.7428        | 1.6   | 221000 | 2.7606          | 0.4611   |
+| 2.7442        | 1.61  | 222000 | 2.7606          | 0.4611   |
+| 2.7395        | 1.61  | 223000 | 2.7604          | 0.4612   |
+| 2.7445        | 1.62  | 224000 | 2.7602          | 0.4612   |
+| 2.7394        | 1.63  | 225000 | 2.7602          | 0.4611   |
+| 2.7403        | 1.63  | 226000 | 2.7599          | 0.4612   |
+| 2.738         | 1.64  | 227000 | 2.7599          | 0.4612   |
+| 2.7332        | 1.65  | 228000 | 2.7597          | 0.4613   |
+| 2.7388        | 1.66  | 229000 | 2.7596          | 0.4613   |
+| 2.743         | 1.66  | 230000 | 2.7595          | 0.4613   |
+| 2.7368        | 1.67  | 231000 | 2.7593          | 0.4613   |
+| 2.7426        | 1.68  | 232000 | 2.7592          | 0.4614   |
+| 2.7332        | 1.69  | 233000 | 2.7591          | 0.4614   |
+| 2.7413        | 1.69  | 234000 | 2.7590          | 0.4614   |
+| 2.735         | 1.7   | 235000 | 2.7589          | 0.4613   |
+| 2.7393        | 1.71  | 236000 | 2.7589          | 0.4614   |
+| 2.7382        | 1.71  | 237000 | 2.7587          | 0.4615   |
+| 2.7403        | 1.72  | 238000 | 2.7587          | 0.4615   |
+| 2.7436        | 1.73  | 239000 | 2.7586          | 0.4615   |
+| 2.7422        | 1.74  | 240000 | 2.7585          | 0.4615   |
+| 2.7257        | 1.74  | 241000 | 2.7584          | 0.4614   |
+| 2.7351        | 1.75  | 242000 | 2.7583          | 0.4615   |
+| 2.7391        | 1.76  | 243000 | 2.7582          | 0.4615   |
+| 2.7495        | 1.76  | 244000 | 2.7581          | 0.4615   |
+| 2.7399        | 1.77  | 245000 | 2.7580          | 0.4614   |
+| 2.7435        | 1.78  | 246000 | 2.7580          | 0.4616   |
+| 2.7414        | 1.79  | 247000 | 2.7579          | 0.4615   |
+| 2.7478        | 1.79  | 248000 | 2.7578          | 0.4616   |
+| 2.7299        | 1.8   | 249000 | 2.7577          | 0.4616   |
+| 2.7401        | 1.81  | 250000 | 2.7576          | 0.4616   |
+| 2.7395        | 1.82  | 251000 | 2.7575          | 0.4616   |
+| 2.7399        | 1.82  | 252000 | 2.7574          | 0.4616   |
+| 2.7413        | 1.83  | 253000 | 2.7574          | 0.4616   |
+| 2.7294        | 1.84  | 254000 | 2.7573          | 0.4616   |
+| 2.7329        | 1.84  | 255000 | 2.7572          | 0.4616   |
+| 2.7454        | 1.85  | 256000 | 2.7572          | 0.4617   |
+| 2.7343        | 1.86  | 257000 | 2.7571          | 0.4617   |
+| 2.7356        | 1.87  | 258000 | 2.7571          | 0.4617   |
+| 2.7462        | 1.87  | 259000 | 2.7570          | 0.4617   |
+| 2.7375        | 1.88  | 260000 | 2.7569          | 0.4617   |
+| 2.7368        | 1.89  | 261000 | 2.7569          | 0.4618   |
+| 2.7452        | 1.89  | 262000 | 2.7569          | 0.4617   |
+| 2.7394        | 1.9   | 263000 | 2.7568          | 0.4617   |
+| 2.7378        | 1.91  | 264000 | 2.7568          | 0.4618   |
+| 2.7446        | 1.92  | 265000 | 2.7567          | 0.4618   |
+| 2.7436        | 1.92  | 266000 | 2.7567          | 0.4618   |
+| 2.7505        | 1.93  | 267000 | 2.7567          | 0.4618   |
+| 2.7493        | 1.94  | 268000 | 2.7566          | 0.4618   |
+| 2.7391        | 1.95  | 269000 | 2.7566          | 0.4618   |
+| 2.7431        | 1.95  | 270000 | 2.7566          | 0.4617   |
+| 2.7387        | 1.96  | 271000 | 2.7565          | 0.4618   |
+| 2.741         | 1.97  | 272000 | 2.7565          | 0.4618   |
+| 2.7343        | 1.97  | 273000 | 2.7565          | 0.4618   |
+| 2.7378        | 1.98  | 274000 | 2.7564          | 0.4618   |
+| 2.737         | 1.99  | 275000 | 2.7564          | 0.4618   |
+| 2.7397        | 2.0   | 276000 | 2.7564          | 0.4618   |
+### Framework versions
+- Transformers 4.34.0
+- Pytorch 2.0.1+cu118
+- Datasets 2.14.5
+- Tokenizers 0.14.1

added_tokens.json ADDED Viewed

	@@ -0,0 +1,5 @@

+{
+  "</s>": 2,
+  "<s>": 1,
+  "<unk>": 0
+}

all_results.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+    "epoch": 2.0,
+    "eval_accuracy": 0.4617488272495486,
+    "eval_loss": 2.7569446563720703,
+    "eval_runtime": 28.951,
+    "eval_samples": 6483,
+    "eval_samples_per_second": 223.93,
+    "eval_steps_per_second": 2.349,
+    "perplexity": 15.751642670488213,
+    "train_loss": 2.8250040690803138,
+    "train_runtime": 396233.9412,
+    "train_samples": 26545790,
+    "train_samples_per_second": 133.99,
+    "train_steps_per_second": 0.698
+}

config.json ADDED Viewed

	@@ -0,0 +1,27 @@

+{
+  "_name_or_path": "llama2_7b_darulm_unigram_init_tie_16_11_23",
+  "architectures": [
+    "LlamaForCausalLM"
+  ],
+  "attention_bias": false,
+  "bos_token_id": 1,
+  "eos_token_id": 2,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 11008,
+  "max_position_embeddings": 4096,
+  "model_type": "llama",
+  "num_attention_heads": 32,
+  "num_hidden_layers": 32,
+  "num_key_value_heads": 32,
+  "pretraining_tp": 1,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": null,
+  "rope_theta": 10000.0,
+  "tie_word_embeddings": false,
+  "torch_dtype": "float16",
+  "transformers_version": "4.34.0",
+  "use_cache": true,
+  "vocab_size": 32000
+}

eval_results.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+    "epoch": 2.0,
+    "eval_accuracy": 0.4617488272495486,
+    "eval_loss": 2.7569446563720703,
+    "eval_runtime": 28.951,
+    "eval_samples": 6483,
+    "eval_samples_per_second": 223.93,
+    "eval_steps_per_second": 2.349,
+    "perplexity": 15.751642670488213
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 1,
+  "eos_token_id": 2,
+  "transformers_version": "4.34.0"
+}

pytorch_model-00001-of-00002.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:14f788a31291164991fb02871e714813fe2aca163f30e9c26489138d282f8730
+size 9976620122

pytorch_model-00002-of-00002.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:79f1991f97779a57b385ebe3e6e0726d75ab9ad7d6c6b4cbef1c52fce1e7a416
+size 3500310787

pytorch_model.bin.index.json ADDED Viewed

	@@ -0,0 +1,298 @@

+{
+  "metadata": {
+    "total_size": 13476831232
+  },
+  "weight_map": {
+    "lm_head.weight": "pytorch_model-00002-of-00002.bin",
+    "model.embed_tokens.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.0.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.0.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.0.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.0.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.0.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.0.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.0.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.0.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.0.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.1.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.1.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.1.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.1.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.1.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.1.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.1.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.1.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.1.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.10.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.10.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.10.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.10.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.10.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.10.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.10.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.10.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.10.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.11.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.11.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.11.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.11.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.11.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.11.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.11.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.11.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.11.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.12.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.12.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.12.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.12.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.12.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.12.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.12.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.12.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.12.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.13.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.13.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.13.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.13.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.13.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.13.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.13.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.13.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.13.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.14.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.14.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.14.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.14.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.14.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.14.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.14.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.14.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.14.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.15.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.15.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.15.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.15.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.15.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.15.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.15.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.15.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.15.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.16.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.16.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.16.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.16.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.16.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.16.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.16.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.16.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.16.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.17.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.17.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.17.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.17.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.17.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.17.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.17.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.17.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.17.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.18.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.18.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.18.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.18.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.18.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.18.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.18.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.18.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.18.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.19.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.19.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.19.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.19.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.19.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.19.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.19.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.19.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.19.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.2.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.2.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.2.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.2.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.2.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.2.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.2.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.2.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.2.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.20.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.20.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.20.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.20.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.20.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.20.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.20.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.20.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.20.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.21.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.21.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.21.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.21.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.21.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.21.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.21.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.21.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.21.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.22.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.22.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.22.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.22.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.22.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.22.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.22.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.22.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.22.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.23.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.23.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.23.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.23.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.23.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.23.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.23.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.23.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.23.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.24.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
+    "model.layers.24.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin",
+    "model.layers.24.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin",
+    "model.layers.24.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin",
+    "model.layers.24.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin",
+    "model.layers.24.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin",
+    "model.layers.24.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin",
+    "model.layers.24.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin",
+    "model.layers.24.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin",
+    "model.layers.25.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
+    "model.layers.25.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin",
+    "model.layers.25.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin",
+    "model.layers.25.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin",
+    "model.layers.25.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin",
+    "model.layers.25.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin",
+    "model.layers.25.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin",
+    "model.layers.25.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin",
+    "model.layers.25.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin",
+    "model.layers.26.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
+    "model.layers.26.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin",
+    "model.layers.26.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin",
+    "model.layers.26.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin",
+    "model.layers.26.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin",
+    "model.layers.26.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin",
+    "model.layers.26.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin",
+    "model.layers.26.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin",
+    "model.layers.26.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin",
+    "model.layers.27.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
+    "model.layers.27.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin",
+    "model.layers.27.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin",
+    "model.layers.27.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin",
+    "model.layers.27.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin",
+    "model.layers.27.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin",
+    "model.layers.27.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin",
+    "model.layers.27.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin",
+    "model.layers.27.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin",
+    "model.layers.28.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
+    "model.layers.28.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin",
+    "model.layers.28.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin",
+    "model.layers.28.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin",
+    "model.layers.28.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin",
+    "model.layers.28.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin",
+    "model.layers.28.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin",
+    "model.layers.28.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin",
+    "model.layers.28.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin",
+    "model.layers.29.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
+    "model.layers.29.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin",
+    "model.layers.29.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin",
+    "model.layers.29.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin",
+    "model.layers.29.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin",
+    "model.layers.29.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin",
+    "model.layers.29.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin",
+    "model.layers.29.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin",
+    "model.layers.29.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin",
+    "model.layers.3.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.3.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.3.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.3.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.3.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.3.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.3.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.3.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.3.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.30.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
+    "model.layers.30.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin",
+    "model.layers.30.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin",
+    "model.layers.30.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin",
+    "model.layers.30.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin",
+    "model.layers.30.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin",
+    "model.layers.30.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin",
+    "model.layers.30.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin",
+    "model.layers.30.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin",
+    "model.layers.31.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
+    "model.layers.31.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin",
+    "model.layers.31.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin",
+    "model.layers.31.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin",
+    "model.layers.31.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin",
+    "model.layers.31.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin",
+    "model.layers.31.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin",
+    "model.layers.31.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin",
+    "model.layers.31.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin",
+    "model.layers.4.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.4.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.4.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.4.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.4.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.4.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.4.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.4.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.4.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.5.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.5.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.5.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.5.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.5.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.5.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.5.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.5.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.5.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.6.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.6.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.6.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.6.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.6.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.6.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.6.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.6.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.6.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.7.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.7.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.7.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.7.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.7.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.7.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.7.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.7.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.7.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.8.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.8.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.8.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.8.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.8.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.8.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.8.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.8.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.8.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.9.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.9.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.9.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.9.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.9.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.9.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.9.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.9.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.layers.9.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
+    "model.norm.weight": "pytorch_model-00002-of-00002.bin"
+  }
+}

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+  "additional_special_tokens": [
+    "<unk>",
+    "<s>",
+    "</s>"
+  ],
+  "bos_token": "<s>",
+  "eos_token": "</s>",
+  "unk_token": "<unk>"
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:88fc252bfd0e6a5355d5ae708410bf365241c858dc556dc4744cbe5b011ba3fc
+size 989601

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,44 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [
+    "<unk>",
+    "<s>",
+    "</s>"
+  ],
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "legacy": true,
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": null,
+  "sp_model_kwargs": {},
+  "spaces_between_special_tokens": false,
+  "tokenizer_class": "LlamaTokenizer",
+  "unk_token": "<unk>",
+  "use_default_system_prompt": true
+}

train_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 2.0,
+    "train_loss": 2.8250040690803138,
+    "train_runtime": 396233.9412,
+    "train_samples": 26545790,
+    "train_samples_per_second": 133.99,
+    "train_steps_per_second": 0.698
+}

trainer_state.json ADDED Viewed

The diff for this file is too large to render. See raw diff

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:052804cced4c5e9bfd207af9c55b076d617163f4c97734a8c512536ecf5a281b
+size 5563