jan-hq commited on
Commit
05dcaf4
1 Parent(s): 53cad53

Upload 2 files

Browse files
Files changed (2) hide show
  1. training_config.yaml +93 -0
  2. training_loss.txt +0 -0
training_config.yaml ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Config for multi-device full finetuning in full_finetune_distributed.py
2
+ # using a Llama3 8B Instruct model
3
+ #
4
+ # This config assumes that you've run the following command before launching
5
+ # this run:
6
+ # tune download meta-llama/Meta-Llama-3-8B-Instruct --output-dir /tmp/Meta-Llama-3-8B-Instruct --hf-token <HF_TOKEN>
7
+ #
8
+ # To launch on 4 devices, run the following command from root:
9
+ # tune run --nproc_per_node 4 full_finetune_distributed --config llama3/8B_full
10
+ #
11
+ # You can add specific overrides through the command line. For example
12
+ # to override the checkpointer directory while launching training
13
+ # you can run:
14
+ # tune run --nproc_per_node 4 full_finetune_distributed --config llama3/8B_full checkpointer.checkpoint_dir=<YOUR_CHECKPOINT_DIR>
15
+ #
16
+ # This config works best when the model is being fine-tuned on 2+ GPUs.
17
+ # Single device full finetuning requires more memory optimizations. It's
18
+ # best to use 8B_full_single_device.yaml for those cases
19
+ # Tokenizer
20
+ tokenizer:
21
+ _component_: torchtune.models.llama3.llama3_s_tokenizer
22
+ path: ../model_zoo/tokenizer.model
23
+ max_seq_len: 4096
24
+
25
+ # Dataset
26
+ dataset:
27
+ _component_: torchtune.datasets.chat_dataset
28
+ source: homebrewltd/mixed-instruction-speech-whispervq-v3-full
29
+ conversation_style: openai
30
+ max_seq_len: 4096
31
+ split: train
32
+ train_on_input: True
33
+
34
+ seed: 42
35
+ shuffle: True
36
+ # Model Arguments
37
+ model:
38
+ _component_: torchtune.models.llama3_1.llama3_1_s_8b
39
+ # path: model_zoo/Llama3.1_s_8b_init
40
+ checkpointer:
41
+ _component_: torchtune.utils.FullModelHFCheckpointerSaveSteps
42
+ checkpoint_dir: ../model_zoo/llama3.1-s-base
43
+ checkpoint_files: [
44
+ model-00001-of-00004.safetensors,
45
+ model-00002-of-00004.safetensors,
46
+ model-00003-of-00004.safetensors,
47
+ model-00004-of-00004.safetensors,
48
+ ]
49
+ recipe_checkpoint: null
50
+ output_dir: ../model_zoo/llama3-s-instruct-v1
51
+ model_type: LLAMA3
52
+ resume_from_checkpoint: False
53
+ save_every_n_steps: 1000
54
+ max_checkpoints: 3
55
+ # Fine-tuning arguments
56
+ batch_size: 4
57
+ epochs: 1
58
+ max_steps_per_epoch: null
59
+ gradient_accumulation_steps: 8
60
+ compile: False
61
+ # Optimizer and Scheduler
62
+ optimizer:
63
+ _component_: torch.optim.AdamW #change this to use adam_mini: torchtune.modules.optimizer.Adam_mini
64
+ weight_decay: 0.005
65
+ lr: 7e-5
66
+ fused: True
67
+ lr_scheduler:
68
+ _component_: torchtune.modules.get_cosine_schedule_with_warmup
69
+ num_warmup_steps: 73
70
+
71
+ loss:
72
+ _component_: torch.nn.CrossEntropyLoss
73
+
74
+ fsdp:
75
+ cpu_offload: False
76
+
77
+ # Training env
78
+ device: cuda
79
+ dtype: bf16
80
+
81
+ # Memory management
82
+ enable_activation_checkpointing: True
83
+ memory_efficient_fsdp_wrap: True
84
+ ac_mode: 'selective'
85
+
86
+
87
+ # Logging
88
+ metric_logger:
89
+ _component_: torchtune.utils.metric_logging.DiskLogger
90
+ log_dir: ${output_dir}
91
+ output_dir: ../model_zoo/Llama3-instruct-log-v1/
92
+ log_every_n_steps: 1
93
+ log_peak_memory_stats: False
training_loss.txt ADDED
The diff for this file is too large to render. See raw diff