bachtieuthuan commited on
Commit
77bb0e1
1 Parent(s): 89c8736

Upload training_config.yml with huggingface_hub

Browse files
Files changed (1) hide show
  1. training_config.yml +87 -0
training_config.yml ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ _component_: models.lora_mmllama3_8b
3
+ lora_attn_modules:
4
+ - q_proj
5
+ - v_proj
6
+ apply_lora_to_mlp: false
7
+ apply_lora_to_output: false
8
+ lora_rank: 16
9
+ lora_alpha: 32
10
+ perception_tokens: 2
11
+ use_clip: false
12
+ tokenizer:
13
+ _component_: models.a2a_tokenizer
14
+ path: models/tokenizer.model
15
+ freeze_layers:
16
+ _component_: torchtune.utils.Freeze_Layers
17
+ num_layers: 8
18
+ checkpointer:
19
+ _component_: torchtune.utils.FullModelMetaCheckpointer
20
+ checkpoint_dir: tiresome5/
21
+ checkpoint_files:
22
+ - meta_model_0.pt
23
+ adapter_checkpoint: null
24
+ recipe_checkpoint: null
25
+ output_dir: output_checkpoints/experiment_1
26
+ model_type: LLAMA3
27
+ use_freeze_layers: true
28
+ resume_from_checkpoint: false
29
+ interim_checkpoint_steps: 20000
30
+ interim_gen_steps: null
31
+ max_new_tokens: 100
32
+ temperature: 0.6
33
+ top_k: 225
34
+ dataset:
35
+ _component_: ds.EvenBatcher
36
+ buffer_size: 1000
37
+ dataset:
38
+ _component_: ds.RoundRobinDataset
39
+ datasets:
40
+ - _component_: ds.CaptionInstructDataset
41
+ dataset_path: ds/sam_llava/output.parquet
42
+ train_on_input: false
43
+ seed: null
44
+ shuffle: true
45
+ batch_size: 4
46
+ optimizer:
47
+ _component_: torch.optim.AdamW
48
+ weight_decay: 0.0001
49
+ lr: 0.0001
50
+ betas:
51
+ - 0.9
52
+ - 0.998
53
+ lr_scheduler:
54
+ _component_: torchtune.modules.get_cosine_schedule_with_warmup
55
+ num_warmup_steps: 500
56
+ loss:
57
+ _component_: torch.nn.CrossEntropyLoss
58
+ grad_clip:
59
+ _component_: torch.nn.utils.clip_grad_norm
60
+ max_norm: 2.0
61
+ norm_type: 2
62
+ epochs: 6
63
+ max_steps_per_epoch: null
64
+ gradient_accumulation_steps: 32
65
+ compile: true
66
+ output_dir: /tmp/lora_finetune_output
67
+ metric_logger:
68
+ _component_: torchtune.utils.metric_logging.DiskLogger
69
+ log_dir: ${output_dir}
70
+ log_every_n_steps: null
71
+ device: cuda
72
+ dtype: bf16
73
+ enable_activation_checkpointing: false
74
+ profiler:
75
+ _component_: torchtune.utils.profiler
76
+ enabled: true
77
+ inference:
78
+ prompt_template: 'Video:
79
+
80
+ {video}
81
+
82
+ Caption the previous video.'
83
+ max_new_tokens: 300
84
+ temperature: 0.6
85
+ top_k: 300
86
+ quantizer: null
87
+ gradient-accumulation-steps: 32