derek-thomas HF staff commited on
Commit
2beb7b1
1 Parent(s): bf52501

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +139 -31
app.py CHANGED
@@ -65,47 +65,155 @@ with gr.Blocks() as demo:
65
  with gr.Tabs():
66
  # Memory Calculation Tab
67
  with gr.TabItem("Memory Calculation"):
68
- hf_model_name_or_path = gr.Textbox(label="HuggingFace Model Name or Path (optional)", value="")
69
- num_gpus = gr.Number(label="Number of GPUs", value=1)
70
- tensor_parallel_size = gr.Number(label="Tensor Parallel Size", value=1)
71
- pipeline_parallel_size = gr.Number(label="Pipeline Parallel Size", value=1)
72
- batch_size_per_gpu = gr.Number(label="Batch Size per GPU", value=8)
73
- sequence_length = gr.Number(label="Sequence Length", value=2048)
74
- vocab_size = gr.Number(label="Vocab Size", value=51200)
75
- hidden_size = gr.Number(label="Hidden Size", value=6144)
76
- num_attention_heads = gr.Number(label="Number of Attention Heads", value=64)
77
- num_layers = gr.Number(label="Number of Layers", value=44)
78
- ffn_expansion_factor = gr.Number(label="FFN Expansion Factor", value=4)
79
- is_mixed_precision = gr.Checkbox(label="Mixed Precision", value=True)
80
- misc_mem_gib = gr.Number(label="Misc Memory Overhead (GiB)", value=5)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
 
82
  memory_result = gr.Textbox(label="Memory Calculation Result", interactive=False)
83
  calc_memory_button = gr.Button("Calculate Memory")
84
- calc_memory_button.click(calc_mem,
85
- inputs=[hf_model_name_or_path, num_gpus, tensor_parallel_size, pipeline_parallel_size, batch_size_per_gpu, sequence_length, vocab_size, hidden_size, num_attention_heads, num_layers, ffn_expansion_factor, is_mixed_precision, misc_mem_gib],
86
- outputs=memory_result)
 
 
 
 
87
 
88
- hf_model_name_or_path.change(fn=update_from_hf_model,
 
89
  inputs=[hf_model_name_or_path],
90
- outputs=[num_layers, hidden_size, num_attention_heads, vocab_size, sequence_length, memory_result])
 
91
 
92
  # Parameter Calculation Tab
93
  with gr.TabItem("Parameter Calculation"):
94
- hf_model_name_or_path = gr.Textbox(label="HuggingFace Model Name or Path (optional)", value="")
95
- vocab_size = gr.Number(label="Vocab Size", value=51200)
96
- tied_embeddings = gr.Checkbox(label="Tied Embeddings", value=False)
97
- hidden_size = gr.Number(label="Hidden Size", value=6144)
98
- sequence_length = gr.Number(label="Sequence Length", value=2048)
99
- num_layers = gr.Number(label="Number of Layers", value=44)
100
- ffn_expansion_factor = gr.Number(label="FFN Expansion Factor", value=4)
101
- num_mlp_linears = gr.Number(label="Number of Linear Layers per MLP Block", value=2)
102
- kv_size_ratio = gr.Number(label="KV Size Ratio", value=1.0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
 
104
  with gr.Accordion("MoE Parameters", open=False):
105
- moe = gr.Checkbox(label="MoE", value=False)
106
- num_experts = gr.Number(label="Number of Experts", value=8)
107
- expert_interval = gr.Number(label="Expert Interval", value=1)
108
- topk = gr.Number(label="Top k Routing", value=1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
 
110
  param_result = gr.Textbox(label="Parameter Calculation Result", interactive=False)
111
  calc_param_button = gr.Button("Calculate Parameters")
 
65
  with gr.Tabs():
66
  # Memory Calculation Tab
67
  with gr.TabItem("Memory Calculation"):
68
+ hf_model_name_or_path = gr.Textbox(
69
+ label="HuggingFace Model Name or Path",
70
+ info="Name of the HuggingFace Hub repository or the local file path for it"
71
+ )
72
+ num_gpus = gr.Number(
73
+ label="Number of GPUs",
74
+ value=1,
75
+ info="Number of GPUs used for training"
76
+ )
77
+ tensor_parallel_size = gr.Number(
78
+ label="Tensor Parallel Size",
79
+ value=1,
80
+ info="Tensor parallel degree (1 if not used)"
81
+ )
82
+ pipeline_parallel_size = gr.Number(
83
+ label="Pipeline Parallel Size",
84
+ value=1,
85
+ info="Pipeline parallel degree (1 if not used)"
86
+ )
87
+ batch_size_per_gpu = gr.Number(
88
+ label="Batch Size per GPU",
89
+ value=8,
90
+ info="Batch size per GPU"
91
+ )
92
+ sequence_length = gr.Number(
93
+ label="Sequence Length",
94
+ value=2048,
95
+ info="Sequence length used for training"
96
+ )
97
+ vocab_size = gr.Number(
98
+ label="Vocab Size",
99
+ value=51200,
100
+ info="How many tokens are in the embedding layer"
101
+ )
102
+ hidden_size = gr.Number(
103
+ label="Hidden Size",
104
+ value=6144,
105
+ info="Dimension of the model's hidden size"
106
+ )
107
+ num_attention_heads = gr.Number(
108
+ label="Number of Attention Heads",
109
+ value=64,
110
+ info="Number of attention heads used in the model"
111
+ )
112
+ num_layers = gr.Number(
113
+ label="Number of Layers",
114
+ value=44,
115
+ info="Number of transformer layers used in the model"
116
+ )
117
+ ffn_expansion_factor = gr.Number(
118
+ label="FFN Expansion Factor",
119
+ value=4,
120
+ info="How much the MLP hidden size expands"
121
+ )
122
+ is_mixed_precision = gr.Checkbox(
123
+ label="Mixed Precision",
124
+ value=True,
125
+ info="Whether mixed precision is enabled"
126
+ )
127
+ misc_mem_gib = gr.Number(
128
+ label="Miscellaneous Memory Overhead (GiB)",
129
+ value=5,
130
+ info="Miscellaneous memory overhead per GPU by DL frameworks, communication libraries, etc."
131
+ )
132
 
133
  memory_result = gr.Textbox(label="Memory Calculation Result", interactive=False)
134
  calc_memory_button = gr.Button("Calculate Memory")
135
+ calc_memory_button.click(
136
+ calc_mem,
137
+ inputs=[
138
+ hf_model_name_or_path, num_gpus, tensor_parallel_size, pipeline_parallel_size, batch_size_per_gpu, sequence_length, vocab_size, hidden_size, num_attention_heads, num_layers, ffn_expansion_factor, is_mixed_precision, misc_mem_gib
139
+ ],
140
+ outputs=memory_result
141
+ )
142
 
143
+ hf_model_name_or_path.change(
144
+ fn=update_from_hf_model,
145
  inputs=[hf_model_name_or_path],
146
+ outputs=[num_layers, hidden_size, num_attention_heads, vocab_size, sequence_length, memory_result]
147
+ )
148
 
149
  # Parameter Calculation Tab
150
  with gr.TabItem("Parameter Calculation"):
151
+ hf_model_name_or_path = gr.Textbox(
152
+ label="HuggingFace Model Name or Path",
153
+ info="Name of the HuggingFace Hub repository or the local file path for it"
154
+ )
155
+ vocab_size = gr.Number(
156
+ label="Vocab Size",
157
+ value=51200,
158
+ info="How many tokens are in the embedding layer"
159
+ )
160
+ tied_embeddings = gr.Checkbox(
161
+ label="Tied Embeddings",
162
+ value=False,
163
+ info="Whether embeddings are tied (shared between input and output)"
164
+ )
165
+ hidden_size = gr.Number(
166
+ label="Hidden Size",
167
+ value=6144,
168
+ info="Dimension of the model's hidden size"
169
+ )
170
+ sequence_length = gr.Number(
171
+ label="Sequence Length",
172
+ value=2048,
173
+ info="Sequence length used for training"
174
+ )
175
+ num_layers = gr.Number(
176
+ label="Number of Layers",
177
+ value=44,
178
+ info="Number of transformer layers used in the model"
179
+ )
180
+ ffn_expansion_factor = gr.Number(
181
+ label="FFN Expansion Factor",
182
+ value=4,
183
+ info="How much the MLP hidden size expands"
184
+ )
185
+ num_mlp_linears = gr.Number(
186
+ label="Number of Linear Layers per MLP Block",
187
+ value=2,
188
+ info="How many linear layers per MLP block"
189
+ )
190
+ kv_size_ratio = gr.Number(
191
+ label="KV Size Ratio",
192
+ value=1.0,
193
+ info="Ratio of total query heads to key/value heads. 1.0 for MHA, 1/num_attention_heads for MQA"
194
+ )
195
 
196
  with gr.Accordion("MoE Parameters", open=False):
197
+ moe = gr.Checkbox(
198
+ label="MoE",
199
+ value=False,
200
+ info="Whether the model is MoE"
201
+ )
202
+ num_experts = gr.Number(
203
+ label="Number of Experts",
204
+ value=8,
205
+ info="Number of experts for MoE"
206
+ )
207
+ expert_interval = gr.Number(
208
+ label="Expert Interval",
209
+ value=1,
210
+ info="Expert interval for MoE"
211
+ )
212
+ topk = gr.Number(
213
+ label="Top k Routing",
214
+ value=1,
215
+ info="Top k routing for MoE"
216
+ )
217
 
218
  param_result = gr.Textbox(label="Parameter Calculation Result", interactive=False)
219
  calc_param_button = gr.Button("Calculate Parameters")