derek-thomas HF staff commited on
Commit
ef8c30b
1 Parent(s): befd20b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +65 -43
app.py CHANGED
@@ -1,7 +1,8 @@
1
  import gradio as gr
2
  import math
 
3
 
4
- # Helper function to pretty-print message sizes
5
  def convert_params(params):
6
  if params == 0:
7
  return "0"
@@ -11,57 +12,78 @@ def convert_params(params):
11
  s = round(params / p, 2)
12
  return "%s %s" % (s, size_name[i])
13
 
14
- # ---- Transformer Parameter Calculation ---- #
15
- def calc_params(vocab_size, tied_embeddings, hidden_size, sequence_length, num_layers, moe, num_experts, expert_interval, topk, ffn_expansion_factor, num_mlp_linears, kv_size_ratio):
16
- if tied_embeddings:
17
- embedding_params = hidden_size * vocab_size
18
- else:
19
- embedding_params = 2 * hidden_size * vocab_size
20
- position_embedding_params = hidden_size * sequence_length
21
- attention_params = int(2 * (1 + kv_size_ratio) * num_layers * hidden_size * hidden_size)
22
- layernorm_params = 13 * num_layers * hidden_size
23
-
24
- if moe:
25
- num_expert_layers = num_layers / expert_interval
26
- ffn_expert_params = num_mlp_linears * ffn_expansion_factor * num_expert_layers * num_experts * hidden_size * hidden_size
27
- ffn_dense_params = num_mlp_linears * ffn_expansion_factor * (num_layers - num_expert_layers) * hidden_size * hidden_size
28
- ffn_params = ffn_expert_params + ffn_dense_params
29
- gating_params = num_expert_layers * hidden_size * num_experts
30
- else:
31
- ffn_params = num_mlp_linears * ffn_expansion_factor * num_layers * hidden_size * hidden_size
32
-
33
- total_params = embedding_params + attention_params + ffn_params + position_embedding_params + layernorm_params
34
-
35
- if moe:
36
- total_params += gating_params
37
-
38
- result = f"""
39
- Embedding parameters: {convert_params(embedding_params)}
40
- Attention parameters: {convert_params(attention_params)}
41
- FFN parameters: {convert_params(ffn_params)}
42
- {'Gating parameters: ' + convert_params(gating_params) if moe else ''}
43
- Total Params in the Model: {convert_params(total_params)}
44
- """
45
- return result
46
-
47
- # ---- Memory Calculation Code (from the second script) ---- #
48
- def calc_mem(args):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  dp_degree = args.num_gpus / (args.tensor_parallel_size * args.pipeline_parallel_size)
50
  embed_params = 2 * args.vocab_size * args.hidden_size
51
  positional_params = args.hidden_size * args.sequence_length
52
  ln_params = 8 * args.hidden_size * args.num_layers + (2 * args.hidden_size)
53
- attention_params = int(2 * (1 + args.kv_size_ratio) * args.num_layers * args.hidden_size * args.hidden_size)
54
- mlp_params = args.num_mlp_linears * args.num_layers * args.hidden_size * args.ffn_expansion_factor * args.hidden_size
55
  total_params = embed_params + positional_params + ln_params + attention_params + mlp_params
56
 
57
- bytes_per_param = args.low_prec_bytes_per_val if args.is_mixed_precision else args.high_prec_bytes_per_val
58
  model_mem = total_params * bytes_per_param
59
- per_gpu_model_mem = model_mem / (args.tensor_parallel_size * args.pipeline_parallel_size)
60
- per_gpu_mem_gib = per_gpu_model_mem / 1024**3 + args.misc_mem_gib
61
 
62
  return f"Per-GPU Memory Required for Training: {per_gpu_mem_gib:.2f} GiB"
63
 
64
- # Gradio Interface
65
  with gr.Blocks() as demo:
66
  with gr.Tabs():
67
  with gr.TabItem("Parameter Calculation"):
@@ -101,6 +123,6 @@ with gr.Blocks() as demo:
101
 
102
  memory_result = gr.Textbox(label="Memory Calculation Result", interactive=False)
103
  calc_memory_button = gr.Button("Calculate Memory")
104
- calc_memory_button.click(calc_mem, inputs=[num_gpus, tensor_parallel_size, pipeline_parallel_size, batch_size_per_gpu, sequence_length, vocab_size, hidden_size, num_attention_heads, num_layers, ffn_expansion_factor, is_mixed_precision, misc_mem_gib], outputs=memory_result)
105
 
106
  demo.launch()
 
1
  import gradio as gr
2
  import math
3
+ from transformers import AutoConfig # Required for Hugging Face integration
4
 
5
+ # ---- Helper Functions ---- #
6
  def convert_params(params):
7
  if params == 0:
8
  return "0"
 
12
  s = round(params / p, 2)
13
  return "%s %s" % (s, size_name[i])
14
 
15
+ # Set defaults for missing arguments
16
+ def set_defaults(args, defaults):
17
+ for key, value in defaults.items():
18
+ if getattr(args, key) is None:
19
+ setattr(args, key, value)
20
+ return args
21
+
22
+ # Set value if it's None, else use the config value
23
+ def set_if_none(args, key, config, config_key, defaults):
24
+ if getattr(args, key) is None:
25
+ setattr(args, key, config.get(config_key, defaults[key]))
26
+ return args
27
+
28
+ # Get Hugging Face model arguments
29
+ def get_hf_model_args(args, defaults):
30
+ if args.hf_model_name_or_path:
31
+ try:
32
+ config = AutoConfig.from_pretrained(args.hf_model_name_or_path, trust_remote_code=True).to_dict()
33
+ except Exception as e:
34
+ raise gr.Error(f"Error fetching Hugging Face model: {str(e)}")
35
+
36
+ # Update arguments with Hugging Face model config values
37
+ args.num_layers = config.get("num_hidden_layers", defaults["num_layers"])
38
+ args.hidden_size = config.get("hidden_size", defaults["hidden_size"])
39
+ args.num_attention_heads = config.get("num_attention_heads", defaults["num_attention_heads"])
40
+ args.vocab_size = config.get("vocab_size", defaults["vocab_size"])
41
+ args.sequence_length = config.get("max_position_embeddings", defaults["sequence_length"])
42
+
43
+ return set_defaults(args, defaults)
44
+
45
+ # ---- Memory Calculation ---- #
46
+ def calc_mem(hf_model_name_or_path, num_gpus, tensor_parallel_size, pipeline_parallel_size, batch_size_per_gpu, sequence_length, vocab_size, hidden_size, num_attention_heads, num_layers, ffn_expansion_factor, is_mixed_precision, misc_mem_gib):
47
+
48
+ # Define defaults
49
+ defaults = {
50
+ "num_layers": 44,
51
+ "hidden_size": 6144,
52
+ "num_attention_heads": 64,
53
+ "vocab_size": 51200,
54
+ "sequence_length": 2048,
55
+ "ffn_expansion_factor": 4,
56
+ }
57
+
58
+ # Create a simple args object to simulate parsed arguments
59
+ class Args:
60
+ def __init__(self, **kwargs):
61
+ for key, value in kwargs.items():
62
+ setattr(self, key, value)
63
+
64
+ args = Args(hf_model_name_or_path=hf_model_name_or_path, num_gpus=num_gpus, tensor_parallel_size=tensor_parallel_size,
65
+ pipeline_parallel_size=pipeline_parallel_size, batch_size_per_gpu=batch_size_per_gpu, sequence_length=sequence_length,
66
+ vocab_size=vocab_size, hidden_size=hidden_size, num_attention_heads=num_attention_heads, num_layers=num_layers,
67
+ ffn_expansion_factor=ffn_expansion_factor, is_mixed_precision=is_mixed_precision, misc_mem_gib=misc_mem_gib)
68
+
69
+ # Fetch Hugging Face model args if a model is provided
70
+ args = get_hf_model_args(args, defaults)
71
+
72
  dp_degree = args.num_gpus / (args.tensor_parallel_size * args.pipeline_parallel_size)
73
  embed_params = 2 * args.vocab_size * args.hidden_size
74
  positional_params = args.hidden_size * args.sequence_length
75
  ln_params = 8 * args.hidden_size * args.num_layers + (2 * args.hidden_size)
76
+ attention_params = int(2 * (1 + args.ffn_expansion_factor) * args.num_layers * args.hidden_size * args.hidden_size)
77
+ mlp_params = args.ffn_expansion_factor * args.num_layers * args.hidden_size * args.hidden_size
78
  total_params = embed_params + positional_params + ln_params + attention_params + mlp_params
79
 
80
+ bytes_per_param = 2 if args.is_mixed_precision else 4
81
  model_mem = total_params * bytes_per_param
82
+ per_gpu_mem_gib = (model_mem / (args.tensor_parallel_size * args.pipeline_parallel_size)) / 1024**3 + args.misc_mem_gib
 
83
 
84
  return f"Per-GPU Memory Required for Training: {per_gpu_mem_gib:.2f} GiB"
85
 
86
+ # ---- Gradio Interface ---- #
87
  with gr.Blocks() as demo:
88
  with gr.Tabs():
89
  with gr.TabItem("Parameter Calculation"):
 
123
 
124
  memory_result = gr.Textbox(label="Memory Calculation Result", interactive=False)
125
  calc_memory_button = gr.Button("Calculate Memory")
126
+ calc_memory_button.click(calc_mem, inputs=[hf_model_name_or_path, num_gpus, tensor_parallel_size, pipeline_parallel_size, batch_size_per_gpu, sequence_length, vocab_size, hidden_size, num_attention_heads, num_layers, ffn_expansion_factor, is_mixed_precision, misc_mem_gib], outputs=memory_result)
127
 
128
  demo.launch()