derek-thomas HF staff commited on
Commit
4db8e8b
1 Parent(s): 4d4bbe8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -27
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import gradio as gr
2
  from transformers import AutoConfig # Required for Hugging Face integration
3
  from calc_params import calc_params # Import calc_params from the new file
 
4
 
5
  # ---- Helper Functions ---- #
6
  def get_hf_model_args(hf_model_name_or_path):
@@ -320,35 +321,39 @@ with gr.Blocks() as demo:
320
  gr.Markdown("""
321
  ## FLOP Calculation
322
 
323
- FLOP Calculation estimates the number of floating point operations (FLOPs) for training or inference of a model.
324
- Provide the necessary model hyperparameters and click 'Calculate FLOPs' to get a result.
 
 
325
  """)
326
  with gr.Row():
327
- with gr.Column():
328
- hf_model_name_or_path = gr.Textbox(
329
- label="HuggingFace Model Name or Path",
330
- info="Name of the HuggingFace Hub repository or the local file path for it"
331
- )
332
- vocab_size = gr.Number(
333
- label="Vocab Size",
334
- value=51200,
335
- info="How many tokens are in the embedding layer"
336
- )
337
- hidden_size = gr.Number(
338
- label="Hidden Size",
339
- value=6144,
340
- info="Dimension of the model's hidden size"
341
- )
342
- sequence_length = gr.Number(
343
- label="Sequence Length",
344
- value=2048,
345
- info="Sequence length used for training"
346
- )
347
- num_layers = gr.Number(
348
- label="Number of Layers",
349
- value=44,
350
- info="Number of transformer layers used in the model"
351
- )
 
 
352
  kv_size_ratio = gr.Number(
353
  label="KV Size Ratio",
354
  value=1.0,
 
1
  import gradio as gr
2
  from transformers import AutoConfig # Required for Hugging Face integration
3
  from calc_params import calc_params # Import calc_params from the new file
4
+ import math
5
 
6
  # ---- Helper Functions ---- #
7
  def get_hf_model_args(hf_model_name_or_path):
 
321
  gr.Markdown("""
322
  ## FLOP Calculation
323
 
324
+ FLOP Calculation calculates the number of theoretical FLOPs required to train a model on t tokens.
325
+ See [Transformers Math 101](https://blog.eleuther.ai/transformer-math/) for more details on how FLOPs are calculated.
326
+ Other good resources that we consulted are the [Chinchilla Paper](https://arxiv.org/abs/2203.15556) and
327
+ [Efficient Large-Scale Language Model Training on GPU Clusters Using Megatron-LM](https://people.eecs.berkeley.edu/~matei/papers/2021/sc_megatron_lm.pdf).
328
  """)
329
  with gr.Row():
330
+ with gr.Column("Generatable"):
331
+ with gr.Group():
332
+ hf_model_name_or_path = gr.Textbox(
333
+ label="HuggingFace Model Name or Path",
334
+ info="Name of the HuggingFace Hub repository or the local file path for it"
335
+ )
336
+ vocab_size = gr.Number(
337
+ label="Vocab Size",
338
+ value=51200,
339
+ info="How many tokens are in the embedding layer"
340
+ )
341
+ hidden_size = gr.Number(
342
+ label="Hidden Size",
343
+ value=6144,
344
+ info="Dimension of the model's hidden size"
345
+ )
346
+ sequence_length = gr.Number(
347
+ label="Sequence Length",
348
+ value=2048,
349
+ info="Sequence length used for training"
350
+ )
351
+ num_layers = gr.Number(
352
+ label="Number of Layers",
353
+ value=44,
354
+ info="Number of transformer layers used in the model"
355
+ )
356
+ with gr.Column("Generatable"):
357
  kv_size_ratio = gr.Number(
358
  label="KV Size Ratio",
359
  value=1.0,