inoid commited on
Commit
2158f85
1 Parent(s): 496d856

Use environement variables with os.environ function

Browse files
Files changed (2) hide show
  1. app.py +1 -0
  2. spanish_medica_llm.py +3 -4
app.py CHANGED
@@ -40,6 +40,7 @@ def evaluate_model():
40
  return(f"Evaluate Model {os.environ.get('HF_LLM_MODEL_ID')} from dataset {os.environ.get('HF_LLM_DATASET_ID')}")
41
 
42
 
 
43
  def train_model(*inputs):
44
  if "IS_SHARED_UI" in os.environ:
45
  raise gr.Error("This Space only works in duplicated instances")
 
40
  return(f"Evaluate Model {os.environ.get('HF_LLM_MODEL_ID')} from dataset {os.environ.get('HF_LLM_DATASET_ID')}")
41
 
42
 
43
+
44
  def train_model(*inputs):
45
  if "IS_SHARED_UI" in os.environ:
46
  raise gr.Error("This Space only works in duplicated instances")
spanish_medica_llm.py CHANGED
@@ -25,7 +25,7 @@ from transformers import (
25
  from accelerate import FullyShardedDataParallelPlugin, Accelerator
26
  from torch.distributed.fsdp.fully_sharded_data_parallel import FullOptimStateDictConfig, FullStateDictConfig
27
  from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
28
- import wandb
29
  from trl import SFTTrainer
30
 
31
  from huggingface_hub import login
@@ -504,7 +504,7 @@ def configAndRunTraining(basemodel, dataset, eval_dataset, tokenizer):
504
  push_to_hub = True,
505
  hub_private_repo = False,
506
  hub_model_id = HUB_MODEL_ID,
507
- warmup_steps =5,
508
  per_device_train_batch_size = MICRO_BATCH_SIZE,
509
  per_device_eval_batch_size=1,
510
  #gradient_checkpointing=True,
@@ -518,8 +518,7 @@ def configAndRunTraining(basemodel, dataset, eval_dataset, tokenizer):
518
  save_steps = 50, # Save checkpoints every 50 steps
519
  evaluation_strategy = "steps", # Evaluate the model every logging step
520
  eval_steps = 50, # Evaluate and save checkpoints every 50 steps
521
- do_eval = True, # Perform evaluation at the end of training
522
- #report_to="wandb", # Comment this out if you don't want to use weights & baises
523
  run_name=f"{run_name}-{datetime.now().strftime('%Y-%m-%d-%H-%M')}" , # Name of the W&B run (optional)
524
  fp16=True, #Set for GPU T4 for more powerful GPU as G-100 or another change to false and bf16 parameter
525
  bf16=False
 
25
  from accelerate import FullyShardedDataParallelPlugin, Accelerator
26
  from torch.distributed.fsdp.fully_sharded_data_parallel import FullOptimStateDictConfig, FullStateDictConfig
27
  from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
28
+ #import wandb
29
  from trl import SFTTrainer
30
 
31
  from huggingface_hub import login
 
504
  push_to_hub = True,
505
  hub_private_repo = False,
506
  hub_model_id = HUB_MODEL_ID,
507
+ warmup_steps = 5,
508
  per_device_train_batch_size = MICRO_BATCH_SIZE,
509
  per_device_eval_batch_size=1,
510
  #gradient_checkpointing=True,
 
518
  save_steps = 50, # Save checkpoints every 50 steps
519
  evaluation_strategy = "steps", # Evaluate the model every logging step
520
  eval_steps = 50, # Evaluate and save checkpoints every 50 steps
521
+ do_eval = True, # Perform evaluation at the end of training
 
522
  run_name=f"{run_name}-{datetime.now().strftime('%Y-%m-%d-%H-%M')}" , # Name of the W&B run (optional)
523
  fp16=True, #Set for GPU T4 for more powerful GPU as G-100 or another change to false and bf16 parameter
524
  bf16=False