File size: 1,789 Bytes
550665c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
RetroDataModelArguments:

    # DataArguments
    max_seq_length: 512
    max_answer_length: 30
    doc_stride: 128
    return_token_type_ids: True
    pad_to_max_length: True
    preprocessing_num_workers: 5
    overwrite_cache: False
    version_2_with_negative: True
    null_score_diff_threshold: 0.0
    rear_threshold: 0.0
    n_best_size: 20
    use_choice_logits: False
    start_n_top: -1
    end_n_top: -1
    beta1: 1
    beta2: 1
    best_cof: 1
    
    # SketchModelArguments
    sketch_model_name: google/electra-base-discriminator
    sketch_architectures: ElectraForSequenceClassification
    
    # IntensiveModelArguments
    intensive_model_name: google/electra-base-discriminator
    intensive_model_mode: transfer
    intensive_architectures: ElectraForQuestionAnsweringAVPool
    

TrainingArguments:
    # report_to: wandb
    run_name: squadv2-electra-base-sketch,squadv2-electra-base-intensive
    output_dir: outputs
    overwrite_output_dir: False
    learning_rate: 2e-5
    evaluation_strategy: epoch
    save_strategy: steps  # Save checkpoints every specified number of steps
    # save_steps: 5000  # Save model checkpoints every 5000 steps
    save_steps: 5000
    save_total_limit: 2  # Maximum number of checkpoints to keep
    # load_best_model_at_end: True  # Disable to avoid loading the best model at the end
    # no need to specify checkpoint_dir, it defaults to output_dir
    # no need to specify logging_dir, it defaults to output_dir
    per_device_train_batch_size: 512
    per_device_eval_batch_size: 512
    num_train_epochs: 10.0
    # no need to specify metric_for_best_model for resuming from checkpoints
    no_cuda: False
    fp16: True
    warmup_ratio: 0.1
    weight_decay: 0.01