Spaces:
Paused
Paused
File size: 799 Bytes
424a94c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 |
model:
arch: video_llama
model_type: pretrain_vicuna
freeze_vit: True
freeze_qformer: True
max_txt_len: 512
end_sym: "###"
low_resource: False
frozen_llama_proj: False
llama_model: "Video-LLaMA-2-7B-Finetuned/llama-2-7b-chat-hf"
imagebind_ckpt_path: "Video-LLaMA-2-7B-Finetuned"
ckpt: "Video-LLaMA-2-7B-Finetuned/VL_LLaMA_2_7B_Finetuned.pth"
ckpt_2: "Video-LLaMA-2-7B-Finetuned/AL_LLaMA_2_7B_Finetuned.pth"
equip_audio_branch: True # whether equips the audio branch
fusion_head_layers: 2
max_frame_pos: 32
fusion_header_type: "seqTransf"
datasets:
webvid:
vis_processor:
train:
name: "alpro_video_eval"
n_frms: 8
image_size: 224
text_processor:
train:
name: "blip_caption"
run:
task: video_text_pretrain
|