{ "_name_or_path": "latif98/videomae-base-finetuned-isl-numbers", "architectures": [ "VideoMAEForVideoClassification" ], "attention_probs_dropout_prob": 0.0, "decoder_hidden_size": 384, "decoder_intermediate_size": 1536, "decoder_num_attention_heads": 6, "decoder_num_hidden_layers": 4, "hidden_act": "gelu", "hidden_dropout_prob": 0.0, "hidden_size": 768, "id2label": { "0": "0", "1": "1", "2": "2", "3": "3", "4": "4", "5": "5", "6": "6", "7": "7", "8": "8", "9": "9", "10": "10", "11": "A", "12": "B", "13": "C", "14": "D", "15": "E", "16": "F", "17": "G", "18": "H", "19": "I", "20": "J", "21": "K", "22": "L", "23": "M", "24": "N", "25": "O", "26": "P", "27": "Q", "28": "R", "29": "S", "30": "T", "31": "U", "32": "V", "33": "W", "34": "X", "35": "Y", "36": "Z" }, "image_size": 224, "initializer_range": 0.02, "intermediate_size": 3072, "label2id": { "0": 0, "1": 1, "10": 10, "2": 2, "3": 3, "4": 4, "5": 5, "6": 6, "7": 7, "8": 8, "9": 9, "A": 11, "B": 12, "C": 13, "D": 14, "E": 15, "F": 16, "G": 17, "H": 18, "I": 19, "J": 20, "K": 21, "L": 22, "M": 23, "N": 24, "O": 25, "P": 26, "Q": 27, "R": 28, "S": 29, "T": 30, "U": 31, "V": 32, "W": 33, "X": 34, "Y": 35, "Z": 36 }, "layer_norm_eps": 1e-12, "model_type": "videomae", "norm_pix_loss": true, "num_attention_heads": 12, "num_channels": 3, "num_frames": 16, "num_hidden_layers": 12, "patch_size": 16, "problem_type": "single_label_classification", "qkv_bias": true, "torch_dtype": "float32", "transformers_version": "4.40.0", "tubelet_size": 2, "use_mean_pooling": false }