EduardoPacheco commited on
Commit
b979318
1 Parent(s): 80a16fe

Upload GroundingDinoForObjectDetection

Browse files
Files changed (2) hide show
  1. config.json +22 -21
  2. model.safetensors +3 -0
config.json CHANGED
@@ -2,20 +2,28 @@
2
  "activation_dropout": 0.0,
3
  "activation_function": "relu",
4
  "architectures": [
5
- "GroundingDINOForObjectDetection"
6
  ],
7
  "attention_dropout": 0.0,
8
  "auxiliary_loss": false,
9
- "backbone": "swin",
10
  "backbone_config": {
 
11
  "depths": [
12
  2,
13
  2,
14
  18,
15
  2
16
  ],
 
17
  "embed_dim": 128,
 
 
 
 
18
  "image_size": 384,
 
 
 
19
  "model_type": "swin",
20
  "num_heads": [
21
  4,
@@ -23,6 +31,7 @@
23
  16,
24
  32
25
  ],
 
26
  "out_features": [
27
  "stage2",
28
  "stage3",
@@ -33,19 +42,20 @@
33
  3,
34
  4
35
  ],
 
 
 
36
  "window_size": 12
37
  },
38
- "bbox_cost": 5,
39
- "bbox_loss_coefficient": 5,
40
- "class_cost": 1,
41
  "d_model": 256,
42
  "decoder_attention_heads": 8,
43
  "decoder_bbox_embed_share": true,
44
  "decoder_ffn_dim": 2048,
45
  "decoder_layers": 6,
46
  "decoder_n_points": 4,
47
- "dice_loss_coefficient": 1,
48
- "dilation": false,
49
  "disable_custom_kernels": false,
50
  "dropout": 0.1,
51
  "embedding_init_target": true,
@@ -53,35 +63,26 @@
53
  "encoder_ffn_dim": 2048,
54
  "encoder_layers": 6,
55
  "encoder_n_points": 4,
56
- "eos_coefficient": 0.1,
57
  "focal_alpha": 0.25,
58
  "fusion_dropout": 0.0,
59
  "fusion_droppath": 0.1,
60
- "giou_cost": 2,
61
- "giou_loss_coefficient": 2,
 
62
  "is_encoder_decoder": true,
63
- "mask_loss_coefficient": 1,
64
- "max_position_embeddings": 1024,
65
  "max_text_len": 256,
66
  "model_type": "grounding-dino",
67
- "num_channels": 3,
68
  "num_feature_levels": 4,
69
  "num_queries": 900,
70
  "position_embedding_type": "sine",
71
  "positional_embedding_temperature": 20,
72
  "query_dim": 4,
73
- "sub_sentence_present": true,
74
  "text_backbone_config": {
75
  "model_type": "grounding-dino-text-prenet"
76
  },
77
  "text_enhancer_dropout": 0.0,
78
  "torch_dtype": "float32",
79
- "transformers_version": "4.33.0.dev0",
80
  "two_stage": true,
81
- "two_stage_bbox_embed_share": false,
82
- "two_stage_class_embed_share": false,
83
- "two_stage_num_proposals": 900,
84
- "use_pretrained_backbone": true,
85
- "use_timm_backbone": false,
86
- "with_box_refine": true
87
  }
 
2
  "activation_dropout": 0.0,
3
  "activation_function": "relu",
4
  "architectures": [
5
+ "GroundingDinoForObjectDetection"
6
  ],
7
  "attention_dropout": 0.0,
8
  "auxiliary_loss": false,
 
9
  "backbone_config": {
10
+ "attention_probs_dropout_prob": 0.0,
11
  "depths": [
12
  2,
13
  2,
14
  18,
15
  2
16
  ],
17
+ "drop_path_rate": 0.1,
18
  "embed_dim": 128,
19
+ "encoder_stride": 32,
20
+ "hidden_act": "gelu",
21
+ "hidden_dropout_prob": 0.0,
22
+ "hidden_size": 1024,
23
  "image_size": 384,
24
+ "initializer_range": 0.02,
25
+ "layer_norm_eps": 1e-05,
26
+ "mlp_ratio": 4.0,
27
  "model_type": "swin",
28
  "num_heads": [
29
  4,
 
31
  16,
32
  32
33
  ],
34
+ "num_layers": 4,
35
  "out_features": [
36
  "stage2",
37
  "stage3",
 
42
  3,
43
  4
44
  ],
45
+ "patch_size": 4,
46
+ "qkv_bias": true,
47
+ "use_absolute_embeddings": false,
48
  "window_size": 12
49
  },
50
+ "bbox_cost": 5.0,
51
+ "bbox_loss_coefficient": 5.0,
52
+ "class_cost": 1.0,
53
  "d_model": 256,
54
  "decoder_attention_heads": 8,
55
  "decoder_bbox_embed_share": true,
56
  "decoder_ffn_dim": 2048,
57
  "decoder_layers": 6,
58
  "decoder_n_points": 4,
 
 
59
  "disable_custom_kernels": false,
60
  "dropout": 0.1,
61
  "embedding_init_target": true,
 
63
  "encoder_ffn_dim": 2048,
64
  "encoder_layers": 6,
65
  "encoder_n_points": 4,
 
66
  "focal_alpha": 0.25,
67
  "fusion_dropout": 0.0,
68
  "fusion_droppath": 0.1,
69
+ "giou_cost": 2.0,
70
+ "giou_loss_coefficient": 2.0,
71
+ "init_std": 0.02,
72
  "is_encoder_decoder": true,
 
 
73
  "max_text_len": 256,
74
  "model_type": "grounding-dino",
 
75
  "num_feature_levels": 4,
76
  "num_queries": 900,
77
  "position_embedding_type": "sine",
78
  "positional_embedding_temperature": 20,
79
  "query_dim": 4,
 
80
  "text_backbone_config": {
81
  "model_type": "grounding-dino-text-prenet"
82
  },
83
  "text_enhancer_dropout": 0.0,
84
  "torch_dtype": "float32",
85
+ "transformers_version": "4.36.0.dev0",
86
  "two_stage": true,
87
+ "two_stage_bbox_embed_share": false
 
 
 
 
 
88
  }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78cf02dda8891982a76e42194e84d7173329c2ef116937fa65bf6723c74fa89d
3
+ size 935754584