xmrt commited on
Commit
7652cc1
1 Parent(s): b675724
Files changed (1) hide show
  1. main.py +92 -82
main.py CHANGED
@@ -182,115 +182,125 @@ def pose2dhand(video, kpt_threshold):
182
 
183
  return "".join(out_file)
184
 
185
- def run_UI():
186
- with gr.Blocks() as demo:
187
- with gr.Column():
188
- with gr.Tab("Upload video"):
189
- with gr.Column():
190
- with gr.Row():
191
- with gr.Column():
192
- video_input = gr.Video(source="upload", type="filepath", height=612)
193
- # Insert slider with kpt_thr
194
- file_kpthr = gr.Slider(0, 1, value=0.3, label='Keypoint threshold')
195
-
196
- submit_pose_file = gr.Button("Make 2d pose estimation", variant="primary")
197
- submit_pose3d_file = gr.Button("Make 3d pose estimation", variant="primary")
198
- submit_hand_file = gr.Button("Make 2d hand estimation", variant="primary")
199
- submit_detect_file = gr.Button("Detect and track objects", variant="primary")
200
-
201
- with gr.Row():
202
- video_output1 = gr.PlayableVideo(height=512, label = "Estimate human 2d poses", show_label=True)
203
- video_output2 = gr.PlayableVideo(height=512, label = "Estimate human 3d poses", show_label=True)
204
- video_output3 = gr.PlayableVideo(height=512, label = "Estimate human hand poses", show_label=True)
205
- video_output4 = gr.Video(height=512, label = "Detection and tracking", show_label=True, format="mp4")
206
-
207
- with gr.Tab("Record video with webcam"):
208
-
209
- with gr.Column():
210
- with gr.Row():
211
- with gr.Column():
212
- webcam_input = gr.Video(source="webcam", height=612)
213
-
214
- web_kpthr = gr.Slider(0, 1, value=0.3, label='Keypoint threshold')
215
-
216
- submit_pose_web = gr.Button("Make 2d pose estimation", variant="primary")
217
- submit_pose3d_web = gr.Button("Make 3d pose estimation", variant="primary")
218
- submit_hand_web = gr.Button("Make 2d hand estimation", variant="primary")
219
- submit_detect_web = gr.Button("Detect and track objects", variant="primary")
220
- with gr.Row():
221
- webcam_output1 = gr.PlayableVideo(height=716, label = "Estimate human 2d poses", show_label=True)
222
- webcam_output2 = gr.PlayableVideo(height=716, label = "Estimate human 3d poses", show_label=True)
223
- webcam_output3 = gr.PlayableVideo(height=716, label = "Estimate human hand position", show_label=True)
224
- webcam_output4 = gr.Video(height=716, label = "Detection and tracking", show_label=True, format="mp4")
225
-
226
- with gr.Tab("General information"):
227
- gr.Markdown("""
228
- \n # Information about the models
229
-
230
- \n ## Pose models:
231
-
232
- \n All the pose estimation models comes from the library [MMpose](https://github.com/open-mmlab/mmpose). It is a library for human pose estimation that provides pre-trained models for 2D and 3D pose estimation.
233
-
234
- \n The 2D pose model is used for estimating the 2D coordinates of human body joints from an image or a video frame. The model uses a convolutional neural network (CNN) to predict the joint locations and their confidence scores.
235
-
236
- \n The 2D hand model is a specialized version of the 2D pose model that is designed for hand pose estimation. It uses a similar CNN architecture to the 2D pose model but is trained specifically for detecting the joints in the hand.
237
-
238
- \n The 3D pose model is used for estimating the 3D coordinates of human body joints from an image or a video frame. The model uses a combination of 2D pose estimation and depth estimation to infer the 3D joint locations.
239
-
240
- \n
241
-
242
- \n ## Detection and tracking:
243
-
244
- \n The tracking method in the Ultralight's YOLOv8 model is used for object tracking in videos. It takes a video file or a camera stream as input and returns the tracked objects in each frame. The method uses the COCO dataset classes for object detection and tracking.
245
-
246
- \n The COCO dataset contains 80 classes of objects such as person, car, bicycle, etc. See https://docs.ultralytics.com/datasets/detect/coco/ for all available classes. The tracking method uses the COCO classes to detect and track the objects in the video frames. The tracked objects are represented as bounding boxes with labels indicating the class of the object.""")
247
- gr.Markdown("You can load the keypoints in python in the following way: ")
248
- gr.Code(
249
- value="""def hello_world():
250
- return "Hello, world!"
251
-
252
- print(hello_world())""",
253
- language="python",
254
- interactive=True,
255
- show_label=False,
256
- )
257
 
 
 
 
 
 
 
258
 
259
  # From file
260
  submit_pose_file.click(fn=pose2d,
261
  inputs= [video_input, file_kpthr],
262
- outputs = video_output1)
 
263
 
264
  submit_pose3d_file.click(fn=pose3d,
265
  inputs= video_input,
266
- outputs = video_output2)
 
267
 
268
  submit_hand_file.click(fn=pose2dhand,
269
  inputs= [video_input, file_kpthr],
270
- outputs = video_output3)
 
271
 
272
  submit_detect_file.click(fn=show_tracking,
273
  inputs= video_input,
274
- outputs = video_output4)
 
275
 
276
  # Web
277
  submit_pose_web.click(fn=pose2d,
278
  inputs = [webcam_input, web_kpthr],
279
- outputs = webcam_output1)
 
280
 
281
  submit_pose3d_web.click(fn=pose3d,
282
  inputs= webcam_input,
283
- outputs = webcam_output2)
 
284
 
285
  submit_hand_web.click(fn=pose2dhand,
286
  inputs= [webcam_input, web_kpthr],
287
- outputs = webcam_output3)
 
288
 
289
  submit_detect_web.click(fn=show_tracking,
290
  inputs= webcam_input,
291
- outputs = webcam_output4)
 
292
 
293
- demo.launch(server_name="0.0.0.0", server_port=7860)
294
 
295
  if __name__ == "__main__":
296
- run_UI()
 
 
 
 
182
 
183
  return "".join(out_file)
184
 
185
+ block = gr.Blocks()
186
+ with block:
187
+ with gr.Column():
188
+ with gr.Tab("Upload video"):
189
+ with gr.Column():
190
+ with gr.Row():
191
+ with gr.Column():
192
+ video_input = gr.Video(source="upload", type="filepath", height=612)
193
+ # Insert slider with kpt_thr
194
+ file_kpthr = gr.Slider(0, 1, value=0.3, label='Keypoint threshold')
195
+
196
+ submit_pose_file = gr.Button("Make 2d pose estimation", variant="primary")
197
+ submit_pose3d_file = gr.Button("Make 3d pose estimation", variant="primary")
198
+ submit_hand_file = gr.Button("Make 2d hand estimation", variant="primary")
199
+ submit_detect_file = gr.Button("Detect and track objects", variant="primary")
200
+
201
+ with gr.Row():
202
+ video_output1 = gr.PlayableVideo(height=512, label = "Estimate human 2d poses", show_label=True)
203
+ video_output2 = gr.PlayableVideo(height=512, label = "Estimate human 3d poses", show_label=True)
204
+ video_output3 = gr.PlayableVideo(height=512, label = "Estimate human hand poses", show_label=True)
205
+ video_output4 = gr.Video(height=512, label = "Detection and tracking", show_label=True, format="mp4")
206
+
207
+ with gr.Tab("Record video with webcam"):
208
+
209
+ with gr.Column():
210
+ with gr.Row():
211
+ with gr.Column():
212
+ webcam_input = gr.Video(source="webcam", height=612)
213
+
214
+ web_kpthr = gr.Slider(0, 1, value=0.3, label='Keypoint threshold')
215
+
216
+ submit_pose_web = gr.Button("Make 2d pose estimation", variant="primary")
217
+ submit_pose3d_web = gr.Button("Make 3d pose estimation", variant="primary")
218
+ submit_hand_web = gr.Button("Make 2d hand estimation", variant="primary")
219
+ submit_detect_web = gr.Button("Detect and track objects", variant="primary")
220
+ with gr.Row():
221
+ webcam_output1 = gr.PlayableVideo(height=716, label = "Estimate human 2d poses", show_label=True)
222
+ webcam_output2 = gr.PlayableVideo(height=716, label = "Estimate human 3d poses", show_label=True)
223
+ webcam_output3 = gr.PlayableVideo(height=716, label = "Estimate human hand position", show_label=True)
224
+ webcam_output4 = gr.Video(height=716, label = "Detection and tracking", show_label=True, format="mp4")
225
+
226
+ with gr.Tab("General information"):
227
+ gr.Markdown("""
228
+ \n # Information about the models
229
+
230
+ \n ## Pose models:
231
+
232
+ \n All the pose estimation models comes from the library [MMpose](https://github.com/open-mmlab/mmpose). It is a library for human pose estimation that provides pre-trained models for 2D and 3D pose estimation.
233
+
234
+ \n The 2D pose model is used for estimating the 2D coordinates of human body joints from an image or a video frame. The model uses a convolutional neural network (CNN) to predict the joint locations and their confidence scores.
235
+
236
+ \n The 2D hand model is a specialized version of the 2D pose model that is designed for hand pose estimation. It uses a similar CNN architecture to the 2D pose model but is trained specifically for detecting the joints in the hand.
237
+
238
+ \n The 3D pose model is used for estimating the 3D coordinates of human body joints from an image or a video frame. The model uses a combination of 2D pose estimation and depth estimation to infer the 3D joint locations.
239
+
240
+ \n
241
+
242
+ \n ## Detection and tracking:
243
+
244
+ \n The tracking method in the Ultralight's YOLOv8 model is used for object tracking in videos. It takes a video file or a camera stream as input and returns the tracked objects in each frame. The method uses the COCO dataset classes for object detection and tracking.
245
+
246
+ \n The COCO dataset contains 80 classes of objects such as person, car, bicycle, etc. See https://docs.ultralytics.com/datasets/detect/coco/ for all available classes. The tracking method uses the COCO classes to detect and track the objects in the video frames. The tracked objects are represented as bounding boxes with labels indicating the class of the object.""")
247
+ gr.Markdown("You can load the keypoints in python in the following way: ")
248
+ gr.Code(
249
+ value="""def hello_world():
250
+ return "Hello, world!"
 
 
 
 
 
 
251
 
252
+ print(hello_world())""",
253
+ language="python",
254
+ interactive=True,
255
+ show_label=False,
256
+ )
257
+
258
 
259
  # From file
260
  submit_pose_file.click(fn=pose2d,
261
  inputs= [video_input, file_kpthr],
262
+ outputs = video_output1,
263
+ queue=False)
264
 
265
  submit_pose3d_file.click(fn=pose3d,
266
  inputs= video_input,
267
+ outputs = video_output2,
268
+ queue=False)
269
 
270
  submit_hand_file.click(fn=pose2dhand,
271
  inputs= [video_input, file_kpthr],
272
+ outputs = video_output3,
273
+ queue=False)
274
 
275
  submit_detect_file.click(fn=show_tracking,
276
  inputs= video_input,
277
+ outputs = video_output4,
278
+ queue=False)
279
 
280
  # Web
281
  submit_pose_web.click(fn=pose2d,
282
  inputs = [webcam_input, web_kpthr],
283
+ outputs = webcam_output1,
284
+ queue=False)
285
 
286
  submit_pose3d_web.click(fn=pose3d,
287
  inputs= webcam_input,
288
+ outputs = webcam_output2,
289
+ queue=False)
290
 
291
  submit_hand_web.click(fn=pose2dhand,
292
  inputs= [webcam_input, web_kpthr],
293
+ outputs = webcam_output3,
294
+ queue=False)
295
 
296
  submit_detect_web.click(fn=show_tracking,
297
  inputs= webcam_input,
298
+ outputs = webcam_output4,
299
+ queue=False)
300
 
 
301
 
302
  if __name__ == "__main__":
303
+ block.queue()
304
+ block.launch(server_name="0.0.0.0", server_port=7860)
305
+
306
+