|
|
|
|
|
|
|
import mmpose |
|
from mmpose.apis import MMPoseInferencer |
|
|
|
|
|
from ultralytics import YOLO |
|
import torch |
|
|
|
|
|
import gradio as gr |
|
|
|
|
|
import os |
|
import glob |
|
import uuid |
|
|
|
|
|
import numpy as np |
|
import cv2 |
|
|
|
print("[INFO]: Imported modules!") |
|
human = MMPoseInferencer("human") |
|
hand = MMPoseInferencer("hand") |
|
human3d = MMPoseInferencer(pose3d="human3d") |
|
track_model = YOLO('yolov8n.pt') |
|
|
|
|
|
|
|
|
|
inferencers = {"Estimate human 2d poses":human, "Estimate human 2d hand poses":hand, "Estimate human 3d poses":human3d, "Detect and track":track_model} |
|
|
|
print("[INFO]: Downloaded models!") |
|
|
|
def tracking(video, model, boxes=True): |
|
print("[INFO] Loading model...") |
|
|
|
|
|
|
|
print("[INFO] Starting tracking!") |
|
|
|
annotated_frame = model(video, boxes=boxes) |
|
|
|
return annotated_frame |
|
|
|
def show_tracking(video_content): |
|
video = cv2.VideoCapture(video_content) |
|
|
|
|
|
video_track = tracking(video_content, track_model.track) |
|
|
|
|
|
|
|
out_file = "track.mp4" |
|
print("[INFO]: TRACK", out_file) |
|
|
|
fourcc = cv2.VideoWriter_fourcc(*"mp4v") |
|
fps = video.get(cv2.CAP_PROP_FPS) |
|
height, width, _ = video_track[0][0].orig_img.shape |
|
size = (width,height) |
|
|
|
out_track = cv2.VideoWriter(out_file, fourcc, fps, size) |
|
|
|
|
|
for frame_track in video_track: |
|
result_track = frame_track[0].plot() |
|
print("[INFO] Done with frames") |
|
|
|
out_track.write(result_track) |
|
|
|
out_track.release() |
|
|
|
video.release() |
|
cv2.destroyAllWindows() |
|
|
|
return out_file |
|
|
|
|
|
def pose3d(video): |
|
add_dir = str(uuid.uuid4()) |
|
|
|
vis_out_dir = "/".join(["/".join(video.split("/")[:-1]), add_dir]) |
|
print("[INFO]: CURRENT OUT DIR: ", vis_out_dir) |
|
|
|
|
|
|
|
result_generator = human3d(video, |
|
vis_out_dir = vis_out_dir, |
|
thickness=2, |
|
return_vis=True, |
|
rebase_keypoint_height=True, |
|
device="cuda") |
|
|
|
result = [result for result in result_generator] |
|
out_file = glob.glob(os.path.join(vis_out_dir, "*")) |
|
print("[INFO]: CURRENT OUT FILE NAME: ", out_file) |
|
|
|
return out_file |
|
|
|
|
|
def pose2d(video): |
|
add_dir = str(uuid.uuid4()) |
|
vis_out_dir = "/".join(["/".join(video.split("/")[:-1]), add_dir]) |
|
print("[INFO]: CURRENT OUT DIR: ", vis_out_dir) |
|
|
|
|
|
result_generator = human(video, |
|
vis_out_dir = vis_out_dir, |
|
thickness=2, |
|
return_vis=True, |
|
rebase_keypoint_height=True, |
|
device="cuda") |
|
|
|
result = [result for result in result_generator] |
|
|
|
out_file = glob.glob(os.path.join(vis_out_dir, "*")) |
|
print("[INFO]: CURRENT OUT FILE NAME: ", out_file) |
|
|
|
return out_file |
|
|
|
|
|
def pose2dhand(video): |
|
add_dir = str(uuid.uuid4()) |
|
vis_out_dir = "/".join(["/".join(video.split("/")[:-1]), add_dir]) |
|
|
|
print("[INFO]: CURRENT OUT DIR: ", vis_out_dir) |
|
|
|
vis_out_dir = str(uuid.uuid4()) |
|
|
|
result_generator = hand(video, |
|
vis_out_dir = vis_out_dir, |
|
return_vis=True, |
|
thickness=2, |
|
rebase_keypoint_height=True, |
|
device="cuda") |
|
|
|
result = [result for result in result_generator] |
|
|
|
out_file = glob.glob(os.path.join(vis_out_dir, "*")) |
|
print("[INFO]: CURRENT OUT FILE NAME: ", out_file) |
|
|
|
return out_file |
|
|
|
|
|
|
|
with gr.Blocks() as demo: |
|
with gr.Column(): |
|
with gr.Tab("Upload video"): |
|
with gr.Row(): |
|
with gr.Column(): |
|
video_input = gr.Video(source="upload", type="filepath", height=512) |
|
|
|
file_kpthr = gr.Slider(0, 1, value=0.3) |
|
|
|
submit_pose_file = gr.Button("Make 2d pose estimation") |
|
submit_pose3d_file = gr.Button("Make 3d pose estimation") |
|
submit_hand_file = gr.Button("Make 2d hand estimation") |
|
submit_detect_file = gr.Button("Detect and track objects") |
|
|
|
video_output1 = gr.Video(height=512) |
|
video_output2 = gr.Video(height=512) |
|
video_output3 = gr.Video(height=512) |
|
video_output4 = gr.Video(height=512) |
|
|
|
with gr.Tab("Record video with webcam"): |
|
with gr.Row(): |
|
with gr.Column(): |
|
webcam_input = gr.Video(source="webcam", height=512) |
|
|
|
web_kpthr = gr.Slider(0, 1, value=0.3) |
|
|
|
submit_pose_web = gr.Button("Make 2d pose estimation") |
|
submit_pose3d_web = gr.Button("Make 3d pose estimation") |
|
submit_hand_web = gr.Button("Make 2d hand estimation") |
|
submit_detect_web = gr.Button("Detect and track objects") |
|
|
|
webcam_output1 = gr.Video(height=512) |
|
webcam_output2 = gr.Video(height=512) |
|
webcam_output3 = gr.Video(height=512) |
|
webcam_output4 = gr.Video(height=512) |
|
|
|
|
|
|
|
|
|
submit_pose_file.click(fn=pose2d, |
|
inputs= video_input, |
|
outputs = video_output1) |
|
|
|
submit_pose3d_file.click(fn=pose3d, |
|
inputs= video_input, |
|
outputs = video_output2) |
|
|
|
submit_hand_file.click(fn=pose2dhand, |
|
inputs= video_input, |
|
outputs = video_output3) |
|
|
|
submit_detect_file.click(fn=show_tracking, |
|
inputs= video_input, |
|
outputs = video_output4) |
|
|
|
|
|
submit_pose_web.click(fn=pose2d, |
|
inputs= webcam_input, |
|
outputs = webcam_output1) |
|
|
|
submit_pose3d_web.click(fn=pose3d, |
|
inputs= webcam_input, |
|
outputs = webcam_output2) |
|
|
|
submit_hand_web.click(fn=pose2dhand, |
|
inputs= webcam_input, |
|
outputs = video_output3) |
|
|
|
submit_detect_web.click(fn=show_tracking, |
|
inputs= webcam_input, |
|
outputs = video_output4) |
|
|
|
demo.launch(server_name="0.0.0.0", server_port=7860) |
|
|