|
|
|
|
|
|
|
import mmpose |
|
from mmpose.apis import MMPoseInferencer |
|
|
|
|
|
from ultralytics import YOLO |
|
import torch |
|
|
|
|
|
import gradio as gr |
|
import moviepy.editor as moviepy |
|
|
|
|
|
|
|
import os |
|
import glob |
|
import uuid |
|
|
|
|
|
import numpy as np |
|
import cv2 |
|
|
|
print("[INFO]: Imported modules!") |
|
human = MMPoseInferencer("human") |
|
hand = MMPoseInferencer("hand") |
|
human3d = MMPoseInferencer(pose3d="human3d") |
|
track_model = YOLO('yolov8n.pt') |
|
|
|
|
|
|
|
|
|
inferencers = {"Estimate human 2d poses":human, "Estimate human 2d hand poses":hand, "Estimate human 3d poses":human3d, "Detect and track":track_model} |
|
|
|
print("[INFO]: Downloaded models!") |
|
|
|
def check_extension(video): |
|
split_tup = os.path.splitext(video) |
|
|
|
|
|
file_name = split_tup[0] |
|
file_extension = split_tup[1] |
|
|
|
if file_extension is not ".mp4": |
|
clip = moviepy.VideoFileClip(video) |
|
|
|
video = file_name+".mp4" |
|
clip.write_videofile(video) |
|
|
|
return video |
|
|
|
|
|
|
|
|
|
|
|
def tracking(video, model, boxes=True): |
|
print("[INFO] Loading model...") |
|
|
|
|
|
|
|
print("[INFO] Starting tracking!") |
|
|
|
annotated_frame = model(video, boxes=boxes) |
|
|
|
return annotated_frame |
|
|
|
def show_tracking(video_content): |
|
|
|
video = cv2.VideoCapture(video_content) |
|
|
|
|
|
video_track = tracking(video_content, track_model.track) |
|
|
|
|
|
|
|
out_file = "track.mp4" |
|
print("[INFO]: TRACK", out_file) |
|
|
|
fourcc = cv2.VideoWriter_fourcc(*"mp4v") |
|
fps = video.get(cv2.CAP_PROP_FPS) |
|
height, width, _ = video_track[0][0].orig_img.shape |
|
size = (width,height) |
|
|
|
out_track = cv2.VideoWriter(out_file, fourcc, fps, size) |
|
|
|
|
|
for frame_track in video_track: |
|
result_track = frame_track[0].plot() |
|
|
|
print("[INFO] Done with frames") |
|
|
|
out_track.write(result_track) |
|
|
|
out_track.release() |
|
|
|
video.release() |
|
cv2.destroyAllWindows() |
|
|
|
return out_file |
|
|
|
|
|
def pose3d(video): |
|
video = check_extension(video) |
|
|
|
|
|
|
|
add_dir = str(uuid.uuid4()) |
|
vis_out_dir = os.path.join("/".join(video.split("/")[:-1]), add_dir) |
|
os.makedirs(vis_out_dir) |
|
|
|
result_generator = human3d(video, |
|
vis_out_dir = vis_out_dir, |
|
thickness=2, |
|
return_vis=True, |
|
rebase_keypoint_height=True, |
|
device="cuda") |
|
|
|
result = [result for result in result_generator] |
|
|
|
out_file = glob.glob(os.path.join(vis_out_dir, "*.mp4")) |
|
|
|
return "".join(out_file) |
|
|
|
|
|
def pose2d(video, kpt_threshold): |
|
video = check_extension(video) |
|
|
|
|
|
add_dir = str(uuid.uuid4()) |
|
vis_out_dir = os.path.join("/".join(video.split("/")[:-1]), add_dir) |
|
os.makedirs(vis_out_dir) |
|
|
|
result_generator = human(video, |
|
vis_out_dir = vis_out_dir, |
|
return_vis=True, |
|
thickness=2, |
|
rebase_keypoint_height=True, |
|
kpt_thr=kpt_threshold, |
|
device="cuda" |
|
) |
|
|
|
result = [result for result in result_generator] |
|
|
|
out_file = glob.glob(os.path.join(vis_out_dir, "*.mp4")) |
|
|
|
return "".join(out_file) |
|
|
|
|
|
def pose2dhand(video, kpt_threshold): |
|
video = check_extension(video) |
|
|
|
|
|
add_dir = str(uuid.uuid4()) |
|
vis_out_dir = os.path.join("/".join(video.split("/")[:-1]), add_dir) |
|
os.makedirs(vis_out_dir) |
|
|
|
result_generator = hand(video, |
|
vis_out_dir = vis_out_dir, |
|
return_vis=True, |
|
thickness=2, |
|
rebase_keypoint_height=True, |
|
kpt_thr=kpt_threshold, |
|
device="cuda") |
|
|
|
result = [result for result in result_generator] |
|
|
|
out_file = glob.glob(os.path.join(vis_out_dir, "*.mp4")) |
|
|
|
return "".join(out_file) |
|
|
|
def run_UI(): |
|
with gr.Blocks() as demo: |
|
with gr.Column(): |
|
with gr.Tab("Upload video"): |
|
with gr.Row(): |
|
with gr.Column(): |
|
video_input = gr.Video(source="upload", type="filepath", height=612) |
|
|
|
file_kpthr = gr.Slider(minimum=1e3, maximum=1e6, step=1e3, default=1e3, label='Keypoint threshold') |
|
|
|
submit_pose_file = gr.Button("Make 2d pose estimation") |
|
submit_pose3d_file = gr.Button("Make 3d pose estimation") |
|
submit_hand_file = gr.Button("Make 2d hand estimation") |
|
submit_detect_file = gr.Button("Detect and track objects") |
|
with gr.Column(): |
|
video_output1 = gr.PlayableVideo(height=512, label = "Estimate human 2d poses", show_label=True) |
|
video_output2 = gr.PlayableVideo(height=512, label = "Estimate human 3d poses", show_label=True) |
|
video_output3 = gr.PlayableVideo(height=512, label = "Estimate human hand poses", show_label=True) |
|
video_output4 = gr.Video(height=512, label = "Detection and tracking", show_label=True, format="mp4") |
|
|
|
with gr.Tab("Record video with webcam"): |
|
|
|
with gr.Column: |
|
with gr.Row(): |
|
with gr.Column(): |
|
webcam_input = gr.Video(source="webcam", height=612) |
|
|
|
web_kpthr = gr.Slider(minimum=0.1, maximum=1, step=1e3, default=0.3, label='Keypoint threshold') |
|
|
|
submit_pose_web = gr.Button("Make 2d pose estimation") |
|
submit_pose3d_web = gr.Button("Make 3d pose estimation") |
|
submit_hand_web = gr.Button("Make 2d hand estimation") |
|
submit_detect_web = gr.Button("Detect and track objects") |
|
with gr.Row(): |
|
webcam_output1 = gr.PlayableVideo(height=512, label = "Estimate human 2d poses", show_label=True) |
|
webcam_output2 = gr.PlayableVideo(height=512, label = "Estimate human 3d poses", show_label=True) |
|
webcam_output3 = gr.PlayableVideo(height=512, label = "Estimate human hand position", show_label=True) |
|
webcam_output4 = gr.Video(height=512, label = "Detection and tracking", show_label=True, format="mp4") |
|
|
|
|
|
|
|
|
|
submit_pose_file.click(fn=pose2d, |
|
inputs= [video_input, file_kpthr], |
|
outputs = video_output1) |
|
|
|
submit_pose3d_file.click(fn=pose3d, |
|
inputs= video_input, |
|
outputs = video_output2) |
|
|
|
submit_hand_file.click(fn=pose2dhand, |
|
inputs= [video_input, file_kpthr], |
|
outputs = video_output3) |
|
|
|
submit_detect_file.click(fn=show_tracking, |
|
inputs= video_input, |
|
outputs = video_output4) |
|
|
|
|
|
submit_pose_web.click(fn=pose2d, |
|
inputs = [webcam_input, web_kpthr], |
|
outputs = webcam_output1) |
|
|
|
submit_pose3d_web.click(fn=pose3d, |
|
inputs= webcam_input, |
|
outputs = webcam_output2) |
|
|
|
submit_hand_web.click(fn=pose2dhand, |
|
inputs= [webcam_input, web_kpthr], |
|
outputs = webcam_output3) |
|
|
|
submit_detect_web.click(fn=show_tracking, |
|
inputs= webcam_input, |
|
outputs = webcam_output4) |
|
|
|
demo.launch(server_name="0.0.0.0", server_port=7860) |
|
|
|
if __name__ == "__main__": |
|
run_UI() |
|
|