# Pose inferencing import mmpose from mmpose.apis import MMPoseInferencer # Ultralytics from ultralytics import YOLO import torch # Gradio import gradio as gr # System and files import os import glob import uuid # Image manipulation import numpy as np import cv2 print("[INFO]: Imported modules!") human = MMPoseInferencer("human") hand = MMPoseInferencer("hand") human3d = MMPoseInferencer(pose3d="human3d") track_model = YOLO('yolov8n.pt') # Load an official Detect model # ultraltics # Defining inferencer models to lookup in function inferencers = {"Estimate human 2d poses":human, "Estimate human 2d hand poses":hand, "Estimate human 3d poses":human3d, "Detect and track":track_model} print("[INFO]: Downloaded models!") def tracking(video, model, boxes=True): print("[INFO] Loading model...") # Load an official or custom model # Perform tracking with the model print("[INFO] Starting tracking!") # https://docs.ultralytics.com/modes/predict/ annotated_frame = model(video, boxes=boxes) return annotated_frame def show_tracking(video_content): video = cv2.VideoCapture(video_content) # Track video_track = tracking(video_content, track_model.track) # Prepare to save video #out_file = os.path.join(vis_out_dir, "track.mp4") out_file = "track.mp4" print("[INFO]: TRACK", out_file) fourcc = cv2.VideoWriter_fourcc(*"mp4v") # Codec for MP4 video fps = video.get(cv2.CAP_PROP_FPS) height, width, _ = video_track[0][0].orig_img.shape size = (width,height) out_track = cv2.VideoWriter(out_file, fourcc, fps, size) # Go through frames and write them for frame_track in video_track: result_track = frame_track[0].plot() # plot a BGR numpy array of predictions print("[INFO] Done with frames") #print(type(result_pose)) numpy ndarray out_track.write(result_track) out_track.release() video.release() cv2.destroyAllWindows() # Closing window return out_file def pose3d(video): add_dir = str(uuid.uuid4()) #vidname = video.split("/")[-1] vis_out_dir = os.path.join("/".join(video.split("/")[:-1]), add_dir) print("[INFO]: CURRENT OUT DIR: ", vis_out_dir) print("[INFO]: LIST CURRENT OUT DIR: ", os.listdir("/".join(video.split("/")[:-1]))) os.makedirs(vis_out_dir) #full name = os.path.join(vis_out_dir, vidname) result_generator = human3d(video, vis_out_dir = vis_out_dir, thickness=2, return_vis=True, rebase_keypoint_height=True, device="cuda") print("OUTDIR", os.listdir(os.path.join(vis_out_dir))) result = [result for result in result_generator] #next(result_generator) out_file = glob.glob(os.path.join(vis_out_dir, "*.mp4")) + glob.glob(os.path.join(vis_out_dir, "*.webm")) print("[INFO]: CURRENT OUT FILE NAME: ", out_file) return out_file def pose2d(video): vidname = video.split("/")[-1] print("VIDNAME", vidname) add_dir = str(uuid.uuid4()) vis_out_dir = os.path.join("/".join(video.split("/")[:-1]), add_dir) os.makedirs(vis_out_dir) print("[INFO]: CURRENT OUT DIR: ", vis_out_dir) print("[INFO]: LIST CURRENT OUT DIR: ", os.listdir("/".join(video.split("/")[:-1]))) result_generator = human(video, vis_out_dir = vis_out_dir, return_vis=True, thickness=2, rebase_keypoint_height=True, #kpt_thr=kpt_thr, device="cuda" ) result = [result for result in result_generator] #next(result_generator) print(result) out_file = glob.glob(os.path.join(vis_out_dir, "*.mp4")) + glob.glob(os.path.join(vis_out_dir, "*.webm")) out_file = os.path.join(vis_out_dir, "sample_flip.webm") return out_file def pose2dhand(video): add_dir = str(uuid.uuid4()) vis_out_dir = os.path.join("/".join(video.split("/")[:-1]), add_dir) os.makedirs(vis_out_dir) print("[INFO]: CURRENT OUT DIR: ", vis_out_dir) print("[INFO]: LIST CURRENT OUT DIR: ", os.listdir("/".join(video.split("/")[:-1]))) vis_out_dir = str(uuid.uuid4()) result_generator = hand(video, vis_out_dir = vis_out_dir, return_vis=True, thickness=2, rebase_keypoint_height=True, device="cuda") result = [result for result in result_generator] #next(result_generator) print(glob.glob(os.path.join(vis_out_dir, "*.mp4")) + glob.glob(os.path.join(vis_out_dir, "*.webm"))) out_file = glob.glob(os.path.join(vis_out_dir, "*.mp4")) + glob.glob(os.path.join(vis_out_dir, "*.webm")) return out_file if __name__ == "__main__": with gr.Blocks() as demo: with gr.Column(): with gr.Tab("Upload video"): with gr.Row(): with gr.Column(): video_input = gr.Video(source="upload", type="filepath", height=512) # Insert slider with kpt_thr file_kpthr = gr.Slider(0, 1, value=1, label="Keypoint threshold",step=5, info='1') submit_pose_file = gr.Button("Make 2d pose estimation") submit_pose3d_file = gr.Button("Make 3d pose estimation") submit_hand_file = gr.Button("Make 2d hand estimation") submit_detect_file = gr.Button("Detect and track objects") video_output1 = gr.PlayableVideo(height=512, label = "Estimate human 2d poses", show_label=True) video_output2 = gr.PlayableVideo(height=512, label = "Estimate human 3d poses", show_label=True) video_output3 = gr.PlayableVideo(height=512, label = "Estimate human hand poses", show_label=True) video_output4 = gr.Video(height=512) with gr.Tab("Record video with webcam"): with gr.Row(): with gr.Column(): webcam_input = gr.Video(source="webcam", height=512) web_kpthr = gr.Slider(0, 1, value=1, label="Keypoint threshold",step=5, info='1') submit_pose_web = gr.Button("Make 2d pose estimation") submit_pose3d_web = gr.Button("Make 3d pose estimation") submit_hand_web = gr.Button("Make 2d hand estimation") submit_detect_web = gr.Button("Detect and track objects") webcam_output1 = gr.PlayableVideo(height=512, label = "Estimate human 2d poses", show_label=True) webcam_output2 = gr.PlayableVideo(height=512, label = "Estimate human 3d poses", show_label=True) webcam_output3 = gr.PlayableVideo(height=512, label = "Estimate human hand position", show_label=True) webcam_output4 = gr.Video(height=512) # From file submit_pose_file.click(fn=pose2d, inputs= video_input, outputs = video_output1) submit_pose3d_file.click(fn=pose3d, inputs= video_input, outputs = video_output2) submit_hand_file.click(fn=pose2dhand, inputs= video_input, outputs = video_output3) submit_detect_file.click(fn=show_tracking, inputs= video_input, outputs = video_output4) # Web submit_pose_web.click(fn=pose2d, inputs = webcam_input, outputs = webcam_output1) submit_pose3d_web.click(fn=pose3d, inputs= webcam_input, outputs = webcam_output2) submit_hand_web.click(fn=pose2dhand, inputs= webcam_input, outputs = video_output3) submit_detect_web.click(fn=show_tracking, inputs= webcam_input, outputs = video_output4) demo.launch(server_name="0.0.0.0", server_port=7860)