Spaces:

NN-BRD
/

MMpose

Build error

File size: 6,607 Bytes



# Pose inferencing
import mmpose
from mmpose.apis import MMPoseInferencer

# Ultralytics
from ultralytics import YOLO
import torch

# Gradio
import gradio as gr

# System and files
import os
import glob
import uuid

# Image manipulation
import numpy as np
import cv2

print("[INFO]: Imported modules!")
human = MMPoseInferencer("human")
hand = MMPoseInferencer("hand")
human3d = MMPoseInferencer(pose3d="human3d")
track_model = YOLO('yolov8n.pt')  # Load an official Detect model

# ultraltics

# Defining inferencer models to lookup in function
inferencers = {"Estimate human 2d poses":human, "Estimate human 2d hand poses":hand, "Estimate human 3d poses":human3d, "Detect and track":track_model}

print("[INFO]: Downloaded models!")

def tracking(video, model, boxes=True):
    print("[INFO] Loading model...")
    # Load an official or custom model

    # Perform tracking with the model
    print("[INFO] Starting tracking!")
    # https://docs.ultralytics.com/modes/predict/
    annotated_frame = model(video, boxes=boxes)

    return annotated_frame

def show_tracking(video_content):
        video = cv2.VideoCapture(video_content)

        # Track
        video_track = tracking(video_content, track_model.track)

        # Prepare to save video
        #out_file = os.path.join(vis_out_dir, "track.mp4")
        out_file = "track.mp4"
        print("[INFO]: TRACK", out_file)

        fourcc = cv2.VideoWriter_fourcc(*"mp4v")  # Codec for MP4 video
        fps = video.get(cv2.CAP_PROP_FPS)
        height, width, _ = video_track[0][0].orig_img.shape
        size = (width,height)

        out_track = cv2.VideoWriter(out_file, fourcc, fps, size)

        # Go through frames and write them 
        for frame_track in video_track:
            result_track = frame_track[0].plot()  # plot a BGR numpy array of predictions
        print("[INFO] Done with frames")
        #print(type(result_pose)) numpy ndarray
        out_track.write(result_track)

        out_track.release()

        video.release()
        cv2.destroyAllWindows() # Closing window

        return out_file


def pose3d(video):
    add_dir = str(uuid.uuid4())
    #vidname = video.split("/")[-1]
    vis_out_dir = "/".join(["/".join(video.split("/")[:-1]), add_dir])
    print("[INFO]: CURRENT OUT DIR: ", vis_out_dir)

    #full name = os.path.join(vis_out_dir, vidname)         

    result_generator = human3d(video, 
                                 vis_out_dir = vis_out_dir,
                                 thickness=2,
                                 rebase_keypoint_height=True,
                                 device="cuda")    
    
    result = [result for result in result_generator] #next(result_generator)    
    out_file = glob.glob(os.path.join(vis_out_dir, "*"))
    print("[INFO]: CURRENT OUT FILE NAME: ", out_file)

    return out_file


def pose2d(video):
    add_dir = str(uuid.uuid4())
    vis_out_dir = "/".join(["/".join(video.split("/")[:-1]), add_dir])
    print("[INFO]: CURRENT OUT DIR: ", vis_out_dir)


    result_generator = human(video, 
                                 vis_out_dir = vis_out_dir,
                                 thickness=2,
                                 rebase_keypoint_height=True,
                                 device="cuda")    
    
    result = [result for result in result_generator] #next(result_generator)    

    out_file = glob.glob(os.path.join(vis_out_dir, "*"))
    print("[INFO]: CURRENT OUT FILE NAME: ", out_file)

    return out_file


def pose2dhand(video):
    add_dir = str(uuid.uuid4())
    vis_out_dir = "/".join(["/".join(video.split("/")[:-1]), add_dir])
    
    print("[INFO]: CURRENT OUT DIR: ", vis_out_dir)

    vis_out_dir = str(uuid.uuid4())

    result_generator = hand(video, 
                                 vis_out_dir = vis_out_dir,
                                 thickness=2,
                                 rebase_keypoint_height=True,
                                 device="cuda")    
    
    result = [result for result in result_generator] #next(result_generator)    

    out_file = glob.glob(os.path.join(vis_out_dir, "*"))
    print("[INFO]: CURRENT OUT FILE NAME: ", out_file)

    return out_file



with gr.Blocks() as demo:
    with gr.Column():            
        with gr.Tab("Upload video"):
            with gr.Row():
                with gr.Column():
                    video_input = gr.Video(source="upload", type="filepath", height=512)

                    submit_pose_file = gr.Button("Make 2d pose estimation")
                    submit_pose3d_file = gr.Button("Make 3d pose estimation")
                    submit_hand_file = gr.Button("Make 2d hand estimation")
                    submit_detect_file = gr.Button("Detect and track objects")

                video_output = gr.Video(height=512)

        with gr.Tab("Record video with webcam"):
            with gr.Row():
                with gr.Column():
                    webcam_input = gr.Video(source="webcam", height=512)
                    
                    submit_pose_web = gr.Button("Make 2d pose estimation")
                    submit_pose3d_web = gr.Button("Make 3d pose estimation")
                    submit_hand_web = gr.Button("Make 2d hand estimation")
                    submit_detect_web = gr.Button("Detect and track objects")

                webcam_output = gr.Video(height=512)
        
    
    # From file
    submit_pose_file.click(fn=pose2d, 
                           inputs= video_input, 
                           outputs = video_output)
    
    submit_pose3d_file.click(fn=pose3d, 
                             inputs= video_input, 
                             outputs = video_output)
    
    submit_hand_file.click(fn=pose2dhand, 
                           inputs= video_input, 
                           outputs = video_output)
    
    submit_detect_file.click(fn=show_tracking, 
                             inputs= video_input, 
                             outputs = video_output)
    
    # Web
    submit_pose_web.click(fn=pose2d, 
                          inputs= video_input, 
                          outputs = video_output)
    
    submit_pose3d_web.click(fn=pose3d, 
                            inputs= video_input, 
                            outputs = video_output)
    
    submit_hand_web.click(fn=pose2dhand, 
                          inputs= video_input, 
                          outputs = video_output)
    
    submit_detect_web.click(fn=show_tracking, 
                            inputs= video_input, 
                            outputs = video_output)

demo.launch()