MMpose / main.py
xmrt's picture
output
6c321f9
raw
history blame
No virus
9.02 kB
# Pose inferencing
import mmpose
from mmpose.apis import MMPoseInferencer
# Ultralytics
from ultralytics import YOLO
import torch
# Gradio
import gradio as gr
import moviepy.editor as moviepy
# System and files
import os
import glob
import uuid
# Image manipulation
import numpy as np
import cv2
print("[INFO]: Imported modules!")
human = MMPoseInferencer("human")
hand = MMPoseInferencer("hand")
human3d = MMPoseInferencer(pose3d="human3d")
track_model = YOLO('yolov8n.pt') # Load an official Detect model
# ultraltics
print("[INFO]: Downloaded models!")
def check_extension(video):
split_tup = os.path.splitext(video)
# extract the file name and extension
file_name = split_tup[0]
file_extension = split_tup[1]
print(file_extension)
if file_extension is not ".mp4":
clip = moviepy.VideoFileClip(video)
video = file_name+".mp4"
clip.write_videofile(video)
return video
def tracking(video, model, boxes=True):
print("[INFO] Is cuda available? ", torch.cuda.is_available())
print("[INFO] Loading model...")
# Load an official or custom model
# Perform tracking with the model
print("[INFO] Starting tracking!")
# https://docs.ultralytics.com/modes/predict/
annotated_frame = model(video, boxes=boxes)
return annotated_frame
def show_tracking(video_content):
print()
# https://docs.ultralytics.com/datasets/detect/coco/
video = cv2.VideoCapture(video_content)
# Track
video_track = tracking(video_content, track_model.track)
# Prepare to save video
#out_file = os.path.join(vis_out_dir, "track.mp4")
out_file = "track.mp4"
print("[INFO]: TRACK", out_file)
fourcc = cv2.VideoWriter_fourcc(*"mp4v") # Codec for MP4 video
fps = video.get(cv2.CAP_PROP_FPS)
height, width, _ = video_track[0][0].orig_img.shape
size = (width,height)
out_track = cv2.VideoWriter(out_file, fourcc, fps, size)
# Go through frames and write them
for frame_track in video_track:
result_track = frame_track[0].plot() # plot a BGR numpy array of predictions
out_track.write(result_track)
print("[INFO] Done with frames")
#print(type(result_pose)) numpy ndarray
out_track.release()
video.release()
cv2.destroyAllWindows() # Closing window
return out_file
def pose3d(video):
video = check_extension(video)
# Define new unique folder
add_dir = str(uuid.uuid4())
vis_out_dir = os.path.join("/".join(video.split("/")[:-1]), add_dir)
os.makedirs(vis_out_dir)
result_generator = human3d(video,
vis_out_dir = vis_out_dir,
thickness=2,
return_vis=True,
rebase_keypoint_height=True,
device="cuda")
result = [result for result in result_generator] #next(result_generator)
out_file = glob.glob(os.path.join(vis_out_dir, "*.mp4")) #+ glob.glob(os.path.join(vis_out_dir, "*.webm"))
return "".join(out_file)
def pose2d(video, kpt_threshold):
video = check_extension(video)
# Define new unique folder
add_dir = str(uuid.uuid4())
vis_out_dir = os.path.join("/".join(video.split("/")[:-1]), add_dir)
os.makedirs(vis_out_dir)
result_generator = human(video,
vis_out_dir = vis_out_dir,
return_vis=True,
thickness=2,
rebase_keypoint_height=True,
kpt_thr=kpt_threshold,
device="cuda"
)
result = [result for result in result_generator] #next(result_generator)
out_file = glob.glob(os.path.join(vis_out_dir, "*.mp4")) #+ glob.glob(os.path.join(vis_out_dir, "*.webm"))
return "".join(out_file)
def pose2dhand(video, kpt_threshold):
video = check_extension(video)
# Define new unique folder
add_dir = str(uuid.uuid4())
vis_out_dir = os.path.join("/".join(video.split("/")[:-1]), add_dir)
os.makedirs(vis_out_dir)
result_generator = hand(video,
vis_out_dir = vis_out_dir,
return_vis=True,
thickness=2,
rebase_keypoint_height=True,
kpt_thr=kpt_threshold,
device="cuda")
result = [result for result in result_generator] #next(result_generator)
out_file = glob.glob(os.path.join(vis_out_dir, "*.mp4")) #+ glob.glob(os.path.join(vis_out_dir, "*.webm"))
return "".join(out_file)
def run_UI():
with gr.Blocks() as demo:
with gr.Column():
with gr.Tab("Upload video"):
with gr.Row():
with gr.Column():
video_input = gr.Video(source="upload", type="filepath", height=612)
# Insert slider with kpt_thr
file_kpthr = gr.Slider(minimum=0.1, maximum=1, step=1e3, default=0.3, label='Keypoint threshold')
submit_pose_file = gr.Button("Make 2d pose estimation")
submit_pose3d_file = gr.Button("Make 3d pose estimation")
submit_hand_file = gr.Button("Make 2d hand estimation")
submit_detect_file = gr.Button("Detect and track objects")
with gr.Column():
video_output1 = gr.PlayableVideo(height=512, label = "Estimate human 2d poses", show_label=True)
video_output2 = gr.PlayableVideo(height=512, label = "Estimate human 3d poses", show_label=True)
video_output3 = gr.PlayableVideo(height=512, label = "Estimate human hand poses", show_label=True)
video_output4 = gr.Video(height=512, label = "Detection and tracking", show_label=True, format="mp4")
with gr.Tab("Record video with webcam"):
with gr.Column():
with gr.Row():
with gr.Column():
webcam_input = gr.Video(source="webcam", height=612)
web_kpthr = gr.Slider(minimum=0.1, maximum=1, step=1e3, default=0.3, label='Keypoint threshold')
submit_pose_web = gr.Button("Make 2d pose estimation")
submit_pose3d_web = gr.Button("Make 3d pose estimation")
submit_hand_web = gr.Button("Make 2d hand estimation")
submit_detect_web = gr.Button("Detect and track objects")
with gr.Row():
webcam_output1 = gr.PlayableVideo(height=512, label = "Estimate human 2d poses", show_label=True)
webcam_output2 = gr.PlayableVideo(height=512, label = "Estimate human 3d poses", show_label=True)
webcam_output3 = gr.PlayableVideo(height=512, label = "Estimate human hand position", show_label=True)
webcam_output4 = gr.Video(height=512, label = "Detection and tracking", show_label=True, format="mp4")
# From file
submit_pose_file.click(fn=pose2d,
inputs= [video_input, file_kpthr],
outputs = video_output1)
submit_pose3d_file.click(fn=pose3d,
inputs= video_input,
outputs = video_output2)
submit_hand_file.click(fn=pose2dhand,
inputs= [video_input, file_kpthr],
outputs = video_output3)
submit_detect_file.click(fn=show_tracking,
inputs= video_input,
outputs = video_output4)
# Web
submit_pose_web.click(fn=pose2d,
inputs = [webcam_input, web_kpthr],
outputs = webcam_output1)
submit_pose3d_web.click(fn=pose3d,
inputs= webcam_input,
outputs = webcam_output2)
submit_hand_web.click(fn=pose2dhand,
inputs= [webcam_input, web_kpthr],
outputs = webcam_output3)
submit_detect_web.click(fn=show_tracking,
inputs= webcam_input,
outputs = webcam_output4)
demo.launch(server_name="0.0.0.0", server_port=7860)
if __name__ == "__main__":
run_UI()