Spaces:

NN-BRD
/

MMpose

Build error

App Files Files Community

MMpose / main.py

xmrt

output

02cf03c about 1 year ago

raw

history blame

No virus

9.11 kB



	# Pose inferencing
	import mmpose
	from mmpose.apis import MMPoseInferencer

	# Ultralytics
	from ultralytics import YOLO
	import torch

	# Gradio
	import gradio as gr
	import moviepy.editor as moviepy


	# System and files
	import os
	import glob
	import uuid

	# Image manipulation
	import numpy as np
	import cv2

	print("[INFO]: Imported modules!")
	human = MMPoseInferencer("human")
	hand = MMPoseInferencer("hand")
	human3d = MMPoseInferencer(pose3d="human3d")
	track_model = YOLO('yolov8n.pt') # Load an official Detect model

	# ultraltics

	# Defining inferencer models to lookup in function
	inferencers = {"Estimate human 2d poses":human, "Estimate human 2d hand poses":hand, "Estimate human 3d poses":human3d, "Detect and track":track_model}

	print("[INFO]: Downloaded models!")

	def check_extension(video):
	split_tup = os.path.splitext(video)

	# extract the file name and extension
	file_name = split_tup[0]
	file_extension = split_tup[1]

	if file_extension is not ".mp4":
	clip = moviepy.VideoFileClip(video)

	video = file_name+".mp4"
	clip.write_videofile(video)

	return video





	def tracking(video, model, boxes=True):
	print("[INFO] Loading model...")
	# Load an official or custom model

	# Perform tracking with the model
	print("[INFO] Starting tracking!")
	# https://docs.ultralytics.com/modes/predict/
	annotated_frame = model(video, boxes=boxes)

	return annotated_frame

	def show_tracking(video_content):
	# https://docs.ultralytics.com/datasets/detect/coco/
	video = cv2.VideoCapture(video_content)

	# Track
	video_track = tracking(video_content, track_model.track)

	# Prepare to save video
	#out_file = os.path.join(vis_out_dir, "track.mp4")
	out_file = "track.mp4"
	print("[INFO]: TRACK", out_file)

	fourcc = cv2.VideoWriter_fourcc(*"mp4v") # Codec for MP4 video
	fps = video.get(cv2.CAP_PROP_FPS)
	height, width, _ = video_track[0][0].orig_img.shape
	size = (width,height)

	out_track = cv2.VideoWriter(out_file, fourcc, fps, size)

	# Go through frames and write them
	for frame_track in video_track:
	result_track = frame_track[0].plot() # plot a BGR numpy array of predictions

	print("[INFO] Done with frames")
	#print(type(result_pose)) numpy ndarray
	out_track.write(result_track)

	out_track.release()

	video.release()
	cv2.destroyAllWindows() # Closing window

	return out_file


	def pose3d(video):
	video = check_extension(video)


	# Define new unique folder
	add_dir = str(uuid.uuid4())
	vis_out_dir = os.path.join("/".join(video.split("/")[:-1]), add_dir)
	os.makedirs(vis_out_dir)

	result_generator = human3d(video,
	vis_out_dir = vis_out_dir,
	thickness=2,
	return_vis=True,
	rebase_keypoint_height=True,
	device="cuda")

	result = [result for result in result_generator] #next(result_generator)

	out_file = glob.glob(os.path.join(vis_out_dir, ".mp4")) #+ glob.glob(os.path.join(vis_out_dir, ".webm"))

	return "".join(out_file)


	def pose2d(video, kpt_threshold):
	video = check_extension(video)

	# Define new unique folder
	add_dir = str(uuid.uuid4())
	vis_out_dir = os.path.join("/".join(video.split("/")[:-1]), add_dir)
	os.makedirs(vis_out_dir)

	result_generator = human(video,
	vis_out_dir = vis_out_dir,
	return_vis=True,
	thickness=2,
	rebase_keypoint_height=True,
	kpt_thr=kpt_threshold,
	device="cuda"
	)

	result = [result for result in result_generator] #next(result_generator)

	out_file = glob.glob(os.path.join(vis_out_dir, ".mp4")) #+ glob.glob(os.path.join(vis_out_dir, ".webm"))

	return "".join(out_file)


	def pose2dhand(video, kpt_threshold):
	video = check_extension(video)

	# Define new unique folder
	add_dir = str(uuid.uuid4())
	vis_out_dir = os.path.join("/".join(video.split("/")[:-1]), add_dir)
	os.makedirs(vis_out_dir)

	result_generator = hand(video,
	vis_out_dir = vis_out_dir,
	return_vis=True,
	thickness=2,
	rebase_keypoint_height=True,
	kpt_thr=kpt_threshold,
	device="cuda")

	result = [result for result in result_generator] #next(result_generator)

	out_file = glob.glob(os.path.join(vis_out_dir, ".mp4")) #+ glob.glob(os.path.join(vis_out_dir, ".webm"))

	return "".join(out_file)

	def run_UI():
	with gr.Blocks() as demo:
	with gr.Column():
	with gr.Tab("Upload video"):
	with gr.Row():
	with gr.Column():
	video_input = gr.Video(source="upload", type="filepath", height=612)
	# Insert slider with kpt_thr
	file_kpthr = gr.Slider(minimum=1e3, maximum=1e6, step=1e3, default=1e3, label='Keypoint threshold')

	submit_pose_file = gr.Button("Make 2d pose estimation")
	submit_pose3d_file = gr.Button("Make 3d pose estimation")
	submit_hand_file = gr.Button("Make 2d hand estimation")
	submit_detect_file = gr.Button("Detect and track objects")
	with gr.Column():
	video_output1 = gr.PlayableVideo(height=512, label = "Estimate human 2d poses", show_label=True)
	video_output2 = gr.PlayableVideo(height=512, label = "Estimate human 3d poses", show_label=True)
	video_output3 = gr.PlayableVideo(height=512, label = "Estimate human hand poses", show_label=True)
	video_output4 = gr.Video(height=512, label = "Detection and tracking", show_label=True, format="mp4")

	with gr.Tab("Record video with webcam"):

	with gr.Column:
	with gr.Row():
	with gr.Column():
	webcam_input = gr.Video(source="webcam", height=612)

	web_kpthr = gr.Slider(minimum=0.1, maximum=1, step=1e3, default=0.3, label='Keypoint threshold')

	submit_pose_web = gr.Button("Make 2d pose estimation")
	submit_pose3d_web = gr.Button("Make 3d pose estimation")
	submit_hand_web = gr.Button("Make 2d hand estimation")
	submit_detect_web = gr.Button("Detect and track objects")
	with gr.Row():
	webcam_output1 = gr.PlayableVideo(height=512, label = "Estimate human 2d poses", show_label=True)
	webcam_output2 = gr.PlayableVideo(height=512, label = "Estimate human 3d poses", show_label=True)
	webcam_output3 = gr.PlayableVideo(height=512, label = "Estimate human hand position", show_label=True)
	webcam_output4 = gr.Video(height=512, label = "Detection and tracking", show_label=True, format="mp4")



	# From file
	submit_pose_file.click(fn=pose2d,
	inputs= [video_input, file_kpthr],
	outputs = video_output1)

	submit_pose3d_file.click(fn=pose3d,
	inputs= video_input,
	outputs = video_output2)

	submit_hand_file.click(fn=pose2dhand,
	inputs= [video_input, file_kpthr],
	outputs = video_output3)

	submit_detect_file.click(fn=show_tracking,
	inputs= video_input,
	outputs = video_output4)

	# Web
	submit_pose_web.click(fn=pose2d,
	inputs = [webcam_input, web_kpthr],
	outputs = webcam_output1)

	submit_pose3d_web.click(fn=pose3d,
	inputs= webcam_input,
	outputs = webcam_output2)

	submit_hand_web.click(fn=pose2dhand,
	inputs= [webcam_input, web_kpthr],
	outputs = webcam_output3)

	submit_detect_web.click(fn=show_tracking,
	inputs= webcam_input,
	outputs = webcam_output4)

	demo.launch(server_name="0.0.0.0", server_port=7860)

	if __name__ == "__main__":
	run_UI()