|
|
|
from mmpose.apis import MMPoseInferencer |
|
|
|
import torch |
|
|
|
|
|
import gradio as gr |
|
import moviepy.editor as moviepy |
|
|
|
|
|
|
|
import os |
|
import glob |
|
import uuid |
|
import json |
|
|
|
|
|
import numpy as np |
|
import cv2 |
|
|
|
|
|
print(torch.__version__) |
|
|
|
if torch.cuda.is_available(): |
|
device = torch.device("cuda") |
|
else: |
|
device = torch.device("cpu") |
|
|
|
os.system("nvidia-smi") |
|
|
|
print("[INFO]: Imported modules!") |
|
human = MMPoseInferencer("human", device=device) |
|
hand = MMPoseInferencer("hand", device=device) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print("[INFO]: Downloaded models!") |
|
|
|
def check_extension(video): |
|
|
|
clip = moviepy.VideoFileClip(video) |
|
|
|
if clip.duration > 10: |
|
raise gr.Error("Please provide or record a video shorter than 10 seconds...") |
|
|
|
split_tup = os.path.splitext(video) |
|
|
|
|
|
file_name = split_tup[0] |
|
file_extension = split_tup[1] |
|
|
|
if file_extension != ".mp4": |
|
print("Converting to mp4") |
|
|
|
video = file_name+".mp4" |
|
clip.write_videofile(video, threads = 8) |
|
|
|
return video |
|
|
|
|
|
def pose3d(video, kpt_threshold): |
|
video = check_extension(video) |
|
print(device) |
|
|
|
human3d = MMPoseInferencer(device=device, pose3d="human3d", scope="mmpose") |
|
|
|
add_dir = str(uuid.uuid4()) |
|
|
|
os.makedirs(add_dir) |
|
|
|
result_generator = human3d(video, |
|
vis_out_dir = add_dir, |
|
radius = 8, |
|
thickness = 5, |
|
rebase_keypoint_height=True, |
|
kpt_thr=kpt_threshold, |
|
pred_out_dir = add_dir |
|
) |
|
result = [result for result in result_generator] |
|
|
|
out_file = glob.glob(os.path.join(add_dir, "*.mp4")) |
|
kpoints = glob.glob(os.path.join(add_dir, "*.json")) |
|
print(kpoints) |
|
|
|
|
|
return "".join(out_file), "".join(kpoints) |
|
|
|
|
|
def pose2d(video, kpt_threshold): |
|
video = check_extension(video) |
|
|
|
|
|
|
|
add_dir = str(uuid.uuid4()) |
|
|
|
os.makedirs(add_dir) |
|
|
|
result_generator = human(video, |
|
vis_out_dir = add_dir, |
|
radius = 5, |
|
thickness=4, |
|
rebase_keypoint_height=True, |
|
kpt_thr=kpt_threshold, |
|
pred_out_dir = add_dir |
|
) |
|
|
|
result = [result for result in result_generator] |
|
|
|
out_file = glob.glob(os.path.join(add_dir, "*.mp4")) |
|
kpoints = glob.glob(os.path.join(add_dir, "*.json")) |
|
print(kpoints) |
|
print(out_file) |
|
|
|
return "".join(out_file), "".join(kpoints) |
|
|
|
def pose2dhand(video, kpt_threshold): |
|
video = check_extension(video) |
|
print(device) |
|
|
|
|
|
|
|
add_dir = str(uuid.uuid4()) |
|
os.makedirs(add_dir) |
|
|
|
result_generator = hand(video, |
|
vis_out_dir = add_dir, |
|
thickness = 4, |
|
radius = 5, |
|
rebase_keypoint_height=True, |
|
kpt_thr=kpt_threshold, |
|
pred_out_dir = add_dir) |
|
|
|
result = [result for result in result_generator] |
|
|
|
out_file = glob.glob(os.path.join(add_dir, "*.mp4")) |
|
kpoints = glob.glob(os.path.join(add_dir, "*.json")) |
|
|
|
|
|
return "".join(out_file), "".join(kpoints) |
|
|
|
code_example = """ |
|
|
|
# Importing packages needed |
|
import json |
|
import numpy as np |
|
|
|
# First we load the data |
|
with open(file_path, 'r') as json_file: |
|
data = json.load(json_file) |
|
|
|
# The we define a function for calculating angles |
|
def calculate_angle(a, b, c): |
|
a = np.array(a) # First point |
|
b = np.array(b) # Middle point |
|
c = np.array(c) # End point |
|
|
|
radians = np.arctan2(c[1]-b[1], c[0]-b[0]) - np.arctan2(a[1]-b[1], a[0]-b[0]) |
|
angle = np.abs(radians*180.0/np.pi) |
|
|
|
if angle >180.0: |
|
angle = 360-angle |
|
|
|
return angle |
|
|
|
|
|
# COCO keypoint indices |
|
shoulder_index = 6 |
|
elbow_index = 8 |
|
wrist_index = 9 |
|
|
|
# We select the first identified person in the first frame (zero index) as an example |
|
# To calculate the angle of the right elbow we take the point before and after and according to the indices that will be 6 (right shoulder) and 9 (right wrist) |
|
shoulder_point = data[0]['instances'][0]['keypoints'][shoulder_index] |
|
elbow_point = data[0]['instances'][0]['keypoints'][elbow_index] |
|
wrist_point = data[0]['instances'][0]['keypoints'][wrist_index] |
|
|
|
angle = calculate_angle(shoulder_point, elbow_point, wrist_point) |
|
print("Angle is: ", angle) |
|
|
|
""" |
|
|
|
venv_example = """ |
|
# Create a virtual environment |
|
python -m venv ".bstad_env" |
|
# Activate the environment |
|
.bstad_env\Scripts\Activate |
|
# Install numpy and json |
|
pip install numpy |
|
pip install json |
|
|
|
# Run the code from the commandline |
|
python \Path\To\Script.py |
|
""" |
|
|
|
block = gr.Blocks() |
|
|
|
with block: |
|
with gr.Column(): |
|
with gr.Tab("Capture video with webcam"): |
|
with gr.Column(): |
|
with gr.Row(): |
|
with gr.Column(): |
|
with gr.Row(): |
|
video_input_web = gr.Video(source="webcam", include_audio=False, height=256, width=192) |
|
|
|
with gr.Column(): |
|
gr.Markdown("Drag the keypoint threshold to filter out lower probability keypoints:") |
|
file_kpthr_web = gr.Slider(0, 1, value=0.3, label='Keypoint threshold') |
|
with gr.Row(): |
|
submit_pose_file_web = gr.Button("Make 2d pose estimation") |
|
submit_pose3d_file_web = gr.Button("Make 3d pose estimation") |
|
submit_hand_file_web = gr.Button("Make 2d hand estimation") |
|
|
|
with gr.Row(): |
|
video_output1_web = gr.PlayableVideo(label = "Estimate human 2d poses", show_label=True, height=256) |
|
video_output2_web = gr.PlayableVideo(label = "Estimate human 3d poses", show_label=True, height=256) |
|
video_output3_web = gr.PlayableVideo(label = "Estimate human hand poses", show_label=True, height=256) |
|
|
|
gr.Markdown("Download the .json file that contains the keypoint positions for each frame in the video.") |
|
jsonoutput_web = gr.File(file_types=[".json"]) |
|
gr.Markdown("""There are multiple ways to interact with these keypoints. |
|
\n The example below shows how you can calulate the angle on the elbow for example. |
|
\n If you choose to run the code, start by installing the packages json and numpy. You can do that by running the following commands in Poweshell or another commandline/terminal.""") |
|
|
|
gr.Code(value=venv_example, |
|
language="python", |
|
interactive=False, |
|
show_label=False, |
|
) |
|
|
|
gr.Markdown("""\n Then copy the next code segment into your own preferred interpreter and experiment with the keypoint file. |
|
""") |
|
gr.Code( |
|
value=code_example, |
|
language="python", |
|
interactive=False, |
|
show_label=False, |
|
) |
|
|
|
gr.Markdown("""The complete overview of the keypoint indices can be seen in the tab 'General information'. """) |
|
|
|
with gr.Tab("Upload video"): |
|
with gr.Column(): |
|
with gr.Row(): |
|
with gr.Column(): |
|
with gr.Row(): |
|
video_input = gr.Video(source="upload", type="filepath", include_audio=False, height=256, width=192) |
|
|
|
with gr.Column(): |
|
gr.Markdown("Drag the keypoint threshold to filter out lower probability keypoints:") |
|
file_kpthr = gr.Slider(0, 1, value=0.3, label='Keypoint threshold') |
|
with gr.Row(): |
|
submit_pose_file = gr.Button("Make 2d pose estimation") |
|
submit_pose3d_file = gr.Button("Make 3d pose estimation") |
|
submit_hand_file = gr.Button("Make 2d hand estimation") |
|
|
|
with gr.Row(): |
|
video_output1 = gr.PlayableVideo(label = "Estimate human 2d poses", show_label=True, height=256) |
|
video_output2 = gr.PlayableVideo(label = "Estimate human 3d poses", show_label=True, height=256) |
|
video_output3 = gr.PlayableVideo(label = "Estimate human hand poses", show_label=True, height=256) |
|
|
|
gr.Markdown("Download the .json file that contains the keypoint positions for each frame in the video.") |
|
jsonoutput = gr.File(file_types=[".json"]) |
|
gr.Markdown("""There are multiple ways to interact with these keypoints. |
|
\n The example below shows how you can calulate the angle on the elbow for example. |
|
\n If you choose to run the code, start by installing the packages json and numpy. You can do that by running the following commands in Poweshell or another commandline/terminal.""") |
|
|
|
gr.Code(value=venv_example, |
|
language="python", |
|
interactive=False, |
|
show_label=False, |
|
) |
|
|
|
gr.Markdown("""\n Then copy the next code segment into your own preferred interpreter and experiment with the keypoint file. |
|
""") |
|
gr.Code( |
|
value=code_example, |
|
language="python", |
|
interactive=False, |
|
show_label=False, |
|
) |
|
|
|
gr.Markdown("""The complete overview of the keypoint indices can be seen in the tab 'General information'. """) |
|
|
|
|
|
with gr.Tab("General information"): |
|
gr.Markdown(""" |
|
\n # Information about the models |
|
|
|
\n ## Pose models: |
|
|
|
\n All the pose estimation models come from the library [MMpose](https://github.com/open-mmlab/mmpose). It is a library for human pose estimation that provides pre-trained models for 2D and 3D pose estimation. |
|
|
|
\n The 2D pose model is used for estimating the 2D coordinates of human body joints from an image or a video frame. The model uses a convolutional neural network (CNN) to predict the joint locations and their confidence scores. |
|
|
|
\n The 2D hand model is a specialized version of the 2D pose model that is designed for hand pose estimation. It uses a similar CNN architecture to the 2D pose model but is trained specifically for detecting the joints in the hand. |
|
|
|
\n The 3D pose model is used for estimating the 3D coordinates of human body joints from an image or a video frame. The model uses a combination of 2D pose estimation and depth estimation to infer the 3D joint locations. |
|
|
|
\n The keypoints in the 2D pose model has the following order: |
|
|
|
\n ``` |
|
0: Nose |
|
1: Left Eye |
|
2: Right Eye |
|
3: Left Ear |
|
4: Right Ear |
|
5: Left Shoulder |
|
6: Right Shoulder |
|
7: Left Elbow |
|
8: Right Elbow |
|
9: Left Wrist |
|
10: Right Wrist |
|
11: Left Hip |
|
12: Right Hip |
|
13: Left Knee |
|
14: Right Knee |
|
15: Left Ankle |
|
16: Right Ankle ``` |
|
|
|
\n Below, you can see a visualization of the poses of the 2d, 3d and hand keypoint locations: """) |
|
with gr.Row(): |
|
gr.Image("./cocoposes.png", width="160",height="220") |
|
gr.Image("./cocohand.png", width="160",height="200") |
|
|
|
|
|
|
|
|
|
|
|
click1 = submit_pose_file.click(fn=pose2d, |
|
inputs= [video_input, file_kpthr], |
|
outputs = [video_output1, jsonoutput], |
|
queue=True) |
|
|
|
click2 = submit_pose3d_file.click(fn=pose3d, |
|
inputs= [video_input, file_kpthr], |
|
outputs = [video_output2, jsonoutput], |
|
|
|
|
|
queue=True) |
|
|
|
click3 = submit_hand_file.click(fn=pose2dhand, |
|
inputs= [video_input, file_kpthr], |
|
outputs = [video_output3, jsonoutput], |
|
queue=True) |
|
|
|
|
|
|
|
submit_pose_file_web.click(fn=pose2d, |
|
inputs= [video_input_web, file_kpthr_web], |
|
outputs = [video_output1_web, jsonoutput_web], |
|
queue=True) |
|
|
|
submit_pose3d_file_web.click(fn=pose3d, |
|
inputs= [video_input_web, file_kpthr_web], |
|
outputs = [video_output2_web, jsonoutput_web], |
|
|
|
|
|
queue=True) |
|
|
|
submit_hand_file_web.click(fn=pose2dhand, |
|
inputs= [video_input_web, file_kpthr_web], |
|
outputs = [video_output3_web, jsonoutput_web], |
|
queue=True) |
|
|
|
if __name__ == "__main__": |
|
block.queue(max_size=20, |
|
|
|
|
|
api_open = False |
|
).launch( |
|
|
|
server_name="0.0.0.0", |
|
server_port=7860, |
|
auth=("novouser", "bstad2023") |
|
) |
|
|
|
|
|
|
|
|