Spaces:

NN-BRD
/

MMpose

Build error

App Files Files Community

xmrt commited on Aug 15, 2023

Commit

3bbef49

•

1 Parent(s): 69d5355

blocks

Browse files

Files changed (3) hide show

Dockerfile +1 -1
main.py +109 -94
main_blocks.py → main_webcamtest.py +94 -109

Dockerfile CHANGED Viewed

@@ -42,4 +42,4 @@ WORKDIR $HOME/app
 # Copy the current directory contents into the container at $HOME/app setting the owner to the user
 COPY --chown=user . $HOME/app
-CMD ["python", "main_blocks.py"]

 # Copy the current directory contents into the container at $HOME/app setting the owner to the user
 COPY --chown=user . $HOME/app
+CMD ["python", "main.py"]

main.py CHANGED Viewed

@@ -1,3 +1,5 @@
 # Pose inferencing
 import mmpose
 from mmpose.apis import MMPoseInferencer
@@ -20,14 +22,12 @@ import cv2
 print("[INFO]: Imported modules!")
 human = MMPoseInferencer("human")
-hand = MMPoseInferencer("hand") #kpt_thr (float) – The threshold to visualize the keypoints. Defaults to 0.3
 human3d = MMPoseInferencer(pose3d="human3d")
 track_model = YOLO('yolov8n.pt')  # Load an official Detect model
 # ultraltics
-# [INFO] VIDEO INPUT:  /tmp/gradio/927601b660ec45919366ce37df1ed004a1fcffab/sample_flip.webm
 # Defining inferencer models to lookup in function
 inferencers = {"Estimate human 2d poses":human, "Estimate human 2d hand poses":hand, "Estimate human 3d poses":human3d, "Detect and track":track_model}
@@ -44,11 +44,11 @@ def tracking(video, model, boxes=True):
     return annotated_frame
-def show_tracking(video_content, vis_out_dir, model):
         video = cv2.VideoCapture(video_content)
         # Track
-        video_track = tracking(video_content, model.track)
         # Prepare to save video
         #out_file = os.path.join(vis_out_dir, "track.mp4")
@@ -65,11 +65,9 @@ def show_tracking(video_content, vis_out_dir, model):
         # Go through frames and write them
         for frame_track in video_track:
             result_track = frame_track[0].plot()  # plot a BGR numpy array of predictions
-            out_track.write(result_track)
         print("[INFO] Done with frames")
         #print(type(result_pose)) numpy ndarray
         out_track.release()
@@ -79,112 +77,129 @@ def show_tracking(video_content, vis_out_dir, model):
         return out_file
-def poses(inferencer, video, vis_out_dir, kpt_thr):
-    print("[INFO] VIDEO INPUT: ", video)
-    result_generator = inferencer(video,
                                  vis_out_dir = vis_out_dir,
-                                 return_vis=True,
                                  thickness=2,
                                  rebase_keypoint_height=True,
-                                 #kpt_thr=kpt_thr,
-                                 device="cuda"
-                                 )
     result = [result for result in result_generator] #next(result_generator)
-    out_file = glob.glob(os.path.join(vis_out_dir, "*.mp4"))
     return out_file
-def infer(video, check, kpt_thr, webcam=True):
-    print("[INFO] VIDEO INPUT: ", video)
-    # Selecting the specific inferencer
-    out_files=[]
-    for i in check:
-         # Create out directory
-        vis_out_dir = str(uuid.uuid4())
-        inferencer = inferencers[i] # 'hand', 'human , device='cuda'
-        if i == "Detect and track":
-            #continue
-            trackfile = show_tracking(video, vis_out_dir, inferencer)
-        else:
-            if webcam==True:
-                print("WEBCAM")
-                add_dir = str(uuid.uuid4())
-                vidname = video.split("/")[-1]
-                vis_out_dir_web = "/".join(["/".join(video.split("/")[:-1]), add_dir])
-                out_file = poses(inferencer, video, vis_out_dir_web, kpt_thr)
-                fullname = os.path.join(vis_out_dir_web, vidname)
-                #if i == "Estimate human 3d poses":
-                #    fullname = fullname[:-4]+"mp4" #Change to .mp4
-                #    out_files.append(fullname)
-                #else:
-                out_files.append(fullname)
-            else:
-                out_files.extend(out_file)
-        print(out_files)
-    return "track.mp4", out_files[0], out_files[1], out_files[2] # out_files[3]
-def run():
-    #https://github.com/open-mmlab/mmpose/blob/main/docs/en/user_guides/inference.md
-    check_web =  gr.CheckboxGroup(choices = ["Detect and track", "Estimate human 2d poses", "Estimate human 2d hand poses", "Estimate human 3d poses"], label="Methods", type="value", info="Select the model(s) you want")
-    check_file = gr.CheckboxGroup(choices = ["Detect and track", "Estimate human 2d poses", "Estimate human 2d hand poses", "Estimate human 3d poses"], label="Methods", type="value", info="Select the model(s) you want")
-    description = """
-    \n\nHere you can upload videos or record one with your webcam and track objects or detect bodyposes in 2d and 3d.
-    """
-    # Insert slider with kpt_thr
-    web_kpthr = gr.Slider(0, 1, value=0.3)
-    file_kpthr = gr.Slider(0, 1, value=0.3)
-    webcam = gr.Interface(
-        fn=infer,
-        inputs= [gr.Video(source="webcam", height=512), check_web, web_kpthr], # /tmp/gradio/927601b660ec45919366ce37df1ed004a1fcffab/sample_flip.webm
-        outputs = [gr.Video(format='mp4', height=512, label="Detect and track", show_label=True), gr.PlayableVideo(height=512,  label = "Estimate human 2d poses", show_label=True), gr.PlayableVideo(height=512, label = "Estimate human 2d hand poses", show_label=True), gr.PlayableVideo(height=512, label = "Estimate human 3d poses", show_label=True)],
-        title = 'Tracking and pose estimation',
-        description = description,
-        allow_flagging=False
-        )
-    file = gr.Interface(
-        infer,
-        inputs = [gr.Video(source="upload", height=512), check_file, file_kpthr],
-        outputs = [gr.Video(format='mp4', height=512, label="Detect and track", show_label=True), gr.PlayableVideo(height=512,  label = "Estimate human 2d poses", show_label=True), gr.PlayableVideo(height=512, label = "Estimate human 2d hand poses", show_label=True), gr.PlayableVideo(height=512, label = "Estimate human 3d poses", show_label=True)],
-        title = 'Tracking and pose estimation',
-        description = description,
-        allow_flagging=False
-    )
-    demo = gr.TabbedInterface(
-            interface_list=[file, webcam],
-            tab_names=["From a File", "From your Webcam"]
-        )
-    demo.launch(server_name="0.0.0.0", server_port=7860)
-if __name__ == "__main__":
-    run()
-# https://github.com/open-mmlab/mmpose/tree/dev-1.x/configs/body_3d_keypoint/pose_lift
-# motionbert_ft_h36m-d80af323_20230531.pth
-# simple3Dbaseline_h36m-f0ad73a4_20210419.pth
-# videopose_h36m_243frames_fullconv_supervised_cpn_ft-88f5abbb_20210527.pth
-# videopose_h36m_81frames_fullconv_supervised-1f2d1104_20210527.pth
-# videopose_h36m_27frames_fullconv_supervised-fe8fbba9_20210527.pth
-# videopose_h36m_1frame_fullconv_supervised_cpn_ft-5c3afaed_20210527.pth
-# https://github.com/open-mmlab/mmpose/blob/main/mmpose/apis/inferencers/pose3d_inferencer.py
-# 00000.mp4
-# 000000.mp4

 # Pose inferencing
 import mmpose
 from mmpose.apis import MMPoseInferencer
 print("[INFO]: Imported modules!")
 human = MMPoseInferencer("human")
+hand = MMPoseInferencer("hand")
 human3d = MMPoseInferencer(pose3d="human3d")
 track_model = YOLO('yolov8n.pt')  # Load an official Detect model
 # ultraltics
 # Defining inferencer models to lookup in function
 inferencers = {"Estimate human 2d poses":human, "Estimate human 2d hand poses":hand, "Estimate human 3d poses":human3d, "Detect and track":track_model}
     return annotated_frame
+def show_tracking(video_content):
         video = cv2.VideoCapture(video_content)
         # Track
+        video_track = tracking(video_content, track_model.track)
         # Prepare to save video
         #out_file = os.path.join(vis_out_dir, "track.mp4")
         # Go through frames and write them
         for frame_track in video_track:
             result_track = frame_track[0].plot()  # plot a BGR numpy array of predictions
         print("[INFO] Done with frames")
         #print(type(result_pose)) numpy ndarray
+        out_track.write(result_track)
         out_track.release()
         return out_file
+def pose3d(video):
+    add_dir = str(uuid.uuid4())
+    #vidname = video.split("/")[-1]
+    vis_out_dir = "/".join(["/".join(video.split("/")[:-1]), add_dir])
+    print("[INFO]: CURRENT OUT DIR: ", vis_out_dir)
+    #full name = os.path.join(vis_out_dir, vidname)
+    result_generator = human3d(video,
                                  vis_out_dir = vis_out_dir,
                                  thickness=2,
                                  rebase_keypoint_height=True,
+                                 device="cuda")
     result = [result for result in result_generator] #next(result_generator)
+    out_file = glob.glob(os.path.join(vis_out_dir, "*"))
+    print("[INFO]: CURRENT OUT FILE NAME: ", out_file)
     return out_file
+def pose2d(video):
+    add_dir = str(uuid.uuid4())
+    vis_out_dir = "/".join(["/".join(video.split("/")[:-1]), add_dir])
+    print("[INFO]: CURRENT OUT DIR: ", vis_out_dir)
+    result_generator = human(video,
+                                 vis_out_dir = vis_out_dir,
+                                 thickness=2,
+                                 rebase_keypoint_height=True,
+                                 device="cuda")
+    result = [result for result in result_generator] #next(result_generator)
+    out_file = glob.glob(os.path.join(vis_out_dir, "*"))
+    print("[INFO]: CURRENT OUT FILE NAME: ", out_file)
+    return out_file
+def pose2dhand(video):
+    add_dir = str(uuid.uuid4())
+    vis_out_dir = "/".join(["/".join(video.split("/")[:-1]), add_dir])
+    print("[INFO]: CURRENT OUT DIR: ", vis_out_dir)
+    vis_out_dir = str(uuid.uuid4())
+    result_generator = hand(video,
+                                 vis_out_dir = vis_out_dir,
+                                 thickness=2,
+                                 rebase_keypoint_height=True,
+                                 device="cuda")
+    result = [result for result in result_generator] #next(result_generator)
+    out_file = glob.glob(os.path.join(vis_out_dir, "*"))
+    print("[INFO]: CURRENT OUT FILE NAME: ", out_file)
+    return out_file
+with gr.Blocks() as demo:
+    with gr.Column():
+        with gr.Tab("Upload video"):
+            with gr.Row():
+                with gr.Column():
+                    video_input = gr.Video(source="upload", type="filepath", height=512)
+                    submit_pose_file = gr.Button("Make 2d pose estimation")
+                    submit_pose3d_file = gr.Button("Make 3d pose estimation")
+                    submit_hand_file = gr.Button("Make 2d hand estimation")
+                    submit_detect_file = gr.Button("Detect and track objects")
+                video_output = gr.Video(height=512)
+        with gr.Tab("Record video with webcam"):
+            with gr.Row():
+                with gr.Column():
+                    webcam_input = gr.Video(source="webcam", height=512)
+                    submit_pose_web = gr.Button("Make 2d pose estimation")
+                    submit_pose3d_web = gr.Button("Make 3d pose estimation")
+                    submit_hand_web = gr.Button("Make 2d hand estimation")
+                    submit_detect_web = gr.Button("Detect and track objects")
+                webcam_output = gr.Video(height=512)
+    # From file
+    submit_pose_file.click(fn=pose2d,
+                           inputs= video_input,
+                           outputs = video_output)
+    submit_pose3d_file.click(fn=pose3d,
+                             inputs= video_input,
+                             outputs = video_output)
+    submit_hand_file.click(fn=pose2dhand,
+                           inputs= video_input,
+                           outputs = video_output)
+    submit_detect_file.click(fn=show_tracking,
+                             inputs= video_input,
+                             outputs = video_output)
+    # Web
+    submit_pose_web.click(fn=pose2d,
+                          inputs= video_input,
+                          outputs = video_output)
+    submit_pose3d_web.click(fn=pose3d,
+                            inputs= video_input,
+                            outputs = video_output)
+    submit_hand_web.click(fn=pose2dhand,
+                          inputs= video_input,
+                          outputs = video_output)
+    submit_detect_web.click(fn=show_tracking,
+                            inputs= video_input,
+                            outputs = video_output)
+demo.launch()

main_blocks.py → main_webcamtest.py RENAMED Viewed

@@ -1,5 +1,3 @@
 # Pose inferencing
 import mmpose
 from mmpose.apis import MMPoseInferencer
@@ -22,12 +20,14 @@ import cv2
 print("[INFO]: Imported modules!")
 human = MMPoseInferencer("human")
-hand = MMPoseInferencer("hand")
 human3d = MMPoseInferencer(pose3d="human3d")
 track_model = YOLO('yolov8n.pt')  # Load an official Detect model
 # ultraltics
 # Defining inferencer models to lookup in function
 inferencers = {"Estimate human 2d poses":human, "Estimate human 2d hand poses":hand, "Estimate human 3d poses":human3d, "Detect and track":track_model}
@@ -44,11 +44,11 @@ def tracking(video, model, boxes=True):
     return annotated_frame
-def show_tracking(video_content):
         video = cv2.VideoCapture(video_content)
         # Track
-        video_track = tracking(video_content, track_model.track)
         # Prepare to save video
         #out_file = os.path.join(vis_out_dir, "track.mp4")
@@ -65,9 +65,11 @@ def show_tracking(video_content):
         # Go through frames and write them
         for frame_track in video_track:
             result_track = frame_track[0].plot()  # plot a BGR numpy array of predictions
         print("[INFO] Done with frames")
         #print(type(result_pose)) numpy ndarray
-        out_track.write(result_track)
         out_track.release()
@@ -77,129 +79,112 @@ def show_tracking(video_content):
         return out_file
-def pose3d(video):
-    add_dir = str(uuid.uuid4())
-    #vidname = video.split("/")[-1]
-    vis_out_dir = "/".join(["/".join(video.split("/")[:-1]), add_dir])
-    print("[INFO]: CURRENT OUT DIR: ", vis_out_dir)
-    #full name = os.path.join(vis_out_dir, vidname)
-    result_generator = human3d(video,
-                                 vis_out_dir = vis_out_dir,
-                                 thickness=2,
-                                 rebase_keypoint_height=True,
-                                 device="cuda")
-    result = [result for result in result_generator] #next(result_generator)
-    out_file = glob.glob(os.path.join(vis_out_dir, "*"))
-    print("[INFO]: CURRENT OUT FILE NAME: ", out_file)
-    return out_file
-def pose2d(video):
-    add_dir = str(uuid.uuid4())
-    vis_out_dir = "/".join(["/".join(video.split("/")[:-1]), add_dir])
-    print("[INFO]: CURRENT OUT DIR: ", vis_out_dir)
-    result_generator = human(video,
                                  vis_out_dir = vis_out_dir,
                                  thickness=2,
                                  rebase_keypoint_height=True,
-                                 device="cuda")
     result = [result for result in result_generator] #next(result_generator)
-    out_file = glob.glob(os.path.join(vis_out_dir, "*"))
-    print("[INFO]: CURRENT OUT FILE NAME: ", out_file)
     return out_file
-def pose2dhand(video):
-    add_dir = str(uuid.uuid4())
-    vis_out_dir = "/".join(["/".join(video.split("/")[:-1]), add_dir])
-    print("[INFO]: CURRENT OUT DIR: ", vis_out_dir)
-    vis_out_dir = str(uuid.uuid4())
-    result_generator = hand(video,
-                                 vis_out_dir = vis_out_dir,
-                                 thickness=2,
-                                 rebase_keypoint_height=True,
-                                 device="cuda")
-    result = [result for result in result_generator] #next(result_generator)
-    out_file = glob.glob(os.path.join(vis_out_dir, "*"))
-    print("[INFO]: CURRENT OUT FILE NAME: ", out_file)
-    return out_file
-with gr.Blocks() as demo:
-    with gr.Column():
-        with gr.Tab("Upload video"):
-            with gr.Row():
-                with gr.Column():
-                    video_input = gr.Video(source="upload", type="filepath", height=512)
-                    submit_pose_file = gr.Button("Make 2d pose estimation")
-                    submit_pose3d_file = gr.Button("Make 3d pose estimation")
-                    submit_hand_file = gr.Button("Make 2d hand estimation")
-                    submit_detect_file = gr.Button("Detect and track objects")
-                video_output = gr.Video(height=512)
-        with gr.Tab("Record video with webcam"):
-            with gr.Row():
-                with gr.Column():
-                    webcam_input = gr.Video(source="webcam", height=512)
-                    submit_pose_web = gr.Button("Make 2d pose estimation")
-                    submit_pose3d_web = gr.Button("Make 3d pose estimation")
-                    submit_hand_web = gr.Button("Make 2d hand estimation")
-                    submit_detect_web = gr.Button("Detect and track objects")
-                webcam_output = gr.Video(height=512)
-    # From file
-    submit_pose_file.click(fn=pose2d,
-                           inputs= video_input,
-                           outputs = video_output)
-    submit_pose3d_file.click(fn=pose3d,
-                             inputs= video_input,
-                             outputs = video_output)
-    submit_hand_file.click(fn=pose2dhand,
-                           inputs= video_input,
-                           outputs = video_output)
-    submit_detect_file.click(fn=show_tracking,
-                             inputs= video_input,
-                             outputs = video_output)
-    # Web
-    submit_pose_web.click(fn=pose2d,
-                          inputs= video_input,
-                          outputs = video_output)
-    submit_pose3d_web.click(fn=pose3d,
-                            inputs= video_input,
-                            outputs = video_output)
-    submit_hand_web.click(fn=pose2dhand,
-                          inputs= video_input,
-                          outputs = video_output)
-    submit_detect_web.click(fn=show_tracking,
-                            inputs= video_input,
-                            outputs = video_output)
-demo.launch()

 # Pose inferencing
 import mmpose
 from mmpose.apis import MMPoseInferencer
 print("[INFO]: Imported modules!")
 human = MMPoseInferencer("human")
+hand = MMPoseInferencer("hand") #kpt_thr (float) – The threshold to visualize the keypoints. Defaults to 0.3
 human3d = MMPoseInferencer(pose3d="human3d")
 track_model = YOLO('yolov8n.pt')  # Load an official Detect model
 # ultraltics
+# [INFO] VIDEO INPUT:  /tmp/gradio/927601b660ec45919366ce37df1ed004a1fcffab/sample_flip.webm
 # Defining inferencer models to lookup in function
 inferencers = {"Estimate human 2d poses":human, "Estimate human 2d hand poses":hand, "Estimate human 3d poses":human3d, "Detect and track":track_model}
     return annotated_frame
+def show_tracking(video_content, vis_out_dir, model):
         video = cv2.VideoCapture(video_content)
         # Track
+        video_track = tracking(video_content, model.track)
         # Prepare to save video
         #out_file = os.path.join(vis_out_dir, "track.mp4")
         # Go through frames and write them
         for frame_track in video_track:
             result_track = frame_track[0].plot()  # plot a BGR numpy array of predictions
+            out_track.write(result_track)
         print("[INFO] Done with frames")
         #print(type(result_pose)) numpy ndarray
         out_track.release()
         return out_file
+def poses(inferencer, video, vis_out_dir, kpt_thr):
+    print("[INFO] VIDEO INPUT: ", video)
+    result_generator = inferencer(video,
                                  vis_out_dir = vis_out_dir,
+                                 return_vis=True,
                                  thickness=2,
                                  rebase_keypoint_height=True,
+                                 #kpt_thr=kpt_thr,
+                                 device="cuda"
+                                 )
     result = [result for result in result_generator] #next(result_generator)
+    out_file = glob.glob(os.path.join(vis_out_dir, "*.mp4"))
     return out_file
+def infer(video, check, kpt_thr, webcam=True):
+    print("[INFO] VIDEO INPUT: ", video)
+    # Selecting the specific inferencer
+    out_files=[]
+    for i in check:
+         # Create out directory
+        vis_out_dir = str(uuid.uuid4())
+        inferencer = inferencers[i] # 'hand', 'human , device='cuda'
+        if i == "Detect and track":
+            #continue
+            trackfile = show_tracking(video, vis_out_dir, inferencer)
+        else:
+            if webcam==True:
+                print("WEBCAM")
+                add_dir = str(uuid.uuid4())
+                vidname = video.split("/")[-1]
+                vis_out_dir_web = "/".join(["/".join(video.split("/")[:-1]), add_dir])
+                out_file = poses(inferencer, video, vis_out_dir_web, kpt_thr)
+                fullname = os.path.join(vis_out_dir_web, vidname)
+                #if i == "Estimate human 3d poses":
+                #    fullname = fullname[:-4]+"mp4" #Change to .mp4
+                #    out_files.append(fullname)
+                #else:
+                out_files.append(fullname)
+            else:
+                out_files.extend(out_file)
+        print(out_files)
+    return "track.mp4", out_files[0], out_files[1], out_files[2] # out_files[3]
+def run():
+    #https://github.com/open-mmlab/mmpose/blob/main/docs/en/user_guides/inference.md
+    check_web =  gr.CheckboxGroup(choices = ["Detect and track", "Estimate human 2d poses", "Estimate human 2d hand poses", "Estimate human 3d poses"], label="Methods", type="value", info="Select the model(s) you want")
+    check_file = gr.CheckboxGroup(choices = ["Detect and track", "Estimate human 2d poses", "Estimate human 2d hand poses", "Estimate human 3d poses"], label="Methods", type="value", info="Select the model(s) you want")
+    description = """
+    \n\nHere you can upload videos or record one with your webcam and track objects or detect bodyposes in 2d and 3d.
+    """
+    # Insert slider with kpt_thr
+    web_kpthr = gr.Slider(0, 1, value=0.3)
+    file_kpthr = gr.Slider(0, 1, value=0.3)
+    webcam = gr.Interface(
+        fn=infer,
+        inputs= [gr.Video(source="webcam", height=512), check_web, web_kpthr], # /tmp/gradio/927601b660ec45919366ce37df1ed004a1fcffab/sample_flip.webm
+        outputs = [gr.Video(format='mp4', height=512, label="Detect and track", show_label=True), gr.PlayableVideo(height=512,  label = "Estimate human 2d poses", show_label=True), gr.PlayableVideo(height=512, label = "Estimate human 2d hand poses", show_label=True), gr.PlayableVideo(height=512, label = "Estimate human 3d poses", show_label=True)],
+        title = 'Tracking and pose estimation',
+        description = description,
+        allow_flagging=False
+        )
+    file = gr.Interface(
+        infer,
+        inputs = [gr.Video(source="upload", height=512), check_file, file_kpthr],
+        outputs = [gr.Video(format='mp4', height=512, label="Detect and track", show_label=True), gr.PlayableVideo(height=512,  label = "Estimate human 2d poses", show_label=True), gr.PlayableVideo(height=512, label = "Estimate human 2d hand poses", show_label=True), gr.PlayableVideo(height=512, label = "Estimate human 3d poses", show_label=True)],
+        title = 'Tracking and pose estimation',
+        description = description,
+        allow_flagging=False
+    )
+    demo = gr.TabbedInterface(
+            interface_list=[file, webcam],
+            tab_names=["From a File", "From your Webcam"]
+        )
+    demo.launch(server_name="0.0.0.0", server_port=7860)
+if __name__ == "__main__":
+    run()
+# https://github.com/open-mmlab/mmpose/tree/dev-1.x/configs/body_3d_keypoint/pose_lift
+# motionbert_ft_h36m-d80af323_20230531.pth
+# simple3Dbaseline_h36m-f0ad73a4_20210419.pth
+# videopose_h36m_243frames_fullconv_supervised_cpn_ft-88f5abbb_20210527.pth
+# videopose_h36m_81frames_fullconv_supervised-1f2d1104_20210527.pth
+# videopose_h36m_27frames_fullconv_supervised-fe8fbba9_20210527.pth
+# videopose_h36m_1frame_fullconv_supervised_cpn_ft-5c3afaed_20210527.pth
+# https://github.com/open-mmlab/mmpose/blob/main/mmpose/apis/inferencers/pose3d_inferencer.py
+# 00000.mp4
+# 000000.mp4