# Pose inferencing
import mmpose
from mmpose.apis import MMPoseInferencer

# Ultralytics
from ultralytics import YOLO
import torch

# Gradio
import gradio as gr

# System and files
import os
import glob
import uuid

# Image manipulation
import numpy as np
import cv2

print("[INFO]: Imported modules!")
human = MMPoseInferencer("human")
hand = MMPoseInferencer("hand") #kpt_thr (float) – The threshold to visualize the keypoints. Defaults to 0.3
human3d = MMPoseInferencer(pose3d="human3d")
track_model = YOLO('yolov8n.pt')  # Load an official Detect model

# ultraltics

# Defining inferencer models to lookup in function
inferencers = {"Estimate human 2d poses":human, "Estimate human 2d hand poses":hand, "Estimate human 3d poses":human3d, "Detect and track":track_model}

print("[INFO]: Downloaded models!")

def tracking(video, model, boxes=True):
    print("[INFO] Loading model...")
    # Load an official or custom model

    # Perform tracking with the model
    print("[INFO] Starting tracking!")
    # https://docs.ultralytics.com/modes/predict/
    annotated_frame = model(video, boxes=boxes)

    return annotated_frame

def show_tracking(video_content, vis_out_dir, model):
        video = cv2.VideoCapture(video_content)

        # Track
        video_track = tracking(video_content, model.track)

        # Prepare to save video
        #out_file = os.path.join(vis_out_dir, "track.mp4")
        out_file = ["track.mp4"]
        print("[INFO]: TRACK", out_file)

        fourcc = cv2.VideoWriter_fourcc(*"mp4v")  # Codec for MP4 video
        fps = video.get(cv2.CAP_PROP_FPS)
        height, width, _ = video_track[0][0].orig_img.shape
        size = (width,height)

        out_track = cv2.VideoWriter(out_file, fourcc, fps, size)

        # Go through frames and write them 
        for frame_track in video_track:
            result_track = frame_track[0].plot()  # plot a BGR numpy array of predictions
            out_track.write(result_track)
        
        print("[INFO] Done with frames")
        #print(type(result_pose)) numpy ndarray
            

        out_track.release()

        video.release()
        cv2.destroyAllWindows() # Closing window

        return out_file


def poses(inferencer, video, vis_out_dir):

    result_generator = inferencer(video, 
                                 vis_out_dir = vis_out_dir,
                                 return_vis=True,
                                 thickness=2,
                                 rebase_keypoint_height=True,
                                 device="cuda")    
    
    result = [result for result in result_generator] #next(result_generator)    

    out_file = glob.glob(os.path.join(vis_out_dir, "*.mp4"))

    return out_file

def infer(video, check):
    # Selecting the specific inferencer
    out_files=[]

    for i in check:
         # Create out directory
        vis_out_dir = str(uuid.uuid4())
        inferencer = inferencers[i] # 'hand', 'human , device='cuda'

        if i == "Detect and track":
            #continue
            out_file = show_tracking(video, vis_out_dir, inferencer)

        else:
            out_file = poses(inferencer, video, vis_out_dir)                                                                                                                                                                                                    
        
        out_files.extend(out_file)
        print(out_files)

    return "track.mp4", out_files[1], out_files[2], out_files[3] # out_files[3]

def run():
    #https://github.com/open-mmlab/mmpose/blob/main/docs/en/user_guides/inference.md
    check_web =  gr.CheckboxGroup(choices = ["Detect and track", "Estimate human 2d poses", "Estimate human 2d hand poses", "Estimate human 3d poses"], label="Methods", type="value", info="Select the model(s) you want")
    check_file = gr.CheckboxGroup(choices = ["Detect and track", "Estimate human 2d poses", "Estimate human 2d hand poses", "Estimate human 3d poses"], label="Methods", type="value", info="Select the model(s) you want")
    
    # Insert slider with kpt_thr

    webcam = gr.Interface(
        fn=infer,
        inputs= [gr.Video(source="webcam", height=412), check_web],
        outputs = [gr.Video(format='mp4'), gr.PlayableVideo(), gr.PlayableVideo(), gr.PlayableVideo()],
        title = 'Pose estimation', 
        description = 'Pose estimation on video',
        allow_flagging=False
        )

    file = gr.Interface(
        infer,
        inputs = [gr.Video(source="upload", height=412), check_file],
        outputs = [gr.Video(format='mp4'), gr.PlayableVideo(), gr.PlayableVideo(), gr.PlayableVideo()],
        allow_flagging=False
    )

    demo = gr.TabbedInterface(
            interface_list=[file, webcam],
            tab_names=["From a File", "From your Webcam"]
        )

    demo.launch(server_name="0.0.0.0", server_port=7860)


if __name__ == "__main__":
    run()

# https://github.com/open-mmlab/mmpose/tree/dev-1.x/configs/body_3d_keypoint/pose_lift
# motionbert_ft_h36m-d80af323_20230531.pth
# simple3Dbaseline_h36m-f0ad73a4_20210419.pth
# videopose_h36m_243frames_fullconv_supervised_cpn_ft-88f5abbb_20210527.pth
# videopose_h36m_81frames_fullconv_supervised-1f2d1104_20210527.pth
# videopose_h36m_27frames_fullconv_supervised-fe8fbba9_20210527.pth
# videopose_h36m_1frame_fullconv_supervised_cpn_ft-5c3afaed_20210527.pth
# https://github.com/open-mmlab/mmpose/blob/main/mmpose/apis/inferencers/pose3d_inferencer.py


# 00000.mp4
# 000000.mp4