# Pose inferencing
import mmpose
from mmpose.apis import MMPoseInferencer

# Ultralytics
from ultralytics import YOLO
import torch

# Gradio
import gradio as gr

# System and files
import os
import glob
import uuid

# Image manipulation
import numpy as np
import cv2

print("[INFO]: Imported modules!")
human = MMPoseInferencer("human")
hand = MMPoseInferencer("hand")
human3d = MMPoseInferencer(pose3d="human3d")
track_model = YOLO('yolov8n.pt')  # Load an official Detect model

# ultraltics

# Defining inferencer models to lookup in function
inferencers = {"Estimate human 2d poses":human, "Estimate human 2d hand poses":hand, "Estimate human 3d poses":human3d, "Detect and track":track_model}

print("[INFO]: Downloaded models!")

def tracking(video, model, boxes=True):
    print("[INFO] Loading model...")
    # Load an official or custom model

    # Perform tracking with the model
    print("[INFO] Starting tracking!")
    # https://docs.ultralytics.com/modes/predict/
    annotated_frame = model(video, boxes=boxes)

    return annotated_frame

def show_tracking(video_content):
        video = cv2.VideoCapture(video_content)

        # Track
        video_track = tracking(video_content, track_model.track)

        # Prepare to save video
        #out_file = os.path.join(vis_out_dir, "track.mp4")
        out_file = "track.mp4"
        print("[INFO]: TRACK", out_file)

        fourcc = cv2.VideoWriter_fourcc(*"mp4v")  # Codec for MP4 video
        fps = video.get(cv2.CAP_PROP_FPS)
        height, width, _ = video_track[0][0].orig_img.shape
        size = (width,height)

        out_track = cv2.VideoWriter(out_file, fourcc, fps, size)

        # Go through frames and write them 
        for frame_track in video_track:
            result_track = frame_track[0].plot()  # plot a BGR numpy array of predictions
        
        print("[INFO] Done with frames")
        #print(type(result_pose)) numpy ndarray
        out_track.write(result_track)

        out_track.release()

        video.release()
        cv2.destroyAllWindows() # Closing window

        return out_file


def pose3d(video):
    add_dir = str(uuid.uuid4())
    #vidname = video.split("/")[-1]
    vis_out_dir = os.path.join("/".join(video.split("/")[:-1]), add_dir)
    print("[INFO]: CURRENT OUT DIR: ", vis_out_dir)
    print("[INFO]: LIST CURRENT OUT DIR: ", os.listdir("/".join(video.split("/")[:-1])))
    os.makedirs(vis_out_dir)

    #full name = os.path.join(vis_out_dir, vidname)         

    result_generator = human3d(video, 
                                 vis_out_dir = vis_out_dir,
                                 thickness=2,
                                 return_vis=True,
                                 rebase_keypoint_height=True,
                                 device="cuda")    
    
    print("OUTDIR", os.listdir(os.path.join(vis_out_dir)))

    result = [result for result in result_generator] #next(result_generator)    
    out_file = glob.glob(os.path.join(vis_out_dir, "*.mp4")) + glob.glob(os.path.join(vis_out_dir, "*.webm")) 
    print("[INFO]: CURRENT OUT FILE NAME: ", out_file)

    return out_file


def pose2d(video):
    vidname = video.split("/")[-1]
    print("VIDNAME", vidname)
    add_dir = str(uuid.uuid4())
    vis_out_dir = os.path.join("/".join(video.split("/")[:-1]), add_dir)
    os.makedirs(vis_out_dir)
    print("[INFO]: CURRENT OUT DIR: ", vis_out_dir)
    print("[INFO]: LIST CURRENT OUT DIR: ", os.listdir("/".join(video.split("/")[:-1])))


    result_generator = human(video, 
                                 vis_out_dir = vis_out_dir,
                                 return_vis=True,
                                 thickness=2,
                                 rebase_keypoint_height=True,
                                 #kpt_thr=kpt_thr,
                                 device="cuda"
                                 )    
    
    result = [result for result in result_generator] #next(result_generator)        
    print(result)


    out_file = glob.glob(os.path.join(vis_out_dir, "*.mp4")) + glob.glob(os.path.join(vis_out_dir, "*.webm")) 
    out_file = os.path.join(vis_out_dir, "sample_flip.webm")

    return out_file


def pose2dhand(video):
    add_dir = str(uuid.uuid4())
    vis_out_dir = os.path.join("/".join(video.split("/")[:-1]), add_dir)
    os.makedirs(vis_out_dir)

    print("[INFO]: CURRENT OUT DIR: ", vis_out_dir)
    print("[INFO]: LIST CURRENT OUT DIR: ", os.listdir("/".join(video.split("/")[:-1])))

    vis_out_dir = str(uuid.uuid4())

    result_generator = hand(video, 
                                 vis_out_dir = vis_out_dir,
                                 return_vis=True,
                                 thickness=2,
                                 rebase_keypoint_height=True,
                                 device="cuda")    
    
    result = [result for result in result_generator] #next(result_generator)    
    
    print(glob.glob(os.path.join(vis_out_dir, "*.mp4")) + glob.glob(os.path.join(vis_out_dir, "*.webm")))
    
    out_file = glob.glob(os.path.join(vis_out_dir, "*.mp4")) + glob.glob(os.path.join(vis_out_dir, "*.webm")) 
    
    return out_file

if __name__ == "__main__":


    with gr.Blocks() as demo:
        with gr.Column():            
            with gr.Tab("Upload video"):
                with gr.Row():
                    with gr.Column():
                        video_input = gr.Video(source="upload", type="filepath", height=512)
                        # Insert slider with kpt_thr
                        file_kpthr = gr.Slider(0, 1, value=1, label="Keypoint threshold",step=5, info='1')

                        submit_pose_file = gr.Button("Make 2d pose estimation")
                        submit_pose3d_file = gr.Button("Make 3d pose estimation")
                        submit_hand_file = gr.Button("Make 2d hand estimation")
                        submit_detect_file = gr.Button("Detect and track objects")

                    video_output1 = gr.PlayableVideo(height=512,  label = "Estimate human 2d poses", show_label=True)
                    video_output2 = gr.PlayableVideo(height=512,  label = "Estimate human 3d poses", show_label=True)
                    video_output3 = gr.PlayableVideo(height=512,  label = "Estimate human hand poses", show_label=True)
                    video_output4 = gr.Video(height=512)

            with gr.Tab("Record video with webcam"):
                with gr.Row():
                    with gr.Column():
                        webcam_input = gr.Video(source="webcam", height=512)
                        
                        web_kpthr = gr.Slider(0, 1, value=1, label="Keypoint threshold",step=5, info='1')


                        submit_pose_web = gr.Button("Make 2d pose estimation")
                        submit_pose3d_web = gr.Button("Make 3d pose estimation")
                        submit_hand_web = gr.Button("Make 2d hand estimation")
                        submit_detect_web = gr.Button("Detect and track objects")

                    webcam_output1 = gr.PlayableVideo(height=512,  label = "Estimate human 2d poses", show_label=True)
                    webcam_output2 = gr.PlayableVideo(height=512,  label = "Estimate human 3d poses", show_label=True)
                    webcam_output3 = gr.PlayableVideo(height=512,  label = "Estimate human hand position", show_label=True)
                    webcam_output4 = gr.Video(height=512)

            
        # From file
        submit_pose_file.click(fn=pose2d, 
                            inputs= video_input, 
                            outputs = video_output1)
        
        submit_pose3d_file.click(fn=pose3d, 
                                inputs= video_input, 
                                outputs = video_output2)
        
        submit_hand_file.click(fn=pose2dhand, 
                            inputs= video_input, 
                            outputs = video_output3)
        
        submit_detect_file.click(fn=show_tracking, 
                                inputs= video_input, 
                                outputs = video_output4)
        
        # Web
        submit_pose_web.click(fn=pose2d, 
                            inputs = webcam_input, 
                            outputs = webcam_output1)
        
        submit_pose3d_web.click(fn=pose3d, 
                                inputs= webcam_input, 
                                outputs = webcam_output2)
        
        submit_hand_web.click(fn=pose2dhand, 
                            inputs= webcam_input, 
                            outputs = video_output3)
        
        submit_detect_web.click(fn=show_tracking, 
                                inputs= webcam_input, 
                                outputs = video_output4)

    demo.launch(server_name="0.0.0.0", server_port=7860)