# Pose inferencing
import mmpose
from mmpose.apis import MMPoseInferencer

# Ultralytics
from ultralytics import YOLO
import torch

# Gradio
import gradio as gr
import moviepy.editor as moviepy


# System and files
import os
import glob
import uuid

# Image manipulation
import numpy as np
import cv2

print("[INFO]: Imported modules!")
human = MMPoseInferencer("human")
hand = MMPoseInferencer("hand")
human3d = MMPoseInferencer(pose3d="human3d")
track_model = YOLO('yolov8n.pt')  # Load an official Detect model

# ultraltics

# Defining inferencer models to lookup in function
inferencers = {"Estimate human 2d poses":human, "Estimate human 2d hand poses":hand, "Estimate human 3d poses":human3d, "Detect and track":track_model}

print("[INFO]: Downloaded models!")

def check_extension(video):
    split_tup = os.path.splitext(video)

    # extract the file name and extension
    file_name = split_tup[0]
    file_extension = split_tup[1]

    if file_extension is not ".mp4":
        clip = moviepy.VideoFileClip(video)

        video = file_name+".mp4"
        clip.write_videofile(video)
    
    return video


def tracking(video, model, boxes=True):
    print("[INFO] Loading model...")
    # Load an official or custom model

    # Perform tracking with the model
    print("[INFO] Starting tracking!")
    # https://docs.ultralytics.com/modes/predict/
    annotated_frame = model(video, boxes=boxes)

    return annotated_frame

def show_tracking(video_content):
        # https://docs.ultralytics.com/datasets/detect/coco/
        video = cv2.VideoCapture(video_content)

        # Track
        video_track = tracking(video_content, track_model.track)

        # Prepare to save video
        #out_file = os.path.join(vis_out_dir, "track.mp4")
        out_file = "track.mp4"
        print("[INFO]: TRACK", out_file)

        fourcc = cv2.VideoWriter_fourcc(*"mp4v")  # Codec for MP4 video
        fps = video.get(cv2.CAP_PROP_FPS)
        height, width, _ = video_track[0][0].orig_img.shape
        size = (width,height)

        out_track = cv2.VideoWriter(out_file, fourcc, fps, size)

        # Go through frames and write them 
        for frame_track in video_track:
            result_track = frame_track[0].plot()  # plot a BGR numpy array of predictions
        
        print("[INFO] Done with frames")
        #print(type(result_pose)) numpy ndarray
        out_track.write(result_track)

        out_track.release()

        video.release()
        cv2.destroyAllWindows() # Closing window

        return out_file


def pose3d(video):
    video = check_extension(video)


    # Define new unique folder
    add_dir = str(uuid.uuid4())
    vis_out_dir = os.path.join("/".join(video.split("/")[:-1]), add_dir)
    os.makedirs(vis_out_dir)

    result_generator = human3d(video, 
                                 vis_out_dir = vis_out_dir,
                                 thickness=2,
                                 return_vis=True,
                                 rebase_keypoint_height=True,
                                 device="cuda")    
    
    result = [result for result in result_generator] #next(result_generator)        

    out_file = glob.glob(os.path.join(vis_out_dir, "*.mp4")) #+ glob.glob(os.path.join(vis_out_dir, "*.webm")) 
   
    return "".join(out_file)


def pose2d(video, kpt_threshold):
    video = check_extension(video)

    # Define new unique folder
    add_dir = str(uuid.uuid4())
    vis_out_dir = os.path.join("/".join(video.split("/")[:-1]), add_dir)
    os.makedirs(vis_out_dir)

    result_generator = human(video, 
                            vis_out_dir = vis_out_dir,
                            return_vis=True,
                            thickness=2,
                            rebase_keypoint_height=True,
                            kpt_thr=kpt_threshold,
                            device="cuda"
                            )    
    
    result = [result for result in result_generator] #next(result_generator)        

    out_file = glob.glob(os.path.join(vis_out_dir, "*.mp4")) #+ glob.glob(os.path.join(vis_out_dir, "*.webm")) 
   
    return "".join(out_file)


def pose2dhand(video, kpt_threshold):
    video = check_extension(video)

    # Define new unique folder
    add_dir = str(uuid.uuid4())
    vis_out_dir = os.path.join("/".join(video.split("/")[:-1]), add_dir)
    os.makedirs(vis_out_dir)

    result_generator = hand(video, 
                                 vis_out_dir = vis_out_dir,
                                 return_vis=True,
                                 thickness=2,
                                 rebase_keypoint_height=True,
                                 kpt_thr=kpt_threshold,
                                 device="cuda")    
    
    result = [result for result in result_generator] #next(result_generator)        

    out_file = glob.glob(os.path.join(vis_out_dir, "*.mp4")) #+ glob.glob(os.path.join(vis_out_dir, "*.webm")) 
   
    return "".join(out_file)

def run_UI():
    with gr.Blocks() as demo:
        with gr.Column():            
            with gr.Tab("Upload video"):
                with gr.Row():
                    with gr.Column():
                        video_input = gr.Video(source="upload", type="filepath", height=612)
                        # Insert slider with kpt_thr
                        file_kpthr = gr.Slider(minimum=1e3, maximum=1e6, step=1e3, default=1e3, label='Keypoint threshold')

                        submit_pose_file = gr.Button("Make 2d pose estimation")
                        submit_pose3d_file = gr.Button("Make 3d pose estimation")
                        submit_hand_file = gr.Button("Make 2d hand estimation")
                        submit_detect_file = gr.Button("Detect and track objects")
                    with gr.Column():
                        video_output1 = gr.PlayableVideo(height=512,  label = "Estimate human 2d poses", show_label=True)
                        video_output2 = gr.PlayableVideo(height=512,  label = "Estimate human 3d poses", show_label=True)
                        video_output3 = gr.PlayableVideo(height=512,  label = "Estimate human hand poses", show_label=True)
                        video_output4 = gr.Video(height=512, label = "Detection and tracking", show_label=True, format="mp4")

            with gr.Tab("Record video with webcam"):
                
                with gr.Column:
                    with gr.Row():
                        with gr.Column():
                            webcam_input = gr.Video(source="webcam", height=612)
                            
                            web_kpthr = gr.Slider(minimum=0.1, maximum=1, step=1e3, default=0.3, label='Keypoint threshold')

                            submit_pose_web = gr.Button("Make 2d pose estimation")
                            submit_pose3d_web = gr.Button("Make 3d pose estimation")
                            submit_hand_web = gr.Button("Make 2d hand estimation")
                            submit_detect_web = gr.Button("Detect and track objects")
                    with gr.Row():
                        webcam_output1 = gr.PlayableVideo(height=512,  label = "Estimate human 2d poses", show_label=True)
                        webcam_output2 = gr.PlayableVideo(height=512,  label = "Estimate human 3d poses", show_label=True)
                        webcam_output3 = gr.PlayableVideo(height=512,  label = "Estimate human hand position", show_label=True)
                        webcam_output4 = gr.Video(height=512, label = "Detection and tracking", show_label=True, format="mp4")

            
        # From file
        submit_pose_file.click(fn=pose2d, 
                            inputs=  [video_input, file_kpthr], 
                            outputs = video_output1)
        
        submit_pose3d_file.click(fn=pose3d, 
                                inputs= video_input, 
                                outputs = video_output2)
        
        submit_hand_file.click(fn=pose2dhand, 
                            inputs= [video_input, file_kpthr], 
                            outputs = video_output3)
        
        submit_detect_file.click(fn=show_tracking, 
                                inputs= video_input, 
                                outputs = video_output4)
        
        # Web
        submit_pose_web.click(fn=pose2d, 
                            inputs = [webcam_input, web_kpthr], 
                            outputs = webcam_output1)
        
        submit_pose3d_web.click(fn=pose3d, 
                                inputs= webcam_input, 
                                outputs = webcam_output2)
        
        submit_hand_web.click(fn=pose2dhand, 
                            inputs= [webcam_input, web_kpthr], 
                            outputs = webcam_output3)
        
        submit_detect_web.click(fn=show_tracking, 
                                inputs= webcam_input, 
                                outputs = webcam_output4)

    demo.launch(server_name="0.0.0.0", server_port=7860)

if __name__ == "__main__":
    run_UI()