import gradio as gr
import pandas as pd
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import csv
import io
import tempfile
import os

# 한국어 처리를 위한 KoSentence-BERT 모델 로드
model = SentenceTransformer('jhgan/ko-sbert-sts')

# 전역 변수
global_recommendations = None
global_csv_file = None
youtube_columns = None

# CSV 파일 생성 함수
def create_csv_file(recommendations):
    with tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix='.csv', encoding='utf-8') as temp_file:
        writer = csv.writer(temp_file)
        writer.writerow(["Employee ID", "Employee Name", "Recommended Programs", "Recommended YouTube Content"])
        for rec in recommendations:
            writer.writerow(rec)
    return temp_file.name

# 열 매칭 함수
def auto_match_columns(df, required_cols):
    matched_cols = {}
    for req_col in required_cols:
        matched_col = None
        for col in df.columns:
            if req_col.lower() in col.lower():
                matched_col = col
                break
        matched_cols[req_col] = matched_col
    return matched_cols

# 열 검증 함수
def validate_and_get_columns(employee_df, program_df):
    required_employee_cols = ["employee_id", "employee_name", "current_skills"]
    required_program_cols = ["program_name", "skills_acquired", "duration"]

    employee_cols = auto_match_columns(employee_df, required_employee_cols)
    program_cols = auto_match_columns(program_df, required_program_cols)
    
    for key, value in employee_cols.items():
        if value is None:
            return f"직원 데이터에서 '{key}' 열을 선택할 수 없습니다. 올바른 열을 선택하세요.", None, None
    
    for key, value in program_cols.items():
        if value is None:
            return f"프로그램 데이터에서 '{key}' 열을 선택할 수 없습니다. 올바른 열을 선택하세요.", None, None

    return None, employee_cols, program_cols

# 유튜브 데이터 열 선택 함수
def select_youtube_columns(youtube_file):
    global youtube_columns
    if youtube_file is None:
        return [gr.Dropdown(choices=[], value="") for _ in range(4)]
    youtube_df = pd.read_csv(youtube_file.name)
    required_youtube_cols = ["title", "description", "url", "upload_date"]
    youtube_columns = auto_match_columns(youtube_df, required_youtube_cols)
    
    column_options = youtube_df.columns.tolist()
    return [
        gr.Dropdown(choices=column_options, value=youtube_columns.get("title", "")),
        gr.Dropdown(choices=column_options, value=youtube_columns.get("description", "")),
        gr.Dropdown(choices=column_options, value=youtube_columns.get("url", "")),
        gr.Dropdown(choices=column_options, value=youtube_columns.get("upload_date", ""))
    ]

# 유튜브 콘텐츠 데이터 로드 및 처리 함수
def load_youtube_content(file_path, title_col, description_col, url_col, upload_date_col):
    youtube_df = pd.read_csv(file_path)
    selected_columns = [col for col in [title_col, description_col, url_col, upload_date_col] if col]
    youtube_df = youtube_df[selected_columns]
    
    column_mapping = {
        title_col: 'title',
        description_col: 'description',
        url_col: 'url',
        upload_date_col: 'upload_date'
    }
    youtube_df.rename(columns=column_mapping, inplace=True)
    
    if 'upload_date' in youtube_df.columns:
        youtube_df['upload_date'] = pd.to_datetime(youtube_df['upload_date'], errors='coerce')
    
    return youtube_df

# 유튜브 콘텐츠와 교육 프로그램 매칭 함수
def match_youtube_content(program_skills, youtube_df, model):
    if 'description' not in youtube_df.columns:
        return None
    youtube_embeddings = model.encode(youtube_df['description'].tolist())
    program_embeddings = model.encode(program_skills)
    similarities = cosine_similarity(program_embeddings, youtube_embeddings)
    return similarities

# 직원 데이터를 분석하여 교육 프로그램을 추천하고, 테이블을 생성하는 함수
def hybrid_rag(employee_file, program_file, youtube_file, title_col, description_col, url_col, upload_date_col):
    global global_recommendations
    global global_csv_file
    
    # 직원 및 프로그램 데이터 로드
    employee_df = pd.read_csv(employee_file.name)
    program_df = pd.read_csv(program_file.name)

    error_msg, employee_cols, program_cols = validate_and_get_columns(employee_df, program_df)
    if error_msg:
        return error_msg, None, None

    employee_skills = employee_df[employee_cols["current_skills"]].tolist()
    program_skills = program_df[program_cols["skills_acquired"]].tolist()
    employee_embeddings = model.encode(employee_skills)
    program_embeddings = model.encode(program_skills)

    similarities = cosine_similarity(employee_embeddings, program_embeddings)

    # 유튜브 콘텐츠 로드 및 처리
    youtube_df = load_youtube_content(youtube_file.name, title_col, description_col, url_col, upload_date_col)
    
    # 유튜브 콘텐츠와 교육 프로그램 매칭
    youtube_similarities = match_youtube_content(program_df[program_cols['skills_acquired']].tolist(), youtube_df, model)
    
    recommendations = []
    recommendation_rows = []
    for i, employee in employee_df.iterrows():
        recommended_programs = []
        recommended_youtube = []
        for j, program in program_df.iterrows():
            if similarities[i][j] > 0.5:
                recommended_programs.append(f"{program[program_cols['program_name']]} ({program[program_cols['duration']]})")
                
                if youtube_similarities is not None:
                    top_youtube_indices = youtube_similarities[j].argsort()[-3:][::-1]  # 상위 3개
                    for idx in top_youtube_indices:
                        if 'title' in youtube_df.columns and 'url' in youtube_df.columns:
                            recommended_youtube.append(f"{youtube_df.iloc[idx]['title']} (URL: {youtube_df.iloc[idx]['url']})")
        
        # 추천 프로그램 및 유튜브 콘텐츠 개수 제한
        recommended_programs = recommended_programs[:5]  # 최대 5개 프로그램만 추천
        recommended_youtube = recommended_youtube[:3]  # 최대 3개 유튜브 콘텐츠만 추천
        
        if recommended_programs:
            recommendation = f"직원 {employee[employee_cols['employee_name']]}의 추천 프로그램: {', '.join(recommended_programs)}"
            youtube_recommendation = f"추천 유튜브 콘텐츠: {', '.join(recommended_youtube)}" if recommended_youtube else "추천할 유튜브 콘텐츠가 없습니다."
            recommendation_rows.append([employee[employee_cols['employee_id']], employee[employee_cols['employee_name']], 
                                        ", ".join(recommended_programs), ", ".join(recommended_youtube)])
        else:
            recommendation = f"직원 {employee[employee_cols['employee_name']]}에게 적합한 프로그램이 없습니다."
            youtube_recommendation = "추천할 유튜브 콘텐츠가 없습니다."
            recommendation_rows.append([employee[employee_cols['employee_id']], employee[employee_cols['employee_name']], 
                                        "적합한 프로그램 없음", "추천 콘텐츠 없음"])

        recommendations.append(recommendation + "\n" + youtube_recommendation)

    global_recommendations = recommendation_rows
    
    # CSV 파일 생성
    global_csv_file = create_csv_file(recommendation_rows)
    
    # 결과 테이블 데이터프레임 생성
    result_df = pd.DataFrame(recommendation_rows, columns=["Employee ID", "Employee Name", "Recommended Programs", "Recommended YouTube Content"])

    return result_df, gr.File(value=global_csv_file, visible=True), gr.Button(value="CSV 다운로드", visible=True)
    
# 채팅 응답 함수
def chat_response(message, history):
    global global_recommendations
    if global_recommendations is None:
        return "먼저 '분석 시작' 버튼을 눌러 데이터를 분석해주세요."
    
    for employee in global_recommendations:
        if employee[1].lower() in message.lower():
            return f"{employee[1]}님에게 추천된 프로그램은 다음과 같습니다: {employee[2]}\n\n추천 유튜브 콘텐츠: {employee[3]}"
    
    return "죄송합니다. 해당 직원의 정보를 찾을 수 없습니다. 다른 직원 이름을 입력해주세요."

# CSV 다운로드 함수
def download_csv():
    global global_csv_file
    return gr.File(value=global_csv_file, visible=True)

# Gradio 블록
with gr.Blocks(css=".gradio-button {background-color: #007bff; color: white;} .gradio-textbox {border-color: #6c757d;}") as demo:
    gr.Markdown("<h1 style='text-align: center; color: #2c3e50;'>💼 HybridRAG 시스템 (유튜브 콘텐츠 포함)</h1>")
    
    with gr.Row():
        with gr.Column(scale=1, min_width=300):
            gr.Markdown("<h3 style='color: #34495e;'>1. 데이터를 업로드하세요</h3>")
            employee_file = gr.File(label="직원 데이터 업로드", interactive=True)
            program_file = gr.File(label="교육 프로그램 데이터 업로드", interactive=True)
            youtube_file = gr.File(label="유튜브 콘텐츠 데이터 업로드", interactive=True)
            
            gr.Markdown("<h4 style='color: #34495e;'>유튜브 데이터 열 선택</h4>")
            title_col = gr.Dropdown(label="제목 열")
            description_col = gr.Dropdown(label="설명 열")
            url_col = gr.Dropdown(label="URL 열")
            upload_date_col = gr.Dropdown(label="업로드 날짜 열")
            
            youtube_file.change(select_youtube_columns, inputs=[youtube_file], outputs=[title_col, description_col, url_col, upload_date_col])
            
            analyze_button = gr.Button("분석 시작", elem_classes="gradio-button")
            output_table = gr.DataFrame(label="분석 결과 (테이블)")
            csv_download = gr.File(label="추천 결과 다운로드", visible=False)
            download_button = gr.Button("CSV 다운로드", visible=False)

    gr.Markdown("<h3 style='color: #34495e;'>2. 직원별 추천 프로그램 및 유튜브 콘텐츠 확인</h3>")
    chatbot = gr.Chatbot()
    msg = gr.Textbox(label="직원 이름을 입력하세요")
    clear = gr.Button("대화 내역 지우기")

    # 분석 버튼 클릭 시 테이블, 파일 다운로드를 업데이트
    analyze_button.click(
        hybrid_rag, 
        inputs=[employee_file, program_file, youtube_file, title_col, description_col, url_col, upload_date_col], 
        outputs=[output_table, csv_download, download_button]
    )

    # CSV 다운로드 버튼
    download_button.click(download_csv, inputs=[], outputs=[csv_download])

    # 채팅 기능
    msg.submit(chat_response, [msg, chatbot], [chatbot])
    clear.click(lambda: None, None, chatbot, queue=False)

# 프로그램 종료 시 임시 파일 삭제
import atexit

@atexit.register
def cleanup():
    global global_csv_file
    if global_csv_file and os.path.exists(global_csv_file):
        os.remove(global_csv_file)

# Gradio 인터페이스 실행
demo.launch()