import gradio as gr import pandas as pd from sentence_transformers import SentenceTransformer from sklearn.metrics.pairwise import cosine_similarity import networkx as nx import matplotlib.pyplot as plt import csv import io import matplotlib.font_manager as fm from neo4j import GraphDatabase # 한국어 처리를 위한 KoSentence-BERT 모델 로드 model = SentenceTransformer('jhgan/ko-sbert-sts') # 나눔바른고딕 폰트 설정 font_path = "/root/fonts/NanumBarunGothic.ttf" # Hugging Face 루트에 저장된 폰트 경로 fontprop = fm.FontProperties(fname=font_path) plt.rc('font', family=fontprop.get_name()) # Neo4j 데이터베이스 연결 클래스 class Neo4jConnection: def __init__(self, uri, user, pwd): self.driver = GraphDatabase.driver(uri, auth=(user, pwd)) def close(self): self.driver.close() def query(self, query, parameters=None, db=None): session = None response = None try: session = self.driver.session(database=db) if db else self.driver.session() response = list(session.run(query, parameters)) except Exception as e: print("Query failed:", e) finally: if session: session.close() return response # Neo4j 연결 설정 conn = Neo4jConnection(uri="bolt://localhost:7687", user="neo4j", pwd="your_password") # 추천 결과를 실제 파일로 저장하는 함수 def save_recommendations_to_file(recommendations): file_path = "recommendations.csv" with open(file_path, mode='w', newline='', encoding='utf-8') as file: writer = csv.writer(file) writer.writerow(["Employee ID", "Employee Name", "Recommended Programs"]) # 추천 결과 CSV 파일에 기록 for rec in recommendations: writer.writerow(rec) return file_path # 자동으로 열을 매칭하는 함수 def auto_match_columns(df, required_cols): matched_cols = {} for req_col in required_cols: matched_col = None for col in df.columns: if req_col in col.lower(): matched_col = col break matched_cols[req_col] = matched_col return matched_cols # 직원 및 프로그램 데이터의 열을 자동으로 매칭하거나, 선택하게 하는 함수 def validate_and_get_columns(employee_df, program_df): required_employee_cols = ["employee_id", "employee_name", "current_skills"] required_program_cols = ["program_name", "skills_acquired", "duration"] employee_cols = auto_match_columns(employee_df, required_employee_cols) program_cols = auto_match_columns(program_df, required_program_cols) for key, value in employee_cols.items(): if value is None: return f"직원 데이터에서 '{key}' 열을 선택할 수 없습니다. 올바른 열을 선택하세요.", None, None for key, value in program_cols.items(): if value is None: return f"프로그램 데이터에서 '{key}' 열을 선택할 수 없습니다. 올바른 열을 선택하세요.", None, None return None, employee_cols, program_cols # 직원 데이터를 분석하여 교육 프로그램을 추천하고, 테이블과 그래프를 생성하는 함수 def hybrid_rag(employee_file, program_file): # 1. VectorRAG: KoSentence-BERT를 이용한 유사도 계산 employee_df = pd.read_csv(employee_file.name) program_df = pd.read_csv(program_file.name) error_msg, employee_cols, program_cols = validate_and_get_columns(employee_df, program_df) if error_msg: return error_msg, None, None, None employee_skills = employee_df[employee_cols["current_skills"]].tolist() program_skills = program_df[program_cols["skills_acquired"]].tolist() employee_embeddings = model.encode(employee_skills) program_embeddings = model.encode(program_skills) similarities = cosine_similarity(employee_embeddings, program_embeddings) recommendations = [] recommendation_rows = [] # 테이블 및 CSV로 저장할 데이터 for i, employee in employee_df.iterrows(): recommended_programs = [] for j, program in program_df.iterrows(): if similarities[i][j] > 0.5: recommended_programs.append(f"{program[program_cols['program_name']]} ({program[program_cols['duration']]})") if recommended_programs: recommendation = f"직원 {employee[employee_cols['employee_name']]}의 추천 프로그램: {', '.join(recommended_programs)}" recommendation_rows.append([employee[employee_cols['employee_id']], employee[employee_cols['employee_name']], ", ".join(recommended_programs)]) else: recommendation = f"직원 {employee[employee_cols['employee_name']]}에게 적합한 프로그램이 없습니다." recommendation_rows.append([employee[employee_cols['employee_id']], employee[employee_cols['employee_name']], "적합한 프로그램 없음"]) recommendations.append(recommendation) # 2. GraphRAG: Neo4j에서 프로그램 추천을 가져옴 query = """ MATCH (e:Employee)-[:HAS_SKILL]->(p:Program) RETURN e.name AS employee_name, p.name AS program_name, p.duration AS duration """ graph_rag_results = conn.query(query) # GraphRAG 결과 추가 for record in graph_rag_results: for row in recommendation_rows: if record['employee_name'] == row[1]: row[2] += f", {record['program_name']} (GraphRAG)" G = nx.Graph() for employee in employee_df[employee_cols['employee_name']]: G.add_node(employee, type='employee') for program in program_df[program_cols['program_name']]: G.add_node(program, type='program') for i, employee in employee_df.iterrows(): for j, program in program_df.iterrows(): if similarities[i][j] > 0.5: G.add_edge(employee[employee_cols['employee_name']], program[program_cols['program_name']]) plt.figure(figsize=(10, 8)) pos = nx.spring_layout(G) nx.draw(G, pos, with_labels=True, node_color='lightblue', node_size=3000, font_size=10, font_weight='bold', edge_color='gray', fontproperties=fontprop) plt.title("직원과 프로그램 간의 관계", fontsize=14, fontweight='bold', fontproperties=fontprop) plt.tight_layout() # CSV 파일로 추천 결과 반환 csv_output = save_recommendations_to_file(recommendation_rows) # 결과 테이블 데이터프레임 생성 result_df = pd.DataFrame(recommendation_rows, columns=["Employee ID", "Employee Name", "Recommended Programs"]) return result_df, plt.gcf(), csv_output # Gradio 블록 with gr.Blocks(css=".gradio-button {background-color: #007bff; color: white;} .gradio-textbox {border-color: #6c757d;}") as demo: gr.Markdown("

💼 HybridRAG 시스템

") with gr.Row(): with gr.Column(scale=1, min_width=300): gr.Markdown("

1. 직원 및 프로그램 데이터를 업로드하세요

") employee_file = gr.File(label="직원 데이터 업로드", interactive=True) program_file = gr.File(label="교육 프로그램 데이터 업로드", interactive=True) analyze_button = gr.Button("분석 시작", elem_classes="gradio-button") output_table = gr.DataFrame(label="분석 결과 (테이블)") csv_download = gr.File(label="추천 결과 다운로드") with gr.Column(scale=2, min_width=500): gr.Markdown("

2. 분석 결과 및 시각화

") chart_output = gr.Plot(label="시각화 차트") # 분석 버튼 클릭 시 테이블, 차트, 파일 다운로드를 업데이트 analyze_button.click(hybrid_rag, inputs=[employee_file, program_file], outputs=[output_table, chart_output, csv_download]) # Gradio 인터페이스 실행 demo.launch()