soojeongcrystal commited on
Commit
65f9910
β€’
1 Parent(s): 5ad04e8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -21
app.py CHANGED
@@ -5,7 +5,6 @@ from sklearn.metrics.pairwise import cosine_similarity
5
  import networkx as nx
6
  import matplotlib.pyplot as plt
7
  import csv
8
- import datetime
9
  import io
10
 
11
  # Sentence-BERT λͺ¨λΈ λ‘œλ“œ
@@ -24,51 +23,95 @@ def save_recommendations_to_csv(recommendations):
24
  output.seek(0)
25
  return output
26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  # 직원 데이터λ₯Ό λΆ„μ„ν•˜μ—¬ ꡐ윑 ν”„λ‘œκ·Έλž¨μ„ μΆ”μ²œν•˜κ³  κ·Έλž˜ν”„λ₯Ό κ·Έλ¦¬λŠ” ν•¨μˆ˜
28
  def analyze_data(employee_file, program_file):
29
- # 직원 데이터와 ꡐ윑 ν”„λ‘œκ·Έλž¨ 데이터 뢈러였기
30
  employee_df = pd.read_csv(employee_file.name)
31
  program_df = pd.read_csv(program_file.name)
32
 
33
- # 직원 μ—­λŸ‰κ³Ό ν”„λ‘œκ·Έλž¨ ν•™μŠ΅ λͺ©ν‘œλ₯Ό 벑터화
34
- employee_skills = employee_df['current_skills'].tolist()
35
- program_skills = program_df['skills_acquired'].tolist()
 
 
 
 
 
36
  employee_embeddings = model.encode(employee_skills)
37
  program_embeddings = model.encode(program_skills)
38
 
39
  # μœ μ‚¬λ„ 계산
40
  similarities = cosine_similarity(employee_embeddings, program_embeddings)
41
 
42
- # 직원별 μΆ”μ²œ ν”„λ‘œκ·Έλž¨ 리슀트
43
  recommendations = []
44
- recommendation_rows = [] # CSV νŒŒμΌμ— μ €μž₯ν•  데이터λ₯Ό μœ„ν•œ 리슀트
45
  for i, employee in employee_df.iterrows():
46
  recommended_programs = []
47
  for j, program in program_df.iterrows():
48
- if similarities[i][j] > 0.5: # μœ μ‚¬λ„ μž„κ³„κ°’ κΈ°μ€€
49
- recommended_programs.append(f"{program['program_name']} ({program['duration']})")
50
 
51
  if recommended_programs:
52
- recommendation = f"직원 {employee['employee_name']}의 μΆ”μ²œ ν”„λ‘œκ·Έλž¨: {', '.join(recommended_programs)}"
53
- recommendation_rows.append([employee['employee_id'], employee['employee_name'], ", ".join(recommended_programs)])
54
  else:
55
- recommendation = f"직원 {employee['employee_name']}μ—κ²Œ μ ν•©ν•œ ν”„λ‘œκ·Έλž¨μ΄ μ—†μŠ΅λ‹ˆλ‹€."
56
- recommendation_rows.append([employee['employee_id'], employee['employee_name'], "μ ν•©ν•œ ν”„λ‘œκ·Έλž¨ μ—†μŒ"])
57
 
58
  recommendations.append(recommendation)
59
 
60
  # λ„€νŠΈμ›Œν¬ κ·Έλž˜ν”„ 생성
61
  G = nx.Graph()
62
- for employee in employee_df['employee_name']:
63
  G.add_node(employee, type='employee')
64
 
65
- for program in program_df['program_name']:
66
  G.add_node(program, type='program')
67
 
68
  for i, employee in employee_df.iterrows():
69
  for j, program in program_df.iterrows():
70
- if similarities[i][j] > 0.5: # μœ μ‚¬λ„ μž„κ³„κ°’
71
- G.add_edge(employee['employee_name'], program['program_name'])
72
 
73
  # κ·Έλž˜ν”„ μ‹œκ°ν™”
74
  plt.figure(figsize=(10, 8))
@@ -84,18 +127,18 @@ def analyze_data(employee_file, program_file):
84
 
85
  # Gradio 블둝
86
  with gr.Blocks(css=".gradio-button {background-color: #6c757d; color: white;} .gradio-textbox {border-color: #6c757d;}") as demo:
87
- gr.Markdown("<h1 style='text-align: center; color: #2c3e50;'>πŸ’Ό HybridRAG μ‹œμŠ€ν…œ</h1>", unsafe_allow_html=True)
88
 
89
  with gr.Row():
90
- with gr.Column(scale=1):
91
  gr.Markdown("<h3 style='color: #34495e;'>1. 직원 및 ν”„λ‘œκ·Έλž¨ 데이터λ₯Ό μ—…λ‘œλ“œν•˜μ„Έμš”</h3>")
92
  employee_file = gr.File(label="직원 데이터 μ—…λ‘œλ“œ", interactive=True)
93
  program_file = gr.File(label="ꡐ윑 ν”„λ‘œκ·Έλž¨ 데이터 μ—…λ‘œλ“œ", interactive=True)
94
  analyze_button = gr.Button("뢄석 μ‹œμž‘", elem_classes="gradio-button")
95
  output_text = gr.Textbox(label="뢄석 κ²°κ³Ό", interactive=False, elem_classes="gradio-textbox")
96
 
97
- with gr.Column(scale=2):
98
- gr.Markdown("<h3 style='color: #34495e;'>2. 뢄석 κ²°κ³Ό</h3>")
99
  chart_output = gr.Plot(label="μ‹œκ°ν™” 차트")
100
  csv_download = gr.File(label="μΆ”μ²œ κ²°κ³Ό λ‹€μš΄λ‘œλ“œ")
101
 
 
5
  import networkx as nx
6
  import matplotlib.pyplot as plt
7
  import csv
 
8
  import io
9
 
10
  # Sentence-BERT λͺ¨λΈ λ‘œλ“œ
 
23
  output.seek(0)
24
  return output
25
 
26
+ # μžλ™μœΌλ‘œ 열을 λ§€μΉ­ν•˜λŠ” ν•¨μˆ˜
27
+ def auto_match_columns(df, required_cols):
28
+ """
29
+ λ°μ΄ν„°ν”„λ ˆμž„κ³Ό ν•„μš”ν•œ μ—΄ 이름 λͺ©λ‘μ„ λ°›μ•„μ„œ, μœ μ‚¬ν•œ μ—΄ 이름을 μžλ™μœΌλ‘œ λ§€μΉ­ν•©λ‹ˆλ‹€.
30
+ ν•„μš”μ— 따라 μ—΄ 이름을 μ„ νƒν•˜μ§€ λͺ»ν•œ 경우 None을 λ°˜ν™˜ν•©λ‹ˆλ‹€.
31
+ """
32
+ matched_cols = {}
33
+ for req_col in required_cols:
34
+ matched_col = None
35
+ for col in df.columns:
36
+ if req_col in col.lower(): # μœ μ‚¬ν•œ μ—΄ 이름을 맀칭
37
+ matched_col = col
38
+ break
39
+ matched_cols[req_col] = matched_col
40
+ return matched_cols
41
+
42
+ # 직원 및 ν”„λ‘œκ·Έλž¨ λ°μ΄ν„°μ˜ 열을 μžλ™μœΌλ‘œ λ§€μΉ­ν•˜κ±°λ‚˜, μ„ νƒν•˜κ²Œ ν•˜λŠ” ν•¨μˆ˜
43
+ def validate_and_get_columns(employee_df, program_df):
44
+ # ν•„μš”ν•œ μ—΄
45
+ required_employee_cols = ["employee_id", "employee_name", "current_skills"]
46
+ required_program_cols = ["program_name", "skills_acquired", "duration"]
47
+
48
+ # μžλ™μœΌλ‘œ 맀칭 μ‹œλ„
49
+ employee_cols = auto_match_columns(employee_df, required_employee_cols)
50
+ program_cols = auto_match_columns(program_df, required_program_cols)
51
+
52
+ # 직원 데이터 μ—΄ 쀑 μžλ™ 맀칭 μ‹€νŒ¨ μ‹œ μ‚¬μš©μžμ—κ²Œ μ„ νƒν•˜λ„λ‘ μœ λ„
53
+ for key, value in employee_cols.items():
54
+ if value is None:
55
+ return f"직원 λ°μ΄ν„°μ—μ„œ '{key}' 열을 선택할 수 μ—†μŠ΅λ‹ˆλ‹€. μ˜¬λ°”λ₯Έ 열을 μ„ νƒν•˜μ„Έμš”.", None, None
56
+
57
+ # ν”„λ‘œκ·Έλž¨ 데이터 μ—΄ 쀑 μžλ™ 맀칭 μ‹€νŒ¨ μ‹œ μ‚¬μš©μžμ—κ²Œ μ„ νƒν•˜λ„λ‘ μœ λ„
58
+ for key, value in program_cols.items():
59
+ if value is None:
60
+ return f"ν”„λ‘œκ·Έλž¨ λ°μ΄ν„°μ—μ„œ '{key}' 열을 선택할 수 μ—†μŠ΅λ‹ˆλ‹€. μ˜¬λ°”λ₯Έ 열을 μ„ νƒν•˜μ„Έμš”.", None, None
61
+
62
+ # μžλ™ 맀칭된 μ—΄ λ°˜ν™˜
63
+ return None, employee_cols, program_cols
64
+
65
  # 직원 데이터λ₯Ό λΆ„μ„ν•˜μ—¬ ꡐ윑 ν”„λ‘œκ·Έλž¨μ„ μΆ”μ²œν•˜κ³  κ·Έλž˜ν”„λ₯Ό κ·Έλ¦¬λŠ” ν•¨μˆ˜
66
  def analyze_data(employee_file, program_file):
67
+ # 직원 데이터와 ν”„λ‘œκ·Έλž¨ 데이터 읽기
68
  employee_df = pd.read_csv(employee_file.name)
69
  program_df = pd.read_csv(program_file.name)
70
 
71
+ # μ—΄ μžλ™ 맀칭 μ‹œλ„
72
+ error_msg, employee_cols, program_cols = validate_and_get_columns(employee_df, program_df)
73
+ if error_msg:
74
+ return error_msg, None, None
75
+
76
+ # μ§μ›μ˜ μ—­λŸ‰κ³Ό ν”„λ‘œκ·Έλž¨ λͺ©ν‘œλ₯Ό 벑터화
77
+ employee_skills = employee_df[employee_cols["current_skills"]].tolist()
78
+ program_skills = program_df[program_cols["skills_acquired"]].tolist()
79
  employee_embeddings = model.encode(employee_skills)
80
  program_embeddings = model.encode(program_skills)
81
 
82
  # μœ μ‚¬λ„ 계산
83
  similarities = cosine_similarity(employee_embeddings, program_embeddings)
84
 
85
+ # 직원별 μΆ”μ²œ ν”„λ‘œκ·Έλž¨ 생성
86
  recommendations = []
87
+ recommendation_rows = [] # CSV둜 μ €μž₯ν•  데이터
88
  for i, employee in employee_df.iterrows():
89
  recommended_programs = []
90
  for j, program in program_df.iterrows():
91
+ if similarities[i][j] > 0.5: # μœ μ‚¬λ„ μž„κ³„κ°’
92
+ recommended_programs.append(f"{program[program_cols['program_name']]} ({program[program_cols['duration']]})")
93
 
94
  if recommended_programs:
95
+ recommendation = f"직원 {employee[employee_cols['employee_name']]}의 μΆ”μ²œ ν”„λ‘œκ·Έλž¨: {', '.join(recommended_programs)}"
96
+ recommendation_rows.append([employee[employee_cols['employee_id']], employee[employee_cols['employee_name']], ", ".join(recommended_programs)])
97
  else:
98
+ recommendation = f"직원 {employee[employee_cols['employee_name']]}μ—κ²Œ μ ν•©ν•œ ν”„λ‘œκ·Έλž¨μ΄ μ—†μŠ΅λ‹ˆλ‹€."
99
+ recommendation_rows.append([employee[employee_cols['employee_id']], employee[employee_cols['employee_name']], "μ ν•©ν•œ ν”„λ‘œκ·Έλž¨ μ—†μŒ"])
100
 
101
  recommendations.append(recommendation)
102
 
103
  # λ„€νŠΈμ›Œν¬ κ·Έλž˜ν”„ 생성
104
  G = nx.Graph()
105
+ for employee in employee_df[employee_cols['employee_name']]:
106
  G.add_node(employee, type='employee')
107
 
108
+ for program in program_df[program_cols['program_name']]:
109
  G.add_node(program, type='program')
110
 
111
  for i, employee in employee_df.iterrows():
112
  for j, program in program_df.iterrows():
113
+ if similarities[i][j] > 0.5:
114
+ G.add_edge(employee[employee_cols['employee_name']], program[program_cols['program_name']])
115
 
116
  # κ·Έλž˜ν”„ μ‹œκ°ν™”
117
  plt.figure(figsize=(10, 8))
 
127
 
128
  # Gradio 블둝
129
  with gr.Blocks(css=".gradio-button {background-color: #6c757d; color: white;} .gradio-textbox {border-color: #6c757d;}") as demo:
130
+ gr.Markdown("<h1 style='text-align: center; color: #2c3e50;'>πŸ’Ό HybridRAG μ‹œμŠ€ν…œ</h1>")
131
 
132
  with gr.Row():
133
+ with gr.Column(scale=1, min_width=300):
134
  gr.Markdown("<h3 style='color: #34495e;'>1. 직원 및 ν”„λ‘œκ·Έλž¨ 데이터λ₯Ό μ—…λ‘œλ“œν•˜μ„Έμš”</h3>")
135
  employee_file = gr.File(label="직원 데이터 μ—…λ‘œλ“œ", interactive=True)
136
  program_file = gr.File(label="ꡐ윑 ν”„λ‘œκ·Έλž¨ 데이터 μ—…λ‘œλ“œ", interactive=True)
137
  analyze_button = gr.Button("뢄석 μ‹œμž‘", elem_classes="gradio-button")
138
  output_text = gr.Textbox(label="뢄석 κ²°κ³Ό", interactive=False, elem_classes="gradio-textbox")
139
 
140
+ with gr.Column(scale=2, min_width=500):
141
+ gr.Markdown("<h3 style='color: #34495e;'>2. 뢄석 κ²°κ³Ό 및 μ‹œκ°ν™”</h3>")
142
  chart_output = gr.Plot(label="μ‹œκ°ν™” 차트")
143
  csv_download = gr.File(label="μΆ”μ²œ κ²°κ³Ό λ‹€μš΄λ‘œλ“œ")
144