soojeongcrystal commited on
Commit
1fe42c9
โ€ข
1 Parent(s): 0352e69

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -12
app.py CHANGED
@@ -49,7 +49,7 @@ def auto_match_columns(df, required_cols):
49
  for req_col in required_cols:
50
  matched_col = None
51
  for col in df.columns:
52
- if req_col in col.lower():
53
  matched_col = col
54
  break
55
  matched_cols[req_col] = matched_col
@@ -81,21 +81,32 @@ def select_youtube_columns(youtube_file):
81
  youtube_columns = auto_match_columns(youtube_df, required_youtube_cols)
82
 
83
  column_options = {col: youtube_df.columns.tolist() for col in required_youtube_cols}
84
- return gr.Dropdown.update(choices=youtube_df.columns.tolist(), value=youtube_columns.get("title")), \
85
- gr.Dropdown.update(choices=youtube_df.columns.tolist(), value=youtube_columns.get("description")), \
86
- gr.Dropdown.update(choices=youtube_df.columns.tolist(), value=youtube_columns.get("url")), \
87
- gr.Dropdown.update(choices=youtube_df.columns.tolist(), value=youtube_columns.get("upload_date"))
88
 
89
  # ์œ ํŠœ๋ธŒ ์ฝ˜ํ…์ธ  ๋ฐ์ดํ„ฐ ๋กœ๋“œ ๋ฐ ์ฒ˜๋ฆฌ ํ•จ์ˆ˜
90
  def load_youtube_content(file_path, title_col, description_col, url_col, upload_date_col):
91
  youtube_df = pd.read_csv(file_path)
92
- youtube_df = youtube_df[[title_col, description_col, url_col, upload_date_col]]
93
- youtube_df.columns = ['title', 'description', 'url', 'upload_date']
94
- youtube_df['upload_date'] = pd.to_datetime(youtube_df['upload_date'])
 
 
 
 
 
 
 
 
 
 
 
 
95
  return youtube_df
96
 
97
  # ์œ ํŠœ๋ธŒ ์ฝ˜ํ…์ธ ์™€ ๊ต์œก ํ”„๋กœ๊ทธ๋žจ ๋งค์นญ ํ•จ์ˆ˜
98
  def match_youtube_content(program_skills, youtube_df, model):
 
 
99
  youtube_embeddings = model.encode(youtube_df['description'].tolist())
100
  program_embeddings = model.encode(program_skills)
101
  similarities = cosine_similarity(program_embeddings, youtube_embeddings)
@@ -137,13 +148,15 @@ def hybrid_rag(employee_file, program_file, youtube_file, title_col, description
137
  recommended_programs.append(f"{program[program_cols['program_name']]} ({program[program_cols['duration']]})")
138
 
139
  # ํ•ด๋‹น ํ”„๋กœ๊ทธ๋žจ๊ณผ ๊ฐ€์žฅ ์œ ์‚ฌํ•œ ์œ ํŠœ๋ธŒ ์ฝ˜ํ…์ธ  ์ฐพ๊ธฐ
140
- top_youtube_indices = youtube_similarities[j].argsort()[-3:][::-1] # ์ƒ์œ„ 3๊ฐœ
141
- for idx in top_youtube_indices:
142
- recommended_youtube.append(f"{youtube_df.iloc[idx]['title']} (URL: {youtube_df.iloc[idx]['url']})")
 
 
143
 
144
  if recommended_programs:
145
  recommendation = f"์ง์› {employee[employee_cols['employee_name']]}์˜ ์ถ”์ฒœ ํ”„๋กœ๊ทธ๋žจ: {', '.join(recommended_programs)}"
146
- youtube_recommendation = f"์ถ”์ฒœ ์œ ํŠœ๋ธŒ ์ฝ˜ํ…์ธ : {', '.join(recommended_youtube)}"
147
  recommendation_rows.append([employee[employee_cols['employee_id']], employee[employee_cols['employee_name']],
148
  ", ".join(recommended_programs), ", ".join(recommended_youtube)])
149
  else:
 
49
  for req_col in required_cols:
50
  matched_col = None
51
  for col in df.columns:
52
+ if req_col.lower() in col.lower():
53
  matched_col = col
54
  break
55
  matched_cols[req_col] = matched_col
 
81
  youtube_columns = auto_match_columns(youtube_df, required_youtube_cols)
82
 
83
  column_options = {col: youtube_df.columns.tolist() for col in required_youtube_cols}
84
+ return [gr.Dropdown.update(choices=youtube_df.columns.tolist(), value=youtube_columns.get(col, "")) for col in required_youtube_cols]
 
 
 
85
 
86
  # ์œ ํŠœ๋ธŒ ์ฝ˜ํ…์ธ  ๋ฐ์ดํ„ฐ ๋กœ๋“œ ๋ฐ ์ฒ˜๋ฆฌ ํ•จ์ˆ˜
87
  def load_youtube_content(file_path, title_col, description_col, url_col, upload_date_col):
88
  youtube_df = pd.read_csv(file_path)
89
+ selected_columns = [col for col in [title_col, description_col, url_col, upload_date_col] if col]
90
+ youtube_df = youtube_df[selected_columns]
91
+
92
+ # ์„ ํƒ๋œ ์—ด ์ด๋ฆ„์„ ํ•„์š”ํ•œ ์—ด ์ด๋ฆ„์œผ๋กœ ๋งคํ•‘
93
+ column_mapping = {
94
+ title_col: 'title',
95
+ description_col: 'description',
96
+ url_col: 'url',
97
+ upload_date_col: 'upload_date'
98
+ }
99
+ youtube_df.rename(columns=column_mapping, inplace=True)
100
+
101
+ if 'upload_date' in youtube_df.columns:
102
+ youtube_df['upload_date'] = pd.to_datetime(youtube_df['upload_date'], errors='coerce')
103
+
104
  return youtube_df
105
 
106
  # ์œ ํŠœ๋ธŒ ์ฝ˜ํ…์ธ ์™€ ๊ต์œก ํ”„๋กœ๊ทธ๋žจ ๋งค์นญ ํ•จ์ˆ˜
107
  def match_youtube_content(program_skills, youtube_df, model):
108
+ if 'description' not in youtube_df.columns:
109
+ return None
110
  youtube_embeddings = model.encode(youtube_df['description'].tolist())
111
  program_embeddings = model.encode(program_skills)
112
  similarities = cosine_similarity(program_embeddings, youtube_embeddings)
 
148
  recommended_programs.append(f"{program[program_cols['program_name']]} ({program[program_cols['duration']]})")
149
 
150
  # ํ•ด๋‹น ํ”„๋กœ๊ทธ๋žจ๊ณผ ๊ฐ€์žฅ ์œ ์‚ฌํ•œ ์œ ํŠœ๋ธŒ ์ฝ˜ํ…์ธ  ์ฐพ๊ธฐ
151
+ if youtube_similarities is not None:
152
+ top_youtube_indices = youtube_similarities[j].argsort()[-3:][::-1] # ์ƒ์œ„ 3๊ฐœ
153
+ for idx in top_youtube_indices:
154
+ if 'title' in youtube_df.columns and 'url' in youtube_df.columns:
155
+ recommended_youtube.append(f"{youtube_df.iloc[idx]['title']} (URL: {youtube_df.iloc[idx]['url']})")
156
 
157
  if recommended_programs:
158
  recommendation = f"์ง์› {employee[employee_cols['employee_name']]}์˜ ์ถ”์ฒœ ํ”„๋กœ๊ทธ๋žจ: {', '.join(recommended_programs)}"
159
+ youtube_recommendation = f"์ถ”์ฒœ ์œ ํŠœ๋ธŒ ์ฝ˜ํ…์ธ : {', '.join(recommended_youtube)}" if recommended_youtube else "์ถ”์ฒœํ•  ์œ ํŠœ๋ธŒ ์ฝ˜ํ…์ธ ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค."
160
  recommendation_rows.append([employee[employee_cols['employee_id']], employee[employee_cols['employee_name']],
161
  ", ".join(recommended_programs), ", ".join(recommended_youtube)])
162
  else: