Emil25 commited on
Commit
03f7867
1 Parent(s): ebd9035

Upload main.py

Browse files
Files changed (1) hide show
  1. main.py +155 -0
main.py ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ import streamlit as st
4
+ import googleapiclient.discovery
5
+ import pandas as pd
6
+ from transformers import pipeline
7
+ import matplotlib.pyplot as plt
8
+ import seaborn as sns
9
+
10
+ st.title('Анализатор комментариев :red[YouTube] :sunglasses:')
11
+
12
+
13
+ # Инициализируем модель Hugging Face для анализа тональности текста
14
+ # Кэшируем ресурс для одной загрузки модели на все сессии
15
+ @st.cache_resource
16
+ def load_model():
17
+ """
18
+ Loads the 'blanchefort/rubert-base-cased-sentiment' model from HuggingFace
19
+ and saves to cache for consecutive loads.
20
+ """
21
+ model = pipeline(
22
+ "sentiment-analysis",
23
+ "blanchefort/rubert-base-cased-sentiment")
24
+ return model
25
+
26
+
27
+ def extract_video_id(url: str) -> str:
28
+ """
29
+ Extracts the video ID from a YouTube video URL.
30
+ Args: url (str): The YouTube video URL.
31
+ Returns: str: The extracted video ID,
32
+ or an empty string if the URL is not valid.
33
+ """
34
+ pattern = r"(?<=v=)[\w-]+(?=&|\b)"
35
+ match = re.search(pattern, url)
36
+ if match:
37
+ return match.group()
38
+ else:
39
+ return ""
40
+
41
+
42
+ def download_comments(video_id: str) -> pd.DataFrame:
43
+ """
44
+ Downloads comments from a YouTube video based on the provided video ID
45
+ and returns them as a DataFrame.
46
+ Args: video_id (str): The video ID of the YouTube video.
47
+ Returns: DataFrame: A DataFrame containing the downloaded comments from the video.
48
+ """
49
+ DEV_KEY = os.getenv('API_KEY_YOUTUBE')
50
+ youtube = googleapiclient.discovery.build("youtube",
51
+ "v3",
52
+ developerKey=DEV_KEY)
53
+ request = youtube.commentThreads().list(part="snippet",
54
+ videoId=video_id,
55
+ maxResults=100)
56
+ response = request.execute()
57
+ comments = []
58
+ for item in response['items']:
59
+ comment = item['snippet']['topLevelComment']['snippet']
60
+ comments.append([comment['authorDisplayName'],
61
+ comment['publishedAt'],
62
+ comment['updatedAt'],
63
+ comment['likeCount'],
64
+ comment['textDisplay'],])
65
+ return pd.DataFrame(comments,
66
+ columns=['author',
67
+ 'published_at',
68
+ 'updated_at',
69
+ 'like_count',
70
+ 'text',])
71
+
72
+
73
+ def analyze_emotions_in_comments(df: pd.DataFrame) -> tuple:
74
+ """
75
+ Takes a DataFrame with comments,
76
+ processes the emotional sentiment of each comment in the DataFrame
77
+ Args: dataframe (pandas.DataFrame): DataFrame containing comments to analyze.
78
+ Returns: tuple: containing the updated DataFrame with the added 'Emotional Sentiment' column
79
+ and the total count of processed comments.
80
+ """
81
+ model = load_model()
82
+ selected_columns = ['text', 'author', 'published_at']
83
+ df = df[selected_columns]
84
+ res_list = []
85
+ res_list = model(df['text'].to_list())
86
+ full_df = pd.concat([pd.DataFrame(res_list), df], axis=1)
87
+ return (full_df, len(res_list))
88
+
89
+
90
+ def plot_heatmap_from_dataframe(df: pd.DataFrame) -> plt:
91
+ """
92
+ Visualizes the data from the input DataFrame and returns a matplotlib plot object.
93
+ Args: df (DataFrame): The input DataFrame containing the data to be visualized.
94
+ Returns: plt: A matplotlib plot object showing the visualization of the data.
95
+ """
96
+ df['published_at'] = pd.to_datetime(df['published_at'])
97
+ df['Date'] = df['published_at'].dt.date
98
+ df['Hour'] = df['published_at'].dt.hour
99
+ pivot_table = df.pivot_table(index='Hour',
100
+ columns='Date',
101
+ values='text',
102
+ aggfunc='count')
103
+ plt.figure(figsize=(10, 6))
104
+ sns.heatmap(pivot_table,
105
+ cmap='YlGnBu')
106
+ plt.title('Количество комментариев по часам и датам')
107
+ plt.xlabel('Дата')
108
+ plt.ylabel('Час')
109
+ return plt
110
+
111
+
112
+ def visualize_data(df: pd.DataFrame):
113
+ """
114
+ Visualizes the data from the input DataFrame and returns a matplotlib figure object.
115
+ Args: df (DataFrame): The input DataFrame containing the data to be visualized.
116
+ Returns: fig: A matplotlib figure object
117
+ """
118
+ st.header('Эмоциональная окраска комментариев на YouTube')
119
+ data = df['label'].value_counts()
120
+ fig, ax = plt.subplots()
121
+ plt.title("Эмоциональная окраска комментариев на YouTube")
122
+ label = df['label'].unique()
123
+ ax.pie(data, labels=label, autopct='%1.1f%%')
124
+ return fig
125
+
126
+
127
+ def change_url():
128
+ st.session_state.start = False
129
+
130
+
131
+ if "start" not in st.session_state:
132
+ st.session_state.start = False
133
+
134
+ # Получаем id видеоролика из URL для отправки запроса
135
+ url = st.text_input(label="Enter URL from YouTube", on_change=change_url)
136
+ video_id = extract_video_id(url)
137
+ if video_id != "":
138
+ if btn_start := st.button('Загрузить комментарии'):
139
+ st.session_state.start = True
140
+
141
+ if st.session_state.start:
142
+ # Выводим таблицу с результатами на странице
143
+ comments_df = download_comments(video_id)
144
+ with st.spinner('Analyzing comments...'):
145
+ full_df, num_comments = analyze_emotions_in_comments(comments_df)
146
+ st.success(f'Готово! Обработано {num_comments} комментариев.')
147
+ st.write(full_df)
148
+ st.markdown('***')
149
+
150
+ # Выводим heatmap комментариев по часам и датам
151
+ st.pyplot(plot_heatmap_from_dataframe(full_df))
152
+ st.markdown('***')
153
+
154
+ # Выводим круговую диаграмму
155
+ st.pyplot(visualize_data(full_df))