Reggie commited on
Commit
0768472
1 Parent(s): bb59f5d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +242 -53
app.py CHANGED
@@ -1,60 +1,249 @@
 
 
 
 
 
1
  import os
2
- import requests
 
 
 
 
 
 
 
 
3
  import json
4
- from io import BytesIO
5
 
6
- from flask import Flask, jsonify, render_template, request, send_file
7
-
8
- from modules.inference import infer_t5
9
- from modules.dataset import query_emotion
10
-
11
- # https://huggingface.co/settings/tokens
12
- # https://huggingface.co/spaces/{username}/{space}/settings
13
- API_TOKEN = os.getenv("BIG_GAN_TOKEN")
14
-
15
- app = Flask(__name__)
16
-
17
-
18
- @app.route("/")
19
- def index():
20
- return render_template("index.html")
21
-
22
-
23
- @app.route("/infer_biggan")
24
- def biggan():
25
- input = request.args.get("input")
26
-
27
- output = requests.request(
28
- "POST",
29
- "https://api-inference.huggingface.co/models/osanseviero/BigGAN-deep-128",
30
- headers={"Authorization": f"Bearer {API_TOKEN}"},
31
- data=json.dumps(input),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  )
33
 
34
- return send_file(BytesIO(output.content), mimetype="image/png")
35
-
36
-
37
- @app.route("/infer_t5")
38
- def t5():
39
- input = request.args.get("input")
40
-
41
- output = infer_t5(input)
42
-
43
- return jsonify({"output": output})
44
-
45
-
46
- @app.route("/query_emotion")
47
- def emotion():
48
- start = request.args.get("start")
49
- end = request.args.get("end")
50
-
51
- print(start)
52
- print(end)
53
-
54
- output = query_emotion(int(start), int(end))
55
-
56
- return jsonify({"output": output})
57
-
 
 
 
 
 
 
 
 
 
 
 
 
 
58
 
59
  if __name__ == "__main__":
60
- app.run(host="0.0.0.0", port=7860)
 
1
+ from authlib.integrations.flask_client import OAuth
2
+ from authlib.common.security import generate_token
3
+ import ffmpeg
4
+ from flask import Flask, render_template, request, jsonify, url_for, redirect, session
5
+ from functools import wraps
6
  import os
7
+ import streamlink
8
+ import threading
9
+ import time
10
+ from faster_whisper import WhisperModel
11
+ import subprocess
12
+ from datetime import datetime as dt
13
+ from datetime import timedelta, timezone
14
+ from apiclient import discovery
15
+ from google.oauth2 import service_account
16
  import json
 
17
 
18
+ # Import secrets
19
+ client_secret = os.environ.get("client_secret")
20
+ gdoc_id = os.environ.get("gdoc_id")
21
+ GOOGLE_CLIENT_SECRET = os.environ.get("GOOGLE_CLIENT_SECRET")
22
+ GOOGLE_CLIENT_ID = os.environ.get("GOOGLE_CLIENT_ID")
23
+ allowed_users = os.environ.get("allowed_users")
24
+
25
+ # Faster Whisper setup
26
+ model_size = 'small'
27
+ beamsize = 2
28
+ wmodel = WhisperModel(model_size, device="cpu", compute_type="int8")
29
+
30
+ # Delete local_transcript if it exists
31
+ if not os.path.exists('transcription_files'): os.makedirs('transcription_files')
32
+ for f in os.listdir('transcription_files/'): os.remove(os.path.join('transcription_files/', f)) # clear any old files in transcription_files folder
33
+
34
+ with open("client_secret.json", "w") as json_file: json.dump(client_secret, json_file, indent=4)
35
+ scopes = ["https://www.googleapis.com/auth/documents", "https://www.googleapis.com/auth/drive.file"]
36
+ credentials = service_account.Credentials.from_service_account_file('client_secret.json', scopes=scopes)
37
+ service = discovery.build('docs', 'v1', credentials=credentials)
38
+
39
+ local_tz = 5.5 # For timestamps
40
+ local_transcript = 'transcription_files/tr.txt'
41
+ pid_file = 'transcription_files/pid.txt'
42
+
43
+ # Check if mp3 folder exists, and create it if it doesn't
44
+ if not os.path.exists('mp3'): os.makedirs('mp3')
45
+ # Delete any old files in mp3 folder
46
+ for f in os.listdir('mp3/'): os.remove(os.path.join('mp3/', f))
47
+
48
+ app = Flask(__name__, static_url_path='/static')
49
+ app.secret_key = os.urandom(12)
50
+
51
+ oauth = OAuth(app)
52
+
53
+ # Store the streamlink process
54
+ stream_process = None
55
+ recording = False
56
+ mp3_extraction_process = None
57
+
58
+ def update_gdoc(text, gdoc_id): # Update contents of google doc
59
+ print('Updating Google Doc', gdoc_id)
60
+ doc = service.documents().get(documentId=gdoc_id).execute()
61
+ endindex = [p['endIndex'] for p in doc['body']['content'] if 'paragraph' in p][-1]
62
+
63
+ try:
64
+ body = {'requests': [{'insertText': {'location': {'index': endindex-1,}, 'text': ' ' + text}}]}
65
+ result = service.documents().batchUpdate(documentId=gdoc_id, body=body).execute()
66
+ print(result)
67
+
68
+ except Exception as e:
69
+ print(e)
70
+
71
+ def process_complete_callback(retcode, **kwargs):
72
+ if retcode == 0:
73
+ print("FFmpeg process completed successfully!")
74
+ else:
75
+ print("FFmpeg process encountered an error.")
76
+
77
+ def transcribe_audio(latest_file, time_counter):
78
+ print('transcribing ', latest_file)
79
+ segments, info = wmodel.transcribe(f"{latest_file}", beam_size=beamsize) # beamsize is 2.
80
+ text = ''
81
+
82
+ for segment in segments:
83
+ text += segment.text
84
+ transcribed = text.replace('\n', ' ').replace(' ', ' ')
85
+ if time_counter%5 == 0:
86
+ transcribed_sents = transcribed.split('. ') # Get the first fullstop break and append to previous para, before adding time code
87
+ transcribed = transcribed_sents[0] + '\nTime ' + str((dt.now(timezone.utc) + timedelta(hours=local_tz)).strftime('%H:%M:%S')) + '\n' + '. '.join(transcribed_sents[1:])
88
+
89
+ time_counter += 1
90
+ return transcribed, time_counter
91
+
92
+ def save_audio(youtube_url):
93
+ global stream_process, recording, mp3_extraction_process
94
+ try:
95
+ streams = streamlink.streams(youtube_url)
96
+ #if "audio" not in streams:
97
+ # raise Exception("No audio stream found.")
98
+
99
+ stream_url = streams["144p"].url
100
+ time_counter = 0
101
+ while recording:
102
+ # Save audio only into mp3 files
103
+
104
+ saved_mp3 = f"mp3/audio_{int(time.time())}.mp3"
105
+ mp3_extraction_process = (
106
+ ffmpeg
107
+ .input(stream_url, t=30)
108
+ .audio
109
+ # TODO - change destination url to relevant url
110
+ .output(saved_mp3)
111
+ .overwrite_output()
112
+ .global_args('-loglevel', 'panic')
113
+ .run_async()
114
+ )
115
+
116
+ print('pid', mp3_extraction_process.pid)
117
+ # write the pid to pid_file
118
+ with open(pid_file, 'w') as f: f.write(str(mp3_extraction_process.pid))
119
+
120
+ # If there is more than one mp3 file in the folder, transcribe the one that is not being written to
121
+ mp3files = [f for f in os.listdir('mp3') if f.endswith('.mp3')]
122
+ if len(mp3files) < 2:
123
+ print('Sleeping for 30s as only one mp3 file in folder')
124
+ time.sleep(30)
125
+ else:
126
+ starttime = time.time()
127
+ file_to_transcribe = [f for f in mp3files if f != os.path.basename(saved_mp3)][0]
128
+ print('Working on ', file_to_transcribe)
129
+ transcribed, time_counter = transcribe_audio(f'mp3/{file_to_transcribe}', time_counter)
130
+ os.remove(f'mp3/{file_to_transcribe}')
131
+
132
+ update_gdoc(transcribed, gdoc_id)
133
+ with open(local_transcript, 'a', encoding='utf-8', errors='ignore') as f: f.write(transcribed)
134
+
135
+ elapsed_time = time.time() - starttime
136
+ print('Time to transcribe:', elapsed_time, 'seconds')
137
+ if elapsed_time < 30:
138
+ print(f'Sleeping for {30-elapsed_time} as there are more than one mp3 files in folder')
139
+ time.sleep(30-elapsed_time)
140
+ #time.sleep(30)
141
+
142
+ except Exception as e:
143
+ recording = False
144
+ print('exception', str(e))
145
+ return str(e)
146
+
147
+ @app.route("/start_process", methods=["POST"])
148
+ def start_process():
149
+ if not os.path.isfile(local_transcript):
150
+ global recording, stream_process
151
+ with open(local_transcript, 'a', encoding='utf-8', errors='ignore') as f: f.write('') # Create the local transcript file, which is used as a check to prevent multiple recordings
152
+
153
+ youtube_url = request.form.get("url")
154
+ if not youtube_url:
155
+ return jsonify({"message": "Please provide a valid YouTube URL."}), 400
156
+
157
+ if recording:
158
+ return jsonify({"message": "A recording is already in progress."}), 400
159
+
160
+ print('In start process')
161
+ recording = True
162
+ stream_process = threading.Thread(target=save_audio, args=(youtube_url,))
163
+ stream_process.start()
164
+
165
+ return jsonify({"message": "Recording started."}), 200
166
+
167
+ else: return jsonify({"message": "Recording is already in progress."}), 400
168
+
169
+
170
+ @app.route("/stop_process", methods=["POST"])
171
+ def stop_process():
172
+ global recording, stream_process, mp3_extraction_process
173
+
174
+ if not recording:
175
+ return jsonify({"message": "No recording is currently in progress."}), 400
176
+ print('In stop process')
177
+ recording = False
178
+ stream_process.join()
179
+ stream_process = None
180
+ mp3_extraction_process.terminate()
181
+ mp3_extraction_process = None
182
+ for f in os.listdir('mp3/'): os.remove(os.path.join('mp3/', f))
183
+ if os.path.isfile(local_transcript): os.remove(local_transcript)
184
+ # check if pid_file exists, get the pid inside it and convert to int, and use os.kill to kill it
185
+ if os.path.isfile(pid_file):
186
+ with open(pid_file, 'r') as f: pid = int(f.read())
187
+ try: os.kill(pid, 9) # For linux
188
+ except:
189
+ try:
190
+ process = subprocess.Popen(["taskkill", "/F", "/PID", str(pid)], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # For Windows
191
+ process.communicate()
192
+ print("Process terminated successfully.")
193
+ except Exception as e:
194
+ print("Error:", e)
195
+ os.remove(pid_file)
196
+
197
+ return jsonify({"message": "Recording stopped."}), 200
198
+
199
+ @app.route('/google/')
200
+ def google():
201
+ CONF_URL = 'https://accounts.google.com/.well-known/openid-configuration'
202
+ oauth.register(
203
+ name='google',
204
+ client_id=GOOGLE_CLIENT_ID,
205
+ client_secret=GOOGLE_CLIENT_SECRET,
206
+ server_metadata_url=CONF_URL,
207
+ client_kwargs={"scope": "openid email profile"}
208
  )
209
 
210
+ # Redirect to google_auth function/page
211
+ redirect_uri = url_for('google_auth', _external=True)
212
+ session['nonce'] = generate_token()
213
+ return oauth.google.authorize_redirect(redirect_uri, nonce=session['nonce'])
214
+
215
+ @app.route('/google/auth/')
216
+ def google_auth():
217
+ token = oauth.google.authorize_access_token()
218
+ user = oauth.google.parse_id_token(token, nonce=session['nonce'])
219
+ session['user'] = user
220
+ print('USER', user)
221
+ # Redirect to home if login successful
222
+ return redirect('/home')
223
+
224
+ def is_not_logged_in():
225
+ return session.get('user') is None or session.get('nonce') is None
226
+
227
+ # decorator to check if user is logged in, used for protected URLs
228
+ def login_required(f):
229
+ @wraps(f)
230
+ def decorated_function(*args, **kwargs):
231
+ if is_not_logged_in():
232
+ return redirect('/login')
233
+ return f(*args, **kwargs)
234
+ return decorated_function
235
+
236
+ @app.route("/home")
237
+ @login_required
238
+ def home():
239
+ return render_template("home.html")
240
+
241
+ @app.route("/", methods=["GET"])
242
+ @app.route("/login", methods=["GET"])
243
+ def login():
244
+ if not is_not_logged_in():
245
+ return redirect("/home")
246
+ return render_template("login.html")
247
 
248
  if __name__ == "__main__":
249
+ app.run(host="0.0.0.0", debug=True, port=8081)