latuan commited on
Commit
c4c4b24
1 Parent(s): 36a27a4
Files changed (1) hide show
  1. app.py +40 -7
app.py CHANGED
@@ -175,7 +175,7 @@ def time_to_seconds(time_str):
175
  def closest_speedup_factor(factor, allowed_factors):
176
  return min(allowed_factors, key=lambda x: abs(x - factor)) + 0.1
177
 
178
- def generate_audio_with_pause(srt_file_path):
179
  subtitles = read_srt(srt_file_path)
180
  audio_clips = []
181
  # allowed_factors = [1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0]
@@ -210,7 +210,18 @@ def generate_audio_with_pause(srt_file_path):
210
  audio_data = audio_data / np.max(np.abs(audio_data))
211
  audio_data = audio_data * 1.2
212
 
213
- if current_duration < desired_duration:
 
 
 
 
 
 
 
 
 
 
 
214
  padding = int((desired_duration - current_duration) * 16000)
215
  audio_data = np.concatenate([np.zeros(padding), audio_data])
216
 
@@ -242,7 +253,7 @@ def check_input_files(srt_files):
242
  if invalid_files:
243
  raise gr.Warning(f"Invalid SRT files: {', '.join(invalid_files)}")
244
 
245
- def srt_to_audio_multi(srt_files):
246
  output_paths = []
247
  invalid_files = []
248
 
@@ -250,7 +261,7 @@ def srt_to_audio_multi(srt_files):
250
  if not is_valid_srt(srt_file.name):
251
  invalid_files.append(srt_file.name)
252
  return None
253
- audio_data = generate_audio_with_pause(srt_file.name)
254
  output_path = os.path.join(cache_dir, f'output_{os.path.basename(srt_file.name)}.wav')
255
  torchaudio.save(output_path, torch.tensor(audio_data).unsqueeze(0), 16000)
256
  return output_path
@@ -288,6 +299,7 @@ model = Model(
288
  css = '''
289
  #title{text-align: center}
290
  #container{display: flex; justify-content: space-between; align-items: center;}
 
291
  '''
292
 
293
  with gr.Blocks(css=css) as demo:
@@ -295,8 +307,25 @@ with gr.Blocks(css=css) as demo:
295
  """<h1>SRT to Audio Tool</h1>""",
296
  elem_id="title",
297
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
298
  with gr.Row(elem_id="container"):
299
- inp = gr.File(
300
  label="Upload SRT files",
301
  file_count="multiple",
302
  type="filepath",
@@ -319,8 +348,12 @@ with gr.Blocks(css=css) as demo:
319
  height=100
320
  )
321
 
322
- inp.change(check_input_files, inputs=inp)
323
- btn.click(fn=srt_to_audio_multi, inputs=inp, outputs=out)
 
 
 
 
324
  download_btn.click(fn=download_all, inputs=out, outputs=download_out)
325
 
326
  if __name__ == "__main__":
 
175
  def closest_speedup_factor(factor, allowed_factors):
176
  return min(allowed_factors, key=lambda x: abs(x - factor)) + 0.1
177
 
178
+ def generate_audio_with_pause(srt_file_path, speaker_id, speed_of_non_edit_speech):
179
  subtitles = read_srt(srt_file_path)
180
  audio_clips = []
181
  # allowed_factors = [1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0]
 
210
  audio_data = audio_data / np.max(np.abs(audio_data))
211
  audio_data = audio_data * 1.2
212
 
213
+ if current_duration < desired_duration:
214
+ if speed_of_non_edit_speech != 1:
215
+ audio_data = librosa.effects.time_stretch(
216
+ y=audio_data,
217
+ rate=speed_of_non_edit_speech,
218
+ n_fft=1024,
219
+ hop_length=256
220
+ )
221
+ audio_data = audio_data / np.max(np.abs(audio_data))
222
+ audio_data = audio_data * 1.2
223
+
224
+ current_duration = len(audio_data) / 16000
225
  padding = int((desired_duration - current_duration) * 16000)
226
  audio_data = np.concatenate([np.zeros(padding), audio_data])
227
 
 
253
  if invalid_files:
254
  raise gr.Warning(f"Invalid SRT files: {', '.join(invalid_files)}")
255
 
256
+ def srt_to_audio_multi(srt_files, speaker_id, speed_of_non_edit_speech):
257
  output_paths = []
258
  invalid_files = []
259
 
 
261
  if not is_valid_srt(srt_file.name):
262
  invalid_files.append(srt_file.name)
263
  return None
264
+ audio_data = generate_audio_with_pause(srt_file.name, speaker_id, speed_of_non_edit_speech)
265
  output_path = os.path.join(cache_dir, f'output_{os.path.basename(srt_file.name)}.wav')
266
  torchaudio.save(output_path, torch.tensor(audio_data).unsqueeze(0), 16000)
267
  return output_path
 
299
  css = '''
300
  #title{text-align: center}
301
  #container{display: flex; justify-content: space-between; align-items: center;}
302
+ #setting-heading{margin-bottom: 10px; text-align: center;}
303
  '''
304
 
305
  with gr.Blocks(css=css) as demo:
 
307
  """<h1>SRT to Audio Tool</h1>""",
308
  elem_id="title",
309
  )
310
+ with gr.Column(elem_id="setting-box"):
311
+ heading = gr.HTML("<h2>Settings</h2>", elem_id="setting-heading")
312
+ with gr.Row():
313
+ speaker_id = gr.Dropdown(
314
+ label="Speaker ID",
315
+ choices=list(dataset_dict.keys()),
316
+ default=speaker_id
317
+ )
318
+ speed_of_non_edit_speech = gr.Slider(
319
+ label="Speed of non-edit speech",
320
+ minimum=1,
321
+ maximum=2.0,
322
+ step=0.1,
323
+ value=1.2
324
+ )
325
+
326
+
327
  with gr.Row(elem_id="container"):
328
+ inp_srt = gr.File(
329
  label="Upload SRT files",
330
  file_count="multiple",
331
  type="filepath",
 
348
  height=100
349
  )
350
 
351
+ inp_srt.change(check_input_files, inputs=inp_srt)
352
+ btn.click(
353
+ fn=srt_to_audio_multi,
354
+ inputs=[inp_srt, speaker_id, speed_of_non_edit_speech],
355
+ outputs=out
356
+ )
357
  download_btn.click(fn=download_all, inputs=out, outputs=download_out)
358
 
359
  if __name__ == "__main__":