Spaces:
Running
Running
ver 2.0.0
Browse files
app.py
CHANGED
@@ -175,7 +175,7 @@ def time_to_seconds(time_str):
|
|
175 |
def closest_speedup_factor(factor, allowed_factors):
|
176 |
return min(allowed_factors, key=lambda x: abs(x - factor)) + 0.1
|
177 |
|
178 |
-
def generate_audio_with_pause(srt_file_path):
|
179 |
subtitles = read_srt(srt_file_path)
|
180 |
audio_clips = []
|
181 |
# allowed_factors = [1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0]
|
@@ -210,7 +210,18 @@ def generate_audio_with_pause(srt_file_path):
|
|
210 |
audio_data = audio_data / np.max(np.abs(audio_data))
|
211 |
audio_data = audio_data * 1.2
|
212 |
|
213 |
-
if current_duration < desired_duration:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
214 |
padding = int((desired_duration - current_duration) * 16000)
|
215 |
audio_data = np.concatenate([np.zeros(padding), audio_data])
|
216 |
|
@@ -242,7 +253,7 @@ def check_input_files(srt_files):
|
|
242 |
if invalid_files:
|
243 |
raise gr.Warning(f"Invalid SRT files: {', '.join(invalid_files)}")
|
244 |
|
245 |
-
def srt_to_audio_multi(srt_files):
|
246 |
output_paths = []
|
247 |
invalid_files = []
|
248 |
|
@@ -250,7 +261,7 @@ def srt_to_audio_multi(srt_files):
|
|
250 |
if not is_valid_srt(srt_file.name):
|
251 |
invalid_files.append(srt_file.name)
|
252 |
return None
|
253 |
-
audio_data = generate_audio_with_pause(srt_file.name)
|
254 |
output_path = os.path.join(cache_dir, f'output_{os.path.basename(srt_file.name)}.wav')
|
255 |
torchaudio.save(output_path, torch.tensor(audio_data).unsqueeze(0), 16000)
|
256 |
return output_path
|
@@ -288,6 +299,7 @@ model = Model(
|
|
288 |
css = '''
|
289 |
#title{text-align: center}
|
290 |
#container{display: flex; justify-content: space-between; align-items: center;}
|
|
|
291 |
'''
|
292 |
|
293 |
with gr.Blocks(css=css) as demo:
|
@@ -295,8 +307,25 @@ with gr.Blocks(css=css) as demo:
|
|
295 |
"""<h1>SRT to Audio Tool</h1>""",
|
296 |
elem_id="title",
|
297 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
298 |
with gr.Row(elem_id="container"):
|
299 |
-
|
300 |
label="Upload SRT files",
|
301 |
file_count="multiple",
|
302 |
type="filepath",
|
@@ -319,8 +348,12 @@ with gr.Blocks(css=css) as demo:
|
|
319 |
height=100
|
320 |
)
|
321 |
|
322 |
-
|
323 |
-
btn.click(
|
|
|
|
|
|
|
|
|
324 |
download_btn.click(fn=download_all, inputs=out, outputs=download_out)
|
325 |
|
326 |
if __name__ == "__main__":
|
|
|
175 |
def closest_speedup_factor(factor, allowed_factors):
|
176 |
return min(allowed_factors, key=lambda x: abs(x - factor)) + 0.1
|
177 |
|
178 |
+
def generate_audio_with_pause(srt_file_path, speaker_id, speed_of_non_edit_speech):
|
179 |
subtitles = read_srt(srt_file_path)
|
180 |
audio_clips = []
|
181 |
# allowed_factors = [1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0]
|
|
|
210 |
audio_data = audio_data / np.max(np.abs(audio_data))
|
211 |
audio_data = audio_data * 1.2
|
212 |
|
213 |
+
if current_duration < desired_duration:
|
214 |
+
if speed_of_non_edit_speech != 1:
|
215 |
+
audio_data = librosa.effects.time_stretch(
|
216 |
+
y=audio_data,
|
217 |
+
rate=speed_of_non_edit_speech,
|
218 |
+
n_fft=1024,
|
219 |
+
hop_length=256
|
220 |
+
)
|
221 |
+
audio_data = audio_data / np.max(np.abs(audio_data))
|
222 |
+
audio_data = audio_data * 1.2
|
223 |
+
|
224 |
+
current_duration = len(audio_data) / 16000
|
225 |
padding = int((desired_duration - current_duration) * 16000)
|
226 |
audio_data = np.concatenate([np.zeros(padding), audio_data])
|
227 |
|
|
|
253 |
if invalid_files:
|
254 |
raise gr.Warning(f"Invalid SRT files: {', '.join(invalid_files)}")
|
255 |
|
256 |
+
def srt_to_audio_multi(srt_files, speaker_id, speed_of_non_edit_speech):
|
257 |
output_paths = []
|
258 |
invalid_files = []
|
259 |
|
|
|
261 |
if not is_valid_srt(srt_file.name):
|
262 |
invalid_files.append(srt_file.name)
|
263 |
return None
|
264 |
+
audio_data = generate_audio_with_pause(srt_file.name, speaker_id, speed_of_non_edit_speech)
|
265 |
output_path = os.path.join(cache_dir, f'output_{os.path.basename(srt_file.name)}.wav')
|
266 |
torchaudio.save(output_path, torch.tensor(audio_data).unsqueeze(0), 16000)
|
267 |
return output_path
|
|
|
299 |
css = '''
|
300 |
#title{text-align: center}
|
301 |
#container{display: flex; justify-content: space-between; align-items: center;}
|
302 |
+
#setting-heading{margin-bottom: 10px; text-align: center;}
|
303 |
'''
|
304 |
|
305 |
with gr.Blocks(css=css) as demo:
|
|
|
307 |
"""<h1>SRT to Audio Tool</h1>""",
|
308 |
elem_id="title",
|
309 |
)
|
310 |
+
with gr.Column(elem_id="setting-box"):
|
311 |
+
heading = gr.HTML("<h2>Settings</h2>", elem_id="setting-heading")
|
312 |
+
with gr.Row():
|
313 |
+
speaker_id = gr.Dropdown(
|
314 |
+
label="Speaker ID",
|
315 |
+
choices=list(dataset_dict.keys()),
|
316 |
+
default=speaker_id
|
317 |
+
)
|
318 |
+
speed_of_non_edit_speech = gr.Slider(
|
319 |
+
label="Speed of non-edit speech",
|
320 |
+
minimum=1,
|
321 |
+
maximum=2.0,
|
322 |
+
step=0.1,
|
323 |
+
value=1.2
|
324 |
+
)
|
325 |
+
|
326 |
+
|
327 |
with gr.Row(elem_id="container"):
|
328 |
+
inp_srt = gr.File(
|
329 |
label="Upload SRT files",
|
330 |
file_count="multiple",
|
331 |
type="filepath",
|
|
|
348 |
height=100
|
349 |
)
|
350 |
|
351 |
+
inp_srt.change(check_input_files, inputs=inp_srt)
|
352 |
+
btn.click(
|
353 |
+
fn=srt_to_audio_multi,
|
354 |
+
inputs=[inp_srt, speaker_id, speed_of_non_edit_speech],
|
355 |
+
outputs=out
|
356 |
+
)
|
357 |
download_btn.click(fn=download_all, inputs=out, outputs=download_out)
|
358 |
|
359 |
if __name__ == "__main__":
|