Spaces:

latuan
/

SRT-to-Audio

Running

latuan commited on Aug 21

Commit

2327ea1

•

1 Parent(s): 9b42c11

ver 1.9.6

Files changed (1) hide show

app.py CHANGED Viewed

@@ -172,18 +172,6 @@ def time_to_seconds(time_str):
     seconds = int(h) * 3600 + int(m) * 60 + float(s.replace(',', '.'))
     return seconds
-def numpy_to_audiosegment(numpy_array):
-    audio_segment = AudioSegment(
-        numpy_array.tobytes(),
-        frame_rate=16000,
-        sample_width=numpy_array.dtype.itemsize,
-        channels=1
-    )
-    return audio_segment
-def audiosegment_to_numpy(audio_segment):
-    return np.array(audio_segment.get_array_of_samples())
 def closest_speedup_factor(factor, allowed_factors):
     return min(allowed_factors, key=lambda x: abs(x - factor))
@@ -202,14 +190,17 @@ def generate_audio_with_pause(srt_file_path):
         current_duration = len(audio_data) / 16000
         # Adjust audio speed by speedup
-        audio_segment = numpy_to_audiosegment(audio_data)
         if current_duration > desired_duration:
             raw_speedup_factor = current_duration / desired_duration
             speedup_factor = closest_speedup_factor(raw_speedup_factor, allowed_factors)
-            audio_segment = audio_segment.speedup(playback_speed=speedup_factor)
-        # Convert back to numpy array
-        audio_data = audiosegment_to_numpy(audio_segment)
         audio_clips.append(audio_data)
         # Add pause

     seconds = int(h) * 3600 + int(m) * 60 + float(s.replace(',', '.'))
     return seconds
 def closest_speedup_factor(factor, allowed_factors):
     return min(allowed_factors, key=lambda x: abs(x - factor))
         current_duration = len(audio_data) / 16000
         # Adjust audio speed by speedup
         if current_duration > desired_duration:
             raw_speedup_factor = current_duration / desired_duration
             speedup_factor = closest_speedup_factor(raw_speedup_factor, allowed_factors)
+            audio_data = librosa.effects.time_stretch(
+                y=audio_data,
+                rate=speedup_factor,
+                n_fft=1024,
+                hop_length=256
+            )
+            audio_data = audio_data / np.max(np.abs(audio_data))
         audio_clips.append(audio_data)
         # Add pause