latuan commited on
Commit
2327ea1
1 Parent(s): 9b42c11
Files changed (1) hide show
  1. app.py +8 -17
app.py CHANGED
@@ -172,18 +172,6 @@ def time_to_seconds(time_str):
172
  seconds = int(h) * 3600 + int(m) * 60 + float(s.replace(',', '.'))
173
  return seconds
174
 
175
- def numpy_to_audiosegment(numpy_array):
176
- audio_segment = AudioSegment(
177
- numpy_array.tobytes(),
178
- frame_rate=16000,
179
- sample_width=numpy_array.dtype.itemsize,
180
- channels=1
181
- )
182
- return audio_segment
183
-
184
- def audiosegment_to_numpy(audio_segment):
185
- return np.array(audio_segment.get_array_of_samples())
186
-
187
  def closest_speedup_factor(factor, allowed_factors):
188
  return min(allowed_factors, key=lambda x: abs(x - factor))
189
 
@@ -202,14 +190,17 @@ def generate_audio_with_pause(srt_file_path):
202
  current_duration = len(audio_data) / 16000
203
 
204
  # Adjust audio speed by speedup
205
- audio_segment = numpy_to_audiosegment(audio_data)
206
  if current_duration > desired_duration:
207
  raw_speedup_factor = current_duration / desired_duration
208
  speedup_factor = closest_speedup_factor(raw_speedup_factor, allowed_factors)
209
- audio_segment = audio_segment.speedup(playback_speed=speedup_factor)
210
-
211
- # Convert back to numpy array
212
- audio_data = audiosegment_to_numpy(audio_segment)
 
 
 
 
213
  audio_clips.append(audio_data)
214
 
215
  # Add pause
 
172
  seconds = int(h) * 3600 + int(m) * 60 + float(s.replace(',', '.'))
173
  return seconds
174
 
 
 
 
 
 
 
 
 
 
 
 
 
175
  def closest_speedup_factor(factor, allowed_factors):
176
  return min(allowed_factors, key=lambda x: abs(x - factor))
177
 
 
190
  current_duration = len(audio_data) / 16000
191
 
192
  # Adjust audio speed by speedup
 
193
  if current_duration > desired_duration:
194
  raw_speedup_factor = current_duration / desired_duration
195
  speedup_factor = closest_speedup_factor(raw_speedup_factor, allowed_factors)
196
+ audio_data = librosa.effects.time_stretch(
197
+ y=audio_data,
198
+ rate=speedup_factor,
199
+ n_fft=1024,
200
+ hop_length=256
201
+ )
202
+ audio_data = audio_data / np.max(np.abs(audio_data))
203
+
204
  audio_clips.append(audio_data)
205
 
206
  # Add pause