agorlanov
commited on
Commit
•
d6b32ee
1
Parent(s):
361c473
add_filter
Browse files- main_pipeline.py +10 -1
main_pipeline.py
CHANGED
@@ -12,11 +12,20 @@ import pandas as pd
|
|
12 |
import soundfile as sf
|
13 |
|
14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
def save_speaker_audios(segments, denoised_audio_path, out_folder='out', out_f=48000):
|
16 |
signal, sr = librosa.load(denoised_audio_path, sr=out_f, mono=True)
|
17 |
os.makedirs(out_folder, exist_ok=True)
|
18 |
out_wav_paths = []
|
|
|
19 |
segments = pd.DataFrame(segments)
|
|
|
|
|
20 |
|
21 |
for label in set(segments.label):
|
22 |
temp_df = segments[segments.label == label]
|
@@ -45,7 +54,7 @@ def main_pipeline(audio_path):
|
|
45 |
|
46 |
if __name__ == '__main__':
|
47 |
parser = argparse.ArgumentParser()
|
48 |
-
parser.add_argument('--audio-path', default='
|
49 |
opt = parser.parse_args()
|
50 |
|
51 |
for _ in tqdm(range(10)):
|
|
|
12 |
import soundfile as sf
|
13 |
|
14 |
|
15 |
+
def filter_small_speech(segments):
|
16 |
+
durs = segments.groupby('label').sum()
|
17 |
+
labels = durs[durs['duration'] / durs.sum()['duration'] > 0.015].index
|
18 |
+
return segments[segments.label.isin(labels)]
|
19 |
+
|
20 |
+
|
21 |
def save_speaker_audios(segments, denoised_audio_path, out_folder='out', out_f=48000):
|
22 |
signal, sr = librosa.load(denoised_audio_path, sr=out_f, mono=True)
|
23 |
os.makedirs(out_folder, exist_ok=True)
|
24 |
out_wav_paths = []
|
25 |
+
|
26 |
segments = pd.DataFrame(segments)
|
27 |
+
segments['duration'] = segments.end - segments.start
|
28 |
+
segments = filter_small_speech(segments)
|
29 |
|
30 |
for label in set(segments.label):
|
31 |
temp_df = segments[segments.label == label]
|
|
|
54 |
|
55 |
if __name__ == '__main__':
|
56 |
parser = argparse.ArgumentParser()
|
57 |
+
parser.add_argument('--audio-path', default='podkast.mp3', help='Path to audio')
|
58 |
opt = parser.parse_args()
|
59 |
|
60 |
for _ in tqdm(range(10)):
|