File size: 2,874 Bytes
d29da97
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
05819d1
 
 
 
d29da97
 
 
05819d1
 
 
d29da97
 
 
 
05819d1
 
d29da97
05819d1
 
 
 
d29da97
 
05819d1
 
 
 
 
 
 
 
 
 
 
 
 
 
d29da97
 
 
 
 
 
 
 
 
05819d1
 
d29da97
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import yt_dlp
import os
import subprocess
import shutil
from pydub import AudioSegment
from pydub.playback import play
import os
import tempfile
from pytube  import  Playlist
def download_and_convert_audio(url, path,idx, sample_rate=24000):
    ydl_opts = {
        'format': 'bestaudio/best',
        'postprocessors': [{
            'key': 'FFmpegExtractAudio',
            'preferredcodec': 'wav',
        }],
        'outtmpl': '%(title)s.%(ext)s',
    }

    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        info = ydl.extract_info(url, download=True)
        filename = ydl.prepare_filename(info)
        original_filename = os.path.splitext(filename)[0] + '.wav'
        print("original_filename",original_filename)


    # Convert sample rate using FFmpeg
    # temp_filename = output_filename.split("/")[0]+ 'temp_' + output_filename.split("/")[1]
    direc = path + original_filename[:30] +"_"
    if not os.path.exists(direc):
        os.makedirs(direc)

    output_filename = direc + "/" +str(idx) + ".wav"
    subprocess.run([
        'ffmpeg', '-i', original_filename,
        '-ar', str(sample_rate),
        output_filename
    ])


    os.remove(original_filename)
    print(f"Audio downloaded and converted: {output_filename}")
    return output_filename

from pydub import AudioSegment

def cut_audio(root):
    total_duration = 0

    lst = os.listdir('audio_cut/')
    print(lst)
    dem = 0
    for folder in os.listdir(root):
        path = root + folder + '/'
        file =  os.listdir(path)[0]
        if file in lst:
            print(file)
            continue
        audio = AudioSegment.from_wav(path + file)
        audio = audio[30 * 1000: 6 * 60 * 1000 + 30 * 1000 ]
        save_path = "audio_cut/" +file
        audio.export(save_path, format="wav")
        dem+=1
        print(dem)
        total_duration += len(audio)
        # if total_duration > 7*60*60*1000:
        #     break

        if dem==7:
            break

def total_duration(root):
    total_duration = 0
    for file in os.listdir(root):
        path = root + file
        audio = AudioSegment.from_wav(path)
        total_duration += len(audio)
    print(total_duration/(1000*60*60))

def delete_file(root,json_path):
    lst = os.listdir(json_path)
    lst = [i.split("_")[0] for i in lst]
    for file in os.listdir(root):
        if file.split(".wav")[0] not in lst:
            os.remove(root + file)

if __name__ == '__main__':
    playlist_url = 'https://www.youtube.com/playlist?list=PLd7oGuDX6k1CD0EaggVT3kV6MjGqbVV9k'

    # Thư mục lưu trữ file âm thanh tải về
    save_path = 'audio_cut/'

    # Tạo thư mục nếu chưa tồn tại
    cut_audio("downloaded_audio/")
    total_duration(save_path)
    # delete_file("audio_cut/","json_result/")