import yt_dlp import os import subprocess import shutil from pydub import AudioSegment from pydub.playback import play import os import tempfile from pytube import Playlist def download_and_convert_audio(url, path,idx, sample_rate=24000): ydl_opts = { 'format': 'bestaudio/best', 'postprocessors': [{ 'key': 'FFmpegExtractAudio', 'preferredcodec': 'wav', }], 'outtmpl': '%(title)s.%(ext)s', } with yt_dlp.YoutubeDL(ydl_opts) as ydl: info = ydl.extract_info(url, download=True) filename = ydl.prepare_filename(info) original_filename = os.path.splitext(filename)[0] + '.wav' print("original_filename",original_filename) # Convert sample rate using FFmpeg # temp_filename = output_filename.split("/")[0]+ 'temp_' + output_filename.split("/")[1] direc = path + original_filename[:30] +"_" if not os.path.exists(direc): os.makedirs(direc) output_filename = direc + "/" +str(idx) + ".wav" subprocess.run([ 'ffmpeg', '-i', original_filename, '-ar', str(sample_rate), output_filename ]) os.remove(original_filename) print(f"Audio downloaded and converted: {output_filename}") return output_filename from pydub import AudioSegment def cut_audio(root): total_duration = 0 lst = os.listdir('audio_cut/') print(lst) dem = 0 for folder in os.listdir(root): path = root + folder + '/' file = os.listdir(path)[0] if file in lst: print(file) continue audio = AudioSegment.from_wav(path + file) audio = audio[30 * 1000: 6 * 60 * 1000 + 30 * 1000 ] save_path = "audio_cut/" +file audio.export(save_path, format="wav") dem+=1 print(dem) total_duration += len(audio) # if total_duration > 7*60*60*1000: # break if dem==7: break def total_duration(root): total_duration = 0 for file in os.listdir(root): path = root + file audio = AudioSegment.from_wav(path) total_duration += len(audio) print(total_duration/(1000*60*60)) def delete_file(root,json_path): lst = os.listdir(json_path) lst = [i.split("_")[0] for i in lst] for file in os.listdir(root): if file.split(".wav")[0] not in lst: os.remove(root + file) if __name__ == '__main__': playlist_url = 'https://www.youtube.com/playlist?list=PLd7oGuDX6k1CD0EaggVT3kV6MjGqbVV9k' # Thư mục lưu trữ file âm thanh tải về save_path = 'audio_cut/' # Tạo thư mục nếu chưa tồn tại cut_audio("downloaded_audio/") total_duration(save_path) # delete_file("audio_cut/","json_result/")