Generate_Audio / download_audio.py
namkuner's picture
Upload folder using huggingface_hub
05819d1 verified
import yt_dlp
import os
import subprocess
import shutil
from pydub import AudioSegment
from pydub.playback import play
import os
import tempfile
from pytube import Playlist
def download_and_convert_audio(url, path,idx, sample_rate=24000):
ydl_opts = {
'format': 'bestaudio/best',
'postprocessors': [{
'key': 'FFmpegExtractAudio',
'preferredcodec': 'wav',
}],
'outtmpl': '%(title)s.%(ext)s',
}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
info = ydl.extract_info(url, download=True)
filename = ydl.prepare_filename(info)
original_filename = os.path.splitext(filename)[0] + '.wav'
print("original_filename",original_filename)
# Convert sample rate using FFmpeg
# temp_filename = output_filename.split("/")[0]+ 'temp_' + output_filename.split("/")[1]
direc = path + original_filename[:30] +"_"
if not os.path.exists(direc):
os.makedirs(direc)
output_filename = direc + "/" +str(idx) + ".wav"
subprocess.run([
'ffmpeg', '-i', original_filename,
'-ar', str(sample_rate),
output_filename
])
os.remove(original_filename)
print(f"Audio downloaded and converted: {output_filename}")
return output_filename
from pydub import AudioSegment
def cut_audio(root):
total_duration = 0
lst = os.listdir('audio_cut/')
print(lst)
dem = 0
for folder in os.listdir(root):
path = root + folder + '/'
file = os.listdir(path)[0]
if file in lst:
print(file)
continue
audio = AudioSegment.from_wav(path + file)
audio = audio[30 * 1000: 6 * 60 * 1000 + 30 * 1000 ]
save_path = "audio_cut/" +file
audio.export(save_path, format="wav")
dem+=1
print(dem)
total_duration += len(audio)
# if total_duration > 7*60*60*1000:
# break
if dem==7:
break
def total_duration(root):
total_duration = 0
for file in os.listdir(root):
path = root + file
audio = AudioSegment.from_wav(path)
total_duration += len(audio)
print(total_duration/(1000*60*60))
def delete_file(root,json_path):
lst = os.listdir(json_path)
lst = [i.split("_")[0] for i in lst]
for file in os.listdir(root):
if file.split(".wav")[0] not in lst:
os.remove(root + file)
if __name__ == '__main__':
playlist_url = 'https://www.youtube.com/playlist?list=PLd7oGuDX6k1CD0EaggVT3kV6MjGqbVV9k'
# Thư mục lưu trữ file âm thanh tải về
save_path = 'audio_cut/'
# Tạo thư mục nếu chưa tồn tại
cut_audio("downloaded_audio/")
total_duration(save_path)
# delete_file("audio_cut/","json_result/")