from llama_index import Document, GPTListIndex, GPTSimpleVectorIndex import gradio as gr import openai import os from pytube import YouTube def download_yt_video(ytlink): try: yt = YouTube(ytlink) video = yt.streams.filter(only_audio=True).first() out_file = video.download(output_path="./") base, ext = os.path.splitext(out_file) new_file = base + '.mp3' os.rename(out_file, new_file) return new_file except Exception as e: return e def get_transcript(filename): import requests import json headers = { 'accept': 'application/json', 'x-gladia-key': '70ad5f6e-31e6-4acf-8a15-89c166c4cc9f', # requests won't add a boundary if this header is set when you pass files= # 'Content-Type': 'multipart/form-data', } files = { 'audio': (filename, open(filename, 'rb'), 'audio/mpeg'), 'audio_url': (None, 'http://files.gladia.io/example/audio-transcription/split_infinity.wav'), 'language': (None, 'english'), 'language_behaviour': (None, 'manual'), 'output_format': (None, 'json'), } response = requests.post( 'https://api.gladia.io/audio/text/audio-transcription/', headers=headers, files=files) data = json.loads(response.text) result = "" for dict_ in data['prediction']: result = result + dict_['transcription'] + " " result = ' '.join(result.strip().split()) with open(f"{filename[:-4]}.txt", "w") as f: f.write(result) return result def createindex(url, openaikey): try: filename = download_yt_video(url) transcript = get_transcript(filename) os.remove(filename) # Store openai key in environment os.environ['OPENAI_API_KEY'] = openaikey # Create index index = GPTListIndex([Document(transcript)], chunk_size_limit=2500) index_filename = "index.json" index.save_to_disk(index_filename) return "Video processed. Now you can start querying." except Exception as e: return e def videoques(query, openaikey): # Basic Checks if not query: return "Please enter your query." # Basic Checks if not openaikey: return "Please enter openaikey." # Store openai key in environment os.environ['OPENAI_API_KEY'] = openaikey index_name = "index.json" index = GPTListIndex.load_from_disk(index_name) # Query based on index response = index.query(query, mode="embedding", similarity_top_k=4) return response def cleartext(query, output): """ Function to clear text """ return ["", ""] with gr.Blocks() as demo: gr.Markdown( """

Portuguese VideoQues

""") gr.Markdown( """ Portuguese VideoQues answers your queries on any Portuguese video. """) with gr.Row(): with gr.Column(): url = gr.Textbox(lines=1, label="Enter Youtube Video link.") openaikey = gr.Textbox(lines=1, label="Enter Your OpenAI key.") submit1_button = gr.Button("Submit") ans1_output = gr.Textbox(label="Status.") clear1_button = gr.Button("Clear") with gr.Column(): query = gr.Textbox(lines=2, label="Enter Your Query.") submit2_button = gr.Button("Submit") ans2_output = gr.Textbox(label="Answer.") clear2_button = gr.Button("Clear") # Submit button for showing YT Video thumbnail. submit1_button.click(createindex, inputs=[ url, openaikey], outputs=[ans1_output]) # Submit button for submitting query. submit2_button.click(videoques, inputs=[ query, openaikey], outputs=[ans2_output]) # Clear button for clearing query and answer. clear1_button.click(cleartext, inputs=[ url, ans1_output], outputs=[url, ans1_output]) # Clear button for clearing query and answer. clear2_button.click(cleartext, inputs=[query, ans2_output], outputs=[ query, ans2_output]) demo.launch(debug=True)