import streamlit as st from transformers import pipeline import whisper from gtts import gTTS import tempfile import os import logging from pydub import AudioSegment import openai # Set up logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # Load Hugging Face model for text generation (instead of Google Cloud) def load_hf_model(): # Load a model for heart health-related questions return pipeline("text-generation", model="gpt2") # Load Whisper model for transcription def load_whisper_model(): return whisper.load_model("base") # Function to generate response using Hugging Face model def generate_hf_response(model, prompt): result = model(prompt, max_length=100, num_return_sequences=1) return result[0]["generated_text"] # Function to process audio input using Whisper and Hugging Face def process_audio(audio_file, hf_model, whisper_model): try: # Transcribe audio using Whisper result = whisper_model.transcribe(audio_file) user_text = result['text'] logger.info(f"Transcription successful: {user_text}") except Exception as e: logger.error(f"Error in transcribing audio: {e}") return "Error in transcribing audio.", None try: # Generate response using Hugging Face model response_text = generate_hf_response(hf_model, user_text) logger.info(f"Generated response: {response_text}") except Exception as e: logger.error(f"Error in generating response: {e}") return "Error in generating response.", None try: # Convert the response text to speech tts = gTTS(text=response_text, lang='en') audio_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') tts.save(audio_file.name) logger.info("Text-to-speech conversion successful.") except Exception as e: logger.error(f"Error in text-to-speech conversion: {e}") return "Error in text-to-speech conversion.", None return response_text, audio_file.name # Main application layout def main(): st.title("Heart Health & Audio Processing App 🫀🎙️ (Hugging Face Edition)") # Load models hf_model = load_hf_model() whisper_model = load_whisper_model() # Two tabs: one for the chatbot and one for audio processing tab1, tab2 = st.tabs(["Heart Health Chatbot", "Audio Processing"]) # Tab 1: Heart Health Chatbot with tab1: st.header("Chat with Heart Health Specialist") if "history" not in st.session_state: st.session_state.history = [] user_input = st.text_input("Ask about heart health:", placeholder="Type here...") if st.button("Send") and user_input: bot_response = generate_hf_response(hf_model, user_input) st.session_state.history.append({"role": "user", "content": user_input}) st.session_state.history.append({"role": "bot", "content": bot_response}) for chat in st.session_state.history: if chat["role"] == "user": st.write(f"**You:** {chat['content']}") else: st.write(f"**Bot:** {chat['content']}") # Tab 2: Audio Processing with tab2: st.header("Audio Processing with Whisper and Hugging Face") uploaded_audio = st.file_uploader("Upload an audio file for transcription and response", type=["mp3", "wav", "ogg"]) if uploaded_audio: with st.spinner("Processing audio..."): response_text, audio_file_path = process_audio(uploaded_audio, hf_model, whisper_model) if response_text: st.write(f"**Response:** {response_text}") st.audio(audio_file_path) # Run the app if __name__ == "__main__": main()