import os import tempfile from flask import Flask, request, jsonify, send_file from gradio_client import Client from openai_chat_module import OpenaiChatModule from text2speech import VITSApiTTS app = Flask(__name__) @app.route('/process_speech', methods=['POST']) def upload_and_return_temp_file(): # try: if 'audio' not in request.files: return jsonify({"error": "No audio file provided"}) file = request.files['audio'] filename = request.form.get('filename') modelid = request.form.get('modelid', default=2) if not filename: return jsonify({"error": "No filename parameter provided"}) # Create a temporary directory temp_dir = tempfile.mkdtemp() # Save the audio file to the temporary directory with the specified filename and .wav extension destination_path = os.path.join(temp_dir, f"{filename}.wav") file.save(destination_path) print(destination_path) # from gradio_client import Client client = Client() result = client.predict( destination_path, # str (filepath or URL to file) in 'Input' Audio component api_name="/predict" ) openai_chat_module = OpenaiChatModule('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') text = openai_chat_module.chat_with_origin_model(result) print(text) vits_tts = VITSApiTTS(modelid) audio_data = vits_tts.text_to_speech_and_play(text) # Save audio data as a file in the temporary directory audio_file_path = os.path.join(temp_dir, ""+filename+".mp3") with open(audio_file_path, "wb") as f: f.write(audio_data) print(audio_file_path) # Return the audio file as a response with the correct content type response = send_file(audio_file_path, mimetype="audio/mp3") # # # Cleanup: Delete temporary files and directory os.remove(destination_path) os.rmdir(temp_dir) return response # except Exception as e: # return jsonify({"error": str(e)}) if __name__ == '__main__': app.run(host='0.0.0.0', port=5000, debug=True)