text-to-speech / app.py
Daryl Fung
changed port
cf64ca3
raw
history blame contribute delete
No virus
1.86 kB
from fastapi import FastAPI
from fastapi.responses import Response
import uvicorn
import numpy as np
import io
from sentence_transformers import SentenceTransformer
from pymilvus import Collection
import soundfile as sf
from bark import SAMPLE_RATE
from db.db_connect import connect, disconnect
from db.query_db import query
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
app = FastAPI()
def get_wait_responses():
with open('db/wait_responses.txt', 'r') as file:
content = file.read()
contents = content.split('\n\n')
wait_embeddings = model.encode(contents)
return wait_embeddings
WAIT_RESPONSES_EMBEDDINGS = get_wait_responses()
#TODO raise exception
def insert_response_to_generate_for_audio(text, embeddings):
connect()
collection = Collection("Response")
data = [
[text],
embeddings
]
collection.insert(data)
collection.flush()
disconnect()
@app.post('/tts')
async def transcribe(text: str):
embeddings = model.encode([text])
audio = await query(embeddings, threshold=0.8)
# if does not exist then store to response
# store this text as response to be used to generate audio
if audio is None:
insert_response_to_generate_for_audio(text, embeddings)
audio = await query(WAIT_RESPONSES_EMBEDDINGS, threshold=0.8)
# convert audio bytes to appropriate format to return
audio_file = io.BytesIO(np.frombuffer(audio, dtype=np.int16))
audio, sample_rate = sf.read(audio_file)
audio_file = io.BytesIO()
sf.write(audio_file, audio, sample_rate, format='wav')
audio_file.seek(0)
return Response(
content=audio_file.read(),
media_type="audio/wav", # Same as the Content-Type header
)
if __name__ == '__main__':
uvicorn.run('app:app', host='0.0.0.0', port=7860)