File size: 512 Bytes
d691b8c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
import install
from reazonspeech.nemo.asr import audio_from_path, load_model, transcribe

model = None

def speech_to_text(audio_file, _model_size = None):
    global model

    if not model:
        model = load_model()

    audio = audio_from_path(audio_file)
    ret = transcribe(model, audio)

    text_with_timestamps = ''
    for segment in ret.segments:
        text_with_timestamps += f"{segment.start_seconds:.2f}\t{segment.end_seconds:.2f}\t{segment.text}\n"

    return ret.text, text_with_timestamps