Spaces:
Running
Running
NeuroSenko
commited on
Commit
•
2f796d3
1
Parent(s):
8810ba5
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pprint import pprint
|
2 |
+
|
3 |
+
import gradio as gr
|
4 |
+
import torch
|
5 |
+
|
6 |
+
# from IPython.display import Audio, display
|
7 |
+
from omegaconf import OmegaConf
|
8 |
+
|
9 |
+
torch.hub.download_url_to_file(
|
10 |
+
"https://raw.githubusercontent.com/snakers4/silero-models/master/models.yml",
|
11 |
+
"latest_silero_models.yml",
|
12 |
+
progress=False,
|
13 |
+
)
|
14 |
+
|
15 |
+
all_models = OmegaConf.load("latest_silero_models.yml")
|
16 |
+
|
17 |
+
model_id = "v3_1_ru"
|
18 |
+
device = torch.device("cpu")
|
19 |
+
|
20 |
+
model, example_text = torch.hub.load(
|
21 |
+
repo_or_dir="snakers4/silero-models",
|
22 |
+
model="silero_tts",
|
23 |
+
language=language,
|
24 |
+
speaker=model_id,
|
25 |
+
)
|
26 |
+
model.to(device) # gpu or cpu
|
27 |
+
|
28 |
+
sample_rate = 48000
|
29 |
+
speaker = "aidar"
|
30 |
+
put_accent = True
|
31 |
+
put_yo = True
|
32 |
+
example_text = "В недрах тундры выдры в г+етрах т+ырят в вёдра ядра к+едров."
|
33 |
+
|
34 |
+
models = list(all_models.tts_models.get(language).keys())
|
35 |
+
|
36 |
+
model, example_text = torch.hub.load(
|
37 |
+
repo_or_dir='snakers4/silero-models',
|
38 |
+
model='silero_tts',
|
39 |
+
language='ru',
|
40 |
+
speaker=model_id
|
41 |
+
)
|
42 |
+
|
43 |
+
def change_language(language):
|
44 |
+
models = list(all_models.tts_models.get(language).keys())
|
45 |
+
return model_input.update(choices=models)
|
46 |
+
|
47 |
+
def change_model(language, model_name):
|
48 |
+
model, example_text = torch.hub.load(
|
49 |
+
repo_or_dir='snakers4/silero-models',
|
50 |
+
model='silero_tts',
|
51 |
+
language=language,
|
52 |
+
speaker=model_name
|
53 |
+
)
|
54 |
+
|
55 |
+
return speaker_input.update(choices=model.speakers)
|
56 |
+
|
57 |
+
|
58 |
+
def generate_audio_by_text(text, text_type, speaker):
|
59 |
+
if text_type == 'SSML':
|
60 |
+
return model.save_wav(
|
61 |
+
ssml_text=text,
|
62 |
+
speaker=speaker,
|
63 |
+
sample_rate=sample_rate,
|
64 |
+
put_accent=put_accent,
|
65 |
+
put_yo=put_yo,
|
66 |
+
)
|
67 |
+
else:
|
68 |
+
return model.save_wav(
|
69 |
+
text=text,
|
70 |
+
speaker=speaker,
|
71 |
+
sample_rate=sample_rate,
|
72 |
+
put_accent=put_accent,
|
73 |
+
put_yo=put_yo,
|
74 |
+
)
|
75 |
+
|
76 |
+
with gr.Blocks() as demo:
|
77 |
+
with gr.Row():
|
78 |
+
with gr.Column():
|
79 |
+
language_input = gr.Dropdown(
|
80 |
+
label="Language",
|
81 |
+
choices=list(all_models.tts_models.keys()),
|
82 |
+
value="ru",
|
83 |
+
interactive=True,
|
84 |
+
)
|
85 |
+
|
86 |
+
model_input = gr.Dropdown(
|
87 |
+
label="Model (based on selected language)",
|
88 |
+
value="v3_1_ru",
|
89 |
+
choices=models,
|
90 |
+
interactive=True,
|
91 |
+
)
|
92 |
+
|
93 |
+
speaker_input = gr.Dropdown(
|
94 |
+
label="Speaker (based on selected model)",
|
95 |
+
value="kseniya",
|
96 |
+
choices=model.speakers,
|
97 |
+
interactive=True,
|
98 |
+
)
|
99 |
+
|
100 |
+
text_input = gr.Textbox(
|
101 |
+
label="Text for generating",
|
102 |
+
value="В недрах тундры выдры в г+етрах т+ырят в вёдра +ядра к+едров.",
|
103 |
+
lines=5,
|
104 |
+
interactive=True,
|
105 |
+
)
|
106 |
+
|
107 |
+
text_type_input = gr.Radio(
|
108 |
+
label="Text type",
|
109 |
+
choices=["Common", "SSML"],
|
110 |
+
value="Common",
|
111 |
+
interactive=True,
|
112 |
+
)
|
113 |
+
|
114 |
+
language_input.change(change_language, inputs=language_input, outputs=model_input)
|
115 |
+
model_input.change(change_model, inputs=[language_input, model_input], outputs=speaker_input)
|
116 |
+
|
117 |
+
with gr.Column():
|
118 |
+
audio_output = gr.Audio(label="Output audio")
|
119 |
+
generate_btn = gr.Button(value="Generate", variant="primary")
|
120 |
+
generate_btn.click(
|
121 |
+
generate_audio_by_text,
|
122 |
+
inputs=[text_input, text_type_input, speaker_input],
|
123 |
+
outputs=audio_output,
|
124 |
+
)
|
125 |
+
|
126 |
+
gr.Markdown(
|
127 |
+
"This is a simple frontend for [silero](https://github.com/snakers4/silero-models) project (Text-To-Speech part only)."
|
128 |
+
)
|
129 |
+
gr.Markdown(
|
130 |
+
"You can check [official docs](https://github.com/snakers4/silero-models/wiki/SSML) to find information about SSML syntax."
|
131 |
+
)
|
132 |
+
|
133 |
+
demo.launch()
|