NeuroSenko commited on
Commit
2f796d3
1 Parent(s): 8810ba5

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +133 -0
app.py ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pprint import pprint
2
+
3
+ import gradio as gr
4
+ import torch
5
+
6
+ # from IPython.display import Audio, display
7
+ from omegaconf import OmegaConf
8
+
9
+ torch.hub.download_url_to_file(
10
+ "https://raw.githubusercontent.com/snakers4/silero-models/master/models.yml",
11
+ "latest_silero_models.yml",
12
+ progress=False,
13
+ )
14
+
15
+ all_models = OmegaConf.load("latest_silero_models.yml")
16
+
17
+ model_id = "v3_1_ru"
18
+ device = torch.device("cpu")
19
+
20
+ model, example_text = torch.hub.load(
21
+ repo_or_dir="snakers4/silero-models",
22
+ model="silero_tts",
23
+ language=language,
24
+ speaker=model_id,
25
+ )
26
+ model.to(device) # gpu or cpu
27
+
28
+ sample_rate = 48000
29
+ speaker = "aidar"
30
+ put_accent = True
31
+ put_yo = True
32
+ example_text = "В недрах тундры выдры в г+етрах т+ырят в вёдра ядра к+едров."
33
+
34
+ models = list(all_models.tts_models.get(language).keys())
35
+
36
+ model, example_text = torch.hub.load(
37
+ repo_or_dir='snakers4/silero-models',
38
+ model='silero_tts',
39
+ language='ru',
40
+ speaker=model_id
41
+ )
42
+
43
+ def change_language(language):
44
+ models = list(all_models.tts_models.get(language).keys())
45
+ return model_input.update(choices=models)
46
+
47
+ def change_model(language, model_name):
48
+ model, example_text = torch.hub.load(
49
+ repo_or_dir='snakers4/silero-models',
50
+ model='silero_tts',
51
+ language=language,
52
+ speaker=model_name
53
+ )
54
+
55
+ return speaker_input.update(choices=model.speakers)
56
+
57
+
58
+ def generate_audio_by_text(text, text_type, speaker):
59
+ if text_type == 'SSML':
60
+ return model.save_wav(
61
+ ssml_text=text,
62
+ speaker=speaker,
63
+ sample_rate=sample_rate,
64
+ put_accent=put_accent,
65
+ put_yo=put_yo,
66
+ )
67
+ else:
68
+ return model.save_wav(
69
+ text=text,
70
+ speaker=speaker,
71
+ sample_rate=sample_rate,
72
+ put_accent=put_accent,
73
+ put_yo=put_yo,
74
+ )
75
+
76
+ with gr.Blocks() as demo:
77
+ with gr.Row():
78
+ with gr.Column():
79
+ language_input = gr.Dropdown(
80
+ label="Language",
81
+ choices=list(all_models.tts_models.keys()),
82
+ value="ru",
83
+ interactive=True,
84
+ )
85
+
86
+ model_input = gr.Dropdown(
87
+ label="Model (based on selected language)",
88
+ value="v3_1_ru",
89
+ choices=models,
90
+ interactive=True,
91
+ )
92
+
93
+ speaker_input = gr.Dropdown(
94
+ label="Speaker (based on selected model)",
95
+ value="kseniya",
96
+ choices=model.speakers,
97
+ interactive=True,
98
+ )
99
+
100
+ text_input = gr.Textbox(
101
+ label="Text for generating",
102
+ value="В недрах тундры выдры в г+етрах т+ырят в вёдра +ядра к+едров.",
103
+ lines=5,
104
+ interactive=True,
105
+ )
106
+
107
+ text_type_input = gr.Radio(
108
+ label="Text type",
109
+ choices=["Common", "SSML"],
110
+ value="Common",
111
+ interactive=True,
112
+ )
113
+
114
+ language_input.change(change_language, inputs=language_input, outputs=model_input)
115
+ model_input.change(change_model, inputs=[language_input, model_input], outputs=speaker_input)
116
+
117
+ with gr.Column():
118
+ audio_output = gr.Audio(label="Output audio")
119
+ generate_btn = gr.Button(value="Generate", variant="primary")
120
+ generate_btn.click(
121
+ generate_audio_by_text,
122
+ inputs=[text_input, text_type_input, speaker_input],
123
+ outputs=audio_output,
124
+ )
125
+
126
+ gr.Markdown(
127
+ "This is a simple frontend for [silero](https://github.com/snakers4/silero-models) project (Text-To-Speech part only)."
128
+ )
129
+ gr.Markdown(
130
+ "You can check [official docs](https://github.com/snakers4/silero-models/wiki/SSML) to find information about SSML syntax."
131
+ )
132
+
133
+ demo.launch()