ntt123 commited on
Commit
e89bb30
1 Parent(s): 9a2eb2d
Files changed (1) hide show
  1. app.py +19 -19
app.py CHANGED
@@ -1,45 +1,45 @@
1
- from pathlib import Path
2
  from vietTTS.hifigan.mel2wave import mel2wave
3
  from vietTTS.nat.text2mel import text2mel
4
  from vietTTS import nat_normalize_text
5
  import numpy as np
6
  import gradio as gr
 
7
 
8
 
9
- import os
10
- os.system("gdown --id 16UhN8QBxG1YYwUh8smdEeVnKo9qZhvZj -O duration_latest_ckpt.pickle")
11
- os.system("gdown --id 1-8Ig65S3irNHSzcskT37SLgeyuUhjKdj -O acoustic_latest_ckpt.pickle")
12
- os.system("gdown --id 19cRNDC6IrHFAAE4U9I7K0mzLMgPsi5zb -O hk_hifi.pickle")
13
- os.system("wget https://raw.githubusercontent.com/NTT123/vietTTS/master/assets/hifigan/config.json")
14
- os.system("wget https://raw.githubusercontent.com/NTT123/vietTTS/master/assets/infore/lexicon.txt")
15
 
16
  def text_to_speech(text):
17
  text = nat_normalize_text(text)
18
  mel = text2mel(
19
- text,
20
- "lexicon.txt",
21
- 0.2,
22
- Path("acoustic_latest_ckpt.pickle"),
23
- Path("duration_latest_ckpt.pickle")
24
  )
25
- wave = mel2wave(mel, Path("config.json"), Path("hk_hifi.pickle"))
26
  return (wave * (2**15)).astype(np.int16)
27
- import gradio as gr
28
 
29
 
30
  def speak(text):
31
  y = text_to_speech(text)
32
  return 16_000, y
33
 
 
 
 
34
  title = "vietTTS"
35
  description = "A vietnamese text-to-speech demo."
36
 
37
- iface = gr.Interface(
38
  fn=speak,
39
  inputs="text",
40
  outputs="audio",
41
  title = title,
42
- description=description
43
- )
44
-
45
- iface.launch()
 
 
1
  from vietTTS.hifigan.mel2wave import mel2wave
2
  from vietTTS.nat.text2mel import text2mel
3
  from vietTTS import nat_normalize_text
4
  import numpy as np
5
  import gradio as gr
6
+ import os
7
 
8
 
9
+ def download_assets():
10
+ os.system("gdown --id 16UhN8QBxG1YYwUh8smdEeVnKo9qZhvZj -O duration_latest_ckpt.pickle")
11
+ os.system("gdown --id 1-8Ig65S3irNHSzcskT37SLgeyuUhjKdj -O acoustic_latest_ckpt.pickle")
12
+ os.system("gdown --id 19cRNDC6IrHFAAE4U9I7K0mzLMgPsi5zb -O hk_hifi.pickle")
13
+ os.system("wget https://raw.githubusercontent.com/NTT123/vietTTS/master/assets/hifigan/config.json")
14
+ os.system("wget https://raw.githubusercontent.com/NTT123/vietTTS/master/assets/infore/lexicon.txt")
15
 
16
  def text_to_speech(text):
17
  text = nat_normalize_text(text)
18
  mel = text2mel(
19
+ text,
20
+ "lexicon.txt",
21
+ 0.2,
22
+ "acoustic_latest_ckpt.pickle",
23
+ "duration_latest_ckpt.pickle"
24
  )
25
+ wave = mel2wave(mel, "config.json", "hk_hifi.pickle")
26
  return (wave * (2**15)).astype(np.int16)
 
27
 
28
 
29
  def speak(text):
30
  y = text_to_speech(text)
31
  return 16_000, y
32
 
33
+
34
+ download_assets()
35
+
36
  title = "vietTTS"
37
  description = "A vietnamese text-to-speech demo."
38
 
39
+ gr.Interface(
40
  fn=speak,
41
  inputs="text",
42
  outputs="audio",
43
  title = title,
44
+ description=description,
45
+ ).launch(debug=True)