Spaces:

ntt123
/

vietTTS

Running

ntt123 commited on Jan 3, 2022

Commit

e89bb30

•

1 Parent(s): 9a2eb2d

Clean up

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,45 +1,45 @@
-from pathlib import Path
 from vietTTS.hifigan.mel2wave import mel2wave
 from vietTTS.nat.text2mel import text2mel
 from vietTTS import nat_normalize_text
 import numpy as np
 import gradio as gr
-import os
-os.system("gdown --id 16UhN8QBxG1YYwUh8smdEeVnKo9qZhvZj -O duration_latest_ckpt.pickle")
-os.system("gdown --id 1-8Ig65S3irNHSzcskT37SLgeyuUhjKdj -O acoustic_latest_ckpt.pickle")
-os.system("gdown --id 19cRNDC6IrHFAAE4U9I7K0mzLMgPsi5zb -O hk_hifi.pickle")
-os.system("wget https://raw.githubusercontent.com/NTT123/vietTTS/master/assets/hifigan/config.json")
-os.system("wget https://raw.githubusercontent.com/NTT123/vietTTS/master/assets/infore/lexicon.txt")
 def text_to_speech(text):
     text = nat_normalize_text(text)
     mel = text2mel(
-      text,
-      "lexicon.txt",
-      0.2,
-      Path("acoustic_latest_ckpt.pickle"),
-      Path("duration_latest_ckpt.pickle")
     )
-    wave = mel2wave(mel, Path("config.json"), Path("hk_hifi.pickle"))
     return (wave * (2**15)).astype(np.int16)
-    import gradio as gr
 def speak(text):
     y = text_to_speech(text)
     return 16_000, y
 title = "vietTTS"
 description = "A vietnamese text-to-speech demo."
-iface = gr.Interface(
     fn=speak,
     inputs="text",
     outputs="audio",
     title = title,
-    description=description
-)
-iface.launch()

 from vietTTS.hifigan.mel2wave import mel2wave
 from vietTTS.nat.text2mel import text2mel
 from vietTTS import nat_normalize_text
 import numpy as np
 import gradio as gr
+import os
+def download_assets():
+    os.system("gdown --id 16UhN8QBxG1YYwUh8smdEeVnKo9qZhvZj -O duration_latest_ckpt.pickle")
+    os.system("gdown --id 1-8Ig65S3irNHSzcskT37SLgeyuUhjKdj -O acoustic_latest_ckpt.pickle")
+    os.system("gdown --id 19cRNDC6IrHFAAE4U9I7K0mzLMgPsi5zb -O hk_hifi.pickle")
+    os.system("wget https://raw.githubusercontent.com/NTT123/vietTTS/master/assets/hifigan/config.json")
+    os.system("wget https://raw.githubusercontent.com/NTT123/vietTTS/master/assets/infore/lexicon.txt")
 def text_to_speech(text):
     text = nat_normalize_text(text)
     mel = text2mel(
+        text,
+        "lexicon.txt",
+        0.2,
+        "acoustic_latest_ckpt.pickle",
+        "duration_latest_ckpt.pickle"
     )
+    wave = mel2wave(mel, "config.json", "hk_hifi.pickle")
     return (wave * (2**15)).astype(np.int16)
 def speak(text):
     y = text_to_speech(text)
     return 16_000, y
+download_assets()
 title = "vietTTS"
 description = "A vietnamese text-to-speech demo."
+gr.Interface(
     fn=speak,
     inputs="text",
     outputs="audio",
     title = title,
+    description=description,
+).launch(debug=True)