mrfakename commited on
Commit
e6f3c81
1 Parent(s): 9fe7d60

Allow custom steps on beta synthesis

Browse files
Files changed (1) hide show
  1. app.py +8 -3
app.py CHANGED
@@ -28,15 +28,19 @@ def synthesize(text, voice):
28
  raise gr.Error("Text must be under 300 characters")
29
  v = voice.lower()
30
  return (24000, styletts2importable.inference(text, voices[v], alpha=0.3, beta=0.7, diffusion_steps=7, embedding_scale=1))
31
- def longsynthesize(text, voice, password, progress=gr.Progress()):
32
  if password == os.environ['ACCESS_CODE']:
33
  if text.strip() == "":
34
  raise gr.Error("You must enter some text")
 
 
 
 
35
  texts = split_and_recombine_text(text)
36
  v = voice.lower()
37
  audios = []
38
  for t in progress.tqdm(texts):
39
- audios.append(styletts2importable.inference(t, voices[v], alpha=0.3, beta=0.7, diffusion_steps=7, embedding_scale=1))
40
  return (24000, np.concatenate(audios))
41
  else:
42
  raise gr.Error('Wrong access code')
@@ -81,11 +85,12 @@ with gr.Blocks() as longText:
81
  with gr.Column(scale=1):
82
  lnginp = gr.Textbox(label="Text", info="What would you like StyleTTS 2 to read? It works better on full sentences.", interactive=True)
83
  lngvoice = gr.Dropdown(voicelist, label="Voice", info="Select a default voice.", value='m-us-1', interactive=True)
 
84
  lngpwd = gr.Textbox(label="Access code", info="This feature is in beta. You need an access code to use it as it uses more resources and we would like to prevent abuse")
85
  with gr.Column(scale=1):
86
  lngbtn = gr.Button("Synthesize", variant="primary")
87
  lngaudio = gr.Audio(interactive=False, label="Synthesized Audio")
88
- lngbtn.click(longsynthesize, inputs=[lnginp, lngvoice, lngpwd], outputs=[lngaudio], concurrency_limit=4)
89
  with gr.Blocks() as lj:
90
  with gr.Row():
91
  with gr.Column(scale=1):
 
28
  raise gr.Error("Text must be under 300 characters")
29
  v = voice.lower()
30
  return (24000, styletts2importable.inference(text, voices[v], alpha=0.3, beta=0.7, diffusion_steps=7, embedding_scale=1))
31
+ def longsynthesize(text, voice, lngsteps, password, progress=gr.Progress()):
32
  if password == os.environ['ACCESS_CODE']:
33
  if text.strip() == "":
34
  raise gr.Error("You must enter some text")
35
+ if lngsteps > 25:
36
+ raise gr.Error("Max 25 steps")
37
+ if lngsteps < 5:
38
+ raise gr.Error("Min 5 steps")
39
  texts = split_and_recombine_text(text)
40
  v = voice.lower()
41
  audios = []
42
  for t in progress.tqdm(texts):
43
+ audios.append(styletts2importable.inference(t, voices[v], alpha=0.3, beta=0.7, diffusion_steps=lngsteps, embedding_scale=1))
44
  return (24000, np.concatenate(audios))
45
  else:
46
  raise gr.Error('Wrong access code')
 
85
  with gr.Column(scale=1):
86
  lnginp = gr.Textbox(label="Text", info="What would you like StyleTTS 2 to read? It works better on full sentences.", interactive=True)
87
  lngvoice = gr.Dropdown(voicelist, label="Voice", info="Select a default voice.", value='m-us-1', interactive=True)
88
+ lngsteps = gr.Slider(minimum=5, maximum=25, value=10, step=1, label="Diffusion Steps", info="Higher = better quality, but longer", interactive=True)
89
  lngpwd = gr.Textbox(label="Access code", info="This feature is in beta. You need an access code to use it as it uses more resources and we would like to prevent abuse")
90
  with gr.Column(scale=1):
91
  lngbtn = gr.Button("Synthesize", variant="primary")
92
  lngaudio = gr.Audio(interactive=False, label="Synthesized Audio")
93
+ lngbtn.click(longsynthesize, inputs=[lnginp, lngvoice, lngsteps, lngpwd], outputs=[lngaudio], concurrency_limit=4)
94
  with gr.Blocks() as lj:
95
  with gr.Row():
96
  with gr.Column(scale=1):