wetdog commited on
Commit
158b03a
1 Parent(s): bbb1375

add 10 timesteps model

Browse files
infer_onnx.py CHANGED
@@ -9,6 +9,8 @@ import soundfile as sf
9
  import tempfile
10
  import yaml
11
 
 
 
12
  def intersperse(lst, item):
13
  result = [item] * (len(lst) * 2 + 1)
14
  result[1::2] = lst
@@ -27,7 +29,7 @@ def process_text(i: int, text: str, device: torch.device):
27
  print(x_phones)
28
  return x.numpy(), x_lengths.numpy()
29
 
30
- MODEL_PATH_MATCHA_MEL="matcha_multispeaker_cat_opset_15.onnx"
31
  MODEL_PATH_MATCHA="matcha_hifigan_multispeaker_cat.onnx"
32
  MODEL_PATH_VOCOS="mel_spec_22khz.onnx"
33
  CONFIG_PATH="config_22khz.yaml"
@@ -101,10 +103,17 @@ def tts(text:str, spk_id:int):
101
  "scales": np.array([0.667, 1.0], dtype=np.float32),
102
  "spks": sid
103
  }
104
-
 
105
  mel, mel_lengths = model_matcha_mel.run(None, inputs)
 
 
 
 
106
  # vocos inference
107
  wavs_vocos = vocos_inference(mel)
 
 
108
 
109
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp_matcha_vocos:
110
  sf.write(fp_matcha_vocos.name, wavs_vocos.squeeze(0), 22050, "PCM_24")
@@ -117,8 +126,12 @@ def tts(text:str, spk_id:int):
117
  "scales": np.array([0.667, 1.0], dtype=np.float32),
118
  "spks": sid
119
  }
 
 
120
  wavs, wav_lengths = model_matcha.run(None, inputs)
121
-
 
 
122
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp_matcha:
123
  sf.write(fp_matcha.name, wavs.squeeze(0), 22050, "PCM_24")
124
 
@@ -165,7 +178,7 @@ vits2_inference = gr.Interface(
165
  ),
166
  ],
167
  outputs=[gr.Audio(label="Matcha vocos", interactive=False, type="filepath"),
168
- gr.Audio(label="Matcha", interactive=False, type="filepath")]
169
  )
170
 
171
  demo = gr.Blocks()
 
9
  import tempfile
10
  import yaml
11
 
12
+ from time import perf_counter
13
+
14
  def intersperse(lst, item):
15
  result = [item] * (len(lst) * 2 + 1)
16
  result[1::2] = lst
 
29
  print(x_phones)
30
  return x.numpy(), x_lengths.numpy()
31
 
32
+ MODEL_PATH_MATCHA_MEL="matcha_multispeaker_cat_opset_15_10_steps.onnx"
33
  MODEL_PATH_MATCHA="matcha_hifigan_multispeaker_cat.onnx"
34
  MODEL_PATH_VOCOS="mel_spec_22khz.onnx"
35
  CONFIG_PATH="config_22khz.yaml"
 
103
  "scales": np.array([0.667, 1.0], dtype=np.float32),
104
  "spks": sid
105
  }
106
+ mel_t0 = perf_counter()
107
+ # matcha mel inference
108
  mel, mel_lengths = model_matcha_mel.run(None, inputs)
109
+ mel_infer_secs = perf_counter() - mel_t0
110
+ print("Matcha Mel inference time", mel_infer_secs)
111
+
112
+ vocos_t0 = perf_counter()
113
  # vocos inference
114
  wavs_vocos = vocos_inference(mel)
115
+ vocos_infer_secs = perf_counter() - vocos_t0
116
+ print("Vocos inference time", vocos_infer_secs)
117
 
118
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp_matcha_vocos:
119
  sf.write(fp_matcha_vocos.name, wavs_vocos.squeeze(0), 22050, "PCM_24")
 
126
  "scales": np.array([0.667, 1.0], dtype=np.float32),
127
  "spks": sid
128
  }
129
+ hifigan_t0 = perf_counter()
130
+ # matcha hifigan inference
131
  wavs, wav_lengths = model_matcha.run(None, inputs)
132
+ hifigan_infer_secs = perf_counter() - hifigan_t0
133
+ print("Matcha + Hifigan",hifigan_infer_secs)
134
+
135
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp_matcha:
136
  sf.write(fp_matcha.name, wavs.squeeze(0), 22050, "PCM_24")
137
 
 
178
  ),
179
  ],
180
  outputs=[gr.Audio(label="Matcha vocos", interactive=False, type="filepath"),
181
+ gr.Audio(label="Matcha hifigan", interactive=False, type="filepath")]
182
  )
183
 
184
  demo = gr.Blocks()
matcha_multispeaker_cat_opset_15_10_steps.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7ab5fb2e8d590d5cb610912d3c4e6480b32322cc4fa4bedf94eb0f8b8ce7570
3
+ size 86049399