Text-to-Speech
English
ButterCream commited on
Commit
9a85254
1 Parent(s): 233db79

Add model checkpoints and config

Browse files
Model/Previous 3/epoch_2nd_00009.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f478a56a3a0da974bc77ca4e9c71d970f411ab9dc00212c2b2a8931a6098c213
3
+ size 2040174838
Model/Previous 3/epoch_2nd_00010.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4a382523b356c123a03bab7f0c42faac45d243f346e870c011ba89653603036
3
+ size 2040174838
Model/Previous 3/epoch_2nd_00011.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:616fa861043aedfa49373409f8ddba6c8f0e1bd448f07d3028a5d539fb096eef
3
+ size 2040174838
Model/config.yml ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ASR_config: Utils/ASR/config.yml
2
+ ASR_path: Utils/ASR/epoch_00080.pth
3
+ F0_path: Utils/JDC/bst.t7
4
+ PLBERT_dir: Utils/PLBERT/
5
+ batch_size: 2
6
+ data_params:
7
+ OOD_data: Data/OOD_texts.txt
8
+ logger: wandb
9
+ min_length: 50
10
+ root_path: Data
11
+ train_data: Data/Train_list.txt
12
+ val_data: Data/Val_list.txt
13
+ device: cuda
14
+ epochs: 50
15
+ load_only_params: true
16
+ log_dir: Models/LJSpeech
17
+ log_interval: 10
18
+ loss_params:
19
+ diff_epoch: 10
20
+ joint_epoch: 30
21
+ lambda_F0: 1
22
+ lambda_ce: 20
23
+ lambda_diff: 1
24
+ lambda_dur: 1
25
+ lambda_gen: 1
26
+ lambda_mel: 5
27
+ lambda_mono: 1
28
+ lambda_norm: 1
29
+ lambda_s2s: 1
30
+ lambda_slm: 1
31
+ lambda_sty: 1
32
+ max_len: 200
33
+ model_params:
34
+ decoder:
35
+ resblock_dilation_sizes:
36
+ - - 1
37
+ - 3
38
+ - 5
39
+ - - 1
40
+ - 3
41
+ - 5
42
+ - - 1
43
+ - 3
44
+ - 5
45
+ resblock_kernel_sizes:
46
+ - 3
47
+ - 7
48
+ - 11
49
+ type: hifigan
50
+ upsample_initial_channel: 512
51
+ upsample_kernel_sizes:
52
+ - 20
53
+ - 10
54
+ - 6
55
+ - 4
56
+ upsample_rates:
57
+ - 10
58
+ - 5
59
+ - 3
60
+ - 2
61
+ diffusion:
62
+ dist:
63
+ estimate_sigma_data: true
64
+ mean: -3
65
+ sigma_data: .nan
66
+ std: 1
67
+ embedding_mask_proba: 0.1
68
+ transformer:
69
+ head_features: 64
70
+ multiplier: 2
71
+ num_heads: 8
72
+ num_layers: 3
73
+ dim_in: 64
74
+ dropout: 0.2
75
+ hidden_dim: 512
76
+ max_conv_dim: 512
77
+ max_dur: 50
78
+ multispeaker: true
79
+ n_layer: 3
80
+ n_mels: 80
81
+ n_token: 178
82
+ slm:
83
+ hidden: 768
84
+ initial_channel: 64
85
+ model: microsoft/wavlm-base-plus
86
+ nlayers: 13
87
+ sr: 16000
88
+ style_dim: 128
89
+ optimizer_params:
90
+ bert_lr: 0.00001
91
+ ft_lr: 0.0001
92
+ lr: 0.0001
93
+ preprocess_params:
94
+ spect_params:
95
+ hop_length: 300
96
+ n_fft: 2048
97
+ win_length: 1200
98
+ sr: 24000
99
+ pretrained_model: Models/LibriTTS/epochs_2nd_00020.pth
100
+ save_freq: 1
101
+ second_stage_load_pretrained: true
102
+ slmadv_params:
103
+ batch_percentage: 0.5
104
+ iter: 10
105
+ max_len: 500
106
+ min_len: 400
107
+ scale: 0.01
108
+ sig: 1.5
109
+ thresh: 5
Model/epoch_2nd_00012.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1971e879335b5715986ff71b168696d41c8985788a5dc1cd34b14bb842e4ccc3
3
+ size 2040174838