alysa commited on
Commit
86ea48f
1 Parent(s): adf1e25

Upload 5 files

Browse files
bert/ProsodyModel.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torch
3
+ import torch.nn as nn
4
+ import torch.nn.functional as F
5
+
6
+ from transformers import BertModel, BertConfig, BertTokenizer
7
+
8
+
9
+ class CharEmbedding(nn.Module):
10
+ def __init__(self, model_dir):
11
+ super().__init__()
12
+ self.tokenizer = BertTokenizer.from_pretrained(model_dir)
13
+ self.bert_config = BertConfig.from_pretrained(model_dir)
14
+ self.hidden_size = self.bert_config.hidden_size
15
+ self.bert = BertModel(self.bert_config)
16
+ self.proj = nn.Linear(self.hidden_size, 256)
17
+ self.linear = nn.Linear(256, 3)
18
+
19
+ def text2Token(self, text):
20
+ token = self.tokenizer.tokenize(text)
21
+ txtid = self.tokenizer.convert_tokens_to_ids(token)
22
+ return txtid
23
+
24
+ def forward(self, inputs_ids, inputs_masks, tokens_type_ids):
25
+ out_seq = self.bert(input_ids=inputs_ids,
26
+ attention_mask=inputs_masks,
27
+ token_type_ids=tokens_type_ids)[0]
28
+ out_seq = self.proj(out_seq)
29
+ return out_seq
30
+
31
+
32
+ class TTSProsody(object):
33
+ def __init__(self, path, device):
34
+ self.device = device
35
+ self.char_model = CharEmbedding(path)
36
+ self.char_model.load_state_dict(
37
+ torch.load(
38
+ os.path.join(path, 'prosody_model.pt'),
39
+ map_location="cpu"
40
+ ),
41
+ strict=False
42
+ )
43
+ self.char_model.eval()
44
+ self.char_model.to(self.device)
45
+
46
+ def get_char_embeds(self, text):
47
+ input_ids = self.char_model.text2Token(text)
48
+ input_masks = [1] * len(input_ids)
49
+ type_ids = [0] * len(input_ids)
50
+ input_ids = torch.LongTensor([input_ids]).to(self.device)
51
+ input_masks = torch.LongTensor([input_masks]).to(self.device)
52
+ type_ids = torch.LongTensor([type_ids]).to(self.device)
53
+
54
+ with torch.no_grad():
55
+ char_embeds = self.char_model(
56
+ input_ids, input_masks, type_ids).squeeze(0).cpu()
57
+ return char_embeds
58
+
59
+ def expand_for_phone(self, char_embeds, length): # length of phones for char
60
+ assert char_embeds.size(0) == len(length)
61
+ expand_vecs = list()
62
+ for vec, leng in zip(char_embeds, length):
63
+ vec = vec.expand(leng, -1)
64
+ expand_vecs.append(vec)
65
+ expand_embeds = torch.cat(expand_vecs, 0)
66
+ assert expand_embeds.size(0) == sum(length)
67
+ return expand_embeds.numpy()
68
+
69
+
70
+ if __name__ == "__main__":
71
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
72
+ prosody = TTSProsody('./bert/', device)
73
+ while True:
74
+ text = input("请输入文本:")
75
+ prosody.get_char_embeds(text)
bert/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from .ProsodyModel import TTSProsody
bert/config.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "attention_probs_dropout_prob": 0.1,
3
+ "directionality": "bidi",
4
+ "hidden_act": "gelu",
5
+ "hidden_dropout_prob": 0.1,
6
+ "hidden_size": 768,
7
+ "initializer_range": 0.02,
8
+ "intermediate_size": 3072,
9
+ "max_position_embeddings": 512,
10
+ "num_attention_heads": 12,
11
+ "num_hidden_layers": 12,
12
+ "pooler_fc_size": 768,
13
+ "pooler_num_attention_heads": 12,
14
+ "pooler_num_fc_layers": 3,
15
+ "pooler_size_per_head": 128,
16
+ "pooler_type": "first_token_transform",
17
+ "type_vocab_size": 2,
18
+ "vocab_size": 21128
19
+ }
bert/prosody_tool.py ADDED
@@ -0,0 +1,426 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def is_chinese(uchar):
2
+ if uchar >= u'\u4e00' and uchar <= u'\u9fa5':
3
+ return True
4
+ else:
5
+ return False
6
+
7
+
8
+ pinyin_dict = {
9
+ "a": ("^", "a"),
10
+ "ai": ("^", "ai"),
11
+ "an": ("^", "an"),
12
+ "ang": ("^", "ang"),
13
+ "ao": ("^", "ao"),
14
+ "ba": ("b", "a"),
15
+ "bai": ("b", "ai"),
16
+ "ban": ("b", "an"),
17
+ "bang": ("b", "ang"),
18
+ "bao": ("b", "ao"),
19
+ "be": ("b", "e"),
20
+ "bei": ("b", "ei"),
21
+ "ben": ("b", "en"),
22
+ "beng": ("b", "eng"),
23
+ "bi": ("b", "i"),
24
+ "bian": ("b", "ian"),
25
+ "biao": ("b", "iao"),
26
+ "bie": ("b", "ie"),
27
+ "bin": ("b", "in"),
28
+ "bing": ("b", "ing"),
29
+ "bo": ("b", "o"),
30
+ "bu": ("b", "u"),
31
+ "ca": ("c", "a"),
32
+ "cai": ("c", "ai"),
33
+ "can": ("c", "an"),
34
+ "cang": ("c", "ang"),
35
+ "cao": ("c", "ao"),
36
+ "ce": ("c", "e"),
37
+ "cen": ("c", "en"),
38
+ "ceng": ("c", "eng"),
39
+ "cha": ("ch", "a"),
40
+ "chai": ("ch", "ai"),
41
+ "chan": ("ch", "an"),
42
+ "chang": ("ch", "ang"),
43
+ "chao": ("ch", "ao"),
44
+ "che": ("ch", "e"),
45
+ "chen": ("ch", "en"),
46
+ "cheng": ("ch", "eng"),
47
+ "chi": ("ch", "iii"),
48
+ "chong": ("ch", "ong"),
49
+ "chou": ("ch", "ou"),
50
+ "chu": ("ch", "u"),
51
+ "chua": ("ch", "ua"),
52
+ "chuai": ("ch", "uai"),
53
+ "chuan": ("ch", "uan"),
54
+ "chuang": ("ch", "uang"),
55
+ "chui": ("ch", "uei"),
56
+ "chun": ("ch", "uen"),
57
+ "chuo": ("ch", "uo"),
58
+ "ci": ("c", "ii"),
59
+ "cong": ("c", "ong"),
60
+ "cou": ("c", "ou"),
61
+ "cu": ("c", "u"),
62
+ "cuan": ("c", "uan"),
63
+ "cui": ("c", "uei"),
64
+ "cun": ("c", "uen"),
65
+ "cuo": ("c", "uo"),
66
+ "da": ("d", "a"),
67
+ "dai": ("d", "ai"),
68
+ "dan": ("d", "an"),
69
+ "dang": ("d", "ang"),
70
+ "dao": ("d", "ao"),
71
+ "de": ("d", "e"),
72
+ "dei": ("d", "ei"),
73
+ "den": ("d", "en"),
74
+ "deng": ("d", "eng"),
75
+ "di": ("d", "i"),
76
+ "dia": ("d", "ia"),
77
+ "dian": ("d", "ian"),
78
+ "diao": ("d", "iao"),
79
+ "die": ("d", "ie"),
80
+ "ding": ("d", "ing"),
81
+ "diu": ("d", "iou"),
82
+ "dong": ("d", "ong"),
83
+ "dou": ("d", "ou"),
84
+ "du": ("d", "u"),
85
+ "duan": ("d", "uan"),
86
+ "dui": ("d", "uei"),
87
+ "dun": ("d", "uen"),
88
+ "duo": ("d", "uo"),
89
+ "e": ("^", "e"),
90
+ "ei": ("^", "ei"),
91
+ "en": ("^", "en"),
92
+ "ng": ("^", "en"),
93
+ "eng": ("^", "eng"),
94
+ "er": ("^", "er"),
95
+ "fa": ("f", "a"),
96
+ "fan": ("f", "an"),
97
+ "fang": ("f", "ang"),
98
+ "fei": ("f", "ei"),
99
+ "fen": ("f", "en"),
100
+ "feng": ("f", "eng"),
101
+ "fo": ("f", "o"),
102
+ "fou": ("f", "ou"),
103
+ "fu": ("f", "u"),
104
+ "ga": ("g", "a"),
105
+ "gai": ("g", "ai"),
106
+ "gan": ("g", "an"),
107
+ "gang": ("g", "ang"),
108
+ "gao": ("g", "ao"),
109
+ "ge": ("g", "e"),
110
+ "gei": ("g", "ei"),
111
+ "gen": ("g", "en"),
112
+ "geng": ("g", "eng"),
113
+ "gong": ("g", "ong"),
114
+ "gou": ("g", "ou"),
115
+ "gu": ("g", "u"),
116
+ "gua": ("g", "ua"),
117
+ "guai": ("g", "uai"),
118
+ "guan": ("g", "uan"),
119
+ "guang": ("g", "uang"),
120
+ "gui": ("g", "uei"),
121
+ "gun": ("g", "uen"),
122
+ "guo": ("g", "uo"),
123
+ "ha": ("h", "a"),
124
+ "hai": ("h", "ai"),
125
+ "han": ("h", "an"),
126
+ "hang": ("h", "ang"),
127
+ "hao": ("h", "ao"),
128
+ "he": ("h", "e"),
129
+ "hei": ("h", "ei"),
130
+ "hen": ("h", "en"),
131
+ "heng": ("h", "eng"),
132
+ "hong": ("h", "ong"),
133
+ "hou": ("h", "ou"),
134
+ "hu": ("h", "u"),
135
+ "hua": ("h", "ua"),
136
+ "huai": ("h", "uai"),
137
+ "huan": ("h", "uan"),
138
+ "huang": ("h", "uang"),
139
+ "hui": ("h", "uei"),
140
+ "hun": ("h", "uen"),
141
+ "huo": ("h", "uo"),
142
+ "ji": ("j", "i"),
143
+ "jia": ("j", "ia"),
144
+ "jian": ("j", "ian"),
145
+ "jiang": ("j", "iang"),
146
+ "jiao": ("j", "iao"),
147
+ "jie": ("j", "ie"),
148
+ "jin": ("j", "in"),
149
+ "jing": ("j", "ing"),
150
+ "jiong": ("j", "iong"),
151
+ "jiu": ("j", "iou"),
152
+ "ju": ("j", "v"),
153
+ "juan": ("j", "van"),
154
+ "jue": ("j", "ve"),
155
+ "jun": ("j", "vn"),
156
+ "ka": ("k", "a"),
157
+ "kai": ("k", "ai"),
158
+ "kan": ("k", "an"),
159
+ "kang": ("k", "ang"),
160
+ "kao": ("k", "ao"),
161
+ "ke": ("k", "e"),
162
+ "kei": ("k", "ei"),
163
+ "ken": ("k", "en"),
164
+ "keng": ("k", "eng"),
165
+ "kong": ("k", "ong"),
166
+ "kou": ("k", "ou"),
167
+ "ku": ("k", "u"),
168
+ "kua": ("k", "ua"),
169
+ "kuai": ("k", "uai"),
170
+ "kuan": ("k", "uan"),
171
+ "kuang": ("k", "uang"),
172
+ "kui": ("k", "uei"),
173
+ "kun": ("k", "uen"),
174
+ "kuo": ("k", "uo"),
175
+ "la": ("l", "a"),
176
+ "lai": ("l", "ai"),
177
+ "lan": ("l", "an"),
178
+ "lang": ("l", "ang"),
179
+ "lao": ("l", "ao"),
180
+ "le": ("l", "e"),
181
+ "lei": ("l", "ei"),
182
+ "leng": ("l", "eng"),
183
+ "li": ("l", "i"),
184
+ "lia": ("l", "ia"),
185
+ "lian": ("l", "ian"),
186
+ "liang": ("l", "iang"),
187
+ "liao": ("l", "iao"),
188
+ "lie": ("l", "ie"),
189
+ "lin": ("l", "in"),
190
+ "ling": ("l", "ing"),
191
+ "liu": ("l", "iou"),
192
+ "lo": ("l", "o"),
193
+ "long": ("l", "ong"),
194
+ "lou": ("l", "ou"),
195
+ "lu": ("l", "u"),
196
+ "lv": ("l", "v"),
197
+ "luan": ("l", "uan"),
198
+ "lve": ("l", "ve"),
199
+ "lue": ("l", "ve"),
200
+ "lun": ("l", "uen"),
201
+ "luo": ("l", "uo"),
202
+ "ma": ("m", "a"),
203
+ "mai": ("m", "ai"),
204
+ "man": ("m", "an"),
205
+ "mang": ("m", "ang"),
206
+ "mao": ("m", "ao"),
207
+ "me": ("m", "e"),
208
+ "mei": ("m", "ei"),
209
+ "men": ("m", "en"),
210
+ "meng": ("m", "eng"),
211
+ "mi": ("m", "i"),
212
+ "mian": ("m", "ian"),
213
+ "miao": ("m", "iao"),
214
+ "mie": ("m", "ie"),
215
+ "min": ("m", "in"),
216
+ "ming": ("m", "ing"),
217
+ "miu": ("m", "iou"),
218
+ "mo": ("m", "o"),
219
+ "mou": ("m", "ou"),
220
+ "mu": ("m", "u"),
221
+ "na": ("n", "a"),
222
+ "nai": ("n", "ai"),
223
+ "nan": ("n", "an"),
224
+ "nang": ("n", "ang"),
225
+ "nao": ("n", "ao"),
226
+ "ne": ("n", "e"),
227
+ "nei": ("n", "ei"),
228
+ "nen": ("n", "en"),
229
+ "neng": ("n", "eng"),
230
+ "ni": ("n", "i"),
231
+ "nia": ("n", "ia"),
232
+ "nian": ("n", "ian"),
233
+ "niang": ("n", "iang"),
234
+ "niao": ("n", "iao"),
235
+ "nie": ("n", "ie"),
236
+ "nin": ("n", "in"),
237
+ "ning": ("n", "ing"),
238
+ "niu": ("n", "iou"),
239
+ "nong": ("n", "ong"),
240
+ "nou": ("n", "ou"),
241
+ "nu": ("n", "u"),
242
+ "nv": ("n", "v"),
243
+ "nuan": ("n", "uan"),
244
+ "nve": ("n", "ve"),
245
+ "nue": ("n", "ve"),
246
+ "nuo": ("n", "uo"),
247
+ "o": ("^", "o"),
248
+ "ou": ("^", "ou"),
249
+ "pa": ("p", "a"),
250
+ "pai": ("p", "ai"),
251
+ "pan": ("p", "an"),
252
+ "pang": ("p", "ang"),
253
+ "pao": ("p", "ao"),
254
+ "pe": ("p", "e"),
255
+ "pei": ("p", "ei"),
256
+ "pen": ("p", "en"),
257
+ "peng": ("p", "eng"),
258
+ "pi": ("p", "i"),
259
+ "pian": ("p", "ian"),
260
+ "piao": ("p", "iao"),
261
+ "pie": ("p", "ie"),
262
+ "pin": ("p", "in"),
263
+ "ping": ("p", "ing"),
264
+ "po": ("p", "o"),
265
+ "pou": ("p", "ou"),
266
+ "pu": ("p", "u"),
267
+ "qi": ("q", "i"),
268
+ "qia": ("q", "ia"),
269
+ "qian": ("q", "ian"),
270
+ "qiang": ("q", "iang"),
271
+ "qiao": ("q", "iao"),
272
+ "qie": ("q", "ie"),
273
+ "qin": ("q", "in"),
274
+ "qing": ("q", "ing"),
275
+ "qiong": ("q", "iong"),
276
+ "qiu": ("q", "iou"),
277
+ "qu": ("q", "v"),
278
+ "quan": ("q", "van"),
279
+ "que": ("q", "ve"),
280
+ "qun": ("q", "vn"),
281
+ "ran": ("r", "an"),
282
+ "rang": ("r", "ang"),
283
+ "rao": ("r", "ao"),
284
+ "re": ("r", "e"),
285
+ "ren": ("r", "en"),
286
+ "reng": ("r", "eng"),
287
+ "ri": ("r", "iii"),
288
+ "rong": ("r", "ong"),
289
+ "rou": ("r", "ou"),
290
+ "ru": ("r", "u"),
291
+ "rua": ("r", "ua"),
292
+ "ruan": ("r", "uan"),
293
+ "rui": ("r", "uei"),
294
+ "run": ("r", "uen"),
295
+ "ruo": ("r", "uo"),
296
+ "sa": ("s", "a"),
297
+ "sai": ("s", "ai"),
298
+ "san": ("s", "an"),
299
+ "sang": ("s", "ang"),
300
+ "sao": ("s", "ao"),
301
+ "se": ("s", "e"),
302
+ "sen": ("s", "en"),
303
+ "seng": ("s", "eng"),
304
+ "sha": ("sh", "a"),
305
+ "shai": ("sh", "ai"),
306
+ "shan": ("sh", "an"),
307
+ "shang": ("sh", "ang"),
308
+ "shao": ("sh", "ao"),
309
+ "she": ("sh", "e"),
310
+ "shei": ("sh", "ei"),
311
+ "shen": ("sh", "en"),
312
+ "sheng": ("sh", "eng"),
313
+ "shi": ("sh", "iii"),
314
+ "shou": ("sh", "ou"),
315
+ "shu": ("sh", "u"),
316
+ "shua": ("sh", "ua"),
317
+ "shuai": ("sh", "uai"),
318
+ "shuan": ("sh", "uan"),
319
+ "shuang": ("sh", "uang"),
320
+ "shui": ("sh", "uei"),
321
+ "shun": ("sh", "uen"),
322
+ "shuo": ("sh", "uo"),
323
+ "si": ("s", "ii"),
324
+ "song": ("s", "ong"),
325
+ "sou": ("s", "ou"),
326
+ "su": ("s", "u"),
327
+ "suan": ("s", "uan"),
328
+ "sui": ("s", "uei"),
329
+ "sun": ("s", "uen"),
330
+ "suo": ("s", "uo"),
331
+ "ta": ("t", "a"),
332
+ "tai": ("t", "ai"),
333
+ "tan": ("t", "an"),
334
+ "tang": ("t", "ang"),
335
+ "tao": ("t", "ao"),
336
+ "te": ("t", "e"),
337
+ "tei": ("t", "ei"),
338
+ "teng": ("t", "eng"),
339
+ "ti": ("t", "i"),
340
+ "tian": ("t", "ian"),
341
+ "tiao": ("t", "iao"),
342
+ "tie": ("t", "ie"),
343
+ "ting": ("t", "ing"),
344
+ "tong": ("t", "ong"),
345
+ "tou": ("t", "ou"),
346
+ "tu": ("t", "u"),
347
+ "tuan": ("t", "uan"),
348
+ "tui": ("t", "uei"),
349
+ "tun": ("t", "uen"),
350
+ "tuo": ("t", "uo"),
351
+ "wa": ("^", "ua"),
352
+ "wai": ("^", "uai"),
353
+ "wan": ("^", "uan"),
354
+ "wang": ("^", "uang"),
355
+ "wei": ("^", "uei"),
356
+ "wen": ("^", "uen"),
357
+ "weng": ("^", "ueng"),
358
+ "wo": ("^", "uo"),
359
+ "wu": ("^", "u"),
360
+ "xi": ("x", "i"),
361
+ "xia": ("x", "ia"),
362
+ "xian": ("x", "ian"),
363
+ "xiang": ("x", "iang"),
364
+ "xiao": ("x", "iao"),
365
+ "xie": ("x", "ie"),
366
+ "xin": ("x", "in"),
367
+ "xing": ("x", "ing"),
368
+ "xiong": ("x", "iong"),
369
+ "xiu": ("x", "iou"),
370
+ "xu": ("x", "v"),
371
+ "xuan": ("x", "van"),
372
+ "xue": ("x", "ve"),
373
+ "xun": ("x", "vn"),
374
+ "ya": ("^", "ia"),
375
+ "yan": ("^", "ian"),
376
+ "yang": ("^", "iang"),
377
+ "yao": ("^", "iao"),
378
+ "ye": ("^", "ie"),
379
+ "yi": ("^", "i"),
380
+ "yin": ("^", "in"),
381
+ "ying": ("^", "ing"),
382
+ "yo": ("^", "iou"),
383
+ "yong": ("^", "iong"),
384
+ "you": ("^", "iou"),
385
+ "yu": ("^", "v"),
386
+ "yuan": ("^", "van"),
387
+ "yue": ("^", "ve"),
388
+ "yun": ("^", "vn"),
389
+ "za": ("z", "a"),
390
+ "zai": ("z", "ai"),
391
+ "zan": ("z", "an"),
392
+ "zang": ("z", "ang"),
393
+ "zao": ("z", "ao"),
394
+ "ze": ("z", "e"),
395
+ "zei": ("z", "ei"),
396
+ "zen": ("z", "en"),
397
+ "zeng": ("z", "eng"),
398
+ "zha": ("zh", "a"),
399
+ "zhai": ("zh", "ai"),
400
+ "zhan": ("zh", "an"),
401
+ "zhang": ("zh", "ang"),
402
+ "zhao": ("zh", "ao"),
403
+ "zhe": ("zh", "e"),
404
+ "zhei": ("zh", "ei"),
405
+ "zhen": ("zh", "en"),
406
+ "zheng": ("zh", "eng"),
407
+ "zhi": ("zh", "iii"),
408
+ "zhong": ("zh", "ong"),
409
+ "zhou": ("zh", "ou"),
410
+ "zhu": ("zh", "u"),
411
+ "zhua": ("zh", "ua"),
412
+ "zhuai": ("zh", "uai"),
413
+ "zhuan": ("zh", "uan"),
414
+ "zhuang": ("zh", "uang"),
415
+ "zhui": ("zh", "uei"),
416
+ "zhun": ("zh", "uen"),
417
+ "zhuo": ("zh", "uo"),
418
+ "zi": ("z", "ii"),
419
+ "zong": ("z", "ong"),
420
+ "zou": ("z", "ou"),
421
+ "zu": ("z", "u"),
422
+ "zuan": ("z", "uan"),
423
+ "zui": ("z", "uei"),
424
+ "zun": ("z", "uen"),
425
+ "zuo": ("z", "uo"),
426
+ }
bert/vocab.txt ADDED
The diff for this file is too large to render. See raw diff