ViXuan commited on
Commit
3e0f6bf
1 Parent(s): 8eb21eb

Add application files

Browse files
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ venv
2
+ .vscode
app.py ADDED
@@ -0,0 +1,812 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import psutil
2
+ from transformers import (
3
+ AutoConfig,
4
+ T5ForConditionalGeneration,
5
+ MT5ForConditionalGeneration,
6
+ )
7
+ import torch
8
+ import time
9
+ import gradio as gr
10
+ from transformers import AutoTokenizer
11
+ import onnxruntime as ort
12
+ from transformers.modeling_outputs import (
13
+ Seq2SeqLMOutput,
14
+ BaseModelOutput,
15
+ )
16
+ import os
17
+ from pathlib import Path
18
+ from progress.bar import Bar
19
+ import operator
20
+ import functools
21
+ from onnxruntime import (
22
+ GraphOptimizationLevel,
23
+ InferenceSession,
24
+ SessionOptions,
25
+ ExecutionMode,
26
+ )
27
+ _auth_token = None
28
+
29
+
30
+ def set_auth_token(token):
31
+ """Set the token which allows the user to authenticate to hugginface.co for downloading private models
32
+
33
+ Args:
34
+ token (Union[str, bool]): The token value to store. One of:
35
+ - an API key (from https://huggingface.co/organizations/ORGNAME/settings/token),
36
+ - a login token obtained by running `$ transformers-cli login`
37
+ - `True`, which tells transformers to use the login token stored in ~/.huggingface/token
38
+
39
+ Returns:
40
+ None
41
+ """
42
+ global _auth_token
43
+ _auth_token = token
44
+
45
+
46
+ def get_auth_token():
47
+ """Get the user-configurable auth token, which defaults to None
48
+
49
+ Returns:
50
+ auth_token (Optional[Union[str, bool]]) for authenticating with huggingface.co
51
+ """
52
+ global _auth_token
53
+ return _auth_token
54
+
55
+
56
+ os.environ["OMP_NUM_THREADS"] = str(psutil.cpu_count(logical=True))
57
+ os.environ["OMP_WAIT_POLICY"] = "ACTIVE"
58
+
59
+
60
+ def get_onnx_runtime_sessions(
61
+ model_paths,
62
+ default: bool = True,
63
+ opt_level: int = 99,
64
+ parallel_exe_mode: bool = True,
65
+ n_threads: int = 0,
66
+ provider=[
67
+ "CPUExecutionProvider",
68
+ ],
69
+ ) -> InferenceSession:
70
+ """
71
+ Optimizes the model
72
+
73
+ Args:
74
+ model_paths (List or Tuple of str) : the path to, in order:
75
+ path_to_encoder (str) : the path of input onnx encoder model.
76
+ path_to_decoder (str) : the path of input onnx decoder model.
77
+ path_to_initial_decoder (str) : the path of input initial onnx decoder model.
78
+ default : set this to true, ort will choose the best settings for your hardware.
79
+ (you can test out different settings for better results.)
80
+ opt_level (int) : sess_options.GraphOptimizationLevel param if set 1 uses 'ORT_ENABLE_BASIC',
81
+ 2 for 'ORT_ENABLE_EXTENDED' and 99 for 'ORT_ENABLE_ALL',
82
+ default value is set to 99.
83
+ parallel_exe_mode (bool) : Sets the execution mode. Default is True (parallel).
84
+ n_threads (int) : Sets the number of threads used to parallelize the execution within nodes. Default is 0 to let onnxruntime choose
85
+ provider : execution providers list.
86
+
87
+ Returns:
88
+ encoder_session : encoder onnx InferenceSession
89
+ decoder_session : decoder onnx InferenceSession
90
+ decoder_sess_init : initial decoder onnx InferenceSession
91
+
92
+ """
93
+ path_to_encoder, path_to_decoder, path_to_initial_decoder = model_paths
94
+
95
+ if default:
96
+
97
+ encoder_sess = InferenceSession(str(path_to_encoder))
98
+
99
+ decoder_sess = InferenceSession(str(path_to_decoder))
100
+
101
+ decoder_sess_init = InferenceSession(str(path_to_initial_decoder))
102
+
103
+ else:
104
+
105
+ # Few properties that might have an impact on performances
106
+ options = SessionOptions()
107
+
108
+ if opt_level == 1:
109
+ options.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_BASIC
110
+ elif opt_level == 2:
111
+ options.graph_optimization_level = (
112
+ GraphOptimizationLevel.ORT_ENABLE_EXTENDED
113
+ )
114
+ else:
115
+ assert opt_level == 99
116
+ options.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_ALL
117
+
118
+ # set this true for better performance
119
+ if parallel_exe_mode == True:
120
+ options.execution_mode = ExecutionMode.ORT_PARALLEL
121
+ else:
122
+ options.execution_mode = ExecutionMode.ORT_SEQUENTIAL
123
+
124
+ options.intra_op_num_threads = n_threads
125
+ # options.inter_op_num_threads = 10
126
+
127
+ # options.enable_profiling = True
128
+
129
+ encoder_sess = InferenceSession(
130
+ str(path_to_encoder), options, providers=provider
131
+ )
132
+
133
+ decoder_sess = InferenceSession(
134
+ str(path_to_decoder), options, providers=provider
135
+ )
136
+
137
+ decoder_sess_init = InferenceSession(
138
+ str(path_to_initial_decoder), options, providers=provider
139
+ )
140
+
141
+ return encoder_sess, decoder_sess, decoder_sess_init
142
+
143
+
144
+ class DecoderWithLMhead(torch.nn.Module):
145
+ """ Creation of a class to combine the decoder and the lm head """
146
+
147
+ def __init__(self, decoder, lm_head, config):
148
+ super().__init__()
149
+ self.decoder = decoder
150
+ self.lm_head = lm_head
151
+ self.config = config
152
+
153
+ def forward(self, *inputs):
154
+
155
+ input_ids, attention_mask, encoder_hidden_states = inputs[:3]
156
+
157
+ list_pkv = inputs[3:]
158
+ past_key_values = tuple(list_pkv[i: i + 4]
159
+ for i in range(0, len(list_pkv), 4))
160
+
161
+ decoder_output = self.decoder(
162
+ input_ids=input_ids, # decoder_input_ids
163
+ encoder_attention_mask=attention_mask,
164
+ encoder_hidden_states=encoder_hidden_states,
165
+ past_key_values=past_key_values,
166
+ )
167
+
168
+ lm_head_out = self.lm_head(
169
+ decoder_output[0] * (self.config.d_model ** -0.5))
170
+
171
+ return lm_head_out, decoder_output[1]
172
+
173
+
174
+ class T5Encoder(torch.nn.Module):
175
+ """ Creation of a class to output only the last hidden state from the encoder """
176
+
177
+ def __init__(self, encoder):
178
+ super().__init__()
179
+ self.encoder = encoder
180
+
181
+ def forward(self, *input, **kwargs):
182
+ return self.encoder(*input, **kwargs)[0]
183
+
184
+
185
+ class DecoderWithLMheadInitial(torch.nn.Module):
186
+ """ Creation of a class to combine the decoder and the lm head """
187
+
188
+ def __init__(self, decoder, lm_head, config):
189
+ super().__init__()
190
+ self.decoder = decoder
191
+ self.lm_head = lm_head
192
+ self.config = config
193
+
194
+ def forward(self, input_ids, attention_mask, encoder_hidden_states):
195
+ decoder_output = self.decoder(
196
+ input_ids=input_ids,
197
+ encoder_attention_mask=attention_mask,
198
+ encoder_hidden_states=encoder_hidden_states,
199
+ )
200
+
201
+ return (
202
+ self.lm_head(decoder_output[0] * (self.config.d_model ** -0.5)),
203
+ decoder_output[1],
204
+ )
205
+
206
+
207
+ _folder = Path.cwd()
208
+ saved_models_path = _folder.joinpath("models")
209
+
210
+ Bar.check_tty = False
211
+
212
+
213
+ def create_t5_encoder_decoder(pretrained_version="t5-base"):
214
+ """Generates an encoder and a decoder model with a language model head from a pretrained huggingface model
215
+
216
+ Args:
217
+ pretrained_version (str): Name of a pretrained model, or path to a pretrained / finetuned version of T5
218
+
219
+ Returns:
220
+ simplified_encoder: pytorch t5 encoder with a wrapper to output only the hidden states
221
+ decoder_with_lm_head: pytorch t5 decoder with a language modeling head
222
+ """
223
+
224
+ if 'mt5' in pretrained_version:
225
+ model = MT5ForConditionalGeneration.from_pretrained(
226
+ pretrained_version, use_auth_token=get_auth_token())
227
+ else:
228
+ model = T5ForConditionalGeneration.from_pretrained(
229
+ pretrained_version, use_auth_token=get_auth_token())
230
+
231
+ return turn_model_into_encoder_decoder(model)
232
+
233
+
234
+ def turn_model_into_encoder_decoder(model):
235
+ encoder = model.encoder
236
+ decoder = model.decoder
237
+ lm_head = model.lm_head
238
+
239
+ decoder_with_lm_head = DecoderWithLMhead(decoder, lm_head, model.config)
240
+ simplified_encoder = T5Encoder(encoder)
241
+ decoder_with_lm_head_init = DecoderWithLMheadInitial(
242
+ decoder, lm_head, model.config)
243
+
244
+ return simplified_encoder, decoder_with_lm_head, decoder_with_lm_head_init
245
+
246
+
247
+ def generate_onnx_representation(
248
+ pretrained_version=None,
249
+ model=None,
250
+ output_path=None,
251
+ input_sequence_length=256,
252
+ onnx_opset_version=12, # no other opset versions are tested, change at your own risk
253
+ ):
254
+ """Exports a given huggingface pretrained model, or a given model and tokenizer, to onnx
255
+
256
+ Args:
257
+ pretrained_version (str): Name of a pretrained model, or path to a pretrained / finetuned version of T5
258
+ output_path (Optional[str]): if missing then use ./models
259
+ input_sequence_length (Optional[int]): typical input sequence length, for use by the ORT for possible optimization
260
+ onnx_opset_version (Optional[int]): ONNX Operator Set Version, default 12 is the only tested version
261
+ """
262
+ if (pretrained_version is None) and model is None:
263
+ print(
264
+ "You need to specify pretrained_version (the pretrained model you wish to export). Alternatively you can export a model you have in memory."
265
+ )
266
+ return
267
+
268
+ if model is not None:
269
+ (
270
+ simplified_encoder,
271
+ decoder_with_lm_head,
272
+ decoder_with_lm_head_init,
273
+ ) = turn_model_into_encoder_decoder(model)
274
+ else:
275
+ (
276
+ simplified_encoder,
277
+ decoder_with_lm_head,
278
+ decoder_with_lm_head_init,
279
+ ) = create_t5_encoder_decoder(pretrained_version)
280
+
281
+ # model paths for enc, dec and dec_init
282
+ output_path = saved_models_path if output_path is None else Path(
283
+ output_path)
284
+ encoder_path, decoder_path, init_decoder_path = get_model_paths(
285
+ pretrained_version, output_path, quantized=False
286
+ )
287
+
288
+ model_config = AutoConfig.from_pretrained(
289
+ pretrained_version, use_auth_token=get_auth_token())
290
+
291
+ # Though these are dummy inputs, ORT optimizations do reference these values,
292
+ # so it is worth using values as close to production as possible
293
+ batch_size = 1 # not configurable since only CPU
294
+ enc_seq_length = input_sequence_length
295
+ # a decoder sequence length is always one because it's just the last generated token
296
+ dec_seq_length = 1
297
+ input_ids = torch.ones(batch_size, enc_seq_length, dtype=torch.int64)
298
+ attention_mask = torch.ones(batch_size, enc_seq_length, dtype=torch.int64)
299
+
300
+ n_heads = model_config.num_heads
301
+ d_kv = model_config.d_kv
302
+
303
+ input_ids_dec = torch.ones(batch_size, dec_seq_length, dtype=torch.int64)
304
+ attention_mask_dec = torch.ones(
305
+ batch_size, dec_seq_length, dtype=torch.int64)
306
+ enc_out = torch.ones(
307
+ (batch_size, enc_seq_length, model_config.d_model), dtype=torch.float32
308
+ )
309
+
310
+ # self_attention_past_key_values = torch.ones(
311
+ # (model_config.num_decoder_layers, 2, batch_size, n_heads, seq_length_a, d_kv), dtype=torch.float32)
312
+ # cross_attention_past_key_values = torch.ones(
313
+ # (model_config.num_decoder_layers, 2, batch_size, n_heads, seq_length_b, d_kv), dtype=torch.float32)
314
+
315
+ sa = torch.ones(
316
+ (batch_size, n_heads, dec_seq_length, d_kv), dtype=torch.float32
317
+ ) # 1, 8, 1, 64
318
+ ca = torch.ones(
319
+ (batch_size, n_heads, enc_seq_length, d_kv), dtype=torch.float32
320
+ ) # 1, 8, variable, 64
321
+ t5_block = (sa, sa, ca, ca)
322
+ past_key_values = (t5_block,) * model_config.num_decoder_layers
323
+
324
+ flat_past_key_values = functools.reduce(
325
+ operator.iconcat, past_key_values, [])
326
+
327
+ decoder_all_inputs = tuple(
328
+ [input_ids_dec, attention_mask_dec, enc_out] + flat_past_key_values
329
+ )
330
+
331
+ # for progress bars
332
+ bar = Bar("Exporting to onnx...", max=3)
333
+
334
+ import warnings
335
+
336
+ # ignores all the warnings during conversion
337
+ warnings.filterwarnings("ignore")
338
+
339
+ # Exports to ONNX
340
+ with torch.no_grad():
341
+
342
+ decoder_inputs = [
343
+ "input_ids",
344
+ "encoder_attention_mask",
345
+ "encoder_hidden_states",
346
+ ]
347
+
348
+ pkv_input_names = ["pkv_{}".format(
349
+ i) for i in range(len(flat_past_key_values))]
350
+
351
+ decoder_input_names = decoder_inputs + pkv_input_names
352
+
353
+ decoder_output_names = ["logits", "output_past_key_values"]
354
+
355
+ dyn_axis_general = {0: "batch", 1: "sequence"}
356
+ dyn_axis_pkv = {0: "batch", 2: "seq_length"}
357
+
358
+ dyn_axis = {
359
+ "input_ids": dyn_axis_general,
360
+ "encoder_attention_mask": dyn_axis_general,
361
+ "encoder_hidden_states": dyn_axis_general,
362
+ "logits": dyn_axis_general,
363
+ "output_past_key_values": dyn_axis_general,
364
+ }
365
+
366
+ dyn_pkv = {
367
+ "pkv_{}".format(i): dyn_axis_pkv
368
+ for i in range(len(flat_past_key_values))
369
+ }
370
+
371
+ dyn_axis_params = {**dyn_axis, **dyn_pkv}
372
+
373
+ # decoder to utilize past key values:
374
+ torch.onnx.export(
375
+ decoder_with_lm_head,
376
+ decoder_all_inputs,
377
+ decoder_path.as_posix(),
378
+ export_params=True,
379
+ do_constant_folding=True,
380
+ opset_version=onnx_opset_version,
381
+ input_names=decoder_input_names,
382
+ output_names=decoder_output_names,
383
+ dynamic_axes=dyn_axis_params,
384
+ )
385
+ bar.next()
386
+
387
+ torch.onnx.export(
388
+ simplified_encoder,
389
+ args=(input_ids, attention_mask),
390
+ f=encoder_path.as_posix(),
391
+ export_params=True,
392
+ opset_version=onnx_opset_version,
393
+ do_constant_folding=True,
394
+ input_names=["input_ids", "attention_mask"],
395
+ output_names=["hidden_states"],
396
+ dynamic_axes={
397
+ "input_ids": dyn_axis_general,
398
+ "attention_mask": dyn_axis_general,
399
+ "hidden_states": dyn_axis_general,
400
+ },
401
+ )
402
+ bar.next()
403
+ # initial decoder to produce past key values
404
+ torch.onnx.export(
405
+ decoder_with_lm_head_init,
406
+ (input_ids_dec, attention_mask_dec, enc_out),
407
+ init_decoder_path.as_posix(),
408
+ export_params=True,
409
+ opset_version=onnx_opset_version,
410
+ input_names=[
411
+ "input_ids",
412
+ "encoder_attention_mask",
413
+ "encoder_hidden_states",
414
+ ],
415
+ output_names=["logits", "past_key_values"],
416
+ dynamic_axes={
417
+ # batch_size, seq_length = input_shape
418
+ "input_ids": dyn_axis_general,
419
+ "encoder_attention_mask": dyn_axis_general,
420
+ "encoder_hidden_states": dyn_axis_general,
421
+ "logits": dyn_axis_general,
422
+ "past_key_values": dyn_axis_general,
423
+ },
424
+ )
425
+ bar.next()
426
+ bar.finish()
427
+
428
+ return encoder_path, decoder_path, init_decoder_path
429
+
430
+
431
+ def get_model_paths(pretrained_model, model_path, quantized):
432
+
433
+ model_path.mkdir(parents=True, exist_ok=True)
434
+
435
+ # gets only the filename
436
+ pretrained_model_name = Path(pretrained_model).stem
437
+
438
+ if not quantized:
439
+ encoder_path = model_path.joinpath(
440
+ f"{pretrained_model_name}-encoder.onnx")
441
+ decoder_path = model_path.joinpath(
442
+ f"{pretrained_model_name}-decoder.onnx")
443
+ init_decoder_path = model_path.joinpath(
444
+ f"{pretrained_model_name}-init-decoder.onnx"
445
+ )
446
+ else:
447
+ encoder_path = model_path.joinpath(
448
+ f"{pretrained_model_name}-encoder-quantized.onnx"
449
+ )
450
+ decoder_path = model_path.joinpath(
451
+ f"{pretrained_model_name}-decoder-quantized.onnx"
452
+ )
453
+ init_decoder_path = model_path.joinpath(
454
+ f"{pretrained_model_name}-init-decoder-quantized.onnx"
455
+ )
456
+
457
+ return encoder_path, decoder_path, init_decoder_path
458
+
459
+
460
+ def quantize(models_name_or_path):
461
+ """
462
+ Quantize the weights of the model from float32 to in8 to allow very efficient inference on modern CPU
463
+
464
+ Uses unsigned ints for activation values, signed ints for weights, per
465
+ https://onnxruntime.ai/docs/performance/quantization.html#data-type-selection
466
+ it is faster on most CPU architectures
467
+ Args:
468
+ onnx_model_path: Path to location the exported ONNX model is stored
469
+ Returns: The Path generated for the quantized
470
+ """
471
+ from onnxruntime.quantization import quantize_dynamic, QuantType
472
+
473
+ bar = Bar("Quantizing...", max=3)
474
+
475
+ quant_model_paths = []
476
+ for model in models_name_or_path:
477
+ model_name = model.as_posix()
478
+ output_model_name = f"{model_name[:-5]}-quantized.onnx"
479
+ quantize_dynamic(
480
+ model_input=model_name,
481
+ model_output=output_model_name,
482
+ per_channel=True,
483
+ reduce_range=True, # should be the same as per_channel
484
+ activation_type=QuantType.QUInt8,
485
+ weight_type=QuantType.QInt8, # per docs, signed is faster on most CPUs
486
+ optimize_model=False,
487
+ ) # op_types_to_quantize=['MatMul', 'Relu', 'Add', 'Mul' ],
488
+ quant_model_paths.append(output_model_name)
489
+ bar.next()
490
+
491
+ bar.finish()
492
+
493
+ return tuple(quant_model_paths)
494
+
495
+
496
+ class T5Encoder(torch.nn.Module):
497
+ def __init__(self, encoder_sess):
498
+ super().__init__()
499
+ self.encoder = encoder_sess
500
+ self.main_input_name = "input_ids"
501
+
502
+ def forward(
503
+ self,
504
+ input_ids,
505
+ attention_mask,
506
+ inputs_embeds=None,
507
+ head_mask=None,
508
+ output_attentions=None,
509
+ output_hidden_states=None,
510
+ return_dict=None,
511
+ ):
512
+
513
+ encoder_hidden_state = torch.from_numpy(
514
+ self.encoder.run(
515
+ None,
516
+ {
517
+ "input_ids": input_ids.cpu().numpy(),
518
+ "attention_mask": attention_mask.cpu().numpy(),
519
+ },
520
+ )[0]
521
+ )
522
+
523
+ return BaseModelOutput(encoder_hidden_state)
524
+
525
+
526
+ class T5DecoderInit(torch.nn.Module):
527
+ def __init__(self, decoder_sess):
528
+ super().__init__()
529
+ self.decoder = decoder_sess
530
+
531
+ def forward(self, input_ids, encoder_attention_mask, encoder_hidden_states):
532
+
533
+ decoder_outputs = self.decoder.run(
534
+ None,
535
+ {
536
+ "input_ids": input_ids.cpu().numpy(),
537
+ "encoder_attention_mask": encoder_attention_mask.cpu().numpy(),
538
+ "encoder_hidden_states": encoder_hidden_states.cpu().numpy(),
539
+ },
540
+ )
541
+
542
+ list_pkv = tuple(torch.from_numpy(x) for x in decoder_outputs[1:])
543
+
544
+ out_past_key_values = tuple(
545
+ list_pkv[i: i + 4] for i in range(0, len(list_pkv), 4)
546
+ )
547
+
548
+ return torch.from_numpy(decoder_outputs[0]), out_past_key_values
549
+
550
+
551
+ class T5Decoder(torch.nn.Module):
552
+ def __init__(self, decoder_sess):
553
+ super().__init__()
554
+ self.decoder = decoder_sess
555
+
556
+ def forward(self, input_ids, attention_mask, encoder_output, past_key_values):
557
+
558
+ decoder_inputs = {
559
+ "input_ids": input_ids.cpu().numpy(),
560
+ "encoder_attention_mask": attention_mask.cpu().numpy(),
561
+ "encoder_hidden_states": encoder_output.cpu().numpy(),
562
+ }
563
+
564
+ flat_past_key_values = functools.reduce(
565
+ operator.iconcat, past_key_values, [])
566
+
567
+ past_key_values = {
568
+ f"pkv_{i}": pkv.cpu().numpy() for i, pkv in enumerate(flat_past_key_values)
569
+ }
570
+
571
+ decoder_outputs = self.decoder.run(
572
+ None, {**decoder_inputs, **past_key_values})
573
+ # converts each value of the list to tensor from numpy
574
+ list_pkv = tuple(torch.from_numpy(x) for x in decoder_outputs[1:])
575
+
576
+ # creates a tuple of tuples of shape 6x4 from the above tuple
577
+ out_past_key_values = tuple(
578
+ list_pkv[i: i + 4] for i in range(0, len(list_pkv), 4)
579
+ )
580
+
581
+ return torch.from_numpy(decoder_outputs[0]), out_past_key_values
582
+
583
+
584
+ class OnnxT5(T5ForConditionalGeneration):
585
+ """creates a T5 model using onnx sessions (encode, decoder & init_decoder)"""
586
+
587
+ def __init__(self, model_or_model_path, onnx_model_sessions):
588
+ config = AutoConfig.from_pretrained(
589
+ model_or_model_path, use_auth_token=get_auth_token()
590
+ )
591
+ super().__init__(config)
592
+
593
+ # monkeypatch to work for MT5
594
+ if (
595
+ isinstance(model_or_model_path, str)
596
+ and "mt5" in model_or_model_path.lower()
597
+ ) or (
598
+ hasattr(model_or_model_path, "name_or_path")
599
+ and "mt5" in model_or_model_path.name_or_path
600
+ ):
601
+ self.model_type = "mt5"
602
+ self.config_class = MT5Config
603
+ self._keys_to_ignore_on_load_missing = [
604
+ r"encoder\.embed_tokens\.weight",
605
+ ]
606
+ self._keys_to_ignore_on_save = [
607
+ r"encoder\.embed_tokens\.weight",
608
+ ]
609
+
610
+ assert len(onnx_model_sessions) == 3, "all three models should be given"
611
+
612
+ encoder_sess, decoder_sess, decoder_sess_init = onnx_model_sessions
613
+
614
+ self.encoder = T5Encoder(encoder_sess)
615
+ self.decoder = T5Decoder(decoder_sess)
616
+ self.decoder_init = T5DecoderInit(decoder_sess_init)
617
+
618
+ def forward(
619
+ self,
620
+ input_ids=None,
621
+ attention_mask=None,
622
+ decoder_input_ids=None,
623
+ decoder_attention_mask=None,
624
+ head_mask=None,
625
+ decoder_head_mask=None,
626
+ cross_attn_head_mask=None,
627
+ encoder_outputs=None,
628
+ past_key_values=None,
629
+ inputs_embeds=None,
630
+ decoder_inputs_embeds=None,
631
+ labels=None,
632
+ use_cache=None,
633
+ output_attentions=None,
634
+ output_hidden_states=None,
635
+ return_dict=None,
636
+ ):
637
+
638
+ if encoder_outputs is None:
639
+ # Convert encoder inputs in embeddings if needed
640
+ encoder_outputs = self.encoder(
641
+ input_ids=input_ids, attention_mask=attention_mask
642
+ )
643
+
644
+ encoder_hidden_states = encoder_outputs[0]
645
+
646
+ if past_key_values is not None:
647
+ if decoder_input_ids is not None:
648
+ decoder_input_ids = decoder_input_ids[:, -1:]
649
+ if decoder_inputs_embeds is not None:
650
+ decoder_inputs_embeds = decoder_inputs_embeds[:, -1:]
651
+
652
+ if past_key_values is None:
653
+
654
+ # runs only for the first time:
655
+ init_onnx_outputs = self.decoder_init(
656
+ decoder_input_ids, attention_mask, encoder_hidden_states
657
+ )
658
+
659
+ logits, past_key_values = init_onnx_outputs
660
+
661
+ else:
662
+
663
+ onnx_outputs = self.decoder(
664
+ decoder_input_ids,
665
+ attention_mask,
666
+ encoder_hidden_states,
667
+ past_key_values,
668
+ )
669
+
670
+ logits, past_key_values = onnx_outputs
671
+
672
+ return Seq2SeqLMOutput(logits=logits, past_key_values=past_key_values)
673
+
674
+
675
+ def export_and_get_onnx_model(
676
+ model_or_model_path, custom_output_path=saved_models_path, quantized=True
677
+ ):
678
+ """
679
+ Method for whole pipeline,
680
+ converts from pytorch to onnx --> quantizes model --> sets onnx runtime
681
+ --> builds whole onnx model with all sessions
682
+
683
+ """
684
+
685
+ # Step 1. convert huggingfaces t5 model to onnx
686
+ onnx_model_paths = generate_onnx_representation(
687
+ model_or_model_path, output_path=custom_output_path
688
+ )
689
+
690
+ if quantized:
691
+ # Step 2. (recommended) quantize the converted model for fast inference and to reduce model size.
692
+ quant_model_paths = quantize(onnx_model_paths)
693
+
694
+ # step 3. setup onnx runtime
695
+ print("Setting up onnx model...")
696
+ model_sessions = get_onnx_runtime_sessions(quant_model_paths)
697
+ else:
698
+ print("Setting up onnx model...")
699
+ model_sessions = get_onnx_runtime_sessions(onnx_model_paths)
700
+
701
+ # step 4. get the onnx model
702
+ model = OnnxT5(model_or_model_path, model_sessions)
703
+ print("Done!")
704
+
705
+ return model
706
+
707
+
708
+ def get_onnx_model(model_name, onnx_models_path=saved_models_path, quantized=True):
709
+ """
710
+ method gets the onnx model, if already converted models exists
711
+ Example:
712
+ >> get_onnx_model(model_name="t5-finetuned", onnx_models_path="../models/onnx/quantized/")
713
+
714
+ """
715
+
716
+ encoder_path, decoder_path, init_decoder_path = get_model_paths(
717
+ model_name, Path(onnx_models_path), quantized
718
+ )
719
+
720
+ if quantized:
721
+ assert (
722
+ encoder_path.exists()
723
+ and decoder_path.exists()
724
+ and init_decoder_path.exists()
725
+ ), "quantized model don't exist in the model folder, first quantize the model!"
726
+ else:
727
+ assert (
728
+ encoder_path.exists()
729
+ and decoder_path.exists()
730
+ and init_decoder_path.exists()
731
+ ), "all or some models don't exists in the model folder, first convert the model! "
732
+
733
+ model_paths = encoder_path, decoder_path, init_decoder_path
734
+
735
+ model_sessions = get_onnx_runtime_sessions(model_paths)
736
+
737
+ model = OnnxT5(model_name, model_sessions)
738
+
739
+ return model
740
+
741
+
742
+ trained_model_path = './t5_squad_v1/'
743
+
744
+ pretrained_model_name = Path(trained_model_path).stem
745
+
746
+ encoder_path = os.path.join(
747
+ trained_model_path, f"{pretrained_model_name}-encoder_quantized.onnx")
748
+ decoder_path = os.path.join(
749
+ trained_model_path, f"{pretrained_model_name}-decoder_quantized.onnx")
750
+ init_decoder_path = os.path.join(
751
+ trained_model_path, f"{pretrained_model_name}-init-decoder_quantized.onnx")
752
+
753
+ model_paths = encoder_path, decoder_path, init_decoder_path
754
+ model_sessions = get_onnx_runtime_sessions(model_paths)
755
+ model = OnnxT5(trained_model_path, model_sessions)
756
+
757
+ tokenizer = AutoTokenizer.from_pretrained(trained_model_path)
758
+
759
+
760
+ def get_question(sentence, answer, mdl, tknizer):
761
+ text = "context: {} answer: {}".format(sentence, answer)
762
+ print(text)
763
+ max_len = 256
764
+ encoding = tknizer.encode_plus(
765
+ text, max_length=max_len, pad_to_max_length=False, truncation=True, return_tensors="pt")
766
+ input_ids, attention_mask = encoding["input_ids"], encoding["attention_mask"]
767
+ outs = mdl.generate(input_ids=input_ids,
768
+ attention_mask=attention_mask,
769
+ early_stopping=True,
770
+ num_beams=5,
771
+ num_return_sequences=1,
772
+ no_repeat_ngram_size=2,
773
+ max_length=300)
774
+
775
+ dec = [tknizer.decode(ids, skip_special_tokens=True) for ids in outs]
776
+
777
+ Question = dec[0].replace("question:", "")
778
+ Ouestion = Question.strip()
779
+ return Question
780
+
781
+
782
+ # context = "Ramsri loves to watch cricket during his free time"
783
+ # answer = "cricket"
784
+ context = "Donald Trump is an American media personality and businessman who served as the 45th president of the United States."
785
+ answer = "Donald Trump"
786
+ ques = get_question(context, answer, model, tokenizer)
787
+ print("question: ", ques)
788
+
789
+
790
+ context = gr.components.Textbox(
791
+ lines=5, placeholder="Enter paragraph/context here...")
792
+ answer = gr.components.Textbox(
793
+ lines=3, placeholder="Enter answer/keyword here...")
794
+ question = gr.components.Textbox(type="text", label="Question")
795
+
796
+
797
+ def generate_question(context, answer):
798
+ start_time = time.time() # Record the start time
799
+ result = get_question(context, answer, model, tokenizer)
800
+ end_time = time.time() # Record the end time
801
+ latency = end_time - start_time # Calculate latency
802
+ print(f"Latency: {latency} seconds")
803
+ return result
804
+
805
+
806
+ iface = gr.Interface(
807
+ fn=generate_question,
808
+ inputs=[context, answer],
809
+ outputs=question
810
+ )
811
+
812
+ iface.launch(share=True)
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ gradio
2
+ onnx
3
+ onnxruntime
4
+ torch
5
+ transformers
6
+ sentencepiece
7
+ progress
8
+ psutil
t5_squad_v1/config.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "models",
3
+ "architectures": [
4
+ "T5ForConditionalGeneration"
5
+ ],
6
+ "d_ff": 3072,
7
+ "d_kv": 64,
8
+ "d_model": 768,
9
+ "decoder_start_token_id": 0,
10
+ "dense_act_fn": "relu",
11
+ "dropout_rate": 0.1,
12
+ "eos_token_id": 1,
13
+ "feed_forward_proj": "relu",
14
+ "initializer_factor": 1.0,
15
+ "is_encoder_decoder": true,
16
+ "is_gated_act": false,
17
+ "layer_norm_epsilon": 1e-06,
18
+ "model_type": "t5",
19
+ "n_positions": 512,
20
+ "num_decoder_layers": 12,
21
+ "num_heads": 12,
22
+ "num_layers": 12,
23
+ "output_past": true,
24
+ "pad_token_id": 0,
25
+ "relative_attention_max_distance": 128,
26
+ "relative_attention_num_buckets": 32,
27
+ "task_specific_params": {
28
+ "summarization": {
29
+ "early_stopping": true,
30
+ "length_penalty": 2.0,
31
+ "max_length": 200,
32
+ "min_length": 30,
33
+ "no_repeat_ngram_size": 3,
34
+ "num_beams": 4,
35
+ "prefix": "summarize: "
36
+ },
37
+ "translation_en_to_de": {
38
+ "early_stopping": true,
39
+ "max_length": 300,
40
+ "num_beams": 4,
41
+ "prefix": "translate English to German: "
42
+ },
43
+ "translation_en_to_fr": {
44
+ "early_stopping": true,
45
+ "max_length": 300,
46
+ "num_beams": 4,
47
+ "prefix": "translate English to French: "
48
+ },
49
+ "translation_en_to_ro": {
50
+ "early_stopping": true,
51
+ "max_length": 300,
52
+ "num_beams": 4,
53
+ "prefix": "translate English to Romanian: "
54
+ }
55
+ },
56
+ "transformers_version": "4.28.1",
57
+ "use_cache": true,
58
+ "vocab_size": 32128
59
+ }
t5_squad_v1/ort_config.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "one_external_file": true,
3
+ "opset": null,
4
+ "optimization": {},
5
+ "optimum_version": "1.12.0",
6
+ "quantization": {
7
+ "activations_dtype": "QUInt8",
8
+ "activations_symmetric": false,
9
+ "format": "QOperator",
10
+ "is_static": false,
11
+ "mode": "IntegerOps",
12
+ "nodes_to_exclude": [],
13
+ "nodes_to_quantize": [],
14
+ "operators_to_quantize": [
15
+ "Conv",
16
+ "MatMul",
17
+ "Attention",
18
+ "LSTM",
19
+ "Gather",
20
+ "Transpose",
21
+ "EmbedLayerNormalization"
22
+ ],
23
+ "per_channel": false,
24
+ "qdq_add_pair_to_weight": false,
25
+ "qdq_dedicated_pair": false,
26
+ "qdq_op_type_per_channel_support_to_axis": {
27
+ "MatMul": 1
28
+ },
29
+ "reduce_range": false,
30
+ "weights_dtype": "QInt8",
31
+ "weights_symmetric": true
32
+ },
33
+ "transformers_version": "4.28.1",
34
+ "use_external_data_format": false
35
+ }
t5_squad_v1/special_tokens_map.json ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<extra_id_0>",
4
+ "<extra_id_1>",
5
+ "<extra_id_2>",
6
+ "<extra_id_3>",
7
+ "<extra_id_4>",
8
+ "<extra_id_5>",
9
+ "<extra_id_6>",
10
+ "<extra_id_7>",
11
+ "<extra_id_8>",
12
+ "<extra_id_9>",
13
+ "<extra_id_10>",
14
+ "<extra_id_11>",
15
+ "<extra_id_12>",
16
+ "<extra_id_13>",
17
+ "<extra_id_14>",
18
+ "<extra_id_15>",
19
+ "<extra_id_16>",
20
+ "<extra_id_17>",
21
+ "<extra_id_18>",
22
+ "<extra_id_19>",
23
+ "<extra_id_20>",
24
+ "<extra_id_21>",
25
+ "<extra_id_22>",
26
+ "<extra_id_23>",
27
+ "<extra_id_24>",
28
+ "<extra_id_25>",
29
+ "<extra_id_26>",
30
+ "<extra_id_27>",
31
+ "<extra_id_28>",
32
+ "<extra_id_29>",
33
+ "<extra_id_30>",
34
+ "<extra_id_31>",
35
+ "<extra_id_32>",
36
+ "<extra_id_33>",
37
+ "<extra_id_34>",
38
+ "<extra_id_35>",
39
+ "<extra_id_36>",
40
+ "<extra_id_37>",
41
+ "<extra_id_38>",
42
+ "<extra_id_39>",
43
+ "<extra_id_40>",
44
+ "<extra_id_41>",
45
+ "<extra_id_42>",
46
+ "<extra_id_43>",
47
+ "<extra_id_44>",
48
+ "<extra_id_45>",
49
+ "<extra_id_46>",
50
+ "<extra_id_47>",
51
+ "<extra_id_48>",
52
+ "<extra_id_49>",
53
+ "<extra_id_50>",
54
+ "<extra_id_51>",
55
+ "<extra_id_52>",
56
+ "<extra_id_53>",
57
+ "<extra_id_54>",
58
+ "<extra_id_55>",
59
+ "<extra_id_56>",
60
+ "<extra_id_57>",
61
+ "<extra_id_58>",
62
+ "<extra_id_59>",
63
+ "<extra_id_60>",
64
+ "<extra_id_61>",
65
+ "<extra_id_62>",
66
+ "<extra_id_63>",
67
+ "<extra_id_64>",
68
+ "<extra_id_65>",
69
+ "<extra_id_66>",
70
+ "<extra_id_67>",
71
+ "<extra_id_68>",
72
+ "<extra_id_69>",
73
+ "<extra_id_70>",
74
+ "<extra_id_71>",
75
+ "<extra_id_72>",
76
+ "<extra_id_73>",
77
+ "<extra_id_74>",
78
+ "<extra_id_75>",
79
+ "<extra_id_76>",
80
+ "<extra_id_77>",
81
+ "<extra_id_78>",
82
+ "<extra_id_79>",
83
+ "<extra_id_80>",
84
+ "<extra_id_81>",
85
+ "<extra_id_82>",
86
+ "<extra_id_83>",
87
+ "<extra_id_84>",
88
+ "<extra_id_85>",
89
+ "<extra_id_86>",
90
+ "<extra_id_87>",
91
+ "<extra_id_88>",
92
+ "<extra_id_89>",
93
+ "<extra_id_90>",
94
+ "<extra_id_91>",
95
+ "<extra_id_92>",
96
+ "<extra_id_93>",
97
+ "<extra_id_94>",
98
+ "<extra_id_95>",
99
+ "<extra_id_96>",
100
+ "<extra_id_97>",
101
+ "<extra_id_98>",
102
+ "<extra_id_99>"
103
+ ],
104
+ "eos_token": "</s>",
105
+ "pad_token": "<pad>",
106
+ "unk_token": "<unk>"
107
+ }
t5_squad_v1/spiece.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86
3
+ size 791656
t5_squad_v1/t5_squad_v1-decoder_quantized.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9fd0f8a3a4f7865ca2d31d1e6d1078c9a17c2f27e969ba6c137d5457694506b9
3
+ size 149128510
t5_squad_v1/t5_squad_v1-encoder_quantized.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93835d3fc5cd7e6e0e9582409b86184be4a2df6e0db3d3d75bcbb7cf2b5ba696
3
+ size 110045668
t5_squad_v1/t5_squad_v1-init-decoder_quantized.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8afab51caddafca6a74103d9fd233abd03cc43c979ae9c7e1066858b6a5dc26d
3
+ size 163346037
t5_squad_v1/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
t5_squad_v1/tokenizer_config.json ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<extra_id_0>",
4
+ "<extra_id_1>",
5
+ "<extra_id_2>",
6
+ "<extra_id_3>",
7
+ "<extra_id_4>",
8
+ "<extra_id_5>",
9
+ "<extra_id_6>",
10
+ "<extra_id_7>",
11
+ "<extra_id_8>",
12
+ "<extra_id_9>",
13
+ "<extra_id_10>",
14
+ "<extra_id_11>",
15
+ "<extra_id_12>",
16
+ "<extra_id_13>",
17
+ "<extra_id_14>",
18
+ "<extra_id_15>",
19
+ "<extra_id_16>",
20
+ "<extra_id_17>",
21
+ "<extra_id_18>",
22
+ "<extra_id_19>",
23
+ "<extra_id_20>",
24
+ "<extra_id_21>",
25
+ "<extra_id_22>",
26
+ "<extra_id_23>",
27
+ "<extra_id_24>",
28
+ "<extra_id_25>",
29
+ "<extra_id_26>",
30
+ "<extra_id_27>",
31
+ "<extra_id_28>",
32
+ "<extra_id_29>",
33
+ "<extra_id_30>",
34
+ "<extra_id_31>",
35
+ "<extra_id_32>",
36
+ "<extra_id_33>",
37
+ "<extra_id_34>",
38
+ "<extra_id_35>",
39
+ "<extra_id_36>",
40
+ "<extra_id_37>",
41
+ "<extra_id_38>",
42
+ "<extra_id_39>",
43
+ "<extra_id_40>",
44
+ "<extra_id_41>",
45
+ "<extra_id_42>",
46
+ "<extra_id_43>",
47
+ "<extra_id_44>",
48
+ "<extra_id_45>",
49
+ "<extra_id_46>",
50
+ "<extra_id_47>",
51
+ "<extra_id_48>",
52
+ "<extra_id_49>",
53
+ "<extra_id_50>",
54
+ "<extra_id_51>",
55
+ "<extra_id_52>",
56
+ "<extra_id_53>",
57
+ "<extra_id_54>",
58
+ "<extra_id_55>",
59
+ "<extra_id_56>",
60
+ "<extra_id_57>",
61
+ "<extra_id_58>",
62
+ "<extra_id_59>",
63
+ "<extra_id_60>",
64
+ "<extra_id_61>",
65
+ "<extra_id_62>",
66
+ "<extra_id_63>",
67
+ "<extra_id_64>",
68
+ "<extra_id_65>",
69
+ "<extra_id_66>",
70
+ "<extra_id_67>",
71
+ "<extra_id_68>",
72
+ "<extra_id_69>",
73
+ "<extra_id_70>",
74
+ "<extra_id_71>",
75
+ "<extra_id_72>",
76
+ "<extra_id_73>",
77
+ "<extra_id_74>",
78
+ "<extra_id_75>",
79
+ "<extra_id_76>",
80
+ "<extra_id_77>",
81
+ "<extra_id_78>",
82
+ "<extra_id_79>",
83
+ "<extra_id_80>",
84
+ "<extra_id_81>",
85
+ "<extra_id_82>",
86
+ "<extra_id_83>",
87
+ "<extra_id_84>",
88
+ "<extra_id_85>",
89
+ "<extra_id_86>",
90
+ "<extra_id_87>",
91
+ "<extra_id_88>",
92
+ "<extra_id_89>",
93
+ "<extra_id_90>",
94
+ "<extra_id_91>",
95
+ "<extra_id_92>",
96
+ "<extra_id_93>",
97
+ "<extra_id_94>",
98
+ "<extra_id_95>",
99
+ "<extra_id_96>",
100
+ "<extra_id_97>",
101
+ "<extra_id_98>",
102
+ "<extra_id_99>"
103
+ ],
104
+ "clean_up_tokenization_spaces": true,
105
+ "eos_token": "</s>",
106
+ "extra_ids": 100,
107
+ "model_max_length": 512,
108
+ "pad_token": "<pad>",
109
+ "sp_model_kwargs": {},
110
+ "tokenizer_class": "T5Tokenizer",
111
+ "unk_token": "<unk>"
112
+ }