davda54 commited on
Commit
256bb06
1 Parent(s): 53f2bdc

Updated tokenizer with special tokens for a chat template

Browse files
Files changed (1) hide show
  1. tokenizer.json +4 -4
tokenizer.json CHANGED
@@ -32,7 +32,7 @@
32
  },
33
  {
34
  "id": 3,
35
- "content": "<extra_id_0>",
36
  "single_word": false,
37
  "lstrip": false,
38
  "rstrip": false,
@@ -41,7 +41,7 @@
41
  },
42
  {
43
  "id": 4,
44
- "content": "<extra_id_1>",
45
  "single_word": false,
46
  "lstrip": false,
47
  "rstrip": false,
@@ -265,8 +265,8 @@
265
  "<unk>": 0,
266
  "<s>": 1,
267
  "</s>": 2,
268
- "<extra_id_0>": 3,
269
- "<extra_id_1>": 4,
270
  "<extra_id_2>": 5,
271
  "<extra_id_3>": 6,
272
  "<extra_id_4>": 7,
 
32
  },
33
  {
34
  "id": 3,
35
+ "content": "<|im_start|>",
36
  "single_word": false,
37
  "lstrip": false,
38
  "rstrip": false,
 
41
  },
42
  {
43
  "id": 4,
44
+ "content": "<|im_end|>",
45
  "single_word": false,
46
  "lstrip": false,
47
  "rstrip": false,
 
265
  "<unk>": 0,
266
  "<s>": 1,
267
  "</s>": 2,
268
+ "<|im_start|>": 3,
269
+ "<|im_end|>": 4,
270
  "<extra_id_2>": 5,
271
  "<extra_id_3>": 6,
272
  "<extra_id_4>": 7,