base / vocab.json
lynguyenminh's picture
Upload tokenizer
21dc37b
raw
history blame
No virus
1.3 kB
{
"/": 17,
"0": 66,
"1": 40,
"2": 28,
"3": 68,
"4": 74,
"5": 84,
"6": 26,
"7": 33,
"8": 45,
"9": 57,
"[PAD]": 106,
"[UNK]": 105,
"a": 43,
"b": 52,
"c": 58,
"d": 104,
"e": 71,
"f": 39,
"g": 3,
"h": 11,
"i": 59,
"k": 70,
"l": 41,
"m": 24,
"n": 100,
"o": 80,
"p": 14,
"q": 61,
"r": 18,
"s": 81,
"t": 15,
"u": 6,
"v": 90,
"w": 12,
"x": 31,
"y": 46,
"z": 88,
"|": 47,
"à": 32,
"á": 1,
"â": 93,
"ã": 76,
"è": 25,
"é": 72,
"ê": 77,
"ì": 9,
"í": 85,
"ò": 10,
"ó": 42,
"ô": 13,
"õ": 92,
"ù": 82,
"ú": 38,
"ý": 63,
"ă": 36,
"đ": 19,
"ĩ": 99,
"ũ": 34,
"ơ": 16,
"ư": 51,
"̀": 49,
"́": 21,
"̣": 48,
"ạ": 30,
"ả": 73,
"ấ": 78,
"ầ": 75,
"ẩ": 67,
"ẫ": 79,
"ậ": 101,
"ắ": 60,
"ằ": 23,
"ẳ": 0,
"ẵ": 65,
"ặ": 89,
"ẹ": 37,
"ẻ": 97,
"ẽ": 69,
"ế": 64,
"ề": 98,
"ể": 29,
"ễ": 96,
"ệ": 7,
"ỉ": 5,
"ị": 62,
"ọ": 8,
"ỏ": 20,
"ố": 4,
"ồ": 2,
"ổ": 83,
"ỗ": 56,
"ộ": 27,
"ớ": 35,
"ờ": 22,
"ở": 103,
"ỡ": 95,
"ợ": 53,
"ụ": 44,
"ủ": 102,
"ứ": 50,
"ừ": 94,
"ử": 54,
"ữ": 91,
"ự": 86,
"ỳ": 87,
"ỷ": 55
}