{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 3, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 4, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": null, "pre_tokenizer": { "type": "Sequence", "pretokenizers": [ { "type": "Split", "pattern": { "Regex": "(\\[[^\\]]+]|<[^>]+>|Br?|Cl?|N|O|S|P|F|H|I|b|c|n|o|s|p|\\(|\\)|\\.|=|#|-|\\+|\\\\\\\\|\\u005C|/|:|~|@|\\?|\\*|\\$|\\%[0-9]{2}|[0-9])" }, "behavior": "Isolated", "invert": false }, { "type": "Split", "pattern": { "Regex": "\\[|\\]|0|1|2|3|4|5|6|7|8|9|-|\\+|H|@" }, "behavior": "Isolated", "invert": false } ] }, "post_processor": { "type": "TemplateProcessing", "single": [ { "SpecialToken": { "id": "", "type_id": 0 } }, { "Sequence": { "id": "A", "type_id": 0 } }, { "SpecialToken": { "id": "", "type_id": 0 } } ], "pair": [ { "Sequence": { "id": "A", "type_id": 0 } }, { "Sequence": { "id": "B", "type_id": 1 } } ], "special_tokens": { "": { "id": "", "ids": [ 2 ], "tokens": [ "" ] }, "": { "id": "", "ids": [ 1 ], "tokens": [ "" ] }, "": { "id": "", "ids": [ 0 ], "tokens": [ "" ] } } }, "decoder": { "type": "BPEDecoder", "suffix": "" }, "model": { "type": "WordLevel", "vocab": { "": 0, "": 1, "": 2, "": 3, "": 4, "C": 5, "=": 6, "(": 7, ")": 8, "O": 9, "N": 10, "1": 11, "2": 12, "3": 13, "4": 14, "F": 15, "S": 16, "": 17, "": 18, "": 19, "5": 20, "Cl": 21, "[": 22, "]": 23, ".": 24, "6": 25, "7": 26, "-": 27, "+": 28, "#": 29, "Br": 30, "8": 31, "9": 32, "P": 33, "H": 34, "I": 35, "Si": 36, "B": 37, "Na": 38, "Y": 39, "Ir": 40, "Pt": 41, "K": 42, "Se": 43, "Li": 44, "W": 45, "Sn": 46, "V": 47, "Zr": 48, "Zn": 49, "Cu": 50, "Fe": 51, "%": 52, "Ti": 53, "Pd": 54, "Co": 55, "Mg": 56, "Al": 57, "Ni": 58, "Ge": 59, "Ru": 60, "Ca": 61, "U": 62, "Mn": 63, "Cr": 64, "Au": 65, "Ag": 66, "As": 67, "Te": 68, "Mo": 69, "Ac": 70, "0": 71, "Tb": 72, "f": 73, "Rh": 74, "g": 75, "Cs": 76, "Rf": 77, "Ar": 78, "Sb": 79, "Rb": 80, "Ba": 81, "Os": 82, "Re": 83, "Gd": 84, "Cd": 85, "Bi": 86, "Pb": 87, "In": 88, "Ga": 89, "Ce": 90, "La": 91, "Eu": 92, "Tl": 93, "Tc": 94, "Nb": 95, "Sr": 96, "Ta": 97, "Nd": 98, "Pr": 99, "Yb": 100, "Sm": 101, "Be": 102, "Sc": 103, "Dy": 104, "Lu": 105, "Fm": 106, "Er": 107, "Th": 108, "o": 109, "Tm": 110, "At": 111, "No": 112, "Po": 113, "Cm": 114, "Sg": 115, "Xe": 116, "Np": 117, "Lr": 118, "Pu": 119, "Pm": 120, "Cf": 121, "e": 122, "Am": 123, "Es": 124, "Pa": 125, "Ne": 126, "Bk": 127, "Db": 128, "Mt": 129, "Kr": 130, "Rn": 131, "s": 132, "Bh": 133, "Md": 134, "Ra": 135, "Fr": 136, "@": 137, "\u005C": 138, "/": 139 }, "unk_token": "" } }