Spaces:
Sleeping
Sleeping
Update utils.py
Browse filesUse a wordaround method to solve the space missing issue for llama-based tokenizer
utils.py
CHANGED
@@ -16,7 +16,22 @@ def get_res(model_name, input_sentence, single_print=True):
|
|
16 |
out = tokenizer.encode(input_sentence, add_special_tokens=False)
|
17 |
token_num = len(out)
|
18 |
|
19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
res = ''.join(w)
|
21 |
if single_print:
|
22 |
print(res + str(token_num))
|
|
|
16 |
out = tokenizer.encode(input_sentence, add_special_tokens=False)
|
17 |
token_num = len(out)
|
18 |
|
19 |
+
work_around = False
|
20 |
+
if work_around:
|
21 |
+
w = []
|
22 |
+
pre = ""
|
23 |
+
for i in range(len(out)):
|
24 |
+
res = tokenizer.decode(out[:i+1])
|
25 |
+
if w == []:
|
26 |
+
w.append(res)
|
27 |
+
else:
|
28 |
+
pre_len = len(pre) #0
|
29 |
+
w.append(res[pre_len:])
|
30 |
+
pre = res
|
31 |
+
|
32 |
+
w = [ f'<span style="font-size:1.25em;background-color:{next(color_iterator)}">{x}</span>' for x in out ]
|
33 |
+
else:
|
34 |
+
w = [ f'<span style="font-size:1.25em;background-color:{next(color_iterator)}">{tokenizer.decode(x)}</span>' for x in out ]
|
35 |
res = ''.join(w)
|
36 |
if single_print:
|
37 |
print(res + str(token_num))
|