m-ric's picture
m-ric HF staff
Create app.py
bc46ee1 verified
raw
history blame
No virus
2.95 kB
import gradio as gr
from transformers import AutoTokenizer
bert_tokenizer = AutoTokenizer.from_pretrained('openai-community/gpt2')
def display_next_step_tokens(sentence, step):
return (
gr.Textbox.update(visible=(split_selection==LABEL_RECURSIVE)),
gr.Radio.update(visible=(split_selection==LABEL_RECURSIVE)),
)
with gr.Blocks(theme=gr.themes.Soft(text_size='lg', font=["monospace"], primary_hue=gr.themes.colors.green)) as demo:
text = gr.Textbox(label="Your prompt to start decoding", value="Ok, I")
with gr.Row():
split_selection = gr.Dropdown(
choices=[
LABEL_TEXTSPLITTER,
LABEL_RECURSIVE,
],
value=LABEL_RECURSIVE,
label="Method to split chunks 🍞",
)
separators_selection = gr.Textbox(
elem_id="textbox_id",
value=["\n\n", "\n", " ", ""],
info="Separators used in RecursiveCharacterTextSplitter",
show_label=False, # or set label to an empty string if you want to keep its space
visible=True,
)
separator_preset_selection = gr.Radio(
['Default', 'Python', 'Markdown'],
label="Choose a preset",
info="This will apply a specific set of separators to RecursiveCharacterTextSplitter.",
visible=True,
)
with gr.Row():
length_unit_selection = gr.Dropdown(
choices=[
"Character count",
"Token count (BERT tokens)",
],
value="Character count",
label="Length function",
info="How should we measure our chunk lengths?",
)
slider_count = gr.Slider(
50, 500, value=200, step=1, label="Chunk length πŸ“", info="In the chosen unit."
)
chunk_overlap = gr.Slider(
0, 50, value=10, step=1, label="Overlap between chunks", info="In the chosen unit."
)
out = gr.HighlightedText(
label="Output",
show_legend=True,
show_label=False,
color_map={'Overlap': '#DADADA'}
)
split_selection.change(
fn=change_split_selection,
inputs=split_selection,
outputs=[separators_selection, separator_preset_selection],
)
separator_preset_selection.change(
fn=change_preset_separators,
inputs=separator_preset_selection,
outputs=separators_selection,
)
gr.on(
[text.change, length_unit_selection.change, separators_selection.change, split_selection.change, slider_count.change, chunk_overlap.change],
chunk,
[text, slider_count, split_selection, separators_selection, length_unit_selection, chunk_overlap],
outputs=out
)
demo.load(chunk, inputs=[text, slider_count, split_selection, separators_selection, length_unit_selection, chunk_overlap], outputs=out)
demo.launch()