File size: 1,251 Bytes
0c07570
 
87c5008
3bed7b4
0c07570
 
4ebac2c
0c07570
 
 
1136738
 
 
 
 
 
 
 
87c5008
 
b246d57
23b27f7
b246d57
 
0c07570
 
 
d118cf4
b246d57
0e41473
b246d57
0c07570
 
23b27f7
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
import gradio as gr
from transformers import AutoModel, AutoTokenizer
import numpy as np
import json

# Load a small CPU model for text to vector processing
model_name = "Supabase/gte-small"
model = AutoModel.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

def text_to_vector(texts_json):
    try:
        texts = json.loads(texts_json)
        if not isinstance(texts, list):
            raise ValueError("Input must be a JSON array of strings.")
    except json.JSONDecodeError:
        raise ValueError("Invalid JSON format.")

    inputs = tokenizer(texts, return_tensors="pt", padding=True, truncation=True)
    outputs = model(**inputs)
    vectors = outputs.pooler_output.detach().numpy()  # NumPy array
    
    # Flatten the array and return as a 1D array of floats 
    return vectors.reshape(-1).tolist() 

demo = gr.Interface(
    fn=text_to_vector,
    inputs=gr.Textbox(label="Enter JSON array", placeholder="Enter an array of sentences as a JSON string"),
    outputs=gr.Textbox(label="Text Vectors (flattened float array)", lines=10),
    title="Batch Text to Vector",
    description="This demo converts an array of sentences to vectors and returns them as a flattened float array."
)

demo.launch()