0xalfroz commited on
Commit
87c5008
1 Parent(s): 0e41473

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -36
app.py CHANGED
@@ -1,6 +1,6 @@
1
  import gradio as gr
2
  from transformers import AutoModel, AutoTokenizer
3
- import torch
4
 
5
  # Load a small CPU model for text to vector processing
6
  model_name = "sentence-transformers/all-mpnet-base-v2"
@@ -8,42 +8,18 @@ model = AutoModel.from_pretrained(model_name)
8
  tokenizer = AutoTokenizer.from_pretrained(model_name)
9
 
10
  def text_to_vector(texts):
11
- results = []
12
-
13
- # Process each sentence individually to catch errors
14
- for sentence in texts:
15
- try:
16
- # Tokenize the sentence
17
- inputs = tokenizer(sentence, return_tensors="pt", padding=True, truncation=True)
18
-
19
- # Check if tokenization results in valid tokens
20
- if inputs['input_ids'].shape[1] == 0:
21
- raise ValueError(f"Tokenization failed for sentence: '{sentence}'")
22
-
23
- # Pass through the model
24
- with torch.no_grad():
25
- outputs = model(**inputs)
26
-
27
- # Get the vector from pooler_output or handle errors
28
- if outputs.pooler_output is None:
29
- raise ValueError(f"No vector generated for sentence: '{sentence}'")
30
-
31
- # Convert the vector to a list of floats
32
- vector = outputs.pooler_output.squeeze().numpy().tolist()
33
-
34
- # Append result as sentence and vector pair
35
- results.append({
36
- "sentence": sentence,
37
- "vector": vector
38
- })
39
- except Exception as e:
40
- # Handle any errors for individual sentences
41
- results.append({
42
- "sentence": sentence,
43
- "vector": f"Error: {str(e)}"
44
- })
45
 
46
- return results
47
 
48
  demo = gr.Interface(
49
  fn=text_to_vector,
 
1
  import gradio as gr
2
  from transformers import AutoModel, AutoTokenizer
3
+ import numpy as np
4
 
5
  # Load a small CPU model for text to vector processing
6
  model_name = "sentence-transformers/all-mpnet-base-v2"
 
8
  tokenizer = AutoTokenizer.from_pretrained(model_name)
9
 
10
  def text_to_vector(texts):
11
+ # Tokenize the input array of sentences
12
+ inputs = tokenizer(texts, return_tensors="pt", padding=True, truncation=True)
13
+ outputs = model(**inputs)
14
+ vectors = outputs.pooler_output.detach().numpy()
15
+
16
+ # Convert each vector to a string representation and create an object
17
+ result = [
18
+ {"sentence": sentence, "vector": ", ".join(map(str, vector))}
19
+ for sentence, vector in zip(texts, vectors)
20
+ ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
+ return result
23
 
24
  demo = gr.Interface(
25
  fn=text_to_vector,