jingwora commited on
Commit
8f5911a
1 Parent(s): b8ba80c

Add application file

Browse files
Files changed (2) hide show
  1. app.py +56 -0
  2. requirements.txt +3 -0
app.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from transformers import pipeline
3
+ from torch import Tensor
4
+ from transformers import AutoTokenizer, AutoModel
5
+ from torch.nn.functional import cosine_similarity
6
+ import gradio as gr
7
+
8
+ def average_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
9
+ last_hidden = last_hidden_states.masked_fill(~attention_mask[..., None].bool(), 0.0)
10
+ return last_hidden.sum(dim=1) / attention_mask.sum(dim=1)[..., None]
11
+
12
+ def get_similarity(sentence1, sentence2):
13
+ input_texts = [sentence1, sentence2]
14
+ # Tokenize and compute embeddings
15
+ batch_dict = tokenizer(input_texts, max_length=512, padding=True, truncation=True, return_tensors="pt")
16
+ outputs = model(**batch_dict)
17
+ embeddings = average_pool(outputs.last_hidden_state, batch_dict["attention_mask"])
18
+ similarity = cosine_similarity(embeddings[0].unsqueeze(0), embeddings[1].unsqueeze(0))
19
+ similarity = round(similarity.item(), 4)
20
+ return similarity
21
+
22
+ checkpoint = "intfloat/multilingual-e5-large"
23
+ tokenizer = AutoTokenizer.from_pretrained(checkpoint)
24
+ model = AutoModel.from_pretrained(checkpoint)
25
+
26
+ demo = gr.Blocks(theme="freddyaboulton/dracula_revamped")
27
+
28
+ with demo:
29
+ gr.Markdown("# Sentence Similarity")
30
+ gr.Markdown("Task: Sentence Similarity (Multilingual)")
31
+ gr.Markdown("Model: https://huggingface.co/intfloat/multilingual-e5-large ")
32
+
33
+ p_txt1 = gr.Textbox(placeholder="Enter passage 1", label="Passage 1")
34
+ p_txt2 = gr.Textbox(placeholder="Enter passage 2", label="Passage 2")
35
+ submit = gr.Button("Submit")
36
+ o_txt = gr.Textbox(placeholder="Similarity score", lines=1,
37
+ interactive=False, label="Similarity score (0-10)")
38
+
39
+ gr.Examples(
40
+ [
41
+ ["A big bus is running on the road in the city.", "There is a big bus running on the road."],
42
+ ["A big bus is running on the road in the city.", "Two children in costumes are standing on the bed."],
43
+ ["街中の道路を大きなバスが走っています。", "道路を大きなバスが走っています。"],
44
+ ["街中の道路を大きなバスが走っています。", "ベッドの上で衣装を着た二人の子供が立っています。"],
45
+ ["A big bus is running on the road in the city.", "道路を大きなバスが走っています。"]
46
+ ],
47
+ inputs=[p_txt1, p_txt2]
48
+ )
49
+
50
+ submit.click(
51
+ get_similarity,
52
+ [p_txt1, p_txt2],
53
+ o_txt
54
+ )
55
+
56
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ gradio==3.36.1
2
+ transformers==4.30.2
3
+ torch==2.0.1