Spaces:
Sleeping
Sleeping
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch, PIL
|
2 |
+
import gradio as gr
|
3 |
+
|
4 |
+
title = "OctoBERT"
|
5 |
+
description = """Interactive Demo for OctoBERT. This base model is trained only on Flickr-30k."""
|
6 |
+
examples =[
|
7 |
+
['https://i.imgur.com/ASXtqVc.jpg','The woman stands outdoors, next to a child in a <mask>.'],
|
8 |
+
['https://i.imgur.com/UspUc7t.jpg', 'A woman in blue shorts and white shirt holds a tennis racket on a blue <mask> court.'],
|
9 |
+
['https://i.imgur.com/IRUoMUg.jpg', 'The smiling <mask> is celebrating her <mask> party with friends, surrounded by balloons and a <mask> with candles.'],
|
10 |
+
['https://i.imgur.com/23BZbaH.jpg', 'A person in a rainbow colored snowsuit is snowboarding down a <mask> slope.'],
|
11 |
+
['https://i.imgur.com/hoQCg1h.jpg', 'A man with <mask> plays with a little girl while walking down the street, while an Asian woman walks ahead of them.'],
|
12 |
+
['https://i.imgur.com/dFeN6bs.jpg', 'A black dog stands on a <mask>, green fields behind him.'],
|
13 |
+
]
|
14 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
15 |
+
# model, img_transform, tokenizer, post_processor, plot_results = torch.hub.load('Jiayi-Pan/RefCloze_Pub', 'flickr_base_model')
|
16 |
+
model, img_transform, tokenizer, post_processor, plot_results = torch.hub.load('.', 'flickr_base_model', source='local')
|
17 |
+
model = model.to(device)
|
18 |
+
|
19 |
+
def plot_inference(img, caption):
|
20 |
+
imgs_tensor = img_transform(img).to(device).unsqueeze(0)
|
21 |
+
tokens_tensor = tokenizer(caption, return_tensors="pt").to(device)
|
22 |
+
with torch.no_grad():
|
23 |
+
outputs = model(imgs_tensor, tokens_tensor, one_pass=True)
|
24 |
+
processed_outputs = post_processor(outputs, img, tokenizer)
|
25 |
+
vis = plot_results(img, processed_outputs, save_path="numpy_array")
|
26 |
+
return vis, processed_outputs['cap']
|
27 |
+
|
28 |
+
|
29 |
+
gr.Interface(
|
30 |
+
plot_inference,
|
31 |
+
[gr.inputs.Image(type="pil", label="Input"), gr.inputs.Textbox(label="input text")],
|
32 |
+
[gr.outputs.Image(type="numpy", label="Output"), gr.outputs.Textbox(label="Predicted Words")],
|
33 |
+
title=title,
|
34 |
+
description=description,
|
35 |
+
examples=examples,
|
36 |
+
cache_examples=True,
|
37 |
+
).launch()
|