Jiayi-Pan commited on
Commit
22d22a4
1 Parent(s): 9150377

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -0
app.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch, PIL
2
+ import gradio as gr
3
+
4
+ title = "OctoBERT"
5
+ description = """Interactive Demo for OctoBERT. This base model is trained only on Flickr-30k."""
6
+ examples =[
7
+ ['https://i.imgur.com/ASXtqVc.jpg','The woman stands outdoors, next to a child in a <mask>.'],
8
+ ['https://i.imgur.com/UspUc7t.jpg', 'A woman in blue shorts and white shirt holds a tennis racket on a blue <mask> court.'],
9
+ ['https://i.imgur.com/IRUoMUg.jpg', 'The smiling <mask> is celebrating her <mask> party with friends, surrounded by balloons and a <mask> with candles.'],
10
+ ['https://i.imgur.com/23BZbaH.jpg', 'A person in a rainbow colored snowsuit is snowboarding down a <mask> slope.'],
11
+ ['https://i.imgur.com/hoQCg1h.jpg', 'A man with <mask> plays with a little girl while walking down the street, while an Asian woman walks ahead of them.'],
12
+ ['https://i.imgur.com/dFeN6bs.jpg', 'A black dog stands on a <mask>, green fields behind him.'],
13
+ ]
14
+ device = "cuda" if torch.cuda.is_available() else "cpu"
15
+ # model, img_transform, tokenizer, post_processor, plot_results = torch.hub.load('Jiayi-Pan/RefCloze_Pub', 'flickr_base_model')
16
+ model, img_transform, tokenizer, post_processor, plot_results = torch.hub.load('.', 'flickr_base_model', source='local')
17
+ model = model.to(device)
18
+
19
+ def plot_inference(img, caption):
20
+ imgs_tensor = img_transform(img).to(device).unsqueeze(0)
21
+ tokens_tensor = tokenizer(caption, return_tensors="pt").to(device)
22
+ with torch.no_grad():
23
+ outputs = model(imgs_tensor, tokens_tensor, one_pass=True)
24
+ processed_outputs = post_processor(outputs, img, tokenizer)
25
+ vis = plot_results(img, processed_outputs, save_path="numpy_array")
26
+ return vis, processed_outputs['cap']
27
+
28
+
29
+ gr.Interface(
30
+ plot_inference,
31
+ [gr.inputs.Image(type="pil", label="Input"), gr.inputs.Textbox(label="input text")],
32
+ [gr.outputs.Image(type="numpy", label="Output"), gr.outputs.Textbox(label="Predicted Words")],
33
+ title=title,
34
+ description=description,
35
+ examples=examples,
36
+ cache_examples=True,
37
+ ).launch()