EnriqueVega1995 commited on
Commit
07c0ff9
1 Parent(s): 9c172f6

Added image captioning project

Browse files
Files changed (2) hide show
  1. app.py +37 -0
  2. requirements.txt +5 -0
app.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from PIL import Image
2
+ from transformers import BlipProcessor, BlipForConditionalGeneration
3
+ import gradio as gr
4
+
5
+ # Initialization of the BLIP processor and model
6
+ processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
7
+ model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")
8
+
9
+ def generate_captions(image, text=""):
10
+ # Convert the uploaded image to PIL Image
11
+ raw_image = Image.fromarray(image).convert('RGB')
12
+
13
+ if text: # Conditional image captioning
14
+ inputs = processor(raw_image, text, return_tensors="pt")
15
+ else: # Unconditional image captioning
16
+ inputs = processor(raw_image, return_tensors="pt")
17
+
18
+ # Generate captions for the image
19
+ out = model.generate(**inputs)
20
+ caption = processor.decode(out[0], skip_special_tokens=True)
21
+
22
+ return caption
23
+
24
+ # Gradio Interface
25
+ iface = gr.Interface(
26
+ fn=generate_captions,
27
+ inputs=[
28
+ gr.Image(label="Upload/Drag Image"), # Removed the 'tool' argument
29
+ gr.Textbox(label="Conditional Text (optional)", placeholder="Enter conditional text (optional)...")
30
+ ],
31
+ outputs=gr.Textbox(label="Generated Caption"),
32
+ title="BLIP Image Caption Generator",
33
+ description="This app generates captions for uploaded images. You can also provide conditional text to guide the caption generation."
34
+ )
35
+
36
+ if __name__ == "__main__":
37
+ iface.launch()
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ diffusers
2
+ gradio
3
+ torch
4
+ transformers
5
+ Pillow