EnriqueVega1995 commited on
Commit
8d502d9
1 Parent(s): 470582a

Adde image caption

Browse files
Files changed (2) hide show
  1. app.py +29 -13
  2. requirements.txt +2 -1
app.py CHANGED
@@ -1,19 +1,35 @@
 
 
1
  import gradio as gr
2
- from diffusers import DDPMPipeline
3
- from transformers import pipeline
4
- import torch
5
 
6
- pipeline = pipeline(task="image-classification", model="julien-c/hotdog-not-hotdog")
 
 
7
 
8
- def predict(input_img):
9
- predictions = pipeline(input_img)
10
- return input_img, {p["label"]: p["score"] for p in predictions}
 
 
 
 
 
 
 
 
 
 
 
11
 
12
- gradio_app = gr.Interface(
13
- predict,
14
- inputs=gr.Image(label="Select hot dog candidate", sources=['upload', 'webcam'], type="pil"),
15
- outputs=[gr.Image(label="Processed Image"), gr.Label(label="Result", num_top_classes=2)],
16
- title="Hot Dog? Or Not?",
 
 
 
17
  )
 
18
  if __name__ == "__main__":
19
- gradio_app.launch()
 
1
+ from PIL import Image
2
+ from transformers import BlipProcessor, BlipForConditionalGeneration
3
  import gradio as gr
 
 
 
4
 
5
+ # Inicialización del procesador y modelo de BLIP
6
+ processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
7
+ model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")
8
 
9
+ def generate_captions(image, text=""):
10
+ # Convertir la imagen cargada a PIL Image
11
+ raw_image = Image.fromarray(image).convert('RGB')
12
+
13
+ if text: # Conditional image captioning
14
+ inputs = processor(raw_image, text, return_tensors="pt")
15
+ else: # Unconditional image captioning
16
+ inputs = processor(raw_image, return_tensors="pt")
17
+
18
+ # Generar subtítulos para la imagen
19
+ out = model.generate(**inputs)
20
+ caption = processor.decode(out[0], skip_special_tokens=True)
21
+
22
+ return caption
23
 
24
+ # Interfaz de Gradio
25
+ iface = gr.Interface(
26
+ fn=generate_captions,
27
+ inputs=[gr.Image(shape=(512, 512), label="Cargar/Arrastrar Imagen", image_mode='RGB', tool="editor"),
28
+ gr.Textbox(label="Texto Condicional (opcional)", placeholder="Introduce un texto condicional (opcional)...")],
29
+ outputs=gr.Textbox(label="Subtítulo Generado"),
30
+ title="Generador de Subtítulos de Imágenes BLIP",
31
+ description="Esta aplicación genera subtítulos para imágenes cargadas. También puedes proporcionar un texto condicional para guiar la generación del subtítulo."
32
  )
33
+
34
  if __name__ == "__main__":
35
+ iface.launch()
requirements.txt CHANGED
@@ -1,4 +1,5 @@
1
  diffusers
2
  gradio
3
  torch
4
- transformers
 
 
1
  diffusers
2
  gradio
3
  torch
4
+ transformers
5
+ Pillow