Spaces:
Running
Running
Upload 7 files
Browse files- app.py +66 -0
- bird.jpg +0 -0
- cat.jpg +0 -0
- deeplabv3.tflite +3 -0
- dog.jpg +0 -0
- requirements.txt +9 -0
- style.css +53 -0
app.py
ADDED
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import requests
|
2 |
+
from PIL import Image
|
3 |
+
import gradio as gr
|
4 |
+
from transformers import ViTImageProcessor, ViTForImageClassification
|
5 |
+
import torch
|
6 |
+
|
7 |
+
# Model and processor loading
|
8 |
+
processor = ViTImageProcessor.from_pretrained('google/vit-base-patch16-224')
|
9 |
+
model = ViTForImageClassification.from_pretrained('google/vit-base-patch16-224')
|
10 |
+
|
11 |
+
# Function to classify image and return top 3 predictions with probabilities
|
12 |
+
def classify_image(image):
|
13 |
+
try:
|
14 |
+
inputs = processor(images=image, return_tensors="pt")
|
15 |
+
outputs = model(**inputs)
|
16 |
+
logits = outputs.logits
|
17 |
+
probabilities = torch.nn.functional.softmax(logits, dim=-1)[0]
|
18 |
+
top3_prob, top3_catid = torch.topk(probabilities, 3)
|
19 |
+
|
20 |
+
labels = [model.config.id2label[catid.item()] for catid in top3_catid]
|
21 |
+
scores = [prob.item() for prob in top3_prob]
|
22 |
+
|
23 |
+
return {labels[i]: scores[i] for i in range(3)}
|
24 |
+
except Exception as e:
|
25 |
+
return {"Error": str(e)}
|
26 |
+
|
27 |
+
INTRO_TEXT = """
|
28 |
+
# 🎨 Zero Shot Image Classification! 🖼️
|
29 |
+
|
30 |
+
<span style="font-size: 20px;">
|
31 |
+
Upload an image and let this model predict what it is! Who knows, maybe it will surprise you! 🕵️♂️
|
32 |
+
</span>
|
33 |
+
|
34 |
+
Here are some links you might find interesting:
|
35 |
+
- [GitHub](https://github.com/google-research/vision_transformer)
|
36 |
+
- [Hugging Face Model](https://huggingface.co/google/vit-base-patch16-224)
|
37 |
+
- [Google Research Paper](https://arxiv.org/abs/2010.11929)
|
38 |
+
"""
|
39 |
+
|
40 |
+
def create_app():
|
41 |
+
with gr.Blocks() as demo:
|
42 |
+
gr.Markdown(INTRO_TEXT)
|
43 |
+
|
44 |
+
with gr.Row():
|
45 |
+
image_input = gr.Image(type="pil", label="Upload your image here!")
|
46 |
+
label_output = gr.Label(label="Top Predictions")
|
47 |
+
|
48 |
+
def process_image(image):
|
49 |
+
return classify_image(image)
|
50 |
+
|
51 |
+
image_input.change(process_image, inputs=image_input, outputs=label_output)
|
52 |
+
|
53 |
+
gr.Markdown("## Example Images")
|
54 |
+
gr.Examples(
|
55 |
+
examples=[
|
56 |
+
["cat.jpg"],
|
57 |
+
["dog.jpg"],
|
58 |
+
["bird.jpg"]
|
59 |
+
],
|
60 |
+
inputs=image_input,
|
61 |
+
)
|
62 |
+
|
63 |
+
return demo
|
64 |
+
|
65 |
+
if __name__ == "__main__":
|
66 |
+
create_app().launch(share=True)
|
bird.jpg
ADDED
cat.jpg
ADDED
deeplabv3.tflite
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ff36e24d40547fe9e645e2f4e8745d1876d6e38b332d39a82f0bf0f5d1d561b3
|
3 |
+
size 2780176
|
dog.jpg
ADDED
requirements.txt
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
accelerate==0.28.0
|
2 |
+
bitsandbytes==0.43.0
|
3 |
+
gradio==4.28.2
|
4 |
+
scipy==1.12.0
|
5 |
+
sentencepiece==0.2.0
|
6 |
+
spaces==0.26.2
|
7 |
+
torch==2.1.1
|
8 |
+
transformers==4.40.1
|
9 |
+
tokenizers==0.19.1
|
style.css
ADDED
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
body {
|
2 |
+
font-family: 'Arial', sans-serif;
|
3 |
+
background-color: #f9f9f9;
|
4 |
+
color: #333;
|
5 |
+
margin: 0;
|
6 |
+
padding: 0;
|
7 |
+
}
|
8 |
+
|
9 |
+
h1, h2, h3, p {
|
10 |
+
padding: 0 20px;
|
11 |
+
}
|
12 |
+
|
13 |
+
h1 {
|
14 |
+
text-align: center;
|
15 |
+
color: #4a54f1; /* Soft blue for headers */
|
16 |
+
font-size: 24px;
|
17 |
+
margin-top: 20px;
|
18 |
+
margin-bottom: 10px;
|
19 |
+
}
|
20 |
+
|
21 |
+
button {
|
22 |
+
background-color: #4a54f1; /* Matching the header */
|
23 |
+
border: none;
|
24 |
+
color: white;
|
25 |
+
padding: 10px 20px;
|
26 |
+
text-align: center;
|
27 |
+
font-size: 16px;
|
28 |
+
border-radius: 5px;
|
29 |
+
cursor: pointer;
|
30 |
+
transition: background-color 0.3s ease;
|
31 |
+
}
|
32 |
+
|
33 |
+
button:hover {
|
34 |
+
background-color: #363b8c;
|
35 |
+
}
|
36 |
+
|
37 |
+
.contain {
|
38 |
+
max-width: 900px;
|
39 |
+
margin: auto;
|
40 |
+
padding: 1.5rem;
|
41 |
+
background-color: #ffffff;
|
42 |
+
box-shadow: 0 4px 8px rgba(0,0,0,0.1);
|
43 |
+
border-radius: 8px;
|
44 |
+
}
|
45 |
+
|
46 |
+
.gradio-chat {
|
47 |
+
border-radius: 8px;
|
48 |
+
box-shadow: 0 2px 5px rgba(0,0,0,0.2);
|
49 |
+
}
|
50 |
+
|
51 |
+
input[type="range"]:hover {
|
52 |
+
opacity: 1;
|
53 |
+
}
|