Spaces:

broadwell
/

samla-photos

Sleeping

App Files Files Community

broadwell commited on Apr 25, 2023

Commit

e547128

•

1 Parent(s): b56a0e7

Upload 6 files

Browse files

Files changed (5) hide show

app.py +173 -0
image_features.npy +3 -0
images_list.txt +0 -0
metadata.csv +0 -0
requirements.txt +12 -0

app.py ADDED Viewed

	@@ -0,0 +1,173 @@

+#!/usr/bin/env python
+# coding: utf-8
+# Norsk (Multilingual) Image Search
+#
+# Based on [Unsplash Image Search](https://github.com/haltakov/natural-language-image-search)
+# by [Vladimir Haltakov](https://twitter.com/haltakov).
+# In[ ]:
+import clip
+import gradio as gr
+from multilingual_clip import pt_multilingual_clip, legacy_multilingual_clip
+import numpy as np
+import os
+import pandas as pd
+from PIL import Image
+import requests
+import torch
+from transformers import AutoTokenizer
+# In[ ]:
+# Load the open CLIP model
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model_name = "M-CLIP/XLM-Roberta-Large-Vit-B-16Plus"
+model = pt_multilingual_clip.MultilingualCLIP.from_pretrained(model_name)
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+# In[ ]:
+# Load the image IDs
+images_info = pd.read_csv("./metadata.csv")
+image_ids = list(
+    open("./images_list.txt", "r", encoding="utf-8").read().strip().split("\n")
+)
+# Load the image feature vectors
+image_features = np.load("./image_features.npy")
+# Convert features to Tensors: Float32 on CPU and Float16 on GPU
+if device == "cpu":
+    image_features = torch.from_numpy(image_features).float().to(device)
+else:
+    image_features = torch.from_numpy(image_features).to(device)
+image_features = image_features / image_features.norm(dim=-1, keepdim=True)
+# ## Define Functions
+#
+# Some important functions for processing the data are defined here.
+#
+#
+# The `encode_search_query` function takes a text description and encodes it into a feature vector using the CLIP model.
+# In[ ]:
+def encode_search_query(search_query):
+    with torch.no_grad():
+        # Encode and normalize the search query using the multilingual model
+        text_encoded = model.forward(search_query, tokenizer)
+        text_encoded /= text_encoded.norm(dim=-1, keepdim=True)
+    # Retrieve the feature vector
+    return text_encoded
+# The `find_best_matches` function compares the text feature vector to the feature vectors of all images and finds the best matches. The function returns the IDs of the best matching images.
+# In[ ]:
+def find_best_matches(text_features, image_features, image_ids, results_count=3):
+    # Compute the similarity between the search query and each image using the Cosine similarity
+    similarities = (image_features @ text_features.T).squeeze(1)
+    # Sort the images by their similarity score
+    best_image_idx = (-similarities).argsort()
+    # Return the image IDs of the best matches
+    return [
+        [image_ids[i], similarities[i].item()] for i in best_image_idx[:results_count]
+    ]
+# In[ ]:
+def clip_search(search_query):
+    if len(search_query) >= 3:
+        text_features = encode_search_query(search_query)
+        # Compute the similarity between the descrption and each photo using the Cosine similarity
+        # similarities = list((text_features @ photo_features.T).squeeze(0))
+        # Sort the photos by their similarity score
+        matches = find_best_matches(
+            text_features, image_features, image_ids, results_count=15
+        )
+        images = []
+        for i in range(15):
+            # Retrieve the photo ID
+            image_id = matches[i][0]
+            image_url = images_info[images_info["filename"] == image_id][
+                "image_url"
+            ].values[0]
+            # response = requests.get(image_url)
+            # img = PIL.open(response.raw)
+            images.append(
+                [
+                    (image_url),
+                    images_info[images_info["filename"] == image_id][
+                        "permalink"
+                    ].values[0],
+                ]
+            )
+        #     print(images)
+        return images
+css = (
+    "footer {display: none !important;} .gradio-container {min-height: 0px !important;}"
+)
+with gr.Blocks(css=css) as gr_app:
+    with gr.Column(variant="panel"):
+        with gr.Row(variant="compact"):
+            search_string = gr.Textbox(
+                label="Evocative Search",
+                show_label=True,
+                max_lines=1,
+                placeholder="Type something, or click a suggested search below.",
+            ).style(
+                container=False,
+            )
+            btn = gr.Button("Search", variant="primary").style(full_width=False)
+        with gr.Row(variant="compact"):
+            suggest1 = gr.Button(
+                "två hundar som leker i snön", variant="secondary"
+            ).style(size="sm")
+            suggest2 = gr.Button(
+                "en fisker til sjøs i en båt", variant="secondary"
+            ).style(size="sm")
+            suggest3 = gr.Button(
+                "cold dark alone on the street", variant="secondary"
+            ).style(size="sm")
+            suggest4 = gr.Button("도로 위의 자동차들", variant="secondary").style(size="sm")
+        gallery = gr.Gallery(label=False, show_label=False, elem_id="gallery").style(
+            grid=[6],
+            height="100%",
+        )
+    suggest1.click(clip_search, inputs=suggest1, outputs=gallery)
+    suggest2.click(clip_search, inputs=suggest2, outputs=gallery)
+    suggest3.click(clip_search, inputs=suggest3, outputs=gallery)
+    suggest4.click(clip_search, inputs=suggest4, outputs=gallery)
+    btn.click(clip_search, inputs=search_string, outputs=gallery)
+    search_string.submit(clip_search, search_string, gallery)
+if __name__ == "__main__":
+    gr_app.launch(share=True)

image_features.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:374756fa5293ce32e31d9b9681c53b374d3024adbfb1c6aaa4791aa3937fec40
+size 51210368

images_list.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

metadata.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

requirements.txt ADDED Viewed

	@@ -0,0 +1,12 @@

+clip==1.0
+gradio==3.27.0
+multilingual_clip==1.0.10
+numpy==1.23.5
+pandas==2.0.1
+Pillow==9.4.0
+Pillow==9.1.1
+Pillow==9.5.0
+Requests==2.28.2
+torch==1.11.0
+torch==1.13.1
+transformers==4.19.2