Spaces:

broadwell
/

ma-images

Sleeping

App Files Files Community

broadwell commited on Aug 20

Commit

62d2147

•

1 Parent(s): 5954f19

Reorder functions

Browse files

Files changed (1) hide show

app.py +66 -66

app.py CHANGED Viewed

@@ -24,6 +24,72 @@ MAX_IMG_HEIGHT = 800
 st.set_page_config(layout="wide")
 def load_image_features():
     # Load the image feature vectors
     if st.session_state.vision_mode == "tiled":
@@ -112,72 +178,6 @@ if "images_info" not in st.session_state:
     init()
-# The `encode_search_query` function takes a text description and encodes it into a feature vector using the CLIP model.
-def encode_search_query(search_query, model_type):
-    with torch.no_grad():
-        # Encode and normalize the search query using the multilingual model
-        if model_type == "M-CLIP (multiple languages)":
-            text_encoded = st.session_state.ml_model.forward(
-                search_query, st.session_state.ml_tokenizer
-            )
-            text_encoded /= text_encoded.norm(dim=-1, keepdim=True)
-        else:  # model_type == "J-CLIP (日本語 only)"
-            t_text = st.session_state.ja_tokenizer(
-                search_query, padding=True, return_tensors="pt"
-            )
-            text_encoded = st.session_state.ja_model.get_text_features(**t_text)
-            text_encoded /= text_encoded.norm(dim=-1, keepdim=True)
-    # Retrieve the feature vector
-    return text_encoded
-# The `find_best_matches` function compares the text feature vector to the feature vectors of all images and finds the best matches. The function returns the IDs of the best matching images.
-def find_best_matches(text_features, image_features, image_ids):
-    # Compute the similarity between the search query and each image using the Cosine similarity
-    similarities = (image_features @ text_features.T).squeeze(1)
-    # Sort the images by their similarity score
-    best_image_idx = (-similarities).argsort()
-    # Return the image IDs of the best matches
-    return [[image_ids[i], similarities[i].item()] for i in best_image_idx]
-def clip_search(search_query):
-    if st.session_state.search_field_value != search_query:
-        st.session_state.search_field_value = search_query
-    model_type = st.session_state.active_model
-    if len(search_query) >= 1:
-        text_features = encode_search_query(search_query, model_type)
-        # Compute the similarity between the descrption and each photo using the Cosine similarity
-        # similarities = list((text_features @ photo_features.T).squeeze(0))
-        # Sort the photos by their similarity score
-        if model_type == "M-CLIP (multiple languages)":
-            matches = find_best_matches(
-                text_features,
-                st.session_state.ml_image_features,
-                st.session_state.image_ids,
-            )
-        else:  # model_type == "J-CLIP (日本語 only)"
-            matches = find_best_matches(
-                text_features,
-                st.session_state.ja_image_features,
-                st.session_state.image_ids,
-            )
-        st.session_state.search_image_ids = [match[0] for match in matches]
-        st.session_state.search_image_scores = {match[0]: match[1] for match in matches}
-def string_search():
-    clip_search(st.session_state.search_field_value)
 def visualize_gradcam(viz_image_id):
     if not st.session_state.search_field_value:
         return

 st.set_page_config(layout="wide")
+# The `find_best_matches` function compares the text feature vector to the feature vectors of all images and finds the best matches. The function returns the IDs of the best matching images.
+def find_best_matches(text_features, image_features, image_ids):
+    # Compute the similarity between the search query and each image using the Cosine similarity
+    similarities = (image_features @ text_features.T).squeeze(1)
+    # Sort the images by their similarity score
+    best_image_idx = (-similarities).argsort()
+    # Return the image IDs of the best matches
+    return [[image_ids[i], similarities[i].item()] for i in best_image_idx]
+# The `encode_search_query` function takes a text description and encodes it into a feature vector using the CLIP model.
+def encode_search_query(search_query, model_type):
+    with torch.no_grad():
+        # Encode and normalize the search query using the multilingual model
+        if model_type == "M-CLIP (multiple languages)":
+            text_encoded = st.session_state.ml_model.forward(
+                search_query, st.session_state.ml_tokenizer
+            )
+            text_encoded /= text_encoded.norm(dim=-1, keepdim=True)
+        else:  # model_type == "J-CLIP (日本語 only)"
+            t_text = st.session_state.ja_tokenizer(
+                search_query, padding=True, return_tensors="pt"
+            )
+            text_encoded = st.session_state.ja_model.get_text_features(**t_text)
+            text_encoded /= text_encoded.norm(dim=-1, keepdim=True)
+    # Retrieve the feature vector
+    return text_encoded
+def clip_search(search_query):
+    if st.session_state.search_field_value != search_query:
+        st.session_state.search_field_value = search_query
+    model_type = st.session_state.active_model
+    if len(search_query) >= 1:
+        text_features = encode_search_query(search_query, model_type)
+        # Compute the similarity between the descrption and each photo using the Cosine similarity
+        # similarities = list((text_features @ photo_features.T).squeeze(0))
+        # Sort the photos by their similarity score
+        if model_type == "M-CLIP (multiple languages)":
+            matches = find_best_matches(
+                text_features,
+                st.session_state.ml_image_features,
+                st.session_state.image_ids,
+            )
+        else:  # model_type == "J-CLIP (日本語 only)"
+            matches = find_best_matches(
+                text_features,
+                st.session_state.ja_image_features,
+                st.session_state.image_ids,
+            )
+        st.session_state.search_image_ids = [match[0] for match in matches]
+        st.session_state.search_image_scores = {match[0]: match[1] for match in matches}
+def string_search():
+    clip_search(st.session_state.search_field_value)
 def load_image_features():
     # Load the image feature vectors
     if st.session_state.vision_mode == "tiled":
     init()
 def visualize_gradcam(viz_image_id):
     if not st.session_state.search_field_value:
         return