gmastrapas commited on
Commit
6507fd6
1 Parent(s): 5417f59

docs: update usage snippet

Browse files
Files changed (1) hide show
  1. README.md +10 -2
README.md CHANGED
@@ -51,6 +51,7 @@ You can use Jina CLIP directly from transformers package.
51
  !pip install transformers einops timm
52
  from transformers import AutoModel, AutoTokenizer, AutoImageProcessor
53
  from numpy.linalg import norm
 
54
 
55
  cos_sim = lambda a,b: (a @ b.T) / (norm(a)*norm(b))
56
 
@@ -58,8 +59,15 @@ tokenizer = AutoTokenizer.from_pretrained('jinaai/jina-clip-v1', trust_remote_co
58
  image_processor = AutoImageProcessor.from_pretrained('jinaai/jina-clip-v1', trust_remote_code=True)
59
  model = AutoModel.from_pretrained('jinaai/jina-clip-v1', trust_remote_code=True)
60
 
61
- text_embeddings = model.encode_text(['How is the weather today?', 'What is the current weather like today?'])
62
- image_embeddings = model.encode_image(['raindrop.png'])
 
 
 
 
 
 
 
63
 
64
  print(cos_sim(text_embeddings[0], text_embeddings[1])) # text embedding similarity
65
  print(cos_sim(text_embeddings[0], image_embeddings[0])) # text-image cross-modal similarity
 
51
  !pip install transformers einops timm
52
  from transformers import AutoModel, AutoTokenizer, AutoImageProcessor
53
  from numpy.linalg import norm
54
+ from PIL import Image
55
 
56
  cos_sim = lambda a,b: (a @ b.T) / (norm(a)*norm(b))
57
 
 
59
  image_processor = AutoImageProcessor.from_pretrained('jinaai/jina-clip-v1', trust_remote_code=True)
60
  model = AutoModel.from_pretrained('jinaai/jina-clip-v1', trust_remote_code=True)
61
 
62
+ text_inputs = tokenizer(
63
+ ['How is the weather today?', 'What is the current weather like today?'],
64
+ return_tensors='pt',
65
+ padding='longest',
66
+ )
67
+ image_inputs = image_processor([Image.open('raindrop.png').load()])
68
+
69
+ text_embeddings = model.get_text_features(text_inputs)
70
+ image_embeddings = model.get_image_features(image_inputs)
71
 
72
  print(cos_sim(text_embeddings[0], text_embeddings[1])) # text embedding similarity
73
  print(cos_sim(text_embeddings[0], image_embeddings[0])) # text-image cross-modal similarity