-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathclip.py
36 lines (28 loc) · 1.23 KB
/
clip.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import torch
import clip
from PIL import Image
# Load the model
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-B/32", device=device)
# Load and preprocess the image
image_path = "path_to_your_image.jpg" # Change this to the path of your image
image = preprocess(Image.open(image_path)).unsqueeze(0).to(device)
# Prepare text descriptions
text_descriptions = ["a photo of a tiger", "a photo of a random object"]
text_tokens = clip.tokenize(text_descriptions).to(device)
# Compute image and text features
with torch.no_grad():
image_features = model.encode_image(image)
text_features = model.encode_text(text_tokens)
# Compare embeddings
image_features /= image_features.norm(dim=-1, keepdim=True)
text_features /= text_features.norm(dim=-1, keepdim=True)
similarity = (100.0 * image_features @ text_features.T).softmax(dim=-1)
probabilities = similarity[0].cpu().numpy()
# Determine if there is a tiger in the image
print(f"Probability of being a tiger: {probabilities[0]:.2f}")
print(f"Probability of being a random object: {probabilities[1]:.2f}")
if probabilities[0] > probabilities[1]:
print("The image likely contains a tiger.")
else:
print("The image likely does not contain a tiger.")