Spaces:

justinkay
/

coda

Sleeping

App Files Files Community

justinkay commited on Oct 1, 2025

Commit

d442533

1 Parent(s): 8a229fe

Remove hf zeroshot bioclip inference

Browse files

Files changed (2) hide show

hf_zeroshot.py +55 -50
iwildcam_demo.pt +2 -2

hf_zeroshot.py CHANGED Viewed

@@ -48,8 +48,11 @@ MODELS = [
     "google/siglip2-large-patch16-384",
     "google/siglip2-large-patch16-512",
     "google/siglip2-so400m-patch16-naflex",
-    "imageomics/bioclip",
-    "imageomics/bioclip-2",
     "facebook/PE-Core-L14-336",
     "laion/CLIP-ViT-L-14-laion2B-s32B-b82K"
 ]
@@ -73,67 +76,67 @@ def load_demo_annotations():
     return image_metadata
-def run_bioclip_inference(model_name, image_paths, class_names):
-    """Run zero-shot inference using BioCLIP via OpenCLIP."""
-    if not OPEN_CLIP_AVAILABLE:
-        print("open_clip is not available. Please install it with: pip install open_clip_torch")
-        return None
-    print(f"Loading BioCLIP model: {model_name}")
-    try:
-        device = "cuda" if torch.cuda.is_available() else "cpu"
-        # Load model using OpenCLIP with hf-hub prefix
-        model, _, preprocess = open_clip.create_model_and_transforms(f'hf-hub:{model_name}')
-        model = model.to(device)
-        model.eval()
-        tokenizer = open_clip.get_tokenizer(f'hf-hub:{model_name}')
-        # Prepare text prompts
-        prompts = [f"a photo of a {class_name.lower()}" for class_name in class_names]
-        text_tokens = tokenizer(prompts).to(device)
-        results = {}
-        with torch.no_grad():
-            # Encode text once
-            text_features = model.encode_text(text_tokens)
-            text_features /= text_features.norm(dim=-1, keepdim=True)
-            for i, image_path in enumerate(image_paths):
-                if i % 10 == 0:
-                    print(f"Processing image {i+1}/{len(image_paths)}: {os.path.basename(image_path)}")
-                try:
-                    image = Image.open(image_path).convert("RGB")
-                    image_tensor = preprocess(image).unsqueeze(0).to(device)
-                    # Encode image
-                    image_features = model.encode_image(image_tensor)
-                    image_features /= image_features.norm(dim=-1, keepdim=True)
-                    # Calculate similarity and convert to probabilities
-                    similarity = (100.0 * image_features @ text_features.T).softmax(dim=-1)
-                    probabilities = similarity.squeeze(0).cpu().numpy()
-                    scores = {}
-                    for j, class_name in enumerate(class_names):
-                        scores[class_name] = float(probabilities[j])
-                    results[os.path.basename(image_path)] = scores
-                except Exception as e:
-                    print(f"Error processing {image_path}: {e}")
-                    uniform_prob = 1.0 / len(class_names)
-                    results[os.path.basename(image_path)] = {class_name: uniform_prob for class_name in class_names}
-        return results
-    except Exception as e:
-        print(f"Error loading BioCLIP: {e}")
-        import traceback
-        traceback.print_exc()
-        return None
 def run_openclip_inference(model_name, image_paths, class_names):
     """Run zero-shot inference using OpenCLIP models."""
@@ -333,7 +336,9 @@ def main():
         # Handle different models with appropriate methods
         if model_name in ["imageomics/bioclip", "imageomics/bioclip-2"]:
-            results = run_bioclip_inference(model_name, image_paths, CLASS_NAMES)
         elif model_name.startswith("google/siglip"):
             results = run_siglip_inference(model_name, image_paths, CLASS_NAMES)
         elif model_name in ["facebook/PE-Core-L14-336", "laion/CLIP-ViT-L-14-laion2B-s32B-b82K"]:

     "google/siglip2-large-patch16-384",
     "google/siglip2-large-patch16-512",
     "google/siglip2-so400m-patch16-naflex",
+    # using bioclip codebase instead
+    # "imageomics/bioclip",
+    # "imageomics/bioclip-2",
     "facebook/PE-Core-L14-336",
     "laion/CLIP-ViT-L-14-laion2B-s32B-b82K"
 ]
     return image_metadata
+# def run_bioclip_inference(model_name, image_paths, class_names):
+#     """Run zero-shot inference using BioCLIP via OpenCLIP."""
+#     if not OPEN_CLIP_AVAILABLE:
+#         print("open_clip is not available. Please install it with: pip install open_clip_torch")
+#         return None
+#     print(f"Loading BioCLIP model: {model_name}")
+#     try:
+#         device = "cuda" if torch.cuda.is_available() else "cpu"
+#         # Load model using OpenCLIP with hf-hub prefix
+#         model, _, preprocess = open_clip.create_model_and_transforms(f'hf-hub:{model_name}')
+#         model = model.to(device)
+#         model.eval()
+#         tokenizer = open_clip.get_tokenizer(f'hf-hub:{model_name}')
+#         # Prepare text prompts
+#         prompts = [f"a photo of a {class_name.lower()}" for class_name in class_names]
+#         text_tokens = tokenizer(prompts).to(device)
+#         results = {}
+#         with torch.no_grad():
+#             # Encode text once
+#             text_features = model.encode_text(text_tokens)
+#             text_features /= text_features.norm(dim=-1, keepdim=True)
+#             for i, image_path in enumerate(image_paths):
+#                 if i % 10 == 0:
+#                     print(f"Processing image {i+1}/{len(image_paths)}: {os.path.basename(image_path)}")
+#                 try:
+#                     image = Image.open(image_path).convert("RGB")
+#                     image_tensor = preprocess(image).unsqueeze(0).to(device)
+#                     # Encode image
+#                     image_features = model.encode_image(image_tensor)
+#                     image_features /= image_features.norm(dim=-1, keepdim=True)
+#                     # Calculate similarity and convert to probabilities
+#                     similarity = (100.0 * image_features @ text_features.T).softmax(dim=-1)
+#                     probabilities = similarity.squeeze(0).cpu().numpy()
+#                     scores = {}
+#                     for j, class_name in enumerate(class_names):
+#                         scores[class_name] = float(probabilities[j])
+#                     results[os.path.basename(image_path)] = scores
+#                 except Exception as e:
+#                     print(f"Error processing {image_path}: {e}")
+#                     uniform_prob = 1.0 / len(class_names)
+#                     results[os.path.basename(image_path)] = {class_name: uniform_prob for class_name in class_names}
+#         return results
+#     except Exception as e:
+#         print(f"Error loading BioCLIP: {e}")
+#         import traceback
+#         traceback.print_exc()
+#         return None
 def run_openclip_inference(model_name, image_paths, class_names):
     """Run zero-shot inference using OpenCLIP models."""
         # Handle different models with appropriate methods
         if model_name in ["imageomics/bioclip", "imageomics/bioclip-2"]:
+            # results = run_bioclip_inference(model_name, image_paths, CLASS_NAMES)
+            print("Use pybioclip!")
+            return
         elif model_name.startswith("google/siglip"):
             results = run_siglip_inference(model_name, image_paths, CLASS_NAMES)
         elif model_name in ["facebook/PE-Core-L14-336", "laion/CLIP-ViT-L-14-laion2B-s32B-b82K"]:

iwildcam_demo.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f31fb9889d96fcecb5899a12e03102cbebac1d02a1689770ca734d748333286c
-size 272

 version https://git-lfs.github.com/spec/v1
+oid sha256:8a9449ef5b30e49bdecca0101e45992795f1650e0bee183cd2bf03dcd0ecfaa5
+size 127187