#!/usr/bin/env python3 """ Script to clean up references to deleted images from: - iwildcam_demo_annotations.json - iwildcam_demo.pt - iwildcam_demo_labels.pt - images.txt """ import json import os import torch # Get list of existing images image_dir = "iwildcam_demo_images" existing_images = set(os.listdir(image_dir)) print(f"Found {len(existing_images)} existing images") # Read images.txt to get current order with open("images.txt", "r") as f: current_images = [line.strip() for line in f] print(f"Found {len(current_images)} images in images.txt") # Identify which images still exist and their new indices valid_images = [] valid_indices = [] for idx, img in enumerate(current_images): if img in existing_images: valid_images.append(img) valid_indices.append(idx) print(f"Keeping {len(valid_images)} images") print(f"Removing {len(current_images) - len(valid_images)} images") # Update images.txt with open("images.txt", "w") as f: for img in valid_images: f.write(f"{img}\n") print("Updated images.txt") # Load and filter .pt files demo_tensors = torch.load("iwildcam_demo.pt") demo_labels = torch.load("iwildcam_demo_labels.pt") print(f"Original iwildcam_demo.pt shape: {demo_tensors.shape}") print(f"Original iwildcam_demo_labels.pt shape: {demo_labels.shape}") # Filter tensors to only keep valid indices # demo_tensors has shape [3, N, 5] where N is number of images # We need to filter along dimension 1 filtered_demo = demo_tensors[:, valid_indices, :] filtered_labels = demo_labels[valid_indices] # Save filtered tensors torch.save(filtered_demo, "iwildcam_demo.pt") torch.save(filtered_labels, "iwildcam_demo_labels.pt") print(f"Updated iwildcam_demo.pt: {demo_tensors.shape} -> {filtered_demo.shape}") print(f"Updated iwildcam_demo_labels.pt: {demo_labels.shape} -> {filtered_labels.shape}") # Load and filter JSON annotations with open("iwildcam_demo_annotations.json", "r") as f: annotations = json.load(f) # Filter images in JSON if "images" in annotations: original_count = len(annotations["images"]) annotations["images"] = [ img for img in annotations["images"] if img["file_name"] in existing_images ] print(f"Updated JSON images: {original_count} -> {len(annotations['images'])}") # Filter annotations in JSON (if they reference image_id) if "annotations" in annotations: # Build mapping of file_name to image_id for existing images valid_image_ids = {img["id"] for img in annotations["images"]} original_count = len(annotations["annotations"]) annotations["annotations"] = [ ann for ann in annotations["annotations"] if ann["image_id"] in valid_image_ids ] print(f"Updated JSON annotations: {original_count} -> {len(annotations['annotations'])}") # Save updated JSON with open("iwildcam_demo_annotations.json", "w") as f: json.dump(annotations, f, indent=2) print("Updated iwildcam_demo_annotations.json") print("\nCleanup complete!")