#!/usr/bin/env python3
"""
Script to clean up references to deleted images from:
- iwildcam_demo_annotations.json
- iwildcam_demo.pt
- iwildcam_demo_labels.pt
- images.txt
"""

import json
import os
import torch

# Get list of existing images
image_dir = "iwildcam_demo_images"
existing_images = set(os.listdir(image_dir))
print(f"Found {len(existing_images)} existing images")

# Read images.txt to get current order
with open("images.txt", "r") as f:
    current_images = [line.strip() for line in f]
print(f"Found {len(current_images)} images in images.txt")

# Identify which images still exist and their new indices
valid_images = []
valid_indices = []
for idx, img in enumerate(current_images):
    if img in existing_images:
        valid_images.append(img)
        valid_indices.append(idx)

print(f"Keeping {len(valid_images)} images")
print(f"Removing {len(current_images) - len(valid_images)} images")

# Update images.txt
with open("images.txt", "w") as f:
    for img in valid_images:
        f.write(f"{img}\n")
print("Updated images.txt")

# Load and filter .pt files
demo_tensors = torch.load("iwildcam_demo.pt")
demo_labels = torch.load("iwildcam_demo_labels.pt")

print(f"Original iwildcam_demo.pt shape: {demo_tensors.shape}")
print(f"Original iwildcam_demo_labels.pt shape: {demo_labels.shape}")

# Filter tensors to only keep valid indices
# demo_tensors has shape [3, N, 5] where N is number of images
# We need to filter along dimension 1
filtered_demo = demo_tensors[:, valid_indices, :]
filtered_labels = demo_labels[valid_indices]

# Save filtered tensors
torch.save(filtered_demo, "iwildcam_demo.pt")
torch.save(filtered_labels, "iwildcam_demo_labels.pt")
print(f"Updated iwildcam_demo.pt: {demo_tensors.shape} -> {filtered_demo.shape}")
print(f"Updated iwildcam_demo_labels.pt: {demo_labels.shape} -> {filtered_labels.shape}")

# Load and filter JSON annotations
with open("iwildcam_demo_annotations.json", "r") as f:
    annotations = json.load(f)

# Filter images in JSON
if "images" in annotations:
    original_count = len(annotations["images"])
    annotations["images"] = [
        img for img in annotations["images"]
        if img["file_name"] in existing_images
    ]
    print(f"Updated JSON images: {original_count} -> {len(annotations['images'])}")

# Filter annotations in JSON (if they reference image_id)
if "annotations" in annotations:
    # Build mapping of file_name to image_id for existing images
    valid_image_ids = {img["id"] for img in annotations["images"]}
    original_count = len(annotations["annotations"])
    annotations["annotations"] = [
        ann for ann in annotations["annotations"]
        if ann["image_id"] in valid_image_ids
    ]
    print(f"Updated JSON annotations: {original_count} -> {len(annotations['annotations'])}")

# Save updated JSON
with open("iwildcam_demo_annotations.json", "w") as f:
    json.dump(annotations, f, indent=2)
print("Updated iwildcam_demo_annotations.json")

print("\nCleanup complete!")