import cv2
import numpy as np
from PIL import Image
import moviepy.editor as mp
import time
import os
from typing import Dict, Tuple
import torch
from transformers import pipeline

class VideoProcessor:
    def __init__(self):
        self.depth_estimator = None
        self._load_models()
    
    def _load_models(self):
        """Load depth estimation model"""
        try:
            # Use a lightweight depth estimation model
            self.depth_estimator = pipeline(
                "depth-estimation",
                model="Intel/dpt-large",
                device=0 if torch.cuda.is_available() else -1
            )
        except Exception as e:
            print(f"Warning: Could not load depth estimation model: {e}")
            # Try a simpler fallback model
            try:
                self.depth_estimator = pipeline(
                    "depth-estimation",
                    model="Intel/dpt-hybrid-midas",
                    device=0 if torch.cuda.is_available() else -1
                )
            except Exception as e2:
                print(f"Warning: Could not load fallback model either: {e2}")
                self.depth_estimator = None
    
    def estimate_depth(self, image: np.ndarray) -> np.ndarray:
        """Estimate depth map for a single frame"""
        if self.depth_estimator is None:
            # Fallback: create a simple depth map based on image gradients
            gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
            depth = cv2.Laplacian(gray, cv2.CV_64F)
            depth = np.abs(depth)
            depth = cv2.GaussianBlur(depth, (5, 5), 0)
            depth = (depth - depth.min()) / (depth.max() - depth.min())
            return depth
        
        try:
            # Convert to PIL Image for the model
            pil_image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
            
            # Get depth prediction
            result = self.depth_estimator(pil_image)
            depth = np.array(result['depth'])
            
            # Normalize depth map
            depth = (depth - depth.min()) / (depth.max() - depth.min())
            
            return depth
        except Exception as e:
            print(f"Error in depth estimation: {e}")
            # Fallback to gradient-based depth
            gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
            depth = cv2.Laplacian(gray, cv2.CV_64F)
            depth = np.abs(depth)
            depth = cv2.GaussianBlur(depth, (5, 5), 0)
            depth = (depth - depth.min()) / (depth.max() - depth.min())
            return depth
    
    def create_stereo_pair(self, image: np.ndarray, depth: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
        """Create left and right eye views for VR180"""
        height, width = image.shape[:2]
        
        # Create disparity map (inverse of depth for stereo effect)
        disparity = 1.0 - depth
        disparity = disparity * 30  # Scale disparity
        
        # Create left and right views
        left_view = image.copy()
        right_view = image.copy()
        
        # Apply horizontal shift based on disparity
        for y in range(height):
            for x in range(width):
                shift = int(disparity[y, x])
                
                # Left view: shift pixels to the right
                if x + shift < width:
                    left_view[y, x] = image[y, min(x + shift, width - 1)]
                
                # Right view: shift pixels to the left
                if x - shift >= 0:
                    right_view[y, x] = image[y, max(x - shift, 0)]
        
        return left_view, right_view
    
    def create_vr180_frame(self, left_view: np.ndarray, right_view: np.ndarray) -> np.ndarray:
        """Combine left and right views into VR180 format"""
        height, width = left_view.shape[:2]
        
        # Create VR180 frame (side-by-side)
        vr180_frame = np.zeros((height, width * 2, 3), dtype=np.uint8)
        
        # Place left view on the left half
        vr180_frame[:, :width] = left_view
        
        # Place right view on the right half
        vr180_frame[:, width:] = right_view
        
        return vr180_frame
    
    def process_video(self, input_path: str, output_path: str) -> Dict:
        """Process video from 2D to VR180"""
        start_time = time.time()
        
        try:
            # Load video
            video = mp.VideoFileClip(input_path)
            fps = video.fps
            duration = video.duration
            
            print(f"Processing video: {input_path}")
            print(f"Duration: {duration}s, FPS: {fps}")
            
            # Process frames
            processed_frames = []
            total_frames = int(duration * fps)
            
            for i, frame in enumerate(video.iter_frames()):
                if i % 10 == 0:  # Print progress every 10 frames
                    print(f"Processing frame {i}/{total_frames}")
                
                # Convert frame to numpy array
                frame_array = np.array(frame)
                
                # Estimate depth
                depth = self.estimate_depth(frame_array)
                
                # Create stereo pair
                left_view, right_view = self.create_stereo_pair(frame_array, depth)
                
                # Create VR180 frame
                vr180_frame = self.create_vr180_frame(left_view, right_view)
                
                processed_frames.append(vr180_frame)
            
            # Save processed video
            if processed_frames:
                # Create video writer
                height, width = processed_frames[0].shape[:2]
                fourcc = cv2.VideoWriter_fourcc(*'mp4v')
                out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
                
                for frame in processed_frames:
                    # Convert RGB to BGR for OpenCV
                    frame_bgr = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
                    out.write(frame_bgr)
                
                out.release()
            
            video.close()
            
            processing_time = time.time() - start_time
            
            return {
                'success': True,
                'processing_time': processing_time,
                'output_path': output_path
            }
        
        except Exception as e:
            return {
                'success': False,
                'error': str(e)
            }
    
    def create_preview_frame(self, input_path: str) -> np.ndarray:
        """Create a preview frame for the UI"""
        try:
            video = mp.VideoFileClip(input_path)
            frame = video.get_frame(0)  # Get first frame
            video.close()
            
            # Process the frame
            frame_array = np.array(frame)
            depth = self.estimate_depth(frame_array)
            left_view, right_view = self.create_stereo_pair(frame_array, depth)
            vr180_frame = self.create_vr180_frame(left_view, right_view)
            
            return vr180_frame
        
        except Exception as e:
            print(f"Error creating preview: {e}")
            return None