import cv2 import numpy as np from PIL import Image import moviepy.editor as mp import time import os from typing import Dict, Tuple import torch from transformers import pipeline class VideoProcessor: def __init__(self): self.depth_estimator = None self._load_models() def _load_models(self): """Load depth estimation model""" try: # Use a lightweight depth estimation model self.depth_estimator = pipeline( "depth-estimation", model="Intel/dpt-large", device=0 if torch.cuda.is_available() else -1 ) except Exception as e: print(f"Warning: Could not load depth estimation model: {e}") # Try a simpler fallback model try: self.depth_estimator = pipeline( "depth-estimation", model="Intel/dpt-hybrid-midas", device=0 if torch.cuda.is_available() else -1 ) except Exception as e2: print(f"Warning: Could not load fallback model either: {e2}") self.depth_estimator = None def estimate_depth(self, image: np.ndarray) -> np.ndarray: """Estimate depth map for a single frame""" if self.depth_estimator is None: # Fallback: create a simple depth map based on image gradients gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) depth = cv2.Laplacian(gray, cv2.CV_64F) depth = np.abs(depth) depth = cv2.GaussianBlur(depth, (5, 5), 0) depth = (depth - depth.min()) / (depth.max() - depth.min()) return depth try: # Convert to PIL Image for the model pil_image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)) # Get depth prediction result = self.depth_estimator(pil_image) depth = np.array(result['depth']) # Normalize depth map depth = (depth - depth.min()) / (depth.max() - depth.min()) return depth except Exception as e: print(f"Error in depth estimation: {e}") # Fallback to gradient-based depth gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) depth = cv2.Laplacian(gray, cv2.CV_64F) depth = np.abs(depth) depth = cv2.GaussianBlur(depth, (5, 5), 0) depth = (depth - depth.min()) / (depth.max() - depth.min()) return depth def create_stereo_pair(self, image: np.ndarray, depth: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: """Create left and right eye views for VR180""" height, width = image.shape[:2] # Create disparity map (inverse of depth for stereo effect) disparity = 1.0 - depth disparity = disparity * 30 # Scale disparity # Create left and right views left_view = image.copy() right_view = image.copy() # Apply horizontal shift based on disparity for y in range(height): for x in range(width): shift = int(disparity[y, x]) # Left view: shift pixels to the right if x + shift < width: left_view[y, x] = image[y, min(x + shift, width - 1)] # Right view: shift pixels to the left if x - shift >= 0: right_view[y, x] = image[y, max(x - shift, 0)] return left_view, right_view def create_vr180_frame(self, left_view: np.ndarray, right_view: np.ndarray) -> np.ndarray: """Combine left and right views into VR180 format""" height, width = left_view.shape[:2] # Create VR180 frame (side-by-side) vr180_frame = np.zeros((height, width * 2, 3), dtype=np.uint8) # Place left view on the left half vr180_frame[:, :width] = left_view # Place right view on the right half vr180_frame[:, width:] = right_view return vr180_frame def process_video(self, input_path: str, output_path: str) -> Dict: """Process video from 2D to VR180""" start_time = time.time() try: # Load video video = mp.VideoFileClip(input_path) fps = video.fps duration = video.duration print(f"Processing video: {input_path}") print(f"Duration: {duration}s, FPS: {fps}") # Process frames processed_frames = [] total_frames = int(duration * fps) for i, frame in enumerate(video.iter_frames()): if i % 10 == 0: # Print progress every 10 frames print(f"Processing frame {i}/{total_frames}") # Convert frame to numpy array frame_array = np.array(frame) # Estimate depth depth = self.estimate_depth(frame_array) # Create stereo pair left_view, right_view = self.create_stereo_pair(frame_array, depth) # Create VR180 frame vr180_frame = self.create_vr180_frame(left_view, right_view) processed_frames.append(vr180_frame) # Save processed video if processed_frames: # Create video writer height, width = processed_frames[0].shape[:2] fourcc = cv2.VideoWriter_fourcc(*'mp4v') out = cv2.VideoWriter(output_path, fourcc, fps, (width, height)) for frame in processed_frames: # Convert RGB to BGR for OpenCV frame_bgr = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) out.write(frame_bgr) out.release() video.close() processing_time = time.time() - start_time return { 'success': True, 'processing_time': processing_time, 'output_path': output_path } except Exception as e: return { 'success': False, 'error': str(e) } def create_preview_frame(self, input_path: str) -> np.ndarray: """Create a preview frame for the UI""" try: video = mp.VideoFileClip(input_path) frame = video.get_frame(0) # Get first frame video.close() # Process the frame frame_array = np.array(frame) depth = self.estimate_depth(frame_array) left_view, right_view = self.create_stereo_pair(frame_array, depth) vr180_frame = self.create_vr180_frame(left_view, right_view) return vr180_frame except Exception as e: print(f"Error creating preview: {e}") return None