Spaces:

Prithwis
/

vr180-converter

Runtime error

App Files Files Community

Prithwis commited on Sep 8, 2025

Commit

eba4376

verified ·

1 Parent(s): ed6d04b

Force update backend\video_processor.py - 1757355553

Browse files

Files changed (1) hide show

backend/video_processor.py +195 -195

backend/video_processor.py CHANGED Viewed

@@ -1,195 +1,195 @@
-import cv2
-import numpy as np
-from PIL import Image
-import moviepy.editor as mp
-import time
-import os
-from typing import Dict, Tuple
-import torch
-from transformers import pipeline
-class VideoProcessor:
-    def __init__(self):
-        self.depth_estimator = None
-        self._load_models()
-    def _load_models(self):
-        """Load depth estimation model"""
-        try:
-            # Use a lightweight depth estimation model
-            self.depth_estimator = pipeline(
-                "depth-estimation",
-                model="Intel/dpt-large",
-                device=0 if torch.cuda.is_available() else -1
-            )
-        except Exception as e:
-            print(f"Warning: Could not load depth estimation model: {e}")
-            # Try a simpler fallback model
-            try:
-                self.depth_estimator = pipeline(
-                    "depth-estimation",
-                    model="Intel/dpt-hybrid-midas",
-                    device=0 if torch.cuda.is_available() else -1
-                )
-            except Exception as e2:
-                print(f"Warning: Could not load fallback model either: {e2}")
-                self.depth_estimator = None
-    def estimate_depth(self, image: np.ndarray) -> np.ndarray:
-        """Estimate depth map for a single frame"""
-        if self.depth_estimator is None:
-            # Fallback: create a simple depth map based on image gradients
-            gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
-            depth = cv2.Laplacian(gray, cv2.CV_64F)
-            depth = np.abs(depth)
-            depth = cv2.GaussianBlur(depth, (5, 5), 0)
-            depth = (depth - depth.min()) / (depth.max() - depth.min())
-            return depth
-        try:
-            # Convert to PIL Image for the model
-            pil_image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
-            # Get depth prediction
-            result = self.depth_estimator(pil_image)
-            depth = np.array(result['depth'])
-            # Normalize depth map
-            depth = (depth - depth.min()) / (depth.max() - depth.min())
-            return depth
-        except Exception as e:
-            print(f"Error in depth estimation: {e}")
-            # Fallback to gradient-based depth
-            gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
-            depth = cv2.Laplacian(gray, cv2.CV_64F)
-            depth = np.abs(depth)
-            depth = cv2.GaussianBlur(depth, (5, 5), 0)
-            depth = (depth - depth.min()) / (depth.max() - depth.min())
-            return depth
-    def create_stereo_pair(self, image: np.ndarray, depth: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
-        """Create left and right eye views for VR180"""
-        height, width = image.shape[:2]
-        # Create disparity map (inverse of depth for stereo effect)
-        disparity = 1.0 - depth
-        disparity = disparity * 30  # Scale disparity
-        # Create left and right views
-        left_view = image.copy()
-        right_view = image.copy()
-        # Apply horizontal shift based on disparity
-        for y in range(height):
-            for x in range(width):
-                shift = int(disparity[y, x])
-                # Left view: shift pixels to the right
-                if x + shift < width:
-                    left_view[y, x] = image[y, min(x + shift, width - 1)]
-                # Right view: shift pixels to the left
-                if x - shift >= 0:
-                    right_view[y, x] = image[y, max(x - shift, 0)]
-        return left_view, right_view
-    def create_vr180_frame(self, left_view: np.ndarray, right_view: np.ndarray) -> np.ndarray:
-        """Combine left and right views into VR180 format"""
-        height, width = left_view.shape[:2]
-        # Create VR180 frame (side-by-side)
-        vr180_frame = np.zeros((height, width * 2, 3), dtype=np.uint8)
-        # Place left view on the left half
-        vr180_frame[:, :width] = left_view
-        # Place right view on the right half
-        vr180_frame[:, width:] = right_view
-        return vr180_frame
-    def process_video(self, input_path: str, output_path: str) -> Dict:
-        """Process video from 2D to VR180"""
-        start_time = time.time()
-        try:
-            # Load video
-            video = mp.VideoFileClip(input_path)
-            fps = video.fps
-            duration = video.duration
-            print(f"Processing video: {input_path}")
-            print(f"Duration: {duration}s, FPS: {fps}")
-            # Process frames
-            processed_frames = []
-            total_frames = int(duration * fps)
-            for i, frame in enumerate(video.iter_frames()):
-                if i % 10 == 0:  # Print progress every 10 frames
-                    print(f"Processing frame {i}/{total_frames}")
-                # Convert frame to numpy array
-                frame_array = np.array(frame)
-                # Estimate depth
-                depth = self.estimate_depth(frame_array)
-                # Create stereo pair
-                left_view, right_view = self.create_stereo_pair(frame_array, depth)
-                # Create VR180 frame
-                vr180_frame = self.create_vr180_frame(left_view, right_view)
-                processed_frames.append(vr180_frame)
-            # Save processed video
-            if processed_frames:
-                # Create video writer
-                height, width = processed_frames[0].shape[:2]
-                fourcc = cv2.VideoWriter_fourcc(*'mp4v')
-                out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
-                for frame in processed_frames:
-                    # Convert RGB to BGR for OpenCV
-                    frame_bgr = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
-                    out.write(frame_bgr)
-                out.release()
-            video.close()
-            processing_time = time.time() - start_time
-            return {
-                'success': True,
-                'processing_time': processing_time,
-                'output_path': output_path
-            }
-        except Exception as e:
-            return {
-                'success': False,
-                'error': str(e)
-            }
-    def create_preview_frame(self, input_path: str) -> np.ndarray:
-        """Create a preview frame for the UI"""
-        try:
-            video = mp.VideoFileClip(input_path)
-            frame = video.get_frame(0)  # Get first frame
-            video.close()
-            # Process the frame
-            frame_array = np.array(frame)
-            depth = self.estimate_depth(frame_array)
-            left_view, right_view = self.create_stereo_pair(frame_array, depth)
-            vr180_frame = self.create_vr180_frame(left_view, right_view)
-            return vr180_frame
-        except Exception as e:
-            print(f"Error creating preview: {e}")
-            return None

+import cv2
+import numpy as np
+from PIL import Image
+import moviepy.editor as mp
+import time
+import os
+from typing import Dict, Tuple
+import torch
+from transformers import pipeline
+class VideoProcessor:
+    def __init__(self):
+        self.depth_estimator = None
+        self._load_models()
+    def _load_models(self):
+        """Load depth estimation model"""
+        try:
+            # Use a lightweight depth estimation model
+            self.depth_estimator = pipeline(
+                "depth-estimation",
+                model="Intel/dpt-large",
+                device=0 if torch.cuda.is_available() else -1
+            )
+        except Exception as e:
+            print(f"Warning: Could not load depth estimation model: {e}")
+            # Try a simpler fallback model
+            try:
+                self.depth_estimator = pipeline(
+                    "depth-estimation",
+                    model="Intel/dpt-hybrid-midas",
+                    device=0 if torch.cuda.is_available() else -1
+                )
+            except Exception as e2:
+                print(f"Warning: Could not load fallback model either: {e2}")
+                self.depth_estimator = None
+    def estimate_depth(self, image: np.ndarray) -> np.ndarray:
+        """Estimate depth map for a single frame"""
+        if self.depth_estimator is None:
+            # Fallback: create a simple depth map based on image gradients
+            gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
+            depth = cv2.Laplacian(gray, cv2.CV_64F)
+            depth = np.abs(depth)
+            depth = cv2.GaussianBlur(depth, (5, 5), 0)
+            depth = (depth - depth.min()) / (depth.max() - depth.min())
+            return depth
+        try:
+            # Convert to PIL Image for the model
+            pil_image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
+            # Get depth prediction
+            result = self.depth_estimator(pil_image)
+            depth = np.array(result['depth'])
+            # Normalize depth map
+            depth = (depth - depth.min()) / (depth.max() - depth.min())
+            return depth
+        except Exception as e:
+            print(f"Error in depth estimation: {e}")
+            # Fallback to gradient-based depth
+            gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
+            depth = cv2.Laplacian(gray, cv2.CV_64F)
+            depth = np.abs(depth)
+            depth = cv2.GaussianBlur(depth, (5, 5), 0)
+            depth = (depth - depth.min()) / (depth.max() - depth.min())
+            return depth
+    def create_stereo_pair(self, image: np.ndarray, depth: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
+        """Create left and right eye views for VR180"""
+        height, width = image.shape[:2]
+        # Create disparity map (inverse of depth for stereo effect)
+        disparity = 1.0 - depth
+        disparity = disparity * 30  # Scale disparity
+        # Create left and right views
+        left_view = image.copy()
+        right_view = image.copy()
+        # Apply horizontal shift based on disparity
+        for y in range(height):
+            for x in range(width):
+                shift = int(disparity[y, x])
+                # Left view: shift pixels to the right
+                if x + shift < width:
+                    left_view[y, x] = image[y, min(x + shift, width - 1)]
+                # Right view: shift pixels to the left
+                if x - shift >= 0:
+                    right_view[y, x] = image[y, max(x - shift, 0)]
+        return left_view, right_view
+    def create_vr180_frame(self, left_view: np.ndarray, right_view: np.ndarray) -> np.ndarray:
+        """Combine left and right views into VR180 format"""
+        height, width = left_view.shape[:2]
+        # Create VR180 frame (side-by-side)
+        vr180_frame = np.zeros((height, width * 2, 3), dtype=np.uint8)
+        # Place left view on the left half
+        vr180_frame[:, :width] = left_view
+        # Place right view on the right half
+        vr180_frame[:, width:] = right_view
+        return vr180_frame
+    def process_video(self, input_path: str, output_path: str) -> Dict:
+        """Process video from 2D to VR180"""
+        start_time = time.time()
+        try:
+            # Load video
+            video = mp.VideoFileClip(input_path)
+            fps = video.fps
+            duration = video.duration
+            print(f"Processing video: {input_path}")
+            print(f"Duration: {duration}s, FPS: {fps}")
+            # Process frames
+            processed_frames = []
+            total_frames = int(duration * fps)
+            for i, frame in enumerate(video.iter_frames()):
+                if i % 10 == 0:  # Print progress every 10 frames
+                    print(f"Processing frame {i}/{total_frames}")
+                # Convert frame to numpy array
+                frame_array = np.array(frame)
+                # Estimate depth
+                depth = self.estimate_depth(frame_array)
+                # Create stereo pair
+                left_view, right_view = self.create_stereo_pair(frame_array, depth)
+                # Create VR180 frame
+                vr180_frame = self.create_vr180_frame(left_view, right_view)
+                processed_frames.append(vr180_frame)
+            # Save processed video
+            if processed_frames:
+                # Create video writer
+                height, width = processed_frames[0].shape[:2]
+                fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+                out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
+                for frame in processed_frames:
+                    # Convert RGB to BGR for OpenCV
+                    frame_bgr = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
+                    out.write(frame_bgr)
+                out.release()
+            video.close()
+            processing_time = time.time() - start_time
+            return {
+                'success': True,
+                'processing_time': processing_time,
+                'output_path': output_path
+            }
+        except Exception as e:
+            return {
+                'success': False,
+                'error': str(e)
+            }
+    def create_preview_frame(self, input_path: str) -> np.ndarray:
+        """Create a preview frame for the UI"""
+        try:
+            video = mp.VideoFileClip(input_path)
+            frame = video.get_frame(0)  # Get first frame
+            video.close()
+            # Process the frame
+            frame_array = np.array(frame)
+            depth = self.estimate_depth(frame_array)
+            left_view, right_view = self.create_stereo_pair(frame_array, depth)
+            vr180_frame = self.create_vr180_frame(left_view, right_view)
+            return vr180_frame
+        except Exception as e:
+            print(f"Error creating preview: {e}")
+            return None