Prithwis commited on
Commit
eba4376
·
verified ·
1 Parent(s): ed6d04b

Force update backend\video_processor.py - 1757355553

Browse files
Files changed (1) hide show
  1. backend/video_processor.py +195 -195
backend/video_processor.py CHANGED
@@ -1,195 +1,195 @@
1
- import cv2
2
- import numpy as np
3
- from PIL import Image
4
- import moviepy.editor as mp
5
- import time
6
- import os
7
- from typing import Dict, Tuple
8
- import torch
9
- from transformers import pipeline
10
-
11
- class VideoProcessor:
12
- def __init__(self):
13
- self.depth_estimator = None
14
- self._load_models()
15
-
16
- def _load_models(self):
17
- """Load depth estimation model"""
18
- try:
19
- # Use a lightweight depth estimation model
20
- self.depth_estimator = pipeline(
21
- "depth-estimation",
22
- model="Intel/dpt-large",
23
- device=0 if torch.cuda.is_available() else -1
24
- )
25
- except Exception as e:
26
- print(f"Warning: Could not load depth estimation model: {e}")
27
- # Try a simpler fallback model
28
- try:
29
- self.depth_estimator = pipeline(
30
- "depth-estimation",
31
- model="Intel/dpt-hybrid-midas",
32
- device=0 if torch.cuda.is_available() else -1
33
- )
34
- except Exception as e2:
35
- print(f"Warning: Could not load fallback model either: {e2}")
36
- self.depth_estimator = None
37
-
38
- def estimate_depth(self, image: np.ndarray) -> np.ndarray:
39
- """Estimate depth map for a single frame"""
40
- if self.depth_estimator is None:
41
- # Fallback: create a simple depth map based on image gradients
42
- gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
43
- depth = cv2.Laplacian(gray, cv2.CV_64F)
44
- depth = np.abs(depth)
45
- depth = cv2.GaussianBlur(depth, (5, 5), 0)
46
- depth = (depth - depth.min()) / (depth.max() - depth.min())
47
- return depth
48
-
49
- try:
50
- # Convert to PIL Image for the model
51
- pil_image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
52
-
53
- # Get depth prediction
54
- result = self.depth_estimator(pil_image)
55
- depth = np.array(result['depth'])
56
-
57
- # Normalize depth map
58
- depth = (depth - depth.min()) / (depth.max() - depth.min())
59
-
60
- return depth
61
- except Exception as e:
62
- print(f"Error in depth estimation: {e}")
63
- # Fallback to gradient-based depth
64
- gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
65
- depth = cv2.Laplacian(gray, cv2.CV_64F)
66
- depth = np.abs(depth)
67
- depth = cv2.GaussianBlur(depth, (5, 5), 0)
68
- depth = (depth - depth.min()) / (depth.max() - depth.min())
69
- return depth
70
-
71
- def create_stereo_pair(self, image: np.ndarray, depth: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
72
- """Create left and right eye views for VR180"""
73
- height, width = image.shape[:2]
74
-
75
- # Create disparity map (inverse of depth for stereo effect)
76
- disparity = 1.0 - depth
77
- disparity = disparity * 30 # Scale disparity
78
-
79
- # Create left and right views
80
- left_view = image.copy()
81
- right_view = image.copy()
82
-
83
- # Apply horizontal shift based on disparity
84
- for y in range(height):
85
- for x in range(width):
86
- shift = int(disparity[y, x])
87
-
88
- # Left view: shift pixels to the right
89
- if x + shift < width:
90
- left_view[y, x] = image[y, min(x + shift, width - 1)]
91
-
92
- # Right view: shift pixels to the left
93
- if x - shift >= 0:
94
- right_view[y, x] = image[y, max(x - shift, 0)]
95
-
96
- return left_view, right_view
97
-
98
- def create_vr180_frame(self, left_view: np.ndarray, right_view: np.ndarray) -> np.ndarray:
99
- """Combine left and right views into VR180 format"""
100
- height, width = left_view.shape[:2]
101
-
102
- # Create VR180 frame (side-by-side)
103
- vr180_frame = np.zeros((height, width * 2, 3), dtype=np.uint8)
104
-
105
- # Place left view on the left half
106
- vr180_frame[:, :width] = left_view
107
-
108
- # Place right view on the right half
109
- vr180_frame[:, width:] = right_view
110
-
111
- return vr180_frame
112
-
113
- def process_video(self, input_path: str, output_path: str) -> Dict:
114
- """Process video from 2D to VR180"""
115
- start_time = time.time()
116
-
117
- try:
118
- # Load video
119
- video = mp.VideoFileClip(input_path)
120
- fps = video.fps
121
- duration = video.duration
122
-
123
- print(f"Processing video: {input_path}")
124
- print(f"Duration: {duration}s, FPS: {fps}")
125
-
126
- # Process frames
127
- processed_frames = []
128
- total_frames = int(duration * fps)
129
-
130
- for i, frame in enumerate(video.iter_frames()):
131
- if i % 10 == 0: # Print progress every 10 frames
132
- print(f"Processing frame {i}/{total_frames}")
133
-
134
- # Convert frame to numpy array
135
- frame_array = np.array(frame)
136
-
137
- # Estimate depth
138
- depth = self.estimate_depth(frame_array)
139
-
140
- # Create stereo pair
141
- left_view, right_view = self.create_stereo_pair(frame_array, depth)
142
-
143
- # Create VR180 frame
144
- vr180_frame = self.create_vr180_frame(left_view, right_view)
145
-
146
- processed_frames.append(vr180_frame)
147
-
148
- # Save processed video
149
- if processed_frames:
150
- # Create video writer
151
- height, width = processed_frames[0].shape[:2]
152
- fourcc = cv2.VideoWriter_fourcc(*'mp4v')
153
- out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
154
-
155
- for frame in processed_frames:
156
- # Convert RGB to BGR for OpenCV
157
- frame_bgr = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
158
- out.write(frame_bgr)
159
-
160
- out.release()
161
-
162
- video.close()
163
-
164
- processing_time = time.time() - start_time
165
-
166
- return {
167
- 'success': True,
168
- 'processing_time': processing_time,
169
- 'output_path': output_path
170
- }
171
-
172
- except Exception as e:
173
- return {
174
- 'success': False,
175
- 'error': str(e)
176
- }
177
-
178
- def create_preview_frame(self, input_path: str) -> np.ndarray:
179
- """Create a preview frame for the UI"""
180
- try:
181
- video = mp.VideoFileClip(input_path)
182
- frame = video.get_frame(0) # Get first frame
183
- video.close()
184
-
185
- # Process the frame
186
- frame_array = np.array(frame)
187
- depth = self.estimate_depth(frame_array)
188
- left_view, right_view = self.create_stereo_pair(frame_array, depth)
189
- vr180_frame = self.create_vr180_frame(left_view, right_view)
190
-
191
- return vr180_frame
192
-
193
- except Exception as e:
194
- print(f"Error creating preview: {e}")
195
- return None
 
1
+ import cv2
2
+ import numpy as np
3
+ from PIL import Image
4
+ import moviepy.editor as mp
5
+ import time
6
+ import os
7
+ from typing import Dict, Tuple
8
+ import torch
9
+ from transformers import pipeline
10
+
11
+ class VideoProcessor:
12
+ def __init__(self):
13
+ self.depth_estimator = None
14
+ self._load_models()
15
+
16
+ def _load_models(self):
17
+ """Load depth estimation model"""
18
+ try:
19
+ # Use a lightweight depth estimation model
20
+ self.depth_estimator = pipeline(
21
+ "depth-estimation",
22
+ model="Intel/dpt-large",
23
+ device=0 if torch.cuda.is_available() else -1
24
+ )
25
+ except Exception as e:
26
+ print(f"Warning: Could not load depth estimation model: {e}")
27
+ # Try a simpler fallback model
28
+ try:
29
+ self.depth_estimator = pipeline(
30
+ "depth-estimation",
31
+ model="Intel/dpt-hybrid-midas",
32
+ device=0 if torch.cuda.is_available() else -1
33
+ )
34
+ except Exception as e2:
35
+ print(f"Warning: Could not load fallback model either: {e2}")
36
+ self.depth_estimator = None
37
+
38
+ def estimate_depth(self, image: np.ndarray) -> np.ndarray:
39
+ """Estimate depth map for a single frame"""
40
+ if self.depth_estimator is None:
41
+ # Fallback: create a simple depth map based on image gradients
42
+ gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
43
+ depth = cv2.Laplacian(gray, cv2.CV_64F)
44
+ depth = np.abs(depth)
45
+ depth = cv2.GaussianBlur(depth, (5, 5), 0)
46
+ depth = (depth - depth.min()) / (depth.max() - depth.min())
47
+ return depth
48
+
49
+ try:
50
+ # Convert to PIL Image for the model
51
+ pil_image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
52
+
53
+ # Get depth prediction
54
+ result = self.depth_estimator(pil_image)
55
+ depth = np.array(result['depth'])
56
+
57
+ # Normalize depth map
58
+ depth = (depth - depth.min()) / (depth.max() - depth.min())
59
+
60
+ return depth
61
+ except Exception as e:
62
+ print(f"Error in depth estimation: {e}")
63
+ # Fallback to gradient-based depth
64
+ gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
65
+ depth = cv2.Laplacian(gray, cv2.CV_64F)
66
+ depth = np.abs(depth)
67
+ depth = cv2.GaussianBlur(depth, (5, 5), 0)
68
+ depth = (depth - depth.min()) / (depth.max() - depth.min())
69
+ return depth
70
+
71
+ def create_stereo_pair(self, image: np.ndarray, depth: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
72
+ """Create left and right eye views for VR180"""
73
+ height, width = image.shape[:2]
74
+
75
+ # Create disparity map (inverse of depth for stereo effect)
76
+ disparity = 1.0 - depth
77
+ disparity = disparity * 30 # Scale disparity
78
+
79
+ # Create left and right views
80
+ left_view = image.copy()
81
+ right_view = image.copy()
82
+
83
+ # Apply horizontal shift based on disparity
84
+ for y in range(height):
85
+ for x in range(width):
86
+ shift = int(disparity[y, x])
87
+
88
+ # Left view: shift pixels to the right
89
+ if x + shift < width:
90
+ left_view[y, x] = image[y, min(x + shift, width - 1)]
91
+
92
+ # Right view: shift pixels to the left
93
+ if x - shift >= 0:
94
+ right_view[y, x] = image[y, max(x - shift, 0)]
95
+
96
+ return left_view, right_view
97
+
98
+ def create_vr180_frame(self, left_view: np.ndarray, right_view: np.ndarray) -> np.ndarray:
99
+ """Combine left and right views into VR180 format"""
100
+ height, width = left_view.shape[:2]
101
+
102
+ # Create VR180 frame (side-by-side)
103
+ vr180_frame = np.zeros((height, width * 2, 3), dtype=np.uint8)
104
+
105
+ # Place left view on the left half
106
+ vr180_frame[:, :width] = left_view
107
+
108
+ # Place right view on the right half
109
+ vr180_frame[:, width:] = right_view
110
+
111
+ return vr180_frame
112
+
113
+ def process_video(self, input_path: str, output_path: str) -> Dict:
114
+ """Process video from 2D to VR180"""
115
+ start_time = time.time()
116
+
117
+ try:
118
+ # Load video
119
+ video = mp.VideoFileClip(input_path)
120
+ fps = video.fps
121
+ duration = video.duration
122
+
123
+ print(f"Processing video: {input_path}")
124
+ print(f"Duration: {duration}s, FPS: {fps}")
125
+
126
+ # Process frames
127
+ processed_frames = []
128
+ total_frames = int(duration * fps)
129
+
130
+ for i, frame in enumerate(video.iter_frames()):
131
+ if i % 10 == 0: # Print progress every 10 frames
132
+ print(f"Processing frame {i}/{total_frames}")
133
+
134
+ # Convert frame to numpy array
135
+ frame_array = np.array(frame)
136
+
137
+ # Estimate depth
138
+ depth = self.estimate_depth(frame_array)
139
+
140
+ # Create stereo pair
141
+ left_view, right_view = self.create_stereo_pair(frame_array, depth)
142
+
143
+ # Create VR180 frame
144
+ vr180_frame = self.create_vr180_frame(left_view, right_view)
145
+
146
+ processed_frames.append(vr180_frame)
147
+
148
+ # Save processed video
149
+ if processed_frames:
150
+ # Create video writer
151
+ height, width = processed_frames[0].shape[:2]
152
+ fourcc = cv2.VideoWriter_fourcc(*'mp4v')
153
+ out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
154
+
155
+ for frame in processed_frames:
156
+ # Convert RGB to BGR for OpenCV
157
+ frame_bgr = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
158
+ out.write(frame_bgr)
159
+
160
+ out.release()
161
+
162
+ video.close()
163
+
164
+ processing_time = time.time() - start_time
165
+
166
+ return {
167
+ 'success': True,
168
+ 'processing_time': processing_time,
169
+ 'output_path': output_path
170
+ }
171
+
172
+ except Exception as e:
173
+ return {
174
+ 'success': False,
175
+ 'error': str(e)
176
+ }
177
+
178
+ def create_preview_frame(self, input_path: str) -> np.ndarray:
179
+ """Create a preview frame for the UI"""
180
+ try:
181
+ video = mp.VideoFileClip(input_path)
182
+ frame = video.get_frame(0) # Get first frame
183
+ video.close()
184
+
185
+ # Process the frame
186
+ frame_array = np.array(frame)
187
+ depth = self.estimate_depth(frame_array)
188
+ left_view, right_view = self.create_stereo_pair(frame_array, depth)
189
+ vr180_frame = self.create_vr180_frame(left_view, right_view)
190
+
191
+ return vr180_frame
192
+
193
+ except Exception as e:
194
+ print(f"Error creating preview: {e}")
195
+ return None