H022329 commited on 15 days ago

Commit

df9fb03

verified ·

1 Parent(s): 9cf08e9

Upload folder using huggingface_hub

Browse files

Files changed (35) hide show

.gitattributes +4 -0
README.md +2 -1
VideoAgent/__pycache__/__init__.cpython-310.pyc +0 -0
VideoAgent/__pycache__/_utils.cpython-310.pyc +0 -0
VideoAgent/__pycache__/base.cpython-310.pyc +0 -0
VideoAgent/__pycache__/chunk.cpython-310.pyc +0 -0
VideoAgent/__pycache__/prompt.cpython-310.pyc +0 -0
VideoAgent/__pycache__/query.cpython-310.pyc +0 -0
VideoAgent/__pycache__/vidrag_pipeline.cpython-310.pyc +0 -0
VideoAgent/_llm/__pycache__/__init__.cpython-310.pyc +0 -0
VideoAgent/_llm/__pycache__/asr_model.cpython-310.pyc +0 -0
VideoAgent/_llm/__pycache__/embedding_model.cpython-310.pyc +0 -0
VideoAgent/_llm/__pycache__/llm_model.cpython-310.pyc +0 -0
VideoAgent/_llm/__pycache__/tokenizer_model.cpython-310.pyc +0 -0
VideoAgent/_llm/__pycache__/vlm_model.cpython-310.pyc +0 -0
VideoAgent/_server/sherpa_asr_server.py +1 -1
VideoAgent/_storage/__pycache__/__init__.cpython-310.pyc +0 -0
VideoAgent/_storage/__pycache__/kv_json.cpython-310.pyc +0 -0
VideoAgent/_storage/__pycache__/vdb_nanovectordb.cpython-310.pyc +0 -0
VideoAgent/_storage/vdb_nanovectordb.py +2 -3
VideoAgent/_videoutil/__pycache__/__init__.cpython-310.pyc +0 -0
VideoAgent/_videoutil/__pycache__/asr.cpython-310.pyc +0 -0
VideoAgent/_videoutil/__pycache__/caption.cpython-310.pyc +0 -0
VideoAgent/_videoutil/__pycache__/feature.cpython-310.pyc +0 -0
VideoAgent/_videoutil/__pycache__/split.cpython-310.pyc +0 -0
VideoAgent/_videoutil/split.py +16 -3
image-10.png +3 -0
image-17.png +0 -0
image-2.png +0 -0
image-7.png +3 -0
image-8.png +3 -0
image-9.png +3 -0
requirements.txt +1 -3
videorag_longervideos.py +2 -2
webui.py +10 -3

.gitattributes CHANGED Viewed

@@ -40,3 +40,7 @@ VideoAgent/_llm/tokenizer_model/Qwen/Qwen3-4B/tokenizer.json filter=lfs diff=lfs
 VideoAgent/_llm/tokenizer_model/Qwen/Qwen3-4B-Instruct-2507/tokenizer.json filter=lfs diff=lfs merge=lfs -text
 videos/origin/sanguo.mp4 filter=lfs diff=lfs merge=lfs -text
 videos/processed/sanguo.mp4 filter=lfs diff=lfs merge=lfs -text

 VideoAgent/_llm/tokenizer_model/Qwen/Qwen3-4B-Instruct-2507/tokenizer.json filter=lfs diff=lfs merge=lfs -text
 videos/origin/sanguo.mp4 filter=lfs diff=lfs merge=lfs -text
 videos/processed/sanguo.mp4 filter=lfs diff=lfs merge=lfs -text
+image-10.png filter=lfs diff=lfs merge=lfs -text
+image-7.png filter=lfs diff=lfs merge=lfs -text
+image-8.png filter=lfs diff=lfs merge=lfs -text
+image-9.png filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -128,7 +128,7 @@ print(result)
 ### 视频索引流程
-![索引流程](mermaid-1778811634860.png)
 ### 查询流程
@@ -158,3 +158,4 @@ VideoAgent-AX650N/
 ## 参考项目
 - 香港大学数据科学实验室（HKUDS）— [VideoRAG](https://github.com/HKUDS/VideoRAG)：超长视频跨模态检索增强生成框架

 ### 视频索引流程
+![索引流程](image-2.png)
 ### 查询流程
 ## 参考项目
 - 香港大学数据科学实验室（HKUDS）— [VideoRAG](https://github.com/HKUDS/VideoRAG)：超长视频跨模态检索增强生成框架

VideoAgent/__pycache__/__init__.cpython-310.pyc CHANGED Viewed

Binary files a/VideoAgent/__pycache__/__init__.cpython-310.pyc and b/VideoAgent/__pycache__/__init__.cpython-310.pyc differ

VideoAgent/__pycache__/_utils.cpython-310.pyc CHANGED Viewed

Binary files a/VideoAgent/__pycache__/_utils.cpython-310.pyc and b/VideoAgent/__pycache__/_utils.cpython-310.pyc differ

VideoAgent/__pycache__/base.cpython-310.pyc CHANGED Viewed

Binary files a/VideoAgent/__pycache__/base.cpython-310.pyc and b/VideoAgent/__pycache__/base.cpython-310.pyc differ

VideoAgent/__pycache__/chunk.cpython-310.pyc CHANGED Viewed

Binary files a/VideoAgent/__pycache__/chunk.cpython-310.pyc and b/VideoAgent/__pycache__/chunk.cpython-310.pyc differ

VideoAgent/__pycache__/prompt.cpython-310.pyc CHANGED Viewed

Binary files a/VideoAgent/__pycache__/prompt.cpython-310.pyc and b/VideoAgent/__pycache__/prompt.cpython-310.pyc differ

VideoAgent/__pycache__/query.cpython-310.pyc CHANGED Viewed

Binary files a/VideoAgent/__pycache__/query.cpython-310.pyc and b/VideoAgent/__pycache__/query.cpython-310.pyc differ

VideoAgent/__pycache__/vidrag_pipeline.cpython-310.pyc CHANGED Viewed

Binary files a/VideoAgent/__pycache__/vidrag_pipeline.cpython-310.pyc and b/VideoAgent/__pycache__/vidrag_pipeline.cpython-310.pyc differ

VideoAgent/_llm/__pycache__/__init__.cpython-310.pyc CHANGED Viewed

Binary files a/VideoAgent/_llm/__pycache__/__init__.cpython-310.pyc and b/VideoAgent/_llm/__pycache__/__init__.cpython-310.pyc differ

VideoAgent/_llm/__pycache__/asr_model.cpython-310.pyc CHANGED Viewed

Binary files a/VideoAgent/_llm/__pycache__/asr_model.cpython-310.pyc and b/VideoAgent/_llm/__pycache__/asr_model.cpython-310.pyc differ

VideoAgent/_llm/__pycache__/embedding_model.cpython-310.pyc CHANGED Viewed

Binary files a/VideoAgent/_llm/__pycache__/embedding_model.cpython-310.pyc and b/VideoAgent/_llm/__pycache__/embedding_model.cpython-310.pyc differ

VideoAgent/_llm/__pycache__/llm_model.cpython-310.pyc CHANGED Viewed

Binary files a/VideoAgent/_llm/__pycache__/llm_model.cpython-310.pyc and b/VideoAgent/_llm/__pycache__/llm_model.cpython-310.pyc differ

VideoAgent/_llm/__pycache__/tokenizer_model.cpython-310.pyc CHANGED Viewed

Binary files a/VideoAgent/_llm/__pycache__/tokenizer_model.cpython-310.pyc and b/VideoAgent/_llm/__pycache__/tokenizer_model.cpython-310.pyc differ

VideoAgent/_llm/__pycache__/vlm_model.cpython-310.pyc CHANGED Viewed

Binary files a/VideoAgent/_llm/__pycache__/vlm_model.cpython-310.pyc and b/VideoAgent/_llm/__pycache__/vlm_model.cpython-310.pyc differ

VideoAgent/_server/sherpa_asr_server.py CHANGED Viewed

@@ -44,7 +44,7 @@ class SherpaASREngine:
             "sherpa-onnx-offline",
         )
         self.provider = provider or os.getenv("SHERPA_PROVIDER", "axera")
-        self.vad = vad or os.getenv("vad-model", "/root/huangjie/AXERA-TECH/SenseVoice/silero_vad.onnx")
         if os.path.exists(self.sherpa_bin):
             os.chmod(self.sherpa_bin, 0o755)

             "sherpa-onnx-offline",
         )
         self.provider = provider or os.getenv("SHERPA_PROVIDER", "axera")
+        # self.vad = vad or os.getenv("vad-model", "/root/huangjie/AXERA-TECH/SenseVoice/silero_vad.onnx")
         if os.path.exists(self.sherpa_bin):
             os.chmod(self.sherpa_bin, 0o755)

VideoAgent/_storage/__pycache__/__init__.cpython-310.pyc CHANGED Viewed

Binary files a/VideoAgent/_storage/__pycache__/__init__.cpython-310.pyc and b/VideoAgent/_storage/__pycache__/__init__.cpython-310.pyc differ

VideoAgent/_storage/__pycache__/kv_json.cpython-310.pyc CHANGED Viewed

Binary files a/VideoAgent/_storage/__pycache__/kv_json.cpython-310.pyc and b/VideoAgent/_storage/__pycache__/kv_json.cpython-310.pyc differ

VideoAgent/_storage/__pycache__/vdb_nanovectordb.cpython-310.pyc CHANGED Viewed

Binary files a/VideoAgent/_storage/__pycache__/vdb_nanovectordb.cpython-310.pyc and b/VideoAgent/_storage/__pycache__/vdb_nanovectordb.cpython-310.pyc differ

VideoAgent/_storage/vdb_nanovectordb.py CHANGED Viewed

@@ -103,7 +103,7 @@ class NanoVectorDBVideoSegmentStorage(BaseVectorStorage):
     async def upsert(self, video_name, segment_index2name, video_output_format):
-        logger.info(f"Inserting {len(segment_index2name)} segments to {self.namespace}")
         if not len(segment_index2name):
             logger.warning("You insert an empty data to vector DB")
             return []
@@ -130,8 +130,7 @@ class NanoVectorDBVideoSegmentStorage(BaseVectorStorage):
                 embeddings.append(batch_embeddings)
         embeddings = torch.concat(embeddings, dim=0)
         embeddings = embeddings.cpu().numpy()
-        print("e", embeddings.shape)
-        print("d", len(list_data))
         for i, d in enumerate(list_data):
             d["__vector__"] = embeddings[i]

     async def upsert(self, video_name, segment_index2name, video_output_format):
         if not len(segment_index2name):
             logger.warning("You insert an empty data to vector DB")
             return []
                 embeddings.append(batch_embeddings)
         embeddings = torch.concat(embeddings, dim=0)
         embeddings = embeddings.cpu().numpy()
         for i, d in enumerate(list_data):
             d["__vector__"] = embeddings[i]

VideoAgent/_videoutil/__pycache__/__init__.cpython-310.pyc CHANGED Viewed

Binary files a/VideoAgent/_videoutil/__pycache__/__init__.cpython-310.pyc and b/VideoAgent/_videoutil/__pycache__/__init__.cpython-310.pyc differ

VideoAgent/_videoutil/__pycache__/asr.cpython-310.pyc CHANGED Viewed

Binary files a/VideoAgent/_videoutil/__pycache__/asr.cpython-310.pyc and b/VideoAgent/_videoutil/__pycache__/asr.cpython-310.pyc differ

VideoAgent/_videoutil/__pycache__/caption.cpython-310.pyc CHANGED Viewed

Binary files a/VideoAgent/_videoutil/__pycache__/caption.cpython-310.pyc and b/VideoAgent/_videoutil/__pycache__/caption.cpython-310.pyc differ

VideoAgent/_videoutil/__pycache__/feature.cpython-310.pyc CHANGED Viewed

Binary files a/VideoAgent/_videoutil/__pycache__/feature.cpython-310.pyc and b/VideoAgent/_videoutil/__pycache__/feature.cpython-310.pyc differ

VideoAgent/_videoutil/__pycache__/split.cpython-310.pyc CHANGED Viewed

Binary files a/VideoAgent/_videoutil/__pycache__/split.cpython-310.pyc and b/VideoAgent/_videoutil/__pycache__/split.cpython-310.pyc differ

VideoAgent/_videoutil/split.py CHANGED Viewed

@@ -9,6 +9,19 @@ from moviepy.video.io.VideoFileClip import VideoFileClip
 import logging
 import multiprocessing
 logger = logging.getLogger(__name__)
@@ -30,17 +43,17 @@ def preprocess_video(
     if os.path.exists(output_path):
-        # logger.info(f"Preprocessed video already exists: {output_path}")
         return output_path
     logger.info(f"Preprocessing video {video_name}: {target_width}x{target_height} @ {target_fps}fps -> {output_path}")
     cmd = [
-        "ffmpeg", "-y",
         "-i", video_path,
         "-vf", f"scale={target_width}:{target_height}",
         "-r", str(target_fps),
         "-c:v", "libx264",
-        "-c:a", "aac",  # 确保音频被编码
         "-b:a", "128k",
         "-loglevel", "error",
         output_path,

 import logging
 import multiprocessing
+def _get_ffmpeg_bin():
+    """返回支持 libx264 的 ffmpeg 路径"""
+    env_bin = os.getenv("FFMPEG_BIN")
+    if env_bin:
+        return env_bin
+    try:
+        import imageio_ffmpeg
+        return imageio_ffmpeg.get_ffmpeg_exe()
+    except ImportError:
+        return "ffmpeg"
+_FFMPEG_BIN = _get_ffmpeg_bin()
 logger = logging.getLogger(__name__)
     if os.path.exists(output_path):
+        logger.info(f"Preprocessed video already exists: {output_path}")
         return output_path
     logger.info(f"Preprocessing video {video_name}: {target_width}x{target_height} @ {target_fps}fps -> {output_path}")
     cmd = [
+        _FFMPEG_BIN, "-y",
         "-i", video_path,
         "-vf", f"scale={target_width}:{target_height}",
         "-r", str(target_fps),
         "-c:v", "libx264",
+        "-c:a", "aac",
         "-b:a", "128k",
         "-loglevel", "error",
         output_path,

image-10.png ADDED Viewed

Git LFS Details

SHA256: 0d1a52489cbfc32c4af39c37a3a87737119e1b278d7982ce127f0e4ae708d932
Pointer size: 131 Bytes
Size of remote file: 137 kB

image-17.png ADDED Viewed

image-2.png CHANGED Viewed

image-7.png ADDED Viewed

Git LFS Details

SHA256: 0a3a2f441cf87e2854e12c9aa51d0edeb6da7497737766046b07b27c5e75ea6a
Pointer size: 131 Bytes
Size of remote file: 256 kB

image-8.png ADDED Viewed

Git LFS Details

SHA256: df062c8de9c9047e558adcfd9b0d670dcf6bdcff878ec37e7eccf240df1b93f0
Pointer size: 131 Bytes
Size of remote file: 412 kB

image-9.png ADDED Viewed

Git LFS Details

SHA256: 342c4041f440420d215802a0555770ee0f6534c33a0268c9ac01282c8e400004
Pointer size: 131 Bytes
Size of remote file: 348 kB

requirements.txt CHANGED Viewed

@@ -1,7 +1,5 @@
 python-dotenv==1.2.1
-torch
-torchvision
-transformers
 numpy==2.2.6
 Pillow==12.0.0
 fastapi==0.135.1

 python-dotenv==1.2.1
 numpy==2.2.6
 Pillow==12.0.0
 fastapi==0.135.1

videorag_longervideos.py CHANGED Viewed

@@ -21,12 +21,12 @@ if __name__ == '__main__':
     multiprocessing.set_start_method('spawn', force=True)
     # video文件地址:
-    video_base_path = f'/root/xxx'
     video_files = sorted(os.listdir(video_base_path))
     video_paths = [os.path.join(video_base_path, f) for f in video_files]
     #工作目录
-    videorag = VideoRAG( working_dir=f"/root/xxx")
     videorag.insert_video(video_path_list=video_paths)
     querys = "SP视频开头前10秒的内容"

     multiprocessing.set_start_method('spawn', force=True)
     # video文件地址:
+    video_base_path = f'/root/huangjie/VideoAgent_api513/videos/origin'
     video_files = sorted(os.listdir(video_base_path))
     video_paths = [os.path.join(video_base_path, f) for f in video_files]
     #工作目录
+    videorag = VideoRAG( working_dir=f"/root/huangjie/VideoAgent_api513/working_dir")
     videorag.insert_video(video_path_list=video_paths)
     querys = "SP视频开头前10秒的内容"

webui.py CHANGED Viewed

@@ -850,8 +850,15 @@ def _export_clip(video_path: str, start: float, end: float, working_dir: str, ca
     if os.path.exists(clip_path):
         return clip_path
     cmd = [
-        "ffmpeg",
         "-y",
         "-ss",
         f"{start:.3f}",
@@ -1508,7 +1515,7 @@ with gr.Blocks(
                             precision=0,
                         )
                         retrieval_topk_chunks_input = gr.Number(
-                            label="检索 Top-K 片段数",
                             value=_rag_runtime_settings["retrieval_topk_chunks"],
                             minimum=1,
                             info="检索相关片段的数量",
@@ -1523,7 +1530,7 @@ with gr.Blocks(
                             precision=0,
                         )
                         segment_retrieval_top_k_input = gr.Number(
-                            label="视频段检索 Top-K 数",
                             value=_rag_runtime_settings["segment_retrieval_top_k"],
                             minimum=1,
                             info="检索相关视频段的数量",

     if os.path.exists(clip_path):
         return clip_path
+    # 使用与 split.py 相同的 ffmpeg 选择逻辑
+    try:
+        import imageio_ffmpeg
+        _ffmpeg = imageio_ffmpeg.get_ffmpeg_exe()
+    except ImportError:
+        _ffmpeg = os.getenv("FFMPEG_BIN", "ffmpeg")
     cmd = [
+        _ffmpeg,
         "-y",
         "-ss",
         f"{start:.3f}",
                             precision=0,
                         )
                         retrieval_topk_chunks_input = gr.Number(
+                            label="文本段检索 Top-K",
                             value=_rag_runtime_settings["retrieval_topk_chunks"],
                             minimum=1,
                             info="检索相关片段的数量",
                             precision=0,
                         )
                         segment_retrieval_top_k_input = gr.Number(
+                            label="视频段检索 Top-K",
                             value=_rag_runtime_settings["segment_retrieval_top_k"],
                             minimum=1,
                             info="检索相关视频段的数量",