H022329 commited on
Commit
df9fb03
·
verified ·
1 Parent(s): 9cf08e9

Upload folder using huggingface_hub

Browse files
Files changed (35) hide show
  1. .gitattributes +4 -0
  2. README.md +2 -1
  3. VideoAgent/__pycache__/__init__.cpython-310.pyc +0 -0
  4. VideoAgent/__pycache__/_utils.cpython-310.pyc +0 -0
  5. VideoAgent/__pycache__/base.cpython-310.pyc +0 -0
  6. VideoAgent/__pycache__/chunk.cpython-310.pyc +0 -0
  7. VideoAgent/__pycache__/prompt.cpython-310.pyc +0 -0
  8. VideoAgent/__pycache__/query.cpython-310.pyc +0 -0
  9. VideoAgent/__pycache__/vidrag_pipeline.cpython-310.pyc +0 -0
  10. VideoAgent/_llm/__pycache__/__init__.cpython-310.pyc +0 -0
  11. VideoAgent/_llm/__pycache__/asr_model.cpython-310.pyc +0 -0
  12. VideoAgent/_llm/__pycache__/embedding_model.cpython-310.pyc +0 -0
  13. VideoAgent/_llm/__pycache__/llm_model.cpython-310.pyc +0 -0
  14. VideoAgent/_llm/__pycache__/tokenizer_model.cpython-310.pyc +0 -0
  15. VideoAgent/_llm/__pycache__/vlm_model.cpython-310.pyc +0 -0
  16. VideoAgent/_server/sherpa_asr_server.py +1 -1
  17. VideoAgent/_storage/__pycache__/__init__.cpython-310.pyc +0 -0
  18. VideoAgent/_storage/__pycache__/kv_json.cpython-310.pyc +0 -0
  19. VideoAgent/_storage/__pycache__/vdb_nanovectordb.cpython-310.pyc +0 -0
  20. VideoAgent/_storage/vdb_nanovectordb.py +2 -3
  21. VideoAgent/_videoutil/__pycache__/__init__.cpython-310.pyc +0 -0
  22. VideoAgent/_videoutil/__pycache__/asr.cpython-310.pyc +0 -0
  23. VideoAgent/_videoutil/__pycache__/caption.cpython-310.pyc +0 -0
  24. VideoAgent/_videoutil/__pycache__/feature.cpython-310.pyc +0 -0
  25. VideoAgent/_videoutil/__pycache__/split.cpython-310.pyc +0 -0
  26. VideoAgent/_videoutil/split.py +16 -3
  27. image-10.png +3 -0
  28. image-17.png +0 -0
  29. image-2.png +0 -0
  30. image-7.png +3 -0
  31. image-8.png +3 -0
  32. image-9.png +3 -0
  33. requirements.txt +1 -3
  34. videorag_longervideos.py +2 -2
  35. webui.py +10 -3
.gitattributes CHANGED
@@ -40,3 +40,7 @@ VideoAgent/_llm/tokenizer_model/Qwen/Qwen3-4B/tokenizer.json filter=lfs diff=lfs
40
  VideoAgent/_llm/tokenizer_model/Qwen/Qwen3-4B-Instruct-2507/tokenizer.json filter=lfs diff=lfs merge=lfs -text
41
  videos/origin/sanguo.mp4 filter=lfs diff=lfs merge=lfs -text
42
  videos/processed/sanguo.mp4 filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
40
  VideoAgent/_llm/tokenizer_model/Qwen/Qwen3-4B-Instruct-2507/tokenizer.json filter=lfs diff=lfs merge=lfs -text
41
  videos/origin/sanguo.mp4 filter=lfs diff=lfs merge=lfs -text
42
  videos/processed/sanguo.mp4 filter=lfs diff=lfs merge=lfs -text
43
+ image-10.png filter=lfs diff=lfs merge=lfs -text
44
+ image-7.png filter=lfs diff=lfs merge=lfs -text
45
+ image-8.png filter=lfs diff=lfs merge=lfs -text
46
+ image-9.png filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -128,7 +128,7 @@ print(result)
128
 
129
  ### 视频索引流程
130
 
131
- ![索引流程](mermaid-1778811634860.png)
132
  ### 查询流程
133
 
134
 
@@ -158,3 +158,4 @@ VideoAgent-AX650N/
158
  ## 参考项目
159
 
160
  - 香港大学数据科学实验室(HKUDS)— [VideoRAG](https://github.com/HKUDS/VideoRAG):超长视频跨模态检索增强生成框架
 
 
128
 
129
  ### 视频索引流程
130
 
131
+ ![索引流程](image-2.png)
132
  ### 查询流程
133
 
134
 
 
158
  ## 参考项目
159
 
160
  - 香港大学数据科学实验室(HKUDS)— [VideoRAG](https://github.com/HKUDS/VideoRAG):超长视频跨模态检索增强生成框架
161
+
VideoAgent/__pycache__/__init__.cpython-310.pyc CHANGED
Binary files a/VideoAgent/__pycache__/__init__.cpython-310.pyc and b/VideoAgent/__pycache__/__init__.cpython-310.pyc differ
 
VideoAgent/__pycache__/_utils.cpython-310.pyc CHANGED
Binary files a/VideoAgent/__pycache__/_utils.cpython-310.pyc and b/VideoAgent/__pycache__/_utils.cpython-310.pyc differ
 
VideoAgent/__pycache__/base.cpython-310.pyc CHANGED
Binary files a/VideoAgent/__pycache__/base.cpython-310.pyc and b/VideoAgent/__pycache__/base.cpython-310.pyc differ
 
VideoAgent/__pycache__/chunk.cpython-310.pyc CHANGED
Binary files a/VideoAgent/__pycache__/chunk.cpython-310.pyc and b/VideoAgent/__pycache__/chunk.cpython-310.pyc differ
 
VideoAgent/__pycache__/prompt.cpython-310.pyc CHANGED
Binary files a/VideoAgent/__pycache__/prompt.cpython-310.pyc and b/VideoAgent/__pycache__/prompt.cpython-310.pyc differ
 
VideoAgent/__pycache__/query.cpython-310.pyc CHANGED
Binary files a/VideoAgent/__pycache__/query.cpython-310.pyc and b/VideoAgent/__pycache__/query.cpython-310.pyc differ
 
VideoAgent/__pycache__/vidrag_pipeline.cpython-310.pyc CHANGED
Binary files a/VideoAgent/__pycache__/vidrag_pipeline.cpython-310.pyc and b/VideoAgent/__pycache__/vidrag_pipeline.cpython-310.pyc differ
 
VideoAgent/_llm/__pycache__/__init__.cpython-310.pyc CHANGED
Binary files a/VideoAgent/_llm/__pycache__/__init__.cpython-310.pyc and b/VideoAgent/_llm/__pycache__/__init__.cpython-310.pyc differ
 
VideoAgent/_llm/__pycache__/asr_model.cpython-310.pyc CHANGED
Binary files a/VideoAgent/_llm/__pycache__/asr_model.cpython-310.pyc and b/VideoAgent/_llm/__pycache__/asr_model.cpython-310.pyc differ
 
VideoAgent/_llm/__pycache__/embedding_model.cpython-310.pyc CHANGED
Binary files a/VideoAgent/_llm/__pycache__/embedding_model.cpython-310.pyc and b/VideoAgent/_llm/__pycache__/embedding_model.cpython-310.pyc differ
 
VideoAgent/_llm/__pycache__/llm_model.cpython-310.pyc CHANGED
Binary files a/VideoAgent/_llm/__pycache__/llm_model.cpython-310.pyc and b/VideoAgent/_llm/__pycache__/llm_model.cpython-310.pyc differ
 
VideoAgent/_llm/__pycache__/tokenizer_model.cpython-310.pyc CHANGED
Binary files a/VideoAgent/_llm/__pycache__/tokenizer_model.cpython-310.pyc and b/VideoAgent/_llm/__pycache__/tokenizer_model.cpython-310.pyc differ
 
VideoAgent/_llm/__pycache__/vlm_model.cpython-310.pyc CHANGED
Binary files a/VideoAgent/_llm/__pycache__/vlm_model.cpython-310.pyc and b/VideoAgent/_llm/__pycache__/vlm_model.cpython-310.pyc differ
 
VideoAgent/_server/sherpa_asr_server.py CHANGED
@@ -44,7 +44,7 @@ class SherpaASREngine:
44
  "sherpa-onnx-offline",
45
  )
46
  self.provider = provider or os.getenv("SHERPA_PROVIDER", "axera")
47
- self.vad = vad or os.getenv("vad-model", "/root/huangjie/AXERA-TECH/SenseVoice/silero_vad.onnx")
48
 
49
  if os.path.exists(self.sherpa_bin):
50
  os.chmod(self.sherpa_bin, 0o755)
 
44
  "sherpa-onnx-offline",
45
  )
46
  self.provider = provider or os.getenv("SHERPA_PROVIDER", "axera")
47
+ # self.vad = vad or os.getenv("vad-model", "/root/huangjie/AXERA-TECH/SenseVoice/silero_vad.onnx")
48
 
49
  if os.path.exists(self.sherpa_bin):
50
  os.chmod(self.sherpa_bin, 0o755)
VideoAgent/_storage/__pycache__/__init__.cpython-310.pyc CHANGED
Binary files a/VideoAgent/_storage/__pycache__/__init__.cpython-310.pyc and b/VideoAgent/_storage/__pycache__/__init__.cpython-310.pyc differ
 
VideoAgent/_storage/__pycache__/kv_json.cpython-310.pyc CHANGED
Binary files a/VideoAgent/_storage/__pycache__/kv_json.cpython-310.pyc and b/VideoAgent/_storage/__pycache__/kv_json.cpython-310.pyc differ
 
VideoAgent/_storage/__pycache__/vdb_nanovectordb.cpython-310.pyc CHANGED
Binary files a/VideoAgent/_storage/__pycache__/vdb_nanovectordb.cpython-310.pyc and b/VideoAgent/_storage/__pycache__/vdb_nanovectordb.cpython-310.pyc differ
 
VideoAgent/_storage/vdb_nanovectordb.py CHANGED
@@ -103,7 +103,7 @@ class NanoVectorDBVideoSegmentStorage(BaseVectorStorage):
103
 
104
  async def upsert(self, video_name, segment_index2name, video_output_format):
105
 
106
- logger.info(f"Inserting {len(segment_index2name)} segments to {self.namespace}")
107
  if not len(segment_index2name):
108
  logger.warning("You insert an empty data to vector DB")
109
  return []
@@ -130,8 +130,7 @@ class NanoVectorDBVideoSegmentStorage(BaseVectorStorage):
130
  embeddings.append(batch_embeddings)
131
  embeddings = torch.concat(embeddings, dim=0)
132
  embeddings = embeddings.cpu().numpy()
133
- print("e", embeddings.shape)
134
- print("d", len(list_data))
135
  for i, d in enumerate(list_data):
136
  d["__vector__"] = embeddings[i]
137
 
 
103
 
104
  async def upsert(self, video_name, segment_index2name, video_output_format):
105
 
106
+
107
  if not len(segment_index2name):
108
  logger.warning("You insert an empty data to vector DB")
109
  return []
 
130
  embeddings.append(batch_embeddings)
131
  embeddings = torch.concat(embeddings, dim=0)
132
  embeddings = embeddings.cpu().numpy()
133
+
 
134
  for i, d in enumerate(list_data):
135
  d["__vector__"] = embeddings[i]
136
 
VideoAgent/_videoutil/__pycache__/__init__.cpython-310.pyc CHANGED
Binary files a/VideoAgent/_videoutil/__pycache__/__init__.cpython-310.pyc and b/VideoAgent/_videoutil/__pycache__/__init__.cpython-310.pyc differ
 
VideoAgent/_videoutil/__pycache__/asr.cpython-310.pyc CHANGED
Binary files a/VideoAgent/_videoutil/__pycache__/asr.cpython-310.pyc and b/VideoAgent/_videoutil/__pycache__/asr.cpython-310.pyc differ
 
VideoAgent/_videoutil/__pycache__/caption.cpython-310.pyc CHANGED
Binary files a/VideoAgent/_videoutil/__pycache__/caption.cpython-310.pyc and b/VideoAgent/_videoutil/__pycache__/caption.cpython-310.pyc differ
 
VideoAgent/_videoutil/__pycache__/feature.cpython-310.pyc CHANGED
Binary files a/VideoAgent/_videoutil/__pycache__/feature.cpython-310.pyc and b/VideoAgent/_videoutil/__pycache__/feature.cpython-310.pyc differ
 
VideoAgent/_videoutil/__pycache__/split.cpython-310.pyc CHANGED
Binary files a/VideoAgent/_videoutil/__pycache__/split.cpython-310.pyc and b/VideoAgent/_videoutil/__pycache__/split.cpython-310.pyc differ
 
VideoAgent/_videoutil/split.py CHANGED
@@ -9,6 +9,19 @@ from moviepy.video.io.VideoFileClip import VideoFileClip
9
  import logging
10
  import multiprocessing
11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  logger = logging.getLogger(__name__)
13
 
14
 
@@ -30,17 +43,17 @@ def preprocess_video(
30
 
31
 
32
  if os.path.exists(output_path):
33
- # logger.info(f"Preprocessed video already exists: {output_path}")
34
  return output_path
35
 
36
  logger.info(f"Preprocessing video {video_name}: {target_width}x{target_height} @ {target_fps}fps -> {output_path}")
37
  cmd = [
38
- "ffmpeg", "-y",
39
  "-i", video_path,
40
  "-vf", f"scale={target_width}:{target_height}",
41
  "-r", str(target_fps),
42
  "-c:v", "libx264",
43
- "-c:a", "aac", # 确保音频被编码
44
  "-b:a", "128k",
45
  "-loglevel", "error",
46
  output_path,
 
9
  import logging
10
  import multiprocessing
11
 
12
+ def _get_ffmpeg_bin():
13
+ """返回支持 libx264 的 ffmpeg 路径"""
14
+ env_bin = os.getenv("FFMPEG_BIN")
15
+ if env_bin:
16
+ return env_bin
17
+ try:
18
+ import imageio_ffmpeg
19
+ return imageio_ffmpeg.get_ffmpeg_exe()
20
+ except ImportError:
21
+ return "ffmpeg"
22
+
23
+ _FFMPEG_BIN = _get_ffmpeg_bin()
24
+
25
  logger = logging.getLogger(__name__)
26
 
27
 
 
43
 
44
 
45
  if os.path.exists(output_path):
46
+ logger.info(f"Preprocessed video already exists: {output_path}")
47
  return output_path
48
 
49
  logger.info(f"Preprocessing video {video_name}: {target_width}x{target_height} @ {target_fps}fps -> {output_path}")
50
  cmd = [
51
+ _FFMPEG_BIN, "-y",
52
  "-i", video_path,
53
  "-vf", f"scale={target_width}:{target_height}",
54
  "-r", str(target_fps),
55
  "-c:v", "libx264",
56
+ "-c:a", "aac",
57
  "-b:a", "128k",
58
  "-loglevel", "error",
59
  output_path,
image-10.png ADDED

Git LFS Details

  • SHA256: 0d1a52489cbfc32c4af39c37a3a87737119e1b278d7982ce127f0e4ae708d932
  • Pointer size: 131 Bytes
  • Size of remote file: 137 kB
image-17.png ADDED
image-2.png CHANGED
image-7.png ADDED

Git LFS Details

  • SHA256: 0a3a2f441cf87e2854e12c9aa51d0edeb6da7497737766046b07b27c5e75ea6a
  • Pointer size: 131 Bytes
  • Size of remote file: 256 kB
image-8.png ADDED

Git LFS Details

  • SHA256: df062c8de9c9047e558adcfd9b0d670dcf6bdcff878ec37e7eccf240df1b93f0
  • Pointer size: 131 Bytes
  • Size of remote file: 412 kB
image-9.png ADDED

Git LFS Details

  • SHA256: 342c4041f440420d215802a0555770ee0f6534c33a0268c9ac01282c8e400004
  • Pointer size: 131 Bytes
  • Size of remote file: 348 kB
requirements.txt CHANGED
@@ -1,7 +1,5 @@
1
  python-dotenv==1.2.1
2
- torch
3
- torchvision
4
- transformers
5
  numpy==2.2.6
6
  Pillow==12.0.0
7
  fastapi==0.135.1
 
1
  python-dotenv==1.2.1
2
+
 
 
3
  numpy==2.2.6
4
  Pillow==12.0.0
5
  fastapi==0.135.1
videorag_longervideos.py CHANGED
@@ -21,12 +21,12 @@ if __name__ == '__main__':
21
  multiprocessing.set_start_method('spawn', force=True)
22
 
23
  # video文件地址:
24
- video_base_path = f'/root/xxx'
25
  video_files = sorted(os.listdir(video_base_path))
26
  video_paths = [os.path.join(video_base_path, f) for f in video_files]
27
 
28
  #工作目录
29
- videorag = VideoRAG( working_dir=f"/root/xxx")
30
  videorag.insert_video(video_path_list=video_paths)
31
 
32
  querys = "SP视频开头前10秒的内容"
 
21
  multiprocessing.set_start_method('spawn', force=True)
22
 
23
  # video文件地址:
24
+ video_base_path = f'/root/huangjie/VideoAgent_api513/videos/origin'
25
  video_files = sorted(os.listdir(video_base_path))
26
  video_paths = [os.path.join(video_base_path, f) for f in video_files]
27
 
28
  #工作目录
29
+ videorag = VideoRAG( working_dir=f"/root/huangjie/VideoAgent_api513/working_dir")
30
  videorag.insert_video(video_path_list=video_paths)
31
 
32
  querys = "SP视频开头前10秒的内容"
webui.py CHANGED
@@ -850,8 +850,15 @@ def _export_clip(video_path: str, start: float, end: float, working_dir: str, ca
850
  if os.path.exists(clip_path):
851
  return clip_path
852
 
 
 
 
 
 
 
 
853
  cmd = [
854
- "ffmpeg",
855
  "-y",
856
  "-ss",
857
  f"{start:.3f}",
@@ -1508,7 +1515,7 @@ with gr.Blocks(
1508
  precision=0,
1509
  )
1510
  retrieval_topk_chunks_input = gr.Number(
1511
- label="检索 Top-K 片段数",
1512
  value=_rag_runtime_settings["retrieval_topk_chunks"],
1513
  minimum=1,
1514
  info="检索相关片段的数量",
@@ -1523,7 +1530,7 @@ with gr.Blocks(
1523
  precision=0,
1524
  )
1525
  segment_retrieval_top_k_input = gr.Number(
1526
- label="视频段检索 Top-K",
1527
  value=_rag_runtime_settings["segment_retrieval_top_k"],
1528
  minimum=1,
1529
  info="检索相关视频段的数量",
 
850
  if os.path.exists(clip_path):
851
  return clip_path
852
 
853
+ # 使用与 split.py 相同的 ffmpeg 选择逻辑
854
+ try:
855
+ import imageio_ffmpeg
856
+ _ffmpeg = imageio_ffmpeg.get_ffmpeg_exe()
857
+ except ImportError:
858
+ _ffmpeg = os.getenv("FFMPEG_BIN", "ffmpeg")
859
+
860
  cmd = [
861
+ _ffmpeg,
862
  "-y",
863
  "-ss",
864
  f"{start:.3f}",
 
1515
  precision=0,
1516
  )
1517
  retrieval_topk_chunks_input = gr.Number(
1518
+ label="文本段检索 Top-K",
1519
  value=_rag_runtime_settings["retrieval_topk_chunks"],
1520
  minimum=1,
1521
  info="检索相关片段的数量",
 
1530
  precision=0,
1531
  )
1532
  segment_retrieval_top_k_input = gr.Number(
1533
+ label="视频段检索 Top-K",
1534
  value=_rag_runtime_settings["segment_retrieval_top_k"],
1535
  minimum=1,
1536
  info="检索相关视频段的数量",