Upload folder using huggingface_hub
Browse files- .gitattributes +4 -0
- README.md +2 -1
- VideoAgent/__pycache__/__init__.cpython-310.pyc +0 -0
- VideoAgent/__pycache__/_utils.cpython-310.pyc +0 -0
- VideoAgent/__pycache__/base.cpython-310.pyc +0 -0
- VideoAgent/__pycache__/chunk.cpython-310.pyc +0 -0
- VideoAgent/__pycache__/prompt.cpython-310.pyc +0 -0
- VideoAgent/__pycache__/query.cpython-310.pyc +0 -0
- VideoAgent/__pycache__/vidrag_pipeline.cpython-310.pyc +0 -0
- VideoAgent/_llm/__pycache__/__init__.cpython-310.pyc +0 -0
- VideoAgent/_llm/__pycache__/asr_model.cpython-310.pyc +0 -0
- VideoAgent/_llm/__pycache__/embedding_model.cpython-310.pyc +0 -0
- VideoAgent/_llm/__pycache__/llm_model.cpython-310.pyc +0 -0
- VideoAgent/_llm/__pycache__/tokenizer_model.cpython-310.pyc +0 -0
- VideoAgent/_llm/__pycache__/vlm_model.cpython-310.pyc +0 -0
- VideoAgent/_server/sherpa_asr_server.py +1 -1
- VideoAgent/_storage/__pycache__/__init__.cpython-310.pyc +0 -0
- VideoAgent/_storage/__pycache__/kv_json.cpython-310.pyc +0 -0
- VideoAgent/_storage/__pycache__/vdb_nanovectordb.cpython-310.pyc +0 -0
- VideoAgent/_storage/vdb_nanovectordb.py +2 -3
- VideoAgent/_videoutil/__pycache__/__init__.cpython-310.pyc +0 -0
- VideoAgent/_videoutil/__pycache__/asr.cpython-310.pyc +0 -0
- VideoAgent/_videoutil/__pycache__/caption.cpython-310.pyc +0 -0
- VideoAgent/_videoutil/__pycache__/feature.cpython-310.pyc +0 -0
- VideoAgent/_videoutil/__pycache__/split.cpython-310.pyc +0 -0
- VideoAgent/_videoutil/split.py +16 -3
- image-10.png +3 -0
- image-17.png +0 -0
- image-2.png +0 -0
- image-7.png +3 -0
- image-8.png +3 -0
- image-9.png +3 -0
- requirements.txt +1 -3
- videorag_longervideos.py +2 -2
- webui.py +10 -3
.gitattributes
CHANGED
|
@@ -40,3 +40,7 @@ VideoAgent/_llm/tokenizer_model/Qwen/Qwen3-4B/tokenizer.json filter=lfs diff=lfs
|
|
| 40 |
VideoAgent/_llm/tokenizer_model/Qwen/Qwen3-4B-Instruct-2507/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 41 |
videos/origin/sanguo.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 42 |
videos/processed/sanguo.mp4 filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
VideoAgent/_llm/tokenizer_model/Qwen/Qwen3-4B-Instruct-2507/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 41 |
videos/origin/sanguo.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 42 |
videos/processed/sanguo.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 43 |
+
image-10.png filter=lfs diff=lfs merge=lfs -text
|
| 44 |
+
image-7.png filter=lfs diff=lfs merge=lfs -text
|
| 45 |
+
image-8.png filter=lfs diff=lfs merge=lfs -text
|
| 46 |
+
image-9.png filter=lfs diff=lfs merge=lfs -text
|
README.md
CHANGED
|
@@ -128,7 +128,7 @@ print(result)
|
|
| 128 |
|
| 129 |
### 视频索引流程
|
| 130 |
|
| 131 |
-
— [VideoRAG](https://github.com/HKUDS/VideoRAG):超长视频跨模态检索增强生成框架
|
|
|
|
|
|
| 128 |
|
| 129 |
### 视频索引流程
|
| 130 |
|
| 131 |
+

|
| 132 |
### 查询流程
|
| 133 |
|
| 134 |
|
|
|
|
| 158 |
## 参考项目
|
| 159 |
|
| 160 |
- 香港大学数据科学实验室(HKUDS)— [VideoRAG](https://github.com/HKUDS/VideoRAG):超长视频跨模态检索增强生成框架
|
| 161 |
+
|
VideoAgent/__pycache__/__init__.cpython-310.pyc
CHANGED
|
Binary files a/VideoAgent/__pycache__/__init__.cpython-310.pyc and b/VideoAgent/__pycache__/__init__.cpython-310.pyc differ
|
|
|
VideoAgent/__pycache__/_utils.cpython-310.pyc
CHANGED
|
Binary files a/VideoAgent/__pycache__/_utils.cpython-310.pyc and b/VideoAgent/__pycache__/_utils.cpython-310.pyc differ
|
|
|
VideoAgent/__pycache__/base.cpython-310.pyc
CHANGED
|
Binary files a/VideoAgent/__pycache__/base.cpython-310.pyc and b/VideoAgent/__pycache__/base.cpython-310.pyc differ
|
|
|
VideoAgent/__pycache__/chunk.cpython-310.pyc
CHANGED
|
Binary files a/VideoAgent/__pycache__/chunk.cpython-310.pyc and b/VideoAgent/__pycache__/chunk.cpython-310.pyc differ
|
|
|
VideoAgent/__pycache__/prompt.cpython-310.pyc
CHANGED
|
Binary files a/VideoAgent/__pycache__/prompt.cpython-310.pyc and b/VideoAgent/__pycache__/prompt.cpython-310.pyc differ
|
|
|
VideoAgent/__pycache__/query.cpython-310.pyc
CHANGED
|
Binary files a/VideoAgent/__pycache__/query.cpython-310.pyc and b/VideoAgent/__pycache__/query.cpython-310.pyc differ
|
|
|
VideoAgent/__pycache__/vidrag_pipeline.cpython-310.pyc
CHANGED
|
Binary files a/VideoAgent/__pycache__/vidrag_pipeline.cpython-310.pyc and b/VideoAgent/__pycache__/vidrag_pipeline.cpython-310.pyc differ
|
|
|
VideoAgent/_llm/__pycache__/__init__.cpython-310.pyc
CHANGED
|
Binary files a/VideoAgent/_llm/__pycache__/__init__.cpython-310.pyc and b/VideoAgent/_llm/__pycache__/__init__.cpython-310.pyc differ
|
|
|
VideoAgent/_llm/__pycache__/asr_model.cpython-310.pyc
CHANGED
|
Binary files a/VideoAgent/_llm/__pycache__/asr_model.cpython-310.pyc and b/VideoAgent/_llm/__pycache__/asr_model.cpython-310.pyc differ
|
|
|
VideoAgent/_llm/__pycache__/embedding_model.cpython-310.pyc
CHANGED
|
Binary files a/VideoAgent/_llm/__pycache__/embedding_model.cpython-310.pyc and b/VideoAgent/_llm/__pycache__/embedding_model.cpython-310.pyc differ
|
|
|
VideoAgent/_llm/__pycache__/llm_model.cpython-310.pyc
CHANGED
|
Binary files a/VideoAgent/_llm/__pycache__/llm_model.cpython-310.pyc and b/VideoAgent/_llm/__pycache__/llm_model.cpython-310.pyc differ
|
|
|
VideoAgent/_llm/__pycache__/tokenizer_model.cpython-310.pyc
CHANGED
|
Binary files a/VideoAgent/_llm/__pycache__/tokenizer_model.cpython-310.pyc and b/VideoAgent/_llm/__pycache__/tokenizer_model.cpython-310.pyc differ
|
|
|
VideoAgent/_llm/__pycache__/vlm_model.cpython-310.pyc
CHANGED
|
Binary files a/VideoAgent/_llm/__pycache__/vlm_model.cpython-310.pyc and b/VideoAgent/_llm/__pycache__/vlm_model.cpython-310.pyc differ
|
|
|
VideoAgent/_server/sherpa_asr_server.py
CHANGED
|
@@ -44,7 +44,7 @@ class SherpaASREngine:
|
|
| 44 |
"sherpa-onnx-offline",
|
| 45 |
)
|
| 46 |
self.provider = provider or os.getenv("SHERPA_PROVIDER", "axera")
|
| 47 |
-
self.vad = vad or os.getenv("vad-model", "/root/huangjie/AXERA-TECH/SenseVoice/silero_vad.onnx")
|
| 48 |
|
| 49 |
if os.path.exists(self.sherpa_bin):
|
| 50 |
os.chmod(self.sherpa_bin, 0o755)
|
|
|
|
| 44 |
"sherpa-onnx-offline",
|
| 45 |
)
|
| 46 |
self.provider = provider or os.getenv("SHERPA_PROVIDER", "axera")
|
| 47 |
+
# self.vad = vad or os.getenv("vad-model", "/root/huangjie/AXERA-TECH/SenseVoice/silero_vad.onnx")
|
| 48 |
|
| 49 |
if os.path.exists(self.sherpa_bin):
|
| 50 |
os.chmod(self.sherpa_bin, 0o755)
|
VideoAgent/_storage/__pycache__/__init__.cpython-310.pyc
CHANGED
|
Binary files a/VideoAgent/_storage/__pycache__/__init__.cpython-310.pyc and b/VideoAgent/_storage/__pycache__/__init__.cpython-310.pyc differ
|
|
|
VideoAgent/_storage/__pycache__/kv_json.cpython-310.pyc
CHANGED
|
Binary files a/VideoAgent/_storage/__pycache__/kv_json.cpython-310.pyc and b/VideoAgent/_storage/__pycache__/kv_json.cpython-310.pyc differ
|
|
|
VideoAgent/_storage/__pycache__/vdb_nanovectordb.cpython-310.pyc
CHANGED
|
Binary files a/VideoAgent/_storage/__pycache__/vdb_nanovectordb.cpython-310.pyc and b/VideoAgent/_storage/__pycache__/vdb_nanovectordb.cpython-310.pyc differ
|
|
|
VideoAgent/_storage/vdb_nanovectordb.py
CHANGED
|
@@ -103,7 +103,7 @@ class NanoVectorDBVideoSegmentStorage(BaseVectorStorage):
|
|
| 103 |
|
| 104 |
async def upsert(self, video_name, segment_index2name, video_output_format):
|
| 105 |
|
| 106 |
-
|
| 107 |
if not len(segment_index2name):
|
| 108 |
logger.warning("You insert an empty data to vector DB")
|
| 109 |
return []
|
|
@@ -130,8 +130,7 @@ class NanoVectorDBVideoSegmentStorage(BaseVectorStorage):
|
|
| 130 |
embeddings.append(batch_embeddings)
|
| 131 |
embeddings = torch.concat(embeddings, dim=0)
|
| 132 |
embeddings = embeddings.cpu().numpy()
|
| 133 |
-
|
| 134 |
-
print("d", len(list_data))
|
| 135 |
for i, d in enumerate(list_data):
|
| 136 |
d["__vector__"] = embeddings[i]
|
| 137 |
|
|
|
|
| 103 |
|
| 104 |
async def upsert(self, video_name, segment_index2name, video_output_format):
|
| 105 |
|
| 106 |
+
|
| 107 |
if not len(segment_index2name):
|
| 108 |
logger.warning("You insert an empty data to vector DB")
|
| 109 |
return []
|
|
|
|
| 130 |
embeddings.append(batch_embeddings)
|
| 131 |
embeddings = torch.concat(embeddings, dim=0)
|
| 132 |
embeddings = embeddings.cpu().numpy()
|
| 133 |
+
|
|
|
|
| 134 |
for i, d in enumerate(list_data):
|
| 135 |
d["__vector__"] = embeddings[i]
|
| 136 |
|
VideoAgent/_videoutil/__pycache__/__init__.cpython-310.pyc
CHANGED
|
Binary files a/VideoAgent/_videoutil/__pycache__/__init__.cpython-310.pyc and b/VideoAgent/_videoutil/__pycache__/__init__.cpython-310.pyc differ
|
|
|
VideoAgent/_videoutil/__pycache__/asr.cpython-310.pyc
CHANGED
|
Binary files a/VideoAgent/_videoutil/__pycache__/asr.cpython-310.pyc and b/VideoAgent/_videoutil/__pycache__/asr.cpython-310.pyc differ
|
|
|
VideoAgent/_videoutil/__pycache__/caption.cpython-310.pyc
CHANGED
|
Binary files a/VideoAgent/_videoutil/__pycache__/caption.cpython-310.pyc and b/VideoAgent/_videoutil/__pycache__/caption.cpython-310.pyc differ
|
|
|
VideoAgent/_videoutil/__pycache__/feature.cpython-310.pyc
CHANGED
|
Binary files a/VideoAgent/_videoutil/__pycache__/feature.cpython-310.pyc and b/VideoAgent/_videoutil/__pycache__/feature.cpython-310.pyc differ
|
|
|
VideoAgent/_videoutil/__pycache__/split.cpython-310.pyc
CHANGED
|
Binary files a/VideoAgent/_videoutil/__pycache__/split.cpython-310.pyc and b/VideoAgent/_videoutil/__pycache__/split.cpython-310.pyc differ
|
|
|
VideoAgent/_videoutil/split.py
CHANGED
|
@@ -9,6 +9,19 @@ from moviepy.video.io.VideoFileClip import VideoFileClip
|
|
| 9 |
import logging
|
| 10 |
import multiprocessing
|
| 11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
logger = logging.getLogger(__name__)
|
| 13 |
|
| 14 |
|
|
@@ -30,17 +43,17 @@ def preprocess_video(
|
|
| 30 |
|
| 31 |
|
| 32 |
if os.path.exists(output_path):
|
| 33 |
-
|
| 34 |
return output_path
|
| 35 |
|
| 36 |
logger.info(f"Preprocessing video {video_name}: {target_width}x{target_height} @ {target_fps}fps -> {output_path}")
|
| 37 |
cmd = [
|
| 38 |
-
|
| 39 |
"-i", video_path,
|
| 40 |
"-vf", f"scale={target_width}:{target_height}",
|
| 41 |
"-r", str(target_fps),
|
| 42 |
"-c:v", "libx264",
|
| 43 |
-
"-c:a", "aac",
|
| 44 |
"-b:a", "128k",
|
| 45 |
"-loglevel", "error",
|
| 46 |
output_path,
|
|
|
|
| 9 |
import logging
|
| 10 |
import multiprocessing
|
| 11 |
|
| 12 |
+
def _get_ffmpeg_bin():
|
| 13 |
+
"""返回支持 libx264 的 ffmpeg 路径"""
|
| 14 |
+
env_bin = os.getenv("FFMPEG_BIN")
|
| 15 |
+
if env_bin:
|
| 16 |
+
return env_bin
|
| 17 |
+
try:
|
| 18 |
+
import imageio_ffmpeg
|
| 19 |
+
return imageio_ffmpeg.get_ffmpeg_exe()
|
| 20 |
+
except ImportError:
|
| 21 |
+
return "ffmpeg"
|
| 22 |
+
|
| 23 |
+
_FFMPEG_BIN = _get_ffmpeg_bin()
|
| 24 |
+
|
| 25 |
logger = logging.getLogger(__name__)
|
| 26 |
|
| 27 |
|
|
|
|
| 43 |
|
| 44 |
|
| 45 |
if os.path.exists(output_path):
|
| 46 |
+
logger.info(f"Preprocessed video already exists: {output_path}")
|
| 47 |
return output_path
|
| 48 |
|
| 49 |
logger.info(f"Preprocessing video {video_name}: {target_width}x{target_height} @ {target_fps}fps -> {output_path}")
|
| 50 |
cmd = [
|
| 51 |
+
_FFMPEG_BIN, "-y",
|
| 52 |
"-i", video_path,
|
| 53 |
"-vf", f"scale={target_width}:{target_height}",
|
| 54 |
"-r", str(target_fps),
|
| 55 |
"-c:v", "libx264",
|
| 56 |
+
"-c:a", "aac",
|
| 57 |
"-b:a", "128k",
|
| 58 |
"-loglevel", "error",
|
| 59 |
output_path,
|
image-10.png
ADDED
|
Git LFS Details
|
image-17.png
ADDED
|
image-2.png
CHANGED
|
|
image-7.png
ADDED
|
Git LFS Details
|
image-8.png
ADDED
|
Git LFS Details
|
image-9.png
ADDED
|
Git LFS Details
|
requirements.txt
CHANGED
|
@@ -1,7 +1,5 @@
|
|
| 1 |
python-dotenv==1.2.1
|
| 2 |
-
|
| 3 |
-
torchvision
|
| 4 |
-
transformers
|
| 5 |
numpy==2.2.6
|
| 6 |
Pillow==12.0.0
|
| 7 |
fastapi==0.135.1
|
|
|
|
| 1 |
python-dotenv==1.2.1
|
| 2 |
+
|
|
|
|
|
|
|
| 3 |
numpy==2.2.6
|
| 4 |
Pillow==12.0.0
|
| 5 |
fastapi==0.135.1
|
videorag_longervideos.py
CHANGED
|
@@ -21,12 +21,12 @@ if __name__ == '__main__':
|
|
| 21 |
multiprocessing.set_start_method('spawn', force=True)
|
| 22 |
|
| 23 |
# video文件地址:
|
| 24 |
-
video_base_path = f'/root/
|
| 25 |
video_files = sorted(os.listdir(video_base_path))
|
| 26 |
video_paths = [os.path.join(video_base_path, f) for f in video_files]
|
| 27 |
|
| 28 |
#工作目录
|
| 29 |
-
videorag = VideoRAG( working_dir=f"/root/
|
| 30 |
videorag.insert_video(video_path_list=video_paths)
|
| 31 |
|
| 32 |
querys = "SP视频开头前10秒的内容"
|
|
|
|
| 21 |
multiprocessing.set_start_method('spawn', force=True)
|
| 22 |
|
| 23 |
# video文件地址:
|
| 24 |
+
video_base_path = f'/root/huangjie/VideoAgent_api513/videos/origin'
|
| 25 |
video_files = sorted(os.listdir(video_base_path))
|
| 26 |
video_paths = [os.path.join(video_base_path, f) for f in video_files]
|
| 27 |
|
| 28 |
#工作目录
|
| 29 |
+
videorag = VideoRAG( working_dir=f"/root/huangjie/VideoAgent_api513/working_dir")
|
| 30 |
videorag.insert_video(video_path_list=video_paths)
|
| 31 |
|
| 32 |
querys = "SP视频开头前10秒的内容"
|
webui.py
CHANGED
|
@@ -850,8 +850,15 @@ def _export_clip(video_path: str, start: float, end: float, working_dir: str, ca
|
|
| 850 |
if os.path.exists(clip_path):
|
| 851 |
return clip_path
|
| 852 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 853 |
cmd = [
|
| 854 |
-
|
| 855 |
"-y",
|
| 856 |
"-ss",
|
| 857 |
f"{start:.3f}",
|
|
@@ -1508,7 +1515,7 @@ with gr.Blocks(
|
|
| 1508 |
precision=0,
|
| 1509 |
)
|
| 1510 |
retrieval_topk_chunks_input = gr.Number(
|
| 1511 |
-
label="检索 Top-K
|
| 1512 |
value=_rag_runtime_settings["retrieval_topk_chunks"],
|
| 1513 |
minimum=1,
|
| 1514 |
info="检索相关片段的数量",
|
|
@@ -1523,7 +1530,7 @@ with gr.Blocks(
|
|
| 1523 |
precision=0,
|
| 1524 |
)
|
| 1525 |
segment_retrieval_top_k_input = gr.Number(
|
| 1526 |
-
label="视频段检索 Top-K
|
| 1527 |
value=_rag_runtime_settings["segment_retrieval_top_k"],
|
| 1528 |
minimum=1,
|
| 1529 |
info="检索相关视频段的数量",
|
|
|
|
| 850 |
if os.path.exists(clip_path):
|
| 851 |
return clip_path
|
| 852 |
|
| 853 |
+
# 使用与 split.py 相同的 ffmpeg 选择逻辑
|
| 854 |
+
try:
|
| 855 |
+
import imageio_ffmpeg
|
| 856 |
+
_ffmpeg = imageio_ffmpeg.get_ffmpeg_exe()
|
| 857 |
+
except ImportError:
|
| 858 |
+
_ffmpeg = os.getenv("FFMPEG_BIN", "ffmpeg")
|
| 859 |
+
|
| 860 |
cmd = [
|
| 861 |
+
_ffmpeg,
|
| 862 |
"-y",
|
| 863 |
"-ss",
|
| 864 |
f"{start:.3f}",
|
|
|
|
| 1515 |
precision=0,
|
| 1516 |
)
|
| 1517 |
retrieval_topk_chunks_input = gr.Number(
|
| 1518 |
+
label="文本段检索 Top-K",
|
| 1519 |
value=_rag_runtime_settings["retrieval_topk_chunks"],
|
| 1520 |
minimum=1,
|
| 1521 |
info="检索相关片段的数量",
|
|
|
|
| 1530 |
precision=0,
|
| 1531 |
)
|
| 1532 |
segment_retrieval_top_k_input = gr.Number(
|
| 1533 |
+
label="视频段检索 Top-K",
|
| 1534 |
value=_rag_runtime_settings["segment_retrieval_top_k"],
|
| 1535 |
minimum=1,
|
| 1536 |
info="检索相关视频段的数量",
|