File size: 3,393 Bytes
e884643 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 |
"""
Configuration management for GraphLLM system
"""
from pydantic_settings import BaseSettings
from pydantic import Field, field_validator
from typing import Optional
import os
class Settings(BaseSettings):
"""Application settings loaded from environment variables"""
# Application
app_name: str = "GraphLLM"
app_version: str = "1.0.0"
environment: str = "development"
debug: bool = True
# API
api_host: str = "0.0.0.0"
api_port: int = 8000
api_workers: int = 4
# LLM Settings - Gemini (Primary)
gemini_api_key: str = Field(default="", env="GEMINI_API_KEY")
gemini_model: str = "gemini-2.5-flash"
# LLM Settings - Mistral (Fallback)
mistral_api_key: str = Field(default="", env="MISTRAL_API_KEY")
mistral_model: str = "mistral-7b-instruct-v0.1"
# LLM Parameters
llm_temperature: float = 0.0
llm_max_tokens: int = 2048
llm_timeout: int = 120
# Embedding Settings
embedding_model: str = "sentence-transformers/multi-qa-MiniLM-L6-cos-v1"
embedding_dimension: int = 384
embedding_batch_size: int = 32
# FAISS Vector DB
faiss_index_path: str = "./data/faiss_index"
faiss_metric: str = "cosine"
# Neo4j Graph DB
neo4j_uri: str = "bolt://localhost:7687"
neo4j_user: str = "neo4j"
neo4j_password: str = Field(default="", env="NEO4J_PASSWORD")
neo4j_database: str = "neo4j"
# PostgreSQL
postgres_host: str = "localhost"
postgres_port: int = 5432
postgres_db: str = "graphllm"
postgres_user: str = "postgres"
postgres_password: str = Field(default="", env="POSTGRES_PASSWORD")
# MongoDB (optional)
mongodb_uri: str = "mongodb://localhost:27017"
mongodb_database: str = "graphllm"
# Chunking
chunk_size: int = 512
chunk_overlap: int = 128
min_chunk_size: int = 100
# Triplet Extraction
triplet_confidence_threshold: float = 0.6
entity_similarity_threshold: float = 0.85
max_triples_per_chunk: int = 10
# Graph Pruning
node_importance_threshold: float = 0.3
edge_confidence_threshold: float = 0.5
min_node_mentions: int = 2
# RAG
rag_top_k: int = 10
rag_rerank_top_k: int = 5
max_context_length: int = 4000
# File Upload
max_file_size_mb: int = 50
allowed_extensions: str = "pdf"
upload_dir: str = "./data/uploads"
# Storage
data_dir: str = "./data"
logs_dir: str = "./logs"
cache_dir: str = "./cache"
# Monitoring
enable_metrics: bool = True
metrics_port: int = 9090
log_level: str = "INFO"
@property
def postgres_url(self) -> str:
"""Build PostgreSQL connection URL"""
return f"postgresql://{self.postgres_user}:{self.postgres_password}@{self.postgres_host}:{self.postgres_port}/{self.postgres_db}"
@property
def max_file_size_bytes(self) -> int:
"""Convert MB to bytes"""
return self.max_file_size_mb * 1024 * 1024
class Config:
env_file = ".env"
case_sensitive = False
# Global settings instance
settings = Settings()
def ensure_directories():
"""Ensure all required directories exist"""
dirs = [
settings.data_dir,
settings.upload_dir,
settings.logs_dir,
settings.cache_dir,
settings.faiss_index_path,
]
for directory in dirs:
os.makedirs(directory, exist_ok=True)
|