Spaces:

23f3003322
/

llm-quiz-analysis

Sleeping

App Files Files Community

23f3003322 commited on 9 days ago

Commit

dc1c6a7

1 Parent(s): e071e70

new changes to handle dificulty level 1

Browse files

Files changed (24) hide show

.gitignore +10 -0
app/__pycache__/main.cpython-313.pyc +0 -0
app/api/routes/__pycache__/task.cpython-313.pyc +0 -0
app/api/routes/task.py +32 -41
app/core/__pycache__/config.cpython-313.pyc +0 -0
app/core/__pycache__/exceptions.cpython-313.pyc +0 -0
app/core/config.py +1 -0
app/core/exceptions.py +4 -0
app/main.py +9 -9
app/models/__pycache__/request.cpython-313.pyc +0 -0
app/models/__pycache__/response.cpython-313.pyc +0 -0
app/models/analysis.py +99 -0
app/models/answer.py +34 -0
app/models/request.py +6 -0
app/models/response.py +9 -4
app/modules/scrapers/base_scraper.py +9 -0
app/modules/scrapers/dynamic_scraper.py +13 -1
app/services/__pycache__/task_processor.cpython-313.pyc +0 -0
app/services/analyser.py +149 -0
app/services/answer_generator.py +492 -0
app/services/task_fetcher.py +274 -109
app/services/task_processor.py +31 -97
app/utils/prompts.py +253 -0
app/utils/submit_answer.py +114 -0

.gitignore CHANGED Viewed

@@ -41,3 +41,13 @@ Thumbs.db
 new.txt
 .DS_Store

 new.txt
 .DS_Store
+analysis.md
+data_fetching.md
+dynamic_scraper.md
+orchestrator.md
+questions.md
+task_processor.md
+unified.md

app/__pycache__/main.cpython-313.pyc CHANGED Viewed

Binary files a/app/__pycache__/main.cpython-313.pyc and b/app/__pycache__/main.cpython-313.pyc differ

app/api/routes/__pycache__/task.cpython-313.pyc CHANGED Viewed

Binary files a/app/api/routes/__pycache__/task.cpython-313.pyc and b/app/api/routes/__pycache__/task.cpython-313.pyc differ

app/api/routes/task.py CHANGED Viewed

@@ -5,15 +5,17 @@ Handles task submission and processing
 from fastapi import APIRouter, Request, status, BackgroundTasks, HTTPException
 from datetime import datetime
-from typing import Dict, Any
-from app.models.request import TaskRequest
-from app.models.response import TaskResponse, ImmediateResponse
 from app.core.logging import get_logger
-from app.core.security import verify_authentication, AuthenticationError
 from app.core.exceptions import TaskProcessingError
 from app.services.task_processor import TaskProcessor
 logger = get_logger(__name__)
 router = APIRouter()
@@ -36,18 +38,7 @@ async def handle_task(
     request: Request,
     background_tasks: BackgroundTasks
 ):
-    """
-    Main API endpoint for handling task requests
-    Flow:
-    1. Validate JSON format (HTTP 400 if invalid)
-    2. Verify secret (HTTP 403 if invalid)
-    3. Respond immediately with HTTP 200
-    4. Process task in background
-    Returns:
-        Immediate HTTP 200 response with task accepted message
-    """
     start_time = datetime.now()
     logger.info("📥 Task request received")
@@ -58,13 +49,13 @@ async def handle_task(
         # ================================================================
         try:
             body = await request.json()
-            task_data = TaskRequest(**body)
-        except ValueError as e:
-            logger.error(f"❌ Invalid JSON format: {str(e)}")
-            raise HTTPException(
-                status_code=status.HTTP_400_BAD_REQUEST,
-                detail=f"Invalid JSON format: {str(e)}"
-            )
         except Exception as e:
             logger.error(f"❌ Request validation failed: {str(e)}")
             raise HTTPException(
@@ -72,28 +63,26 @@ async def handle_task(
                 detail=f"Invalid request data: {str(e)}"
             )
-        logger.info(f"✅ Request validated for: {task_data.email}")
-        # ================================================================
-        # STEP 2: VERIFY AUTHENTICATION (HTTP 403 if invalid)
-        # ================================================================
-        logger.info("🔐 Verifying authentication")
-        try:
-            verify_authentication(task_data.secret)
-        except AuthenticationError as e:
-            logger.error(f"❌ Authentication failed: {str(e)}")
-            raise HTTPException(
-                status_code=status.HTTP_403_FORBIDDEN,
-                detail="Invalid secret. Authentication failed."
-            )
-        logger.info("✅ Authentication successful")
         # ================================================================
         # STEP 3: RESPOND IMMEDIATELY WITH HTTP 200
         # ================================================================
         logger.info("✅ Request accepted - processing in background")
         # Add task processing to background
         background_tasks.add_task(
             process_task_background,
@@ -105,7 +94,6 @@ async def handle_task(
         response = ImmediateResponse(
             success=True,
             message="Task accepted and processing started",
-            email=task_data.email,
             task_url=str(task_data.url),
             status="processing",
             timestamp=datetime.now().isoformat()
@@ -127,8 +115,11 @@ async def handle_task(
         )
 async def process_task_background(
-    task_data: TaskRequest,
     start_time: datetime
 ):
     """
@@ -149,7 +140,7 @@ async def process_task_background(
     try:
         # Process the task
         result_data = await task_processor.process(task_data)
         # Calculate execution time
         execution_time = (datetime.now() - start_time).total_seconds()

 from fastapi import APIRouter, Request, status, BackgroundTasks, HTTPException
 from datetime import datetime
+# from typing import Dict, Any
+from app.models.request import ManualTriggeredRequestBody
+from app.models.response import ImmediateResponse
 from app.core.logging import get_logger
+# from app.core.security import verify_authentication, AuthenticationError
 from app.core.exceptions import TaskProcessingError
 from app.services.task_processor import TaskProcessor
+import requests
 logger = get_logger(__name__)
 router = APIRouter()
     request: Request,
     background_tasks: BackgroundTasks
 ):
     start_time = datetime.now()
     logger.info("📥 Task request received")
         # ================================================================
         try:
             body = await request.json()
+            task_data = ManualTriggeredRequestBody(**body)
+        # except ValueError as e:
+        #     logger.error(f"❌ Invalid JSON format: {str(e)}")
+        #     raise HTTPException(
+        #         status_code=status.HTTP_400_BAD_REQUEST,
+        #         detail=f"Invalid JSON format: {str(e)}"
+        #     )
         except Exception as e:
             logger.error(f"❌ Request validation failed: {str(e)}")
             raise HTTPException(
                 detail=f"Invalid request data: {str(e)}"
             )
+        # # ================================================================
+        # # STEP 2: VERIFY AUTHENTICATION (HTTP 403 if invalid)
+        # # ================================================================
+        # logger.info("🔐 Verifying authentication")
+        # try:
+        #     verify_authentication(task_data.secret)
+        # except AuthenticationError as e:
+        #     logger.error(f"❌ Authentication failed: {str(e)}")
+        #     raise HTTPException(
+        #         status_code=status.HTTP_403_FORBIDDEN,
+        #         detail="Invalid secret. Authentication failed."
+        #     )
+        # logger.info("✅ Authentication successful")
         # ================================================================
         # STEP 3: RESPOND IMMEDIATELY WITH HTTP 200
         # ================================================================
         logger.info("✅ Request accepted - processing in background")
         # Add task processing to background
         background_tasks.add_task(
             process_task_background,
         response = ImmediateResponse(
             success=True,
             message="Task accepted and processing started",
             task_url=str(task_data.url),
             status="processing",
             timestamp=datetime.now().isoformat()
         )
 async def process_task_background(
+    task_data: ManualTriggeredRequestBody,
     start_time: datetime
 ):
     """
     try:
         # Process the task
         result_data = await task_processor.process(task_data)
         # Calculate execution time
         execution_time = (datetime.now() - start_time).total_seconds()

app/core/__pycache__/config.cpython-313.pyc CHANGED Viewed

Binary files a/app/core/__pycache__/config.cpython-313.pyc and b/app/core/__pycache__/config.cpython-313.pyc differ

app/core/__pycache__/exceptions.cpython-313.pyc CHANGED Viewed

Binary files a/app/core/__pycache__/exceptions.cpython-313.pyc and b/app/core/__pycache__/exceptions.cpython-313.pyc differ

app/core/config.py CHANGED Viewed

@@ -26,6 +26,7 @@ class Settings(BaseSettings):
     # Security
     API_SECRET: str = Field(default="", env="API_SECRET")
     ALLOWED_ORIGINS: List[str] = Field(default=["*"], env="ALLOWED_ORIGINS")
     # Logging

     # Security
     API_SECRET: str = Field(default="", env="API_SECRET")
+    USER_EMAIL: str = Field(default="", env="USER_EMAIL")
     ALLOWED_ORIGINS: List[str] = Field(default=["*"], env="ALLOWED_ORIGINS")
     # Logging

app/core/exceptions.py CHANGED Viewed

@@ -19,6 +19,10 @@ class TaskProcessingError(Exception):
     """Raised when task processing fails"""
     pass
 class AuthenticationError(Exception):
     """Raised when authentication fails"""

     """Raised when task processing fails"""
     pass
+class AnswerGenerationError(Exception):
+    """Raised when answer generation fails"""
+    pass
 class AuthenticationError(Exception):
     """Raised when authentication fails"""

app/main.py CHANGED Viewed

@@ -29,13 +29,13 @@ async def lifespan(app: FastAPI):
     logger.info(f"Environment: {settings.ENVIRONMENT}")
     logger.info("=" * 80)
     import os
-    if os.getenv('ENVIRONMENT') == 'production':
-        from app.modules.scrapers.browser_pool import get_pooled_browser
-        from app.modules.scrapers.browser_config import PRODUCTION_CONFIG
-        logger.info("Pre-warming browser pool...")
-        await get_pooled_browser(PRODUCTION_CONFIG)
-        logger.info("✓ Browser pool ready")
     yield
@@ -57,8 +57,8 @@ def create_application() -> FastAPI:
         description=settings.APP_DESCRIPTION,
         version=settings.APP_VERSION,
         lifespan=lifespan,
-        docs_url="/docs" if settings.ENVIRONMENT == "development" else None,
-        redoc_url="/redoc" if settings.ENVIRONMENT == "development" else None,
     )
     # Configure CORS
@@ -76,7 +76,7 @@ def create_application() -> FastAPI:
     # Register exception handlers
     register_exception_handlers(app)
     registry = register_all_modules()
-    orchestrator = OrchestratorEngine(registry)
     # Include routers
     app.include_router(health.router, tags=["Health"])

     logger.info(f"Environment: {settings.ENVIRONMENT}")
     logger.info("=" * 80)
     import os
+    # if os.getenv('ENVIRONMENT') == 'production':
+    #     from app.modules.scrapers.browser_pool import get_pooled_browser
+    #     from app.modules.scrapers.browser_config import PRODUCTION_CONFIG
+    #     logger.info("Pre-warming browser pool...")
+    #     await get_pooled_browser(PRODUCTION_CONFIG)
+    #     logger.info("✓ Browser pool ready")
     yield
         description=settings.APP_DESCRIPTION,
         version=settings.APP_VERSION,
         lifespan=lifespan,
+        # docs_url="/docs" if settings.ENVIRONMENT == "development" else None,
+        # redoc_url="/redoc" if settings.ENVIRONMENT == "development" else None,
     )
     # Configure CORS
     # Register exception handlers
     register_exception_handlers(app)
     registry = register_all_modules()
+    # orchestrator = OrchestratorEngine(registry)
     # Include routers
     app.include_router(health.router, tags=["Health"])

app/models/__pycache__/request.cpython-313.pyc CHANGED Viewed

Binary files a/app/models/__pycache__/request.cpython-313.pyc and b/app/models/__pycache__/request.cpython-313.pyc differ

app/models/__pycache__/response.cpython-313.pyc CHANGED Viewed

Binary files a/app/models/__pycache__/response.cpython-313.pyc and b/app/models/__pycache__/response.cpython-313.pyc differ

app/models/analysis.py ADDED Viewed

	@@ -0,0 +1,99 @@

+from pydantic import BaseModel, Field
+from typing import Dict, Any, List, Optional, Literal
+class QuestionAnalysis(BaseModel):
+    """
+    Analysis focused on generating the correct answer.
+    No redirect/entry page logic needed.
+    """
+    # ===== QUESTION CLASSIFICATION =====
+    question_type: Literal[
+        'cli_command',           # Q2, Q3: shell commands
+        'file_path',             # Q4: paths/URLs
+        'data_processing',       # Q7, Q9, Q11: CSV/JSON processing
+        'image_analysis',        # Q6, Q17: image operations
+        'audio_transcription',   # Q5: audio to text
+        'api_interaction',       # Q8: external API calls
+        'document_parsing',      # Q10: PDF extraction
+        'calculation',           # Q20, Q21: mathematical computations
+        'text_generation',       # Q12, Q13, Q19: YAML, prompts
+        'optimization',          # Q14, Q18: constraint solving
+        'llm_reasoning'          # Q16: tool planning/reasoning
+    ] = Field(description="Type of task to solve")
+    # ===== ANSWER FORMAT =====
+    answer_format: Literal[
+        'plain_string',          # Q2, Q3, Q4: raw text
+        'json_object',           # Q11, Q14, Q16, Q21: {"key": "value"}
+        'json_array',            # Q20: ["a", "b", "c"]
+        'number',                # Q8, Q9, Q10, Q17, Q18: integer/float
+        'single_letter'          # Q12: A, B, or C
+    ] = Field(description="How to format the final answer")
+    # ===== ANSWER COMPONENTS =====
+    key_components: Dict[str, Any] = Field(
+        default_factory=dict,
+        description="Extracted components needed to generate answer"
+    )
+    # ===== PERSONALIZATION =====
+    requires_personalization: bool = Field(
+        default=False,
+        description="Does answer depend on user email?"
+    )
+    personalization_type: Optional[Literal[
+        'email_in_url',          # Q2: ?email=<user_email>
+        'email_length_offset',   # Q8, Q9, Q15, Q18: offset = len(email) mod N
+        'email_length_conditional'  # Q15: if even/odd
+    ]] = None
+    personalization_details: Optional[str] = Field(
+        default=None,
+        description="Specific personalization logic"
+    )
+    # ===== FILE REQUIREMENTS =====
+    requires_files: bool = Field(
+        default=False,
+        description="Does question need file downloads?"
+    )
+    required_file_types: List[str] = Field(
+        default_factory=list,
+        description="File types needed: csv, json, png, pdf, opus, zip"
+    )
+    # ===== EXTERNAL RESOURCES =====
+    requires_external_fetch: bool = Field(
+        default=False,
+        description="Need to fetch data from another URL (not just files)?"
+    )
+    external_resources: List[str] = Field(
+        default_factory=list,
+        description="URLs/endpoints to fetch before solving"
+    )
+    # ===== CRITICAL CONSTRAINTS =====
+    critical_constraints: List[str] = Field(
+        default_factory=list,
+        description="Must-follow rules for answer format"
+    )
+    # ===== SUBMISSION INFO =====
+    submission_url_path: str = Field(
+        description="URL path for this question (e.g., '/project2-uv')"
+    )
+    # ===== CONFIDENCE & REASONING =====
+    reasoning: str = Field(
+        description="Why this classification and components were chosen"
+    )
+    confidence: float = Field(
+        ge=0.0,
+        le=1.0,
+        description="Confidence in analysis (0.0-1.0)"
+    )

app/models/answer.py ADDED Viewed

	@@ -0,0 +1,34 @@

+from pydantic import BaseModel, Field
+from typing import Dict, Any, Optional
+class AnswerResult(BaseModel):
+    """Structured output from answer generation"""
+    answer: str = Field(
+        description="The exact answer to submit (final output)"
+    )
+    reasoning: str = Field(
+        description="Step-by-step explanation of how answer was generated"
+    )
+    components_used: Dict[str, Any] = Field(
+        default_factory=dict,
+        description="Which components from analysis were used"
+    )
+    confidence: float = Field(
+        ge=0.0,
+        le=1.0,
+        description="Confidence in answer correctness (0.0-1.0)"
+    )
+    personalization_applied: bool = Field(
+        default=False,
+        description="Whether personalization was applied"
+    )
+    validation_notes: str = Field(
+        default="",
+        description="Notes about format validation"
+    )

app/models/request.py CHANGED Viewed

@@ -7,6 +7,12 @@ from typing import Optional, Dict, Any
 from pydantic import BaseModel, Field, EmailStr, HttpUrl, validator
 class TaskRequest(BaseModel):
     """
     Schema for task request validation

 from pydantic import BaseModel, Field, EmailStr, HttpUrl, validator
+class ManualTriggeredRequestBody(BaseModel):
+    """Request body format for quiz submission"""
+    url: str
 class TaskRequest(BaseModel):
     """
     Schema for task request validation

app/models/response.py CHANGED Viewed

@@ -7,6 +7,14 @@ from typing import Optional, Dict, Any
 from datetime import datetime
 from pydantic import BaseModel, Field
 class ImmediateResponse(BaseModel):
     """
     Immediate response sent after validation
@@ -19,10 +27,7 @@ class ImmediateResponse(BaseModel):
     message: str = Field(
         description="Status message"
     )
-    email: str = Field(
-        description="Student email from request"
-    )
     task_url: str = Field(
         description="Task URL from request"

 from datetime import datetime
 from pydantic import BaseModel, Field
+class SubmissionBody(BaseModel):
+    """Request body format for quiz submission"""
+    email: str
+    secret: str
+    url: str
+    answer: int
 class ImmediateResponse(BaseModel):
     """
     Immediate response sent after validation
     message: str = Field(
         description="Status message"
     )
     task_url: str = Field(
         description="Task URL from request"

app/modules/scrapers/base_scraper.py CHANGED Viewed

@@ -26,6 +26,7 @@ class ScraperResult(BaseModel):
     encoding: str = "utf-8"
     response_time: float = 0.0
     status_code: int = 200
     # Scraping details
     selectors_used: List[str] = Field(default_factory=list)
@@ -35,6 +36,14 @@ class ScraperResult(BaseModel):
     error: Optional[str] = None
     warnings: List[str] = Field(default_factory=list)
     class Config:
         arbitrary_types_allowed = True

     encoding: str = "utf-8"
     response_time: float = 0.0
     status_code: int = 200
+    raw_html: Optional[str] = None
     # Scraping details
     selectors_used: List[str] = Field(default_factory=list)
     error: Optional[str] = None
     warnings: List[str] = Field(default_factory=list)
+    def __post_init__(self):
+        if self.data is None:
+            self.data = []
+        if self.columns_extracted is None:
+            self.columns_extracted = []
+        if self.selectors_used is None:
+            self.selectors_used = []
     class Config:
         arbitrary_types_allowed = True

app/modules/scrapers/dynamic_scraper.py CHANGED Viewed

@@ -154,6 +154,7 @@ class DynamicScraper(BaseScraper):
     wait_for: Optional[str] = None,
     click_selectors: List[str] = None,
     scroll: bool = False,
     take_screenshot: bool = False,
     **kwargs
 ) -> ScraperResult:
@@ -209,7 +210,7 @@ class DynamicScraper(BaseScraper):
                         url=url,
                         error="Failed to load page"
                     )
                 status_code = response.status
                 logger.info(f"Page loaded | Status: {status_code}")
@@ -246,6 +247,15 @@ class DynamicScraper(BaseScraper):
                 else:
                     data = await self._extract_auto(page)
                 # Build result
                 columns = list(data[0].keys()) if data else []
@@ -257,6 +267,8 @@ class DynamicScraper(BaseScraper):
                     columns_extracted=columns,
                     status_code=status_code,
                     selectors_used=list(selectors.keys()) if selectors else []
                 )
                 logger.info(f"✓ Scraped {len(data)} rows with browser")

     wait_for: Optional[str] = None,
     click_selectors: List[str] = None,
     scroll: bool = False,
+    return_html: bool = True,
     take_screenshot: bool = False,
     **kwargs
 ) -> ScraperResult:
                         url=url,
                         error="Failed to load page"
                     )
                 status_code = response.status
                 logger.info(f"Page loaded | Status: {status_code}")
                 else:
                     data = await self._extract_auto(page)
+                if selectors:
+                    data = await self._extract_with_selectors(page, selectors)
+                else:
+                    data = await self._extract_auto(page)
+                rendered_html = None
+                if return_html:
+                    rendered_html = await page.content()
                 # Build result
                 columns = list(data[0].keys()) if data else []
                     columns_extracted=columns,
                     status_code=status_code,
                     selectors_used=list(selectors.keys()) if selectors else []
+                    ,
+                    raw_html=rendered_html
                 )
                 logger.info(f"✓ Scraped {len(data)} rows with browser")

app/services/__pycache__/task_processor.cpython-313.pyc CHANGED Viewed

Binary files a/app/services/__pycache__/task_processor.cpython-313.pyc and b/app/services/__pycache__/task_processor.cpython-313.pyc differ

app/services/analyser.py ADDED Viewed

	@@ -0,0 +1,149 @@

+from typing import Dict, Any, List
+from app.core.logging import get_logger
+from app.core.exceptions import TaskProcessingError
+from app.models.analysis import QuestionAnalysis
+from app.utils.prompts import AnalysisPrompts
+logger = get_logger(__name__)
+class QuestionAnalyzer:
+    """
+    Analyzes questions to determine how to generate answers.
+    No entry page or redirect logic - assumes all content is solvable questions.
+    """
+    def __init__(self, llm_client):
+        """
+        Args:
+            llm_client: LLM client with run_agent() method
+        """
+        self.llm_client = llm_client
+        self._analyzer_agent = None
+    async def initialize(self):
+        """Initialize LLM agent"""
+        self._analyzer_agent = self.llm_client.create_agent(
+            output_type=QuestionAnalysis,
+            system_prompt=(
+                "You are an expert at analyzing technical quiz questions. "
+                "Extract precise information needed to generate correct answers. "
+                "Be thorough and accurate."
+            ),
+            retries=2
+        )
+        logger.info("✓ Question analyzer initialized")
+    async def analyze_question(
+        self,
+        question_metadata: Dict[str, Any],
+        base_url: str,
+        user_email: str,
+        downloaded_files: List[Dict[str, Any]]
+    ) -> QuestionAnalysis:
+        """
+        Analyze question to determine how to generate the answer.
+        Args:
+            question_metadata: Parsed metadata from scraping
+                - title: Question title
+                - heading: Question heading
+                - difficulty: 1-5
+                - is_personalized: bool
+                - instructions: List of instruction strings
+                - file_links: List of file references
+            base_url: Base URL for the quiz
+            user_email: User's email address
+            downloaded_files: List of downloaded file info
+                - filename, type, path, size
+        Returns:
+            QuestionAnalysis: Structured analysis
+        Raises:
+            TaskProcessingError: If analysis fails
+        """
+        logger.info(f"🤖 Analyzing question: {question_metadata.get('title', 'unknown')}")
+        # Build prompt
+        prompt = AnalysisPrompts.question_analysis_prompt(
+            instructions=question_metadata.get('instructions', []),
+            difficulty=question_metadata.get('difficulty', 1),
+            is_personalized=question_metadata.get('is_personalized', False),
+            title=question_metadata.get('title', ''),
+            heading=question_metadata.get('heading', ''),
+            base_url=base_url,
+            user_email=user_email,
+            available_files=downloaded_files
+        )
+        try:
+            # Run LLM analysis
+            analysis: QuestionAnalysis = await self.llm_client.run_agent(
+                self._analyzer_agent,
+                prompt
+            )
+            # Log analysis results
+            logger.info(f"✓ Question type: {analysis.question_type}")
+            logger.info(f"✓ Answer format: {analysis.answer_format}")
+            logger.info(f"✓ Personalization: {analysis.requires_personalization}")
+            logger.info(f"✓ Files needed: {analysis.requires_files}")
+            logger.info(f"✓ Confidence: {analysis.confidence:.2f}")
+            # Validate analysis
+            self._validate_analysis(analysis, question_metadata)
+            return analysis
+        except Exception as e:
+            logger.error(f"❌ Question analysis failed: {e}", exc_info=True)
+            raise TaskProcessingError(
+                f"Cannot analyze question: {str(e)}. "
+                "LLM analysis is required for unknown question types."
+            )
+    def _validate_analysis(
+        self,
+        analysis: QuestionAnalysis,
+        metadata: Dict[str, Any]
+    ):
+        """
+        Validate analysis results make sense.
+        Args:
+            analysis: LLM analysis result
+            metadata: Original question metadata
+        Raises:
+            TaskProcessingError: If validation fails
+        """
+        # Check confidence threshold
+        if analysis.confidence < 0.5:
+            logger.warning(
+                f"⚠️ Low confidence analysis: {analysis.confidence:.2f}"
+            )
+        # Check personalization consistency
+        if metadata.get('is_personalized') and not analysis.requires_personalization:
+            logger.warning(
+                "⚠️ Metadata says personalized but analysis disagrees"
+            )
+        # Check file requirements
+        if analysis.requires_files and not analysis.required_file_types:
+            logger.warning(
+                "⚠️ Requires files but no file types specified"
+            )
+        # Check submission URL
+        if not analysis.submission_url_path:
+            raise TaskProcessingError(
+                "Analysis missing submission_url_path"
+            )
+        if not analysis.submission_url_path.startswith('/'):
+            logger.warning(
+                f"⚠️ Submission URL should start with '/': {analysis.submission_url_path}"
+            )
+        logger.debug("✓ Analysis validation passed")

app/services/answer_generator.py ADDED Viewed

	@@ -0,0 +1,492 @@

+from typing import Dict, Any, List, Optional
+import json
+import re
+from app.core.logging import get_logger
+from app.core.exceptions import AnswerGenerationError
+from app.models.answer import AnswerResult
+from app.models.analysis import QuestionAnalysis
+logger = get_logger(__name__)
+class AnswerGenerator:
+    """
+    Generates answers based on question analysis.
+    Uses LLM with rich context for flexibility with unknown questions.
+    """
+    def __init__(self, llm_client):
+        """
+        Args:
+            llm_client: LLM client with run_agent() method
+        """
+        self.llm_client = llm_client
+        self._generator_agent = None
+    async def initialize(self):
+        """Initialize LLM agent for answer generation"""
+        self._generator_agent = self.llm_client.create_agent(
+            output_type=AnswerResult,
+            system_prompt=(
+                "You are an expert at solving technical quiz questions. "
+                "Generate precise, exact answers based on the provided analysis and context. "
+                "Follow all constraints strictly. "
+                "Be thorough in your reasoning to ensure correctness."
+            ),
+            retries=2
+        )
+        logger.info("✓ Answer generator initialized")
+    async def generate(
+        self,
+        analysis: 'QuestionAnalysis',
+        question_metadata: Dict[str, Any],
+        base_url: str,
+        user_email: str,
+        downloaded_files: List[Dict[str, Any]]
+    ) -> str:
+        """
+        Generate answer based on question analysis.
+        Args:
+            analysis: Question analysis from analyzer
+            question_metadata: Original metadata with instructions
+            base_url: Base URL for the quiz
+            user_email: User's email address
+            downloaded_files: List of downloaded files with local_path
+        Returns:
+            str: Final answer ready to submit
+        Raises:
+            AnswerGenerationError: If generation fails
+        """
+        logger.info(f"💡 Generating answer for {analysis.question_type}...")
+        try:
+            # Step 1: Build comprehensive context for LLM
+            context = self._build_generation_context(
+                analysis=analysis,
+                question_metadata=question_metadata,
+                base_url=base_url,
+                user_email=user_email,
+                downloaded_files=downloaded_files
+            )
+            # Step 2: Generate answer with LLM
+            result = await self._generate_with_llm(context)
+            logger.info(f"✓ Generated answer (confidence: {result.confidence:.2f})")
+            logger.debug(f"Reasoning: {result.reasoning}")
+            # Step 3: Apply personalization if needed
+            if analysis.requires_personalization and not result.personalization_applied:
+                logger.info("Applying personalization...")
+                result.answer = self._apply_personalization(
+                    answer=result.answer,
+                    analysis=analysis,
+                    user_email=user_email
+                )
+                result.personalization_applied = True
+            # Step 4: Validate format
+            is_valid, validation_message = self._validate_format(
+                result.answer,
+                analysis
+            )
+            if not is_valid:
+                logger.warning(f"Format validation issue: {validation_message}")
+                # Try to auto-correct common issues
+                result.answer = self._auto_correct_format(
+                    result.answer,
+                    analysis,
+                    validation_message
+                )
+                logger.info("Applied auto-correction")
+            # Step 5: Check constraints
+            constraints_met, violations = self._check_constraints(
+                result.answer,
+                analysis
+            )
+            if not constraints_met:
+                logger.warning(f"Constraint violations: {violations}")
+                if result.confidence < 0.8:
+                    raise AnswerGenerationError(
+                        f"Low confidence ({result.confidence}) with constraint violations: {violations}"
+                    )
+            logger.info(f"✓ Final answer: {result.answer[:100]}...")
+            return result.answer
+        except Exception as e:
+            logger.error(f"❌ Answer generation failed: {e}", exc_info=True)
+            raise AnswerGenerationError(f"Failed to generate answer: {str(e)}")
+    def _build_generation_context(
+        self,
+        analysis: 'QuestionAnalysis',
+        question_metadata: Dict[str, Any],
+        base_url: str,
+        user_email: str,
+        downloaded_files: List[Dict[str, Any]]
+    ) -> str:
+        """
+        Build comprehensive context prompt for LLM.
+        Returns:
+            str: Rich context prompt
+        """
+        # Format instructions
+        instructions_text = "\n".join(
+            f"{i+1}. {inst}"
+            for i, inst in enumerate(question_metadata.get('instructions', []))
+        )
+        # Format key components
+        components_text = json.dumps(analysis.key_components, indent=2)
+        # Format constraints
+        constraints_text = "\n".join(
+            f"- {constraint}"
+            for constraint in analysis.critical_constraints
+        ) if analysis.critical_constraints else "None specified"
+        # Format files
+        files_text = "\n".join(
+            f"- {f['filename']} (type: {f['type']}, path: {f['local_path']})"
+            for f in downloaded_files
+        ) if downloaded_files else "None"
+        # Build personalization info
+        personalization_text = "Not required"
+        if analysis.requires_personalization:
+            personalization_text = f"""
+Required: Yes
+Type: {analysis.personalization_type}
+Details: {analysis.personalization_details}
+User Email: {user_email}
+Email Length: {len(user_email)}
+"""
+        # Build complete prompt
+        prompt = f"""Generate the exact answer for this technical quiz question.
+# QUESTION METADATA
+- Title: {question_metadata.get('title', 'Unknown')}
+- Difficulty: {question_metadata.get('difficulty', 'Unknown')}/5
+- Question Type: {analysis.question_type}
+- Answer Format: {analysis.answer_format}
+# ORIGINAL INSTRUCTIONS
+{instructions_text}
+# EXTRACTED COMPONENTS
+The following components were extracted from the instructions:
+{components_text}
+# USER CONTEXT
+- Base URL: {base_url}
+- User Email: {user_email}
+# PERSONALIZATION
+{personalization_text}
+# AVAILABLE FILES
+{files_text}
+# CRITICAL CONSTRAINTS
+{constraints_text}
+# ANSWER FORMAT REQUIREMENTS
+Format: {analysis.answer_format}
+"""
+        # Add format-specific guidance
+        if analysis.answer_format == 'plain_string':
+            prompt += """
+Return PLAIN TEXT ONLY:
+- No JSON wrapping
+- No quotes around the answer
+- No extra formatting
+- Just the raw string
+"""
+        elif analysis.answer_format == 'json_object':
+            prompt += """
+Return VALID JSON OBJECT:
+- Must be a dictionary {{"key": "value"}}
+- Properly escaped quotes
+- Valid JSON syntax
+"""
+        elif analysis.answer_format == 'json_array':
+            prompt += """
+Return VALID JSON ARRAY:
+- Must be a list ["item1", "item2"]
+- Properly formatted
+- Valid JSON syntax
+"""
+        elif analysis.answer_format == 'number':
+            prompt += """
+Return NUMBER ONLY:
+- Just the numeric value
+- No units or extra text
+- Integer or float as appropriate
+"""
+        elif analysis.answer_format == 'single_letter':
+            prompt += """
+Return SINGLE LETTER:
+- Just one character (A, B, C, etc.)
+- No explanation or extra text
+"""
+        # Add question-type specific guidance
+        if analysis.question_type == 'cli_command':
+            prompt += """
+# COMMAND GENERATION GUIDANCE
+- Assemble command from components in correct order
+- Use exact formatting for flags and arguments
+- Pay attention to quote style (single vs double)
+- Include all required parts (tool, subcommand, arguments, flags)
+- Do NOT include shell prompt ($, >, #)
+- Return the COMMAND STRING itself, not its output
+"""
+        elif analysis.question_type == 'file_path':
+            prompt += """
+# FILE PATH GUIDANCE
+- Return the exact path as specified
+- No markdown formatting []()
+- No HTML tags
+- No quotes unless specifically required
+- Exact string match is critical
+"""
+        prompt += """
+# YOUR TASK
+Generate the EXACT answer that should be submitted based on all the information above.
+IMPORTANT:
+1. Use the extracted components to build the answer
+2. Replace any placeholders with actual values (base_url, user_email)
+3. Follow ALL critical constraints precisely
+4. Match the required answer format exactly
+5. Provide detailed reasoning for your answer
+Generate the answer now.
+"""
+        return prompt
+    async def _generate_with_llm(self, context: str) -> AnswerResult:
+        """
+        Call LLM to generate answer.
+        Args:
+            context: Rich context prompt
+        Returns:
+            AnswerResult: Structured answer with reasoning
+        """
+        try:
+            result: AnswerResult = await self.llm_client.run_agent(
+                self._generator_agent,
+                context
+            )
+            return result
+        except Exception as e:
+            logger.error(f"LLM generation failed: {e}")
+            raise AnswerGenerationError(f"LLM generation failed: {str(e)}")
+    def _apply_personalization(
+        self,
+        answer: str,
+        analysis: 'QuestionAnalysis',
+        user_email: str
+    ) -> str:
+        """
+        Apply email-based personalization to answer.
+        Args:
+            answer: Base answer from LLM
+            analysis: Question analysis
+            user_email: User's email
+        Returns:
+            str: Personalized answer
+        """
+        if not analysis.requires_personalization:
+            return answer
+        email_length = len(user_email)
+        if analysis.personalization_type == 'email_length_offset':
+            # Parse offset formula from personalization_details
+            # Example: "Add (len(email) mod 5) to base sum"
+            match = re.search(r'mod\s+(\d+)', analysis.personalization_details or '')
+            if match:
+                mod_value = int(match.group(1))
+                offset = email_length % mod_value
+                # Try to parse answer as number and add offset
+                try:
+                    base_value = float(answer)
+                    final_value = base_value + offset
+                    # Return as int if it's a whole number
+                    if final_value.is_integer():
+                        return str(int(final_value))
+                    return str(final_value)
+                except ValueError:
+                    logger.warning(f"Cannot apply offset to non-numeric answer: {answer}")
+                    return answer
+        elif analysis.personalization_type == 'email_length_conditional':
+            # Example: If even, use option A; if odd, use option B
+            # This should already be handled by LLM based on email_length in context
+            pass
+        return answer
+    def _validate_format(
+        self,
+        answer: str,
+        analysis: 'QuestionAnalysis'
+    ) -> tuple[bool, str]:
+        """
+        Validate answer matches expected format.
+        Returns:
+            tuple: (is_valid, message)
+        """
+        answer_format = analysis.answer_format
+        if answer_format == 'plain_string':
+            # Should not be JSON
+            if answer.strip().startswith(('{', '[')):
+                return False, "Should be plain string, not JSON"
+            return True, "Valid plain string"
+        elif answer_format == 'json_object':
+            try:
+                parsed = json.loads(answer)
+                if not isinstance(parsed, dict):
+                    return False, "Should be JSON object (dict), not array"
+                return True, "Valid JSON object"
+            except json.JSONDecodeError as e:
+                return False, f"Invalid JSON: {str(e)}"
+        elif answer_format == 'json_array':
+            try:
+                parsed = json.loads(answer)
+                if not isinstance(parsed, list):
+                    return False, "Should be JSON array (list), not object"
+                return True, "Valid JSON array"
+            except json.JSONDecodeError as e:
+                return False, f"Invalid JSON: {str(e)}"
+        elif answer_format == 'number':
+            try:
+                float(answer.strip())
+                return True, "Valid number"
+            except ValueError:
+                return False, "Should be a numeric value"
+        elif answer_format == 'single_letter':
+            if len(answer.strip()) == 1 and answer.strip().isalpha():
+                return True, "Valid single letter"
+            return False, "Should be exactly one letter"
+        return True, "Format not strictly validated"
+    def _check_constraints(
+        self,
+        answer: str,
+        analysis: 'QuestionAnalysis'
+    ) -> tuple[bool, List[str]]:
+        """
+        Check answer against critical constraints.
+        Returns:
+            tuple: (all_met, violations_list)
+        """
+        violations = []
+        for constraint in analysis.critical_constraints:
+            constraint_lower = constraint.lower()
+            # Check: "command string not output"
+            if 'command string' in constraint_lower and 'not output' in constraint_lower:
+                if answer.startswith(('$', '>', '#', 'Output:', 'Result:')):
+                    violations.append("Answer looks like output/prompt, should be command only")
+            # Check: "no markdown formatting"
+            if 'no markdown' in constraint_lower or 'no formatting' in constraint_lower:
+                if re.search(r'\[.+\]\(.+\)', answer):
+                    violations.append("Should not have markdown links []() formatting")
+            # Check: "double quotes"
+            if 'double quote' in constraint_lower:
+                if "'" in answer and '"' not in answer:
+                    violations.append("Should use double quotes, not single quotes")
+            # Check: "exact string"
+            if 'exact string' in constraint_lower:
+                # Can't validate without knowing expected value
+                pass
+            # Check: "lowercase"
+            if 'lowercase' in constraint_lower:
+                if answer != answer.lower():
+                    violations.append("Should be lowercase")
+            # Check: "no quotes" or "plain path"
+            if 'no quotes' in constraint_lower or 'plain' in constraint_lower:
+                if answer.startswith(('"', "'")) and answer.endswith(('"', "'")):
+                    violations.append("Should not be wrapped in quotes")
+        return len(violations) == 0, violations
+    def _auto_correct_format(
+        self,
+        answer: str,
+        analysis: 'QuestionAnalysis',
+        validation_message: str
+    ) -> str:
+        """
+        Attempt to auto-correct common format issues.
+        Args:
+            answer: Original answer
+            analysis: Question analysis
+            validation_message: What was wrong
+        Returns:
+            str: Corrected answer
+        """
+        corrected = answer
+        # Remove JSON wrapping if should be plain string
+        if analysis.answer_format == 'plain_string':
+            if corrected.startswith('"') and corrected.endswith('"'):
+                corrected = corrected[1:-1]
+            if corrected.startswith("'") and corrected.endswith("'"):
+                corrected = corrected[1:-1]
+        # Strip whitespace
+        corrected = corrected.strip()
+        # Remove shell prompts
+        for prefix in ['$ ', '> ', '# ', 'Output: ', 'Result: ']:
+            if corrected.startswith(prefix):
+                corrected = corrected[len(prefix):]
+        return corrected

app/services/task_fetcher.py CHANGED Viewed

@@ -15,7 +15,7 @@ from app.core.logging import get_logger
 from app.core.exceptions import TaskProcessingError
 from app.utils.llm_client import get_llm_client
 from app.utils.prompts import AnalysisPrompts
 logger = get_logger(__name__)
@@ -32,6 +32,7 @@ class TaskFetcher:
         self.timeout = timeout
         self.client: Optional[httpx.AsyncClient] = None
         self.llm_client = get_llm_client()
         # Import here to avoid circular imports
         from app.orchestrator.models import UnifiedTaskAnalysis
@@ -79,52 +80,85 @@ class TaskFetcher:
         # Step 1: Fetch visible content (with fallback)
         content = await self._fetch_content(url)
         logger.debug(f"Task description length after fetch: {len(content['task_description'])}")
         # Step 2: Unified LLM analysis
-        analysis = await self._analyze_content_with_llm(
-            task_description=content['task_description'],
-            raw_content=content['raw_content'],
-            url=url,
-            base_url=base_url
         )
-        # Merge content + analysis
         result = {
-            **content,
-            'is_redirect': analysis.is_redirect,
-            'question_url': analysis.question_url,
-            'submission_url': analysis.submission_url,
-            'instructions': self._format_instructions(analysis.instructions),
-            'overall_goal': analysis.overall_goal,
-            'complexity': analysis.complexity,
-            'llm_analysis': {
-                'redirect_reasoning': analysis.redirect_reasoning,
-                'submission_reasoning': analysis.submission_reasoning,
-                'confidence': analysis.confidence,
-            }
         }
-        # Resolve relative submission URL if needed
-        if analysis.submission_url and analysis.submission_url_is_relative:
-            absolute = str(httpx.URL(base_url).join(analysis.submission_url))
-            logger.info(f"✓ Resolved relative submission URL: {analysis.submission_url} → {absolute}")
-            result['submission_url'] = absolute
-        # Resolve relative question URL if needed
-        if analysis.question_url and analysis.question_url.startswith('/'):
-            absolute_q = str(httpx.URL(base_url).join(analysis.question_url))
-            logger.info(f"✓ Resolved relative question URL: {analysis.question_url} → {absolute_q}")
-            result['question_url'] = absolute_q
-        logger.info("✅ Analysis complete:")
-        logger.info(f"   Is Redirect: {result['is_redirect']}")
-        logger.info(f"   Submission URL: {result['submission_url']}")
-        logger.info(f"   Instructions: {len(result['instructions'])} steps")
-        logger.info(f"   Complexity: {result['complexity']}")
-        return result
     # ======================================================================
     # FETCHING WITH FALLBACK TO DYNAMIC SCRAPER
@@ -139,30 +173,43 @@ class TaskFetcher:
         if not self._is_valid_url(url):
             raise TaskProcessingError(f"Invalid URL format: {url}")
         try:
             response = await self._fetch_url(url)
             content_type = self._detect_content_type(response)
             # Basic extraction
-            task_description = await self._extract_basic_content(response, content_type)
             raw_content = response.text[:5000]
             # Heuristic: if nothing useful, try dynamic scraper
             if self._looks_js_only(task_description, raw_content):
                 logger.warning("⚠️ Content looks JS-only/empty. Falling back to DynamicScraper for instructions.")
                 dyn = await self._fetch_with_dynamic_scraper(url)
                 task_description = dyn['task_description']
                 raw_content = dyn['raw_content']
             return {
-                'task_description': task_description,
-                'raw_content': raw_content,
-                'content_type': content_type,
-                'url': url,
-                'metadata': {
-                    'content_length': len(response.content),
-                    'status_code': response.status_code,
-                }
             }
         except Exception as e:
@@ -212,15 +259,43 @@ class TaskFetcher:
         for instruction pages.
         """
         from app.modules.scrapers.dynamic_scraper import DynamicScraper
         scraper = DynamicScraper(use_pool=True)
         await scraper.initialize()
         try:
             # Auto-extract text blocks
             result = await scraper.scrape_url(url)
             if not result.success:
                 raise RuntimeError(result.error or "Dynamic scraping failed")
             # DynamicScraper._extract_auto returns list of dicts with 'text' for paragraphs
             texts: List[str] = []
             if isinstance(result.data, list):
@@ -234,8 +309,10 @@ class TaskFetcher:
             # Best-effort raw_content: you could extend DynamicScraper to return page.content()
             return {
-                'task_description': task_text,
-                'raw_content': task_text[:5000],  # at least something readable
             }
         finally:
             await scraper.cleanup()
@@ -244,31 +321,169 @@ class TaskFetcher:
     # BASIC EXTRACTION (NO LLM)
     # ======================================================================
-    async def _extract_basic_content(self, response: httpx.Response, content_type: str) -> str:
-        """Fast, no-JS extraction for instruction pages."""
         if content_type == 'json':
             try:
-                data = response.json()
                 for field in ['task', 'description', 'question', 'content', 'text']:
                     if isinstance(data, dict) and field in data:
                         return str(data[field])
                 return json.dumps(data)
             except Exception:
-                return response.text
         if content_type == 'html':
             try:
-                html = response.text
-                soup = BeautifulSoup(html, 'html.parser')
                 for script in soup(['script', 'style', 'nav', 'header', 'footer']):
                     script.decompose()
                 text = soup.get_text(strip=True, separator=' ')
                 return text
             except Exception as e:
                 logger.error(f"HTML basic extraction failed: {e}")
-                return response.text
-        return response.text
     def _detect_content_type(self, response: httpx.Response) -> str:
         ct = response.headers.get('content-type', '').lower()
@@ -320,57 +535,7 @@ class TaskFetcher:
             return analysis
         except Exception as e:
             logger.error(f"❌ LLM analysis failed: {e}", exc_info=True)
-            return self._fallback_analysis(task_description, all_urls, url, base_url)
-    def _fallback_analysis(
-        self,
-        task_description: str,
-        all_urls: List[str],
-        url: str,
-        base_url: str
-    ):
-        """Very simple fallback if LLM fails."""
-        from app.orchestrator.models import UnifiedTaskAnalysis, InstructionStep
-        logger.warning("⚠️ Using fallback pattern-based analysis")
-        is_redirect = False
-        submission_url = None
-        for pattern in [r'POST\s+(?:to\s+)?([^\s<>"\']+)', r'submit\s+(?:to\s+)?([^\s<>"\']+)']:
-            m = re.search(pattern, task_description, re.IGNORECASE)
-            if m:
-                submission_url = m.group(1).rstrip('.,;:)')
-                break
-        sentences = re.split(r'[.;\n]', task_description)
-        instructions = []
-        step = 1
-        for s in sentences:
-            s = s.strip()
-            if len(s) > 5:
-                instructions.append(InstructionStep(
-                    step_number=step,
-                    action='unknown',
-                    description=s,
-                    target=None,
-                    dependencies=[]
-                ))
-                step += 1
-        return UnifiedTaskAnalysis(
-            is_redirect=is_redirect,
-            question_url=None,
-            redirect_reasoning="Fallback: no redirect detection",
-            submission_url=submission_url,
-            submission_url_is_relative=submission_url.startswith('/') if submission_url else False,
-            submission_reasoning="Fallback: simple regex match",
-            instructions=instructions,
-            overall_goal="Unknown (fallback)",
-            complexity="unknown",
-            confidence=0.3
-        )
     def _format_instructions(self, steps) -> List[Dict[str, Any]]:
         return [
             {

 from app.core.exceptions import TaskProcessingError
 from app.utils.llm_client import get_llm_client
 from app.utils.prompts import AnalysisPrompts
+from app.services.analyser import QuestionAnalyzer
 logger = get_logger(__name__)
         self.timeout = timeout
         self.client: Optional[httpx.AsyncClient] = None
         self.llm_client = get_llm_client()
+        self.question_analyzer = QuestionAnalyzer(self.llm_client)
         # Import here to avoid circular imports
         from app.orchestrator.models import UnifiedTaskAnalysis
         # Step 1: Fetch visible content (with fallback)
         content = await self._fetch_content(url)
+        print(content)
         logger.debug(f"Task description length after fetch: {len(content['task_description'])}")
+        file_links = content['question_metadata'].get('file_links', [])
+        if file_links:
+        # Download files to disk
+            downloaded_files = await self._download_files(
+                file_links,
+                content['base_url'],
+                "[email protected]"
+            )
+            content['downloaded_files'] = downloaded_files
+        else:
+            content['downloaded_files'] = []
         # Step 2: Unified LLM analysis
+        logger.info("🔍 Analyzing question...")
+        if not getattr(self.question_analyzer, "_analyzer_agent", None):
+            await self.question_analyzer.initialize()
+        analysis = await self.question_analyzer.analyze_question(
+            question_metadata=content["question_metadata"],
+            base_url=base_url,
+            user_email="[email protected]",
+            downloaded_files=content["downloaded_files"]
         )
         result = {
+            'analysis': analysis,
+            'question_metadata': content['question_metadata'],
+            'base_url':base_url,
+            'user_email':"23f3003322@ds.study.iitm.ac.in",
+            'downloaded_files':content["downloaded_files"]
         }
+        return result
+        # analysis = await self._analyze_content_with_llm(
+        #     task_description=content['task_description'],
+        #     raw_content=content['raw_content'],
+        #     url=url,
+        #     base_url=base_url
+        # )
+        # # Merge content + analysis
+        # result = {
+        #     **content,
+        #     'is_redirect': analysis.is_redirect,
+        #     'question_url': analysis.question_url,
+        #     'submission_url': analysis.submission_url,
+        #     'instructions': self._format_instructions(analysis.instructions),
+        #     'overall_goal': analysis.overall_goal,
+        #     'complexity': analysis.complexity,
+        #     'llm_analysis': {
+        #         'redirect_reasoning': analysis.redirect_reasoning,
+        #         'submission_reasoning': analysis.submission_reasoning,
+        #         'confidence': analysis.confidence,
+        #     }
+        # }
+        # # Resolve relative submission URL if needed
+        # if analysis.submission_url and analysis.submission_url_is_relative:
+        #     absolute = str(httpx.URL(base_url).join(analysis.submission_url))
+        #     logger.info(f"✓ Resolved relative submission URL: {analysis.submission_url} → {absolute}")
+        #     result['submission_url'] = absolute
+        # # Resolve relative question URL if needed
+        # if analysis.question_url and analysis.question_url.startswith('/'):
+        #     absolute_q = str(httpx.URL(base_url).join(analysis.question_url))
+        #     logger.info(f"✓ Resolved relative question URL: {analysis.question_url} → {absolute_q}")
+        #     result['question_url'] = absolute_q
+        # logger.info("✅ Analysis complete:")
+        # logger.info(f"   Is Redirect: {result['is_redirect']}")
+        # logger.info(f"   Submission URL: {result['submission_url']}")
+        # logger.info(f"   Instructions: {len(result['instructions'])} steps")
+        # logger.info(f"   Complexity: {result['complexity']}")
+        # return result
     # ======================================================================
     # FETCHING WITH FALLBACK TO DYNAMIC SCRAPER
         if not self._is_valid_url(url):
             raise TaskProcessingError(f"Invalid URL format: {url}")
+        from urllib.parse import urlparse
+        parsed = urlparse(url)
+        base_url = f"{parsed.scheme}://{parsed.netloc}"
         try:
             response = await self._fetch_url(url)
             content_type = self._detect_content_type(response)
+            html_content = response.text  # ← This is html_content
+            html_content = html_content.replace(
+                '<span class="origin"></span>',
+             base_url
+            )
             # Basic extraction
+            task_description = await self._extract_basic_content_from_html(html_content, content_type)
             raw_content = response.text[:5000]
+            metadata = self._parse_question_metadata(html_content)
             # Heuristic: if nothing useful, try dynamic scraper
             if self._looks_js_only(task_description, raw_content):
                 logger.warning("⚠️ Content looks JS-only/empty. Falling back to DynamicScraper for instructions.")
                 dyn = await self._fetch_with_dynamic_scraper(url)
                 task_description = dyn['task_description']
                 raw_content = dyn['raw_content']
+                metadata = dyn['question_metadata']
             return {
+                    'task_description': task_description,
+                    'raw_content': raw_content,
+        'content_type': content_type,
+        'url': url,
+        'base_url': base_url,
+        'question_metadata': metadata,  # ✓ ADDED
+        'metadata': {
+            'content_length': len(response.content),
+            'status_code': response.status_code,
+        }
             }
         except Exception as e:
         for instruction pages.
         """
         from app.modules.scrapers.dynamic_scraper import DynamicScraper
+        from urllib.parse import urlparse
+        # Extract base URL
+        parsed = urlparse(url)
+        base_url = f"{parsed.scheme}://{parsed.netloc}"
         scraper = DynamicScraper(use_pool=True)
         await scraper.initialize()
         try:
             # Auto-extract text blocks
             result = await scraper.scrape_url(url)
             if not result.success:
                 raise RuntimeError(result.error or "Dynamic scraping failed")
+            rendered_html = result.raw_html if hasattr(result, 'raw_html') else None
+            if rendered_html:
+                rendered_html = rendered_html.replace(
+                '<span class="origin"></span>',
+                base_url
+            )
+            question_metadata = None
+            if rendered_html:
+                soup = BeautifulSoup(rendered_html, 'html.parser')
+                question_metadata = self._parse_question_metadata_from_soup(soup)
+            file_links = []
+            if rendered_html:
+                soup = BeautifulSoup(rendered_html, 'html.parser')
+                for a in soup.find_all('a', href=True):
+                    href = a['href']
+                    if href.startswith('/project2/'):
+                        file_links.append({
+                            'href': href,
+                            'text': a.get_text(strip=True)
+                        })
             # DynamicScraper._extract_auto returns list of dicts with 'text' for paragraphs
             texts: List[str] = []
             if isinstance(result.data, list):
             # Best-effort raw_content: you could extend DynamicScraper to return page.content()
             return {
+            'task_description': task_text,
+            'raw_content': rendered_html if rendered_html else task_text[:5000],
+            'base_url': base_url,
+            'question_metadata': question_metadata,  # NEW
             }
         finally:
             await scraper.cleanup()
     # BASIC EXTRACTION (NO LLM)
     # ======================================================================
+    async def _extract_basic_content_from_html(
+        self,
+        html_content: str,  # ← Changed from response
+        content_type: str
+    ) -> str:
+        """
+        Fast extraction from HTML string (no JS execution).
+        """
         if content_type == 'json':
             try:
+                data = json.loads(html_content)
                 for field in ['task', 'description', 'question', 'content', 'text']:
                     if isinstance(data, dict) and field in data:
                         return str(data[field])
                 return json.dumps(data)
             except Exception:
+                return html_content
         if content_type == 'html':
             try:
+                from bs4 import BeautifulSoup
+                soup = BeautifulSoup(html_content, 'html.parser')
+                # Remove scripts (but origin already replaced before this)
                 for script in soup(['script', 'style', 'nav', 'header', 'footer']):
                     script.decompose()
                 text = soup.get_text(strip=True, separator=' ')
                 return text
             except Exception as e:
                 logger.error(f"HTML basic extraction failed: {e}")
+                return html_content
+        return html_content
+    def _parse_question_metadata(self, html: str) -> Dict[str, Any]:
+        """
+        Extract structured metadata from question HTML.
+        """
+        from bs4 import BeautifulSoup
+        soup = BeautifulSoup(html, 'html.parser')
+        metadata = {
+            'title': None,
+            'heading': None,
+            'difficulty': None,
+            'is_personalized': False,
+            'instructions': [],
+            'file_links': []
+        }
+        # Extract title
+        title_tag = soup.find('title')
+        if title_tag:
+            metadata['title'] = title_tag.text.strip()
+        # Extract heading
+        h1_tag = soup.find('h1')
+        if h1_tag:
+            metadata['heading'] = h1_tag.text.strip()
+        # Extract difficulty and personalization
+        for p in soup.find_all('p'):
+            text = p.get_text()
+            # Difficulty: "Difficulty: 1 (next URL revealed even if wrong)"
+            if 'Difficulty:' in text:
+                import re
+                match = re.search(r'Difficulty:\s*(\d+)', text)
+                if match:
+                    metadata['difficulty'] = int(match.group(1))
+            # Personalization: "Personalized: Yes" or "Personalized: No"
+            if 'Personalized:' in text:
+                metadata['is_personalized'] = 'Yes' in text
+        # Extract ordered instructions
+        ol_tag = soup.find('ol')
+        if ol_tag:
+            for li in ol_tag.find_all('li', recursive=False):
+                metadata['instructions'].append(li.get_text(strip=True))
+        # Extract file links
+        for a in soup.find_all('a', href=True):
+            href = a['href']
+            if href.startswith('/project2/'):
+                metadata['file_links'].append({
+                    'href': href,
+                    'text': a.get_text(strip=True)
+                })
+        return metadata
+    def _parse_question_metadata_from_soup(self, soup) -> Dict[str, Any]:
+        """
+        Extract structured metadata from BeautifulSoup object.
+        Helper method for both httpx and dynamic scraper paths.
+        Args:
+            soup: BeautifulSoup parsed HTML
+        Returns:
+            Dict with title, difficulty, personalization, instructions, file_links
+        """
+        metadata = {
+            'title': None,
+            'heading': None,
+            'difficulty': None,
+            'is_personalized': False,
+            'instructions': [],
+            'file_links': []
+        }
+        # Extract title
+        title_tag = soup.find('title')
+        if title_tag:
+            metadata['title'] = title_tag.text.strip()
+        # Extract heading
+        h1_tag = soup.find('h1')
+        if h1_tag:
+            metadata['heading'] = h1_tag.text.strip()
+        # Extract difficulty and personalization from paragraphs
+        for p in soup.find_all('p'):
+            text = p.get_text()
+            # Parse difficulty: "Difficulty: 1 (next URL revealed even if wrong)"
+            if 'Difficulty:' in text or 'difficulty:' in text.lower():
+                import re
+                match = re.search(r'[Dd]ifficulty:\s*(\d+)', text)
+                if match:
+                    metadata['difficulty'] = int(match.group(1))
+                    logger.debug(f"Parsed difficulty: {metadata['difficulty']}")
+            # Parse personalization: "Personalized: Yes" or "Personalized: No"
+            if 'Personalized:' in text or 'personalized:' in text.lower():
+                metadata['is_personalized'] = 'yes' in text.lower()
+                logger.debug(f"Parsed personalization: {metadata['is_personalized']}")
+        # Extract ordered instructions from <ol> tag
+        ol_tag = soup.find('ol')
+        if ol_tag:
+            for li in ol_tag.find_all('li', recursive=False):
+                instruction_text = li.get_text(separator=' ', strip=True)
+                metadata['instructions'].append(instruction_text)
+            logger.debug(f"Parsed {len(metadata['instructions'])} instructions")
+        # Extract file links from <a> tags
+        for a in soup.find_all('a', href=True):
+            href = a['href']
+            # Look for project files
+            if href.startswith('/project2/') or '/project2/' in href:
+                metadata['file_links'].append({
+                    'href': href,
+                    'text': a.get_text(strip=True)
+                })
+        if metadata['file_links']:
+            logger.debug(f"Found {len(metadata['file_links'])} file links")
+        return metadata
     def _detect_content_type(self, response: httpx.Response) -> str:
         ct = response.headers.get('content-type', '').lower()
             return analysis
         except Exception as e:
             logger.error(f"❌ LLM analysis failed: {e}", exc_info=True)
+            return
     def _format_instructions(self, steps) -> List[Dict[str, Any]]:
         return [
             {

app/services/task_processor.py CHANGED Viewed

@@ -5,14 +5,16 @@ Simplified with unified LLM analysis in task_fetcher + AnswerSubmitter integrati
 from typing import Dict, Any, Optional
 import asyncio
-from app.models.request import TaskRequest
 from app.core.logging import get_logger
 from app.core.exceptions import TaskProcessingError
 from app.orchestrator.orchestrator_engine import OrchestratorEngine
 from app.modules import get_fully_loaded_registry  # ✅ AUTO-REGISTRATION
 from app.services.task_fetcher import TaskFetcher
 from app.modules.submitters.answer_submitter import AnswerSubmitter  # ✅ NEW
 logger = get_logger(__name__)
 class TaskProcessor:
@@ -28,13 +30,15 @@ class TaskProcessor:
         # ✅ AUTO-REGISTER ALL MODULES
         self.registry = get_fully_loaded_registry()
         self.answer_submitter = AnswerSubmitter()
         # Initialize orchestrator engine
         self.orchestrator = OrchestratorEngine(self.registry)
         logger.info(f"✅ TaskProcessor initialized with {len(self.registry.modules)} modules")
-    async def process(self, task_data: TaskRequest) -> Dict[str, Any]:
         """
         Process TDS quiz task - COMPLETE END-TO-END FLOW
@@ -47,7 +51,7 @@ class TaskProcessor:
         6. Build response
         """
         logger.info("=" * 80)
-        logger.info(f"🔄 Processing task for: {task_data.email}")
         logger.info(f"📋 Request URL: {task_data.url}")
         logger.info("=" * 80)
@@ -65,101 +69,31 @@ class TaskProcessor:
             # ✅ FIXED: Use proper async context manager pattern
             async with TaskFetcher() as fetcher:
-                analysis = await fetcher.fetch_and_analyze(url=request_url)
-            logger.info(f"✓ Request URL analyzed")
-            logger.info(f"  Submission URL: {analysis.get('submission_url')}")
-            # Extract key information
-            task_description = analysis['task_description']
-            submission_url = analysis.get('submission_url')
-            instructions = analysis.get('instructions', [])
-            question_url = request_url  # Default to request URL
-            logger.info(f"📍 Submission URL: {submission_url}")
-            logger.info(f"📋 Instructions: {len(instructions)} steps")
-            # ===================================================================
-            # STEP 2: EXECUTE ORCHESTRATION (Scrape → Extract → Answer)
-            # ===================================================================
-            logger.info("\n" + "=" * 80)
-            logger.info("STEP 2: EXECUTING ORCHESTRATION")
-            logger.info("=" * 80)
-            orchestration_result = await self.orchestrator.execute_task(
-                task_input=task_description,
-                task_url=question_url,
-                context={
-                    'email': task_data.email,
-                    'request_url': request_url,
-                    'question_url': question_url,
-                    'submission_url': submission_url,
-                    'instructions': instructions
-                }
             )
-            logger.info(f"✓ Orchestration completed")
-            logger.info(f"  Success: {orchestration_result['success']}")
-            # ===================================================================
-            # STEP 3: EXTRACT ANSWER
-            # ===================================================================
-            answer = self._extract_answer(orchestration_result)
-            logger.info(f"✓ Answer extracted: {str(answer)[:100]}")
-            if not answer or answer == "No answer found":
-                logger.warning("⚠️ No valid answer extracted")
-                return self._build_response(
-                    task_data, request_url, question_url, submission_url,
-                    analysis, orchestration_result, None, answer
-                )
-            # ===================================================================
-            # STEP 4: SUBMIT ANSWER & HANDLE CHAINING
-            # ===================================================================
-            logger.info("\n" + "=" * 80)
-            logger.info("STEP 4: SUBMITTING & CHAINING")
-            logger.info("=" * 80)
-            submission_result = await self.answer_submitter.execute({
-                'submission_url': submission_url,
-                'email': task_data.email,
-                'secret': str(answer),
-                'quiz_url': question_url,
-                'answer': answer
-            })
-            logger.info(f"✓ Submission completed: {getattr(submission_result, 'success', False)}")
-            # ✅ ALWAYS check for new URL first
-            if (hasattr(submission_result, 'data') and
-                submission_result.data and
-                (next_url := submission_result.data.get('next_quiz_url'))):
-                logger.info(f"🔄 NEW QUIZ DETECTED: {next_url}")
-                # ✅ FIXED: Proper background task handling with reference tracking
-                background_tasks = set()
-                task = asyncio.create_task(self._process_chained_quiz(task_data.email, next_url, submission_url))
-                background_tasks.add(task)
-                task.add_done_callback(background_tasks.discard)
-                return {
-                    'success': True,
-                    'status': 'chained',
-                    'message': f'Submitted & chained to next quiz: {next_url}',
-                    'next_url': next_url,
-                    'correct': submission_result.data.get('correct', False)
-                }
-            # ✅ No new URL = SUCCESS (whether correct or not)
-            logger.info("✅ No new quiz - Task completed successfully")
-            return {
-                'success': True,
-                'status': 'completed',
-                'message': 'Answer submitted successfully to TDS',
-                'correct': getattr(submission_result, 'data', {}).get('correct', False)
-            }
         except Exception as e:
             logger.error(f"❌ Task processing failed: {str(e)}", exc_info=True)
@@ -226,7 +160,7 @@ class TaskProcessor:
     def _build_response(
         self,
-        task_data: TaskRequest,
         request_url: str,
         question_url: str,
         submission_url: str,

 from typing import Dict, Any, Optional
 import asyncio
+from app.models.request import ManualTriggeredRequestBody
 from app.core.logging import get_logger
 from app.core.exceptions import TaskProcessingError
 from app.orchestrator.orchestrator_engine import OrchestratorEngine
 from app.modules import get_fully_loaded_registry  # ✅ AUTO-REGISTRATION
 from app.services.task_fetcher import TaskFetcher
 from app.modules.submitters.answer_submitter import AnswerSubmitter  # ✅ NEW
+from app.services.answer_generator import AnswerGenerator
+from app.utils.llm_client import get_llm_client
+from app.utils.submit_answer import submit_answer
 logger = get_logger(__name__)
 class TaskProcessor:
         # ✅ AUTO-REGISTER ALL MODULES
         self.registry = get_fully_loaded_registry()
         self.answer_submitter = AnswerSubmitter()
+        self.llm_client = get_llm_client()
+        self.answer_generator = AnswerGenerator(self.llm_client)
         # Initialize orchestrator engine
         self.orchestrator = OrchestratorEngine(self.registry)
         logger.info(f"✅ TaskProcessor initialized with {len(self.registry.modules)} modules")
+    async def process(self, task_data: ManualTriggeredRequestBody) -> Dict[str, Any]:
         """
         Process TDS quiz task - COMPLETE END-TO-END FLOW
         6. Build response
         """
         logger.info("=" * 80)
+        # logger.info(f"🔄 Processing task for: {task_data.email}")
         logger.info(f"📋 Request URL: {task_data.url}")
         logger.info("=" * 80)
             # ✅ FIXED: Use proper async context manager pattern
             async with TaskFetcher() as fetcher:
+                result = await fetcher.fetch_and_analyze(url=request_url)
+                print("========")
+                print("analysis")
+                print(result)
+            # Initialize answer generator if needed
+            if not getattr(self.answer_generator, "_generator_agent", None):
+                await self.answer_generator.initialize()
+            answer = await self.answer_generator.generate(
+                analysis=result["analysis"],
+                question_metadata=result["question_metadata"],
+                base_url=result["base_url"],
+                user_email=result["user_email"],
+                downloaded_files=result["downloaded_files"]
             )
+            print("================================= answer")
+            print(answer)
+            return submit_answer(
+                submit_url="https://tds-llm-analysis.s-anand.net/submit",
+                answer=answer,
+                req_url=request_url,
+                background_tasks=None
+            )
         except Exception as e:
             logger.error(f"❌ Task processing failed: {str(e)}", exc_info=True)
     def _build_response(
         self,
+        task_data: ManualTriggeredRequestBody,
         request_url: str,
         question_url: str,
         submission_url: str,

app/utils/prompts.py CHANGED Viewed

@@ -132,6 +132,259 @@ class AnalysisPrompts:
     Now analyze the content above."""
     @staticmethod
     def analysis_planning_prompt(
         question: str,

     Now analyze the content above."""
+    @staticmethod
+    def question_analysis_prompt(
+        instructions: List[str],
+        difficulty: int,
+        is_personalized: bool,
+        title: str,
+        heading: str,
+        base_url: str,
+        user_email: str,
+        available_files: List[Dict[str, Any]]
+    ) -> str:
+            """
+            Generate prompt for analyzing question.
+            Focused on extracting what's needed to generate the answer.
+            """
+            files_text = "\n".join(
+                f"- {f.get('filename', 'unknown')} ({f.get('type', 'unknown')})"
+                for f in available_files
+            ) if available_files else "None"
+            instructions_text = "\n".join(
+                f"{i+1}. {inst}"
+                for i, inst in enumerate(instructions)
+            )
+            return f"""Analyze this technical quiz question to determine how to generate the correct answer.
+    # QUESTION METADATA
+    - **Title**: {title}
+    - **Heading**: {heading}
+    - **Difficulty**: {difficulty}/5 (1=easiest, 5=hardest)
+    - **Personalized**: {is_personalized}
+    - **Base URL**: {base_url}
+    - **User Email**: {user_email}
+    # INSTRUCTIONS
+    {instructions_text}
+    # AVAILABLE FILES
+    {files_text}
+    ---
+    # YOUR ANALYSIS TASK
+    Extract the following information to enable answer generation:
+    ## 1. QUESTION TYPE
+    Categorize the task:
+    - **cli_command**: Generate command strings (uv, git, curl, docker)
+    - **file_path**: Return file paths or URLs
+    - **data_processing**: Process CSV/JSON/ZIP files
+    - **image_analysis**: Analyze images (colors, pixels, differences)
+    - **audio_transcription**: Transcribe audio to text
+    - **api_interaction**: Make API calls (GitHub, REST APIs)
+    - **document_parsing**: Extract data from PDFs
+    - **calculation**: Mathematical computations (sums, F1 scores)
+    - **text_generation**: Generate YAML, prompts, configuration
+    - **optimization**: Solve constraint/optimization problems
+    - **llm_reasoning**: Multi-step reasoning or tool planning
+    ## 2. ANSWER FORMAT
+    How should the final answer be formatted?
+    - **plain_string**: Raw text, no quotes, no JSON (e.g., "uv http get ...")
+    - **json_object**: JSON dictionary (e.g., {{"key": "value"}})
+    - **json_array**: JSON list (e.g., ["a", "b", "c"])
+    - **number**: Integer or float (e.g., 42 or 3.14)
+    - **single_letter**: One character (e.g., A, B, or C)
+    ## 3. KEY COMPONENTS
+    Extract specific data needed to generate the answer:
+    **For cli_command:**
+    - tool: "uv", "git", "curl"
+    - subcommand: "http get", "add", "commit"
+    - url_template: Pattern with placeholders
+    - flags: ["-H", "-m", "-p"]
+    - arguments: Headers, messages, parameters
+    **For file_path:**
+    - path: Exact path or pattern
+    **For data_processing:**
+    - operations: ["normalize", "filter", "aggregate"]
+    - output_format: "json", "csv"
+    - sorting: Field and direction
+    **For calculations:**
+    - formula: Mathematical expression
+    - input_sources: Where data comes from
+    - precision: Decimal places
+    **For any type:**
+    - Any other relevant details from instructions
+    ## 4. PERSONALIZATION
+    Determine if answer depends on user's email:
+    **Types:**
+    - **email_in_url**: Email appears in URL (e.g., ?email={{user_email}})
+    - **email_length_offset**: offset = len(email) mod N, add to result
+    - **email_length_conditional**: Different answer based on email length (even/odd)
+    **Details:**
+    - Which mod value? (mod 2, mod 3, mod 5)
+    - How to apply? (add to result, choose option)
+    ## 5. FILE REQUIREMENTS
+    Does the question need files from available_files list?
+    - Which file types? (csv, json, png, pdf, opus, zip)
+    - What to do with them? (process, analyze, extract)
+    ## 6. EXTERNAL RESOURCES
+    Does the question require fetching from another URL/endpoint?
+    - API endpoints mentioned in instructions
+    - Data sources not in available_files
+    - Example: "Use GitHub API with params in /project2/gh-tree.json"
+    ## 7. CRITICAL CONSTRAINTS
+    Extract must-follow rules:
+    - "command string" not "command output"
+    - Exact decimal places (2, 4)
+    - Sorting order (ascending, descending)
+    - Case sensitivity (lowercase, uppercase)
+    - Separators (comma, space, newline)
+    - Quote style ("double", 'single', none)
+    - No markdown formatting
+    - Specific value ranges
+    ## 8. SUBMISSION URL PATH
+    The URL path for THIS specific question (from title/heading).
+    Pattern: /project2-{{question-name}}
+    Example: /project2-uv, /project2-git, /project2-md
+    ---
+    # EXAMPLES
+    ## Example 1: CLI Command (Q2-like)
+    **Instructions:**
+    1. Craft the command string using uv http get on {{{{base_url}}}}/project2/uv.json?email=<your email>
+    2. Include header Accept: application/json
+    3. POST that exact command string as answer
+    **Analysis:**
+    {{
+"question_type": "cli_command",
+"answer_format": "plain_string",
+"key_components": {{
+"tool": "uv",
+"subcommand": "http get",
+"url_template": "{{{{base_url}}}}/project2/uv.json?email={{{{user_email}}}}",
+"headers": [{{"name": "Accept", "value": "application/json"}}],
+"header_flag": "-H"
+}},
+"requires_personalization": true,
+"personalization_type": "email_in_url",
+"personalization_details": "User email in URL query parameter",
+"requires_files": false,
+"required_file_types": [],
+"requires_external_fetch": false,
+"external_resources": [],
+"critical_constraints": [
+"Return command string only, not output",
+"Use double quotes for header value",
+"Format: tool subcommand url -H \"header: value\""
+],
+"submission_url_path": "/project2-uv",
+"reasoning": "Instructions explicitly ask for 'command string' using specific tool and parameters",
+"confidence": 0.98
+}}
+text
+## Example 2: File Path (Q4-like)
+**Instructions:**
+1. The correct relative link target is exactly /project2/data-preparation.md
+2. Submit that exact string. Do not wrap in Markdown/HTML
+**Analysis:**
+{{
+"question_type": "file_path",
+"answer_format": "plain_string",
+"key_components": {{
+"path": "/project2/data-preparation.md"
+}},
+"requires_personalization": false,
+"requires_files": false,
+"requires_external_fetch": false,
+"critical_constraints": [
+"Exact string: /project2/data-preparation.md",
+"No markdown formatting",
+"No HTML tags",
+"No quotes"
+],
+"submission_url_path": "/project2-md",
+"reasoning": "Instructions provide exact path to return",
+"confidence": 1.0
+}}
+text
+## Example 3: Data Processing with Personalization (Q9-like)
+**Instructions:**
+1. Download logs.zip and sum bytes where event=="download"
+2. Compute offset = (length of your email) mod 5
+3. Final answer = base sum + offset
+**Available Files:**
+- logs.zip (zip)
+**Analysis:**
+{{
+"question_type": "data_processing",
+"answer_format": "number",
+"key_components": {{
+"file": "logs.zip",
+"operation": "sum",
+"field": "bytes",
+"filter": {{"event": "download"}},
+"offset_formula": "len(user_email) mod 5"
+}},
+"requires_personalization": true,
+"personalization_type": "email_length_offset",
+"personalization_details": "Add (len(email) mod 5) to base sum",
+"requires_files": true,
+"required_file_types": ["zip"],
+"requires_external_fetch": false,
+"critical_constraints": [
+"Filter: event == 'download'",
+"Sum the bytes field",
+"Add email length offset",
+"Return integer only"
+],
+"submission_url_path": "/project2-logs",
+"reasoning": "File processing with email-based offset calculation",
+"confidence": 0.92
+}}
+text
+---
+# NOW ANALYZE
+Analyze the question above and return a complete QuestionAnalysis object.
+Be precise and extract ALL relevant details from the instructions.
+"""
     @staticmethod
     def analysis_planning_prompt(
         question: str,

app/utils/submit_answer.py ADDED Viewed

	@@ -0,0 +1,114 @@

+"""
+Answer Submission Utility
+Handles answer submission and chained quiz processing
+"""
+from datetime import datetime
+from fastapi import HTTPException, BackgroundTasks
+from app.core.logging import get_logger
+from app.models.request import ManualTriggeredRequestBody
+logger = get_logger(__name__)
+import requests
+def submit_answer(submit_url: str, req_url: str ,answer:str, background_tasks: BackgroundTasks = None) -> dict:
+    """
+    Submits an answer to the provided submit_url and triggers next quiz if URL is returned.
+    Args:
+        submit_url: The URL endpoint to submit the answer to
+        body: Dictionary containing email, secret, url, and answer
+        background_tasks: FastAPI BackgroundTasks for chained processing
+    Returns:
+        The response from the server containing correct status, reason, url, and delay
+    Raises:
+        HTTPException on request failure
+    """
+    try:
+        logger.info(f"Submitting answer to {submit_url}")
+        # Get email and secret from environment
+        from app.core.config import settings
+        answer_body = {
+            "email": settings.USER_EMAIL,
+            "secret": settings.API_SECRET,
+            "url": req_url,
+            "answer": answer
+        }
+        response = requests.post(submit_url, json=answer_body, timeout=15)
+        response.raise_for_status()
+        result = response.json()
+        logger.info(f"Submission response: {result}")
+        print(f"[submit_answer] Response from {submit_url}:")
+        print ("="* 8)
+        print("answer")
+        print(result.get("correct"))
+        print(result)
+        print ("="* 8)
+        # If response contains a url, process it as the next quiz in background
+        if result.get("url"):
+            next_url = result["url"]
+            logger.info(f"🔗 Chained quiz detected: {next_url}")
+            print(f"\n[submit_answer] Adding next quiz to background tasks: {next_url}")
+            # If background_tasks available (from FastAPI), use it
+            if background_tasks:
+                background_tasks.add_task(
+                    process_next_quiz,
+                    next_url=next_url,
+                    email=answer_body.get("email"),
+                    start_time=datetime.now()
+                )
+            else:
+                # Fallback: run in background thread
+                import threading
+                thread = threading.Thread(
+                    target=process_next_quiz,
+                    args=(next_url, answer_body.get("email"), datetime.now()),
+                    daemon=True
+                )
+                thread.start()
+                logger.info(f"✓ Started background thread for chained quiz")
+        return result
+    except requests.exceptions.RequestException as exc:
+        logger.error(f"Failed to submit answer to {submit_url}: {exc}")
+        raise HTTPException(status_code=400, detail=f"Submission failed: {exc}")
+def process_next_quiz(next_url: str, email: str, start_time: datetime):
+    """
+    Process the next quiz in the chain as a background task.
+    Args:
+        next_url: URL of the next quiz to process
+        email: User's email address
+        start_time: Start time for tracking
+    """
+    try:
+        logger.info(f"🔄 Processing chained quiz: {next_url}")
+        # Import here to avoid circular dependency
+        from app.services.task_processor import TaskProcessor
+        # Create task data for next quiz
+        task_data = ManualTriggeredRequestBody(url=next_url)
+        # Process the next quiz
+        processor = TaskProcessor()
+        import asyncio
+        result = asyncio.run(processor.process(task_data))
+        elapsed = (datetime.now() - start_time).total_seconds()
+        logger.info(f"✅ Chained quiz completed in {elapsed:.2f}s")
+    except Exception as e:
+        logger.error(f"❌ Failed to process chained quiz: {e}", exc_info=True)