gateway/modules/services/serviceAi/subResponseParsing.py

# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Response Parsing Module

Handles parsing of AI responses, including:
- Section extraction from responses
- JSON completeness detection
- Loop detection
- Document metadata extraction
- Final result building
"""
import json
import logging
from typing import Dict, Any, List, Optional, Tuple

from modules.shared.jsonUtils import extractJsonString, repairBrokenJson, extractSectionsFromDocument
from .subJsonResponseHandling import JsonResponseHandler
from modules.datamodels.datamodelAi import JsonAccumulationState

logger = logging.getLogger(__name__)


class ResponseParser:
    """Handles parsing of AI responses and completion detection."""

    def __init__(self, services):
        """Initialize ResponseParser with service center access."""
        self.services = services

    def extractSectionsFromResponse(
        self,
        result: str,
        iteration: int,
        debugPrefix: str,
        allSections: List[Dict[str, Any]] = None,
        accumulationState: Optional[JsonAccumulationState] = None
    ) -> Tuple[List[Dict[str, Any]], bool, Optional[Dict[str, Any]], Optional[JsonAccumulationState]]:
        """
        Extract sections from AI response, handling both valid and broken JSON.

        NEW BEHAVIOR:
        - First iteration: Check if complete, if not start accumulation
        - Subsequent iterations: Accumulate strings, parse when complete

        Returns:
            Tuple of:
            - sections: Extracted sections
            - wasJsonComplete: True if JSON is complete
            - parsedResult: Parsed JSON object
            - updatedAccumulationState: Updated accumulation state (None if not in accumulation mode)
        """
        if allSections is None:
            allSections = []

        if iteration == 1:
            # First iteration - check if complete
            parsed = None
            try:
                extracted = extractJsonString(result)
                parsed = json.loads(extracted)

                # Check completeness
                if JsonResponseHandler.isJsonComplete(parsed):
                    # Complete JSON - no accumulation needed
                    sections = extractSectionsFromDocument(parsed)
                    logger.info(f"Iteration 1: Complete JSON detected, no accumulation needed")
                    return sections, True, parsed, None  # No accumulation
            except Exception:
                pass

            # Incomplete - try to extract partial sections from broken JSON
            logger.info(f"Iteration 1: Incomplete JSON detected, attempting to extract partial sections")

            partialSections = []
            if parsed:
                # Try to extract sections from parsed (even if incomplete)
                partialSections = extractSectionsFromDocument(parsed)
            else:
                # Try to repair broken JSON and extract sections
                try:
                    repaired = repairBrokenJson(result)
                    if repaired:
                        partialSections = extractSectionsFromDocument(repaired)
                        parsed = repaired  # Use repaired version for accumulation state
                except Exception:
                    pass  # If repair fails, continue with empty sections


            # Define KPIs (async call - need to handle this)
            # For now, create accumulation state without KPIs, will be updated after async call
            accumulationState = JsonAccumulationState(
                accumulatedJsonString=result,
                isAccumulationMode=True,
                lastParsedResult=parsed,
                allSections=partialSections,
                kpis=[]
            )

            # Note: KPI definition will be done in the caller (async context)
            return partialSections, False, parsed, accumulationState

        else:
            # Subsequent iterations - accumulate
            if accumulationState and accumulationState.isAccumulationMode:
                accumulated, sections, isComplete, parsedResult = \
                    JsonResponseHandler.accumulateAndParseJsonFragments(
                        accumulationState.accumulatedJsonString,
                        result,
                        allSections,
                        iteration
                    )

                # Update accumulation state
                accumulationState.accumulatedJsonString = accumulated
                accumulationState.lastParsedResult = parsedResult
                accumulationState.allSections = allSections + sections if sections else allSections
                accumulationState.isAccumulationMode = not isComplete

                # Log accumulated JSON for debugging
                if parsedResult:
                    accumulated_json_str = json.dumps(parsedResult, indent=2, ensure_ascii=False)
                    self.services.utils.writeDebugFile(accumulated_json_str, f"{debugPrefix}_accumulated_json_iteration_{iteration}.json")

                return sections, isComplete, parsedResult, accumulationState
            else:
                # No accumulation mode - process normally (shouldn't happen)
                logger.warning(f"Iteration {iteration}: No accumulation state but iteration > 1")
                return [], False, None, None

    def shouldContinueGeneration(
        self,
        allSections: List[Dict[str, Any]],
        iteration: int,
        wasJsonComplete: bool,
        rawResponse: str = None
    ) -> bool:
        """
        Determine if AI generation loop should continue.

        CRITICAL: This is ONLY about AI Loop Completion, NOT Action DoD!
        Action DoD is checked AFTER the AI Loop completes in _refineDecide.

        Simple logic:
        - If JSON parsing failed or incomplete → continue (needs more content)
        - If JSON parses successfully and is complete → stop (all content delivered)
        - Loop detection prevents infinite loops

        CRITICAL: JSON completeness is determined by parsing, NOT by last character check!
        Returns True if we should continue, False if AI Loop is done.
        """
        if len(allSections) == 0:
            return True  # No sections yet, continue

        # CRITERION 1: If JSON was incomplete/broken (parsing failed or incomplete) - continue to repair/complete
        if not wasJsonComplete:
            logger.info(f"Iteration {iteration}: JSON incomplete/broken - continuing to complete")
            return True

        # CRITERION 2: JSON is complete (parsed successfully) - check for loop detection
        if self._isStuckInLoop(allSections, iteration):
            logger.warning(f"Iteration {iteration}: Detected potential infinite loop - stopping AI loop")
            return False

        # JSON is complete and not stuck in loop - done
        logger.info(f"Iteration {iteration}: JSON complete - AI loop done")
        return False

    def _isStuckInLoop(
        self,
        allSections: List[Dict[str, Any]],
        iteration: int
    ) -> bool:
        """
        Detect if we're stuck in a loop (same content being repeated).

        Generic approach: Check if recent iterations are adding minimal or duplicate content.
        """
        if iteration < 3:
            return False  # Need at least 3 iterations to detect a loop

        if len(allSections) == 0:
            return False

        # Check if last section is very small (might be stuck)
        lastSection = allSections[-1]
        elements = lastSection.get("elements", [])

        if isinstance(elements, list) and elements:
            lastElem = elements[-1] if elements else {}
        else:
            lastElem = elements if isinstance(elements, dict) else {}

        # Check content size of last section
        lastSectionSize = 0
        if isinstance(lastElem, dict):
            for key, value in lastElem.items():
                if isinstance(value, str):
                    lastSectionSize += len(value)
                elif isinstance(value, list):
                    lastSectionSize += len(str(value))

        # If last section is very small and we've done many iterations, might be stuck
        if lastSectionSize < 100 and iteration > 10:
            logger.warning(f"Potential loop detected: iteration {iteration}, last section size {lastSectionSize}")
            return True

        return False

    def extractDocumentMetadata(
        self,
        parsedResult: Dict[str, Any]
    ) -> Optional[Dict[str, Any]]:
        """
        Extract document metadata (title, filename) from parsed AI response.
        Returns dict with 'title' and 'filename' keys if found, None otherwise.
        """
        if not isinstance(parsedResult, dict):
            return None

        # Try to get from documents array (preferred structure)
        if "documents" in parsedResult and isinstance(parsedResult["documents"], list) and len(parsedResult["documents"]) > 0:
            firstDoc = parsedResult["documents"][0]
            if isinstance(firstDoc, dict):
                title = firstDoc.get("title")
                filename = firstDoc.get("filename")
                if title or filename:
                    return {
                        "title": title,
                        "filename": filename
                    }

        return None

    def buildFinalResultFromSections(
        self,
        allSections: List[Dict[str, Any]],
        documentMetadata: Optional[Dict[str, Any]] = None
    ) -> str:
        """
        Build final JSON result from accumulated sections.
        Uses AI-provided metadata (title, filename) if available.
        """
        if not allSections:
            return ""

        # Extract metadata from AI response if available
        title = "Generated Document"
        filename = "document.json"
        if documentMetadata:
            if documentMetadata.get("title"):
                title = documentMetadata["title"]
            if documentMetadata.get("filename"):
                filename = documentMetadata["filename"]

        # Build documents structure
        # Assuming single document for now
        documents = [{
            "id": "doc_1",
            "title": title,
            "filename": filename,
            "sections": allSections
        }]

        result = {
            "metadata": {
                "split_strategy": "single_document",
                "source_documents": [],
                "extraction_method": "ai_generation"
            },
            "documents": documents
        }

        return json.dumps(result, indent=2)