# Copyright (c) 2025 Patrick Motsch # All rights reserved. """ Response Parsing Module Handles parsing of AI responses, including: - Section extraction from responses - JSON completeness detection - Loop detection - Document metadata extraction - Final result building """ import json import logging from typing import Dict, Any, List, Optional, Tuple from modules.shared.jsonUtils import extractJsonString, repairBrokenJson, extractSectionsFromDocument from .subJsonResponseHandling import JsonResponseHandler from modules.datamodels.datamodelAi import JsonAccumulationState logger = logging.getLogger(__name__) class ResponseParser: """Handles parsing of AI responses and completion detection.""" def __init__(self, services): """Initialize ResponseParser with service center access.""" self.services = services def extractSectionsFromResponse( self, result: str, iteration: int, debugPrefix: str, allSections: List[Dict[str, Any]] = None, accumulationState: Optional[JsonAccumulationState] = None ) -> Tuple[List[Dict[str, Any]], bool, Optional[Dict[str, Any]], Optional[JsonAccumulationState]]: """ Extract sections from AI response, handling both valid and broken JSON. NEW BEHAVIOR: - First iteration: Check if complete, if not start accumulation - Subsequent iterations: Accumulate strings, parse when complete Returns: Tuple of: - sections: Extracted sections - wasJsonComplete: True if JSON is complete - parsedResult: Parsed JSON object - updatedAccumulationState: Updated accumulation state (None if not in accumulation mode) """ if allSections is None: allSections = [] if iteration == 1: # First iteration - check if complete parsed = None try: extracted = extractJsonString(result) parsed = json.loads(extracted) # Check completeness if JsonResponseHandler.isJsonComplete(parsed): # Complete JSON - no accumulation needed sections = extractSectionsFromDocument(parsed) logger.info(f"Iteration 1: Complete JSON detected, no accumulation needed") return sections, True, parsed, None # No accumulation except Exception: pass # Incomplete - try to extract partial sections from broken JSON logger.info(f"Iteration 1: Incomplete JSON detected, attempting to extract partial sections") partialSections = [] if parsed: # Try to extract sections from parsed (even if incomplete) partialSections = extractSectionsFromDocument(parsed) else: # Try to repair broken JSON and extract sections try: repaired = repairBrokenJson(result) if repaired: partialSections = extractSectionsFromDocument(repaired) parsed = repaired # Use repaired version for accumulation state except Exception: pass # If repair fails, continue with empty sections # Define KPIs (async call - need to handle this) # For now, create accumulation state without KPIs, will be updated after async call accumulationState = JsonAccumulationState( accumulatedJsonString=result, isAccumulationMode=True, lastParsedResult=parsed, allSections=partialSections, kpis=[] ) # Note: KPI definition will be done in the caller (async context) return partialSections, False, parsed, accumulationState else: # Subsequent iterations - accumulate if accumulationState and accumulationState.isAccumulationMode: accumulated, sections, isComplete, parsedResult = \ JsonResponseHandler.accumulateAndParseJsonFragments( accumulationState.accumulatedJsonString, result, allSections, iteration ) # Update accumulation state accumulationState.accumulatedJsonString = accumulated accumulationState.lastParsedResult = parsedResult accumulationState.allSections = allSections + sections if sections else allSections accumulationState.isAccumulationMode = not isComplete # Log accumulated JSON for debugging if parsedResult: accumulated_json_str = json.dumps(parsedResult, indent=2, ensure_ascii=False) self.services.utils.writeDebugFile(accumulated_json_str, f"{debugPrefix}_accumulated_json_iteration_{iteration}.json") return sections, isComplete, parsedResult, accumulationState else: # No accumulation mode - process normally (shouldn't happen) logger.warning(f"Iteration {iteration}: No accumulation state but iteration > 1") return [], False, None, None def shouldContinueGeneration( self, allSections: List[Dict[str, Any]], iteration: int, wasJsonComplete: bool, rawResponse: str = None ) -> bool: """ Determine if AI generation loop should continue. CRITICAL: This is ONLY about AI Loop Completion, NOT Action DoD! Action DoD is checked AFTER the AI Loop completes in _refineDecide. Simple logic: - If JSON parsing failed or incomplete → continue (needs more content) - If JSON parses successfully and is complete → stop (all content delivered) - Loop detection prevents infinite loops CRITICAL: JSON completeness is determined by parsing, NOT by last character check! Returns True if we should continue, False if AI Loop is done. """ if len(allSections) == 0: return True # No sections yet, continue # CRITERION 1: If JSON was incomplete/broken (parsing failed or incomplete) - continue to repair/complete if not wasJsonComplete: logger.info(f"Iteration {iteration}: JSON incomplete/broken - continuing to complete") return True # CRITERION 2: JSON is complete (parsed successfully) - check for loop detection if self._isStuckInLoop(allSections, iteration): logger.warning(f"Iteration {iteration}: Detected potential infinite loop - stopping AI loop") return False # JSON is complete and not stuck in loop - done logger.info(f"Iteration {iteration}: JSON complete - AI loop done") return False def _isStuckInLoop( self, allSections: List[Dict[str, Any]], iteration: int ) -> bool: """ Detect if we're stuck in a loop (same content being repeated). Generic approach: Check if recent iterations are adding minimal or duplicate content. """ if iteration < 3: return False # Need at least 3 iterations to detect a loop if len(allSections) == 0: return False # Check if last section is very small (might be stuck) lastSection = allSections[-1] elements = lastSection.get("elements", []) if isinstance(elements, list) and elements: lastElem = elements[-1] if elements else {} else: lastElem = elements if isinstance(elements, dict) else {} # Check content size of last section lastSectionSize = 0 if isinstance(lastElem, dict): for key, value in lastElem.items(): if isinstance(value, str): lastSectionSize += len(value) elif isinstance(value, list): lastSectionSize += len(str(value)) # If last section is very small and we've done many iterations, might be stuck if lastSectionSize < 100 and iteration > 10: logger.warning(f"Potential loop detected: iteration {iteration}, last section size {lastSectionSize}") return True return False def extractDocumentMetadata( self, parsedResult: Dict[str, Any] ) -> Optional[Dict[str, Any]]: """ Extract document metadata (title, filename) from parsed AI response. Returns dict with 'title' and 'filename' keys if found, None otherwise. """ if not isinstance(parsedResult, dict): return None # Try to get from documents array (preferred structure) if "documents" in parsedResult and isinstance(parsedResult["documents"], list) and len(parsedResult["documents"]) > 0: firstDoc = parsedResult["documents"][0] if isinstance(firstDoc, dict): title = firstDoc.get("title") filename = firstDoc.get("filename") if title or filename: return { "title": title, "filename": filename } return None def buildFinalResultFromSections( self, allSections: List[Dict[str, Any]], documentMetadata: Optional[Dict[str, Any]] = None ) -> str: """ Build final JSON result from accumulated sections. Uses AI-provided metadata (title, filename) if available. """ if not allSections: return "" # Extract metadata from AI response if available title = "Generated Document" filename = "document.json" if documentMetadata: if documentMetadata.get("title"): title = documentMetadata["title"] if documentMetadata.get("filename"): filename = documentMetadata["filename"] # Build documents structure # Assuming single document for now documents = [{ "id": "doc_1", "title": title, "filename": filename, "sections": allSections }] result = { "metadata": { "split_strategy": "single_document", "source_documents": [], "extraction_method": "ai_generation" }, "documents": documents } return json.dumps(result, indent=2)