275 lines
11 KiB
Python
275 lines
11 KiB
Python
# Copyright (c) 2025 Patrick Motsch
|
|
# All rights reserved.
|
|
"""
|
|
Response Parsing Module
|
|
|
|
Handles parsing of AI responses, including:
|
|
- Section extraction from responses
|
|
- JSON completeness detection
|
|
- Loop detection
|
|
- Document metadata extraction
|
|
- Final result building
|
|
"""
|
|
import json
|
|
import logging
|
|
from typing import Dict, Any, List, Optional, Tuple
|
|
|
|
from modules.shared.jsonUtils import extractJsonString, repairBrokenJson, extractSectionsFromDocument
|
|
from .subJsonResponseHandling import JsonResponseHandler
|
|
from modules.datamodels.datamodelAi import JsonAccumulationState
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class ResponseParser:
|
|
"""Handles parsing of AI responses and completion detection."""
|
|
|
|
def __init__(self, services):
|
|
"""Initialize ResponseParser with service center access."""
|
|
self.services = services
|
|
|
|
def extractSectionsFromResponse(
|
|
self,
|
|
result: str,
|
|
iteration: int,
|
|
debugPrefix: str,
|
|
allSections: List[Dict[str, Any]] = None,
|
|
accumulationState: Optional[JsonAccumulationState] = None
|
|
) -> Tuple[List[Dict[str, Any]], bool, Optional[Dict[str, Any]], Optional[JsonAccumulationState]]:
|
|
"""
|
|
Extract sections from AI response, handling both valid and broken JSON.
|
|
|
|
NEW BEHAVIOR:
|
|
- First iteration: Check if complete, if not start accumulation
|
|
- Subsequent iterations: Accumulate strings, parse when complete
|
|
|
|
Returns:
|
|
Tuple of:
|
|
- sections: Extracted sections
|
|
- wasJsonComplete: True if JSON is complete
|
|
- parsedResult: Parsed JSON object
|
|
- updatedAccumulationState: Updated accumulation state (None if not in accumulation mode)
|
|
"""
|
|
if allSections is None:
|
|
allSections = []
|
|
|
|
if iteration == 1:
|
|
# First iteration - check if complete
|
|
parsed = None
|
|
try:
|
|
extracted = extractJsonString(result)
|
|
parsed = json.loads(extracted)
|
|
|
|
# Check completeness
|
|
if JsonResponseHandler.isJsonComplete(parsed):
|
|
# Complete JSON - no accumulation needed
|
|
sections = extractSectionsFromDocument(parsed)
|
|
logger.info(f"Iteration 1: Complete JSON detected, no accumulation needed")
|
|
return sections, True, parsed, None # No accumulation
|
|
except Exception:
|
|
pass
|
|
|
|
# Incomplete - try to extract partial sections from broken JSON
|
|
logger.info(f"Iteration 1: Incomplete JSON detected, attempting to extract partial sections")
|
|
|
|
partialSections = []
|
|
if parsed:
|
|
# Try to extract sections from parsed (even if incomplete)
|
|
partialSections = extractSectionsFromDocument(parsed)
|
|
else:
|
|
# Try to repair broken JSON and extract sections
|
|
try:
|
|
repaired = repairBrokenJson(result)
|
|
if repaired:
|
|
partialSections = extractSectionsFromDocument(repaired)
|
|
parsed = repaired # Use repaired version for accumulation state
|
|
except Exception:
|
|
pass # If repair fails, continue with empty sections
|
|
|
|
|
|
# Define KPIs (async call - need to handle this)
|
|
# For now, create accumulation state without KPIs, will be updated after async call
|
|
accumulationState = JsonAccumulationState(
|
|
accumulatedJsonString=result,
|
|
isAccumulationMode=True,
|
|
lastParsedResult=parsed,
|
|
allSections=partialSections,
|
|
kpis=[]
|
|
)
|
|
|
|
# Note: KPI definition will be done in the caller (async context)
|
|
return partialSections, False, parsed, accumulationState
|
|
|
|
else:
|
|
# Subsequent iterations - accumulate
|
|
if accumulationState and accumulationState.isAccumulationMode:
|
|
accumulated, sections, isComplete, parsedResult = \
|
|
JsonResponseHandler.accumulateAndParseJsonFragments(
|
|
accumulationState.accumulatedJsonString,
|
|
result,
|
|
allSections,
|
|
iteration
|
|
)
|
|
|
|
# Update accumulation state
|
|
accumulationState.accumulatedJsonString = accumulated
|
|
accumulationState.lastParsedResult = parsedResult
|
|
accumulationState.allSections = allSections + sections if sections else allSections
|
|
accumulationState.isAccumulationMode = not isComplete
|
|
|
|
# Log accumulated JSON for debugging
|
|
if parsedResult:
|
|
accumulated_json_str = json.dumps(parsedResult, indent=2, ensure_ascii=False)
|
|
self.services.utils.writeDebugFile(accumulated_json_str, f"{debugPrefix}_accumulated_json_iteration_{iteration}.json")
|
|
|
|
return sections, isComplete, parsedResult, accumulationState
|
|
else:
|
|
# No accumulation mode - process normally (shouldn't happen)
|
|
logger.warning(f"Iteration {iteration}: No accumulation state but iteration > 1")
|
|
return [], False, None, None
|
|
|
|
def shouldContinueGeneration(
|
|
self,
|
|
allSections: List[Dict[str, Any]],
|
|
iteration: int,
|
|
wasJsonComplete: bool,
|
|
rawResponse: str = None
|
|
) -> bool:
|
|
"""
|
|
Determine if AI generation loop should continue.
|
|
|
|
CRITICAL: This is ONLY about AI Loop Completion, NOT Action DoD!
|
|
Action DoD is checked AFTER the AI Loop completes in _refineDecide.
|
|
|
|
Simple logic:
|
|
- If JSON parsing failed or incomplete → continue (needs more content)
|
|
- If JSON parses successfully and is complete → stop (all content delivered)
|
|
- Loop detection prevents infinite loops
|
|
|
|
CRITICAL: JSON completeness is determined by parsing, NOT by last character check!
|
|
Returns True if we should continue, False if AI Loop is done.
|
|
"""
|
|
if len(allSections) == 0:
|
|
return True # No sections yet, continue
|
|
|
|
# CRITERION 1: If JSON was incomplete/broken (parsing failed or incomplete) - continue to repair/complete
|
|
if not wasJsonComplete:
|
|
logger.info(f"Iteration {iteration}: JSON incomplete/broken - continuing to complete")
|
|
return True
|
|
|
|
# CRITERION 2: JSON is complete (parsed successfully) - check for loop detection
|
|
if self._isStuckInLoop(allSections, iteration):
|
|
logger.warning(f"Iteration {iteration}: Detected potential infinite loop - stopping AI loop")
|
|
return False
|
|
|
|
# JSON is complete and not stuck in loop - done
|
|
logger.info(f"Iteration {iteration}: JSON complete - AI loop done")
|
|
return False
|
|
|
|
def _isStuckInLoop(
|
|
self,
|
|
allSections: List[Dict[str, Any]],
|
|
iteration: int
|
|
) -> bool:
|
|
"""
|
|
Detect if we're stuck in a loop (same content being repeated).
|
|
|
|
Generic approach: Check if recent iterations are adding minimal or duplicate content.
|
|
"""
|
|
if iteration < 3:
|
|
return False # Need at least 3 iterations to detect a loop
|
|
|
|
if len(allSections) == 0:
|
|
return False
|
|
|
|
# Check if last section is very small (might be stuck)
|
|
lastSection = allSections[-1]
|
|
elements = lastSection.get("elements", [])
|
|
|
|
if isinstance(elements, list) and elements:
|
|
lastElem = elements[-1] if elements else {}
|
|
else:
|
|
lastElem = elements if isinstance(elements, dict) else {}
|
|
|
|
# Check content size of last section
|
|
lastSectionSize = 0
|
|
if isinstance(lastElem, dict):
|
|
for key, value in lastElem.items():
|
|
if isinstance(value, str):
|
|
lastSectionSize += len(value)
|
|
elif isinstance(value, list):
|
|
lastSectionSize += len(str(value))
|
|
|
|
# If last section is very small and we've done many iterations, might be stuck
|
|
if lastSectionSize < 100 and iteration > 10:
|
|
logger.warning(f"Potential loop detected: iteration {iteration}, last section size {lastSectionSize}")
|
|
return True
|
|
|
|
return False
|
|
|
|
def extractDocumentMetadata(
|
|
self,
|
|
parsedResult: Dict[str, Any]
|
|
) -> Optional[Dict[str, Any]]:
|
|
"""
|
|
Extract document metadata (title, filename) from parsed AI response.
|
|
Returns dict with 'title' and 'filename' keys if found, None otherwise.
|
|
"""
|
|
if not isinstance(parsedResult, dict):
|
|
return None
|
|
|
|
# Try to get from documents array (preferred structure)
|
|
if "documents" in parsedResult and isinstance(parsedResult["documents"], list) and len(parsedResult["documents"]) > 0:
|
|
firstDoc = parsedResult["documents"][0]
|
|
if isinstance(firstDoc, dict):
|
|
title = firstDoc.get("title")
|
|
filename = firstDoc.get("filename")
|
|
if title or filename:
|
|
return {
|
|
"title": title,
|
|
"filename": filename
|
|
}
|
|
|
|
return None
|
|
|
|
def buildFinalResultFromSections(
|
|
self,
|
|
allSections: List[Dict[str, Any]],
|
|
documentMetadata: Optional[Dict[str, Any]] = None
|
|
) -> str:
|
|
"""
|
|
Build final JSON result from accumulated sections.
|
|
Uses AI-provided metadata (title, filename) if available.
|
|
"""
|
|
if not allSections:
|
|
return ""
|
|
|
|
# Extract metadata from AI response if available
|
|
title = "Generated Document"
|
|
filename = "document.json"
|
|
if documentMetadata:
|
|
if documentMetadata.get("title"):
|
|
title = documentMetadata["title"]
|
|
if documentMetadata.get("filename"):
|
|
filename = documentMetadata["filename"]
|
|
|
|
# Build documents structure
|
|
# Assuming single document for now
|
|
documents = [{
|
|
"id": "doc_1",
|
|
"title": title,
|
|
"filename": filename,
|
|
"sections": allSections
|
|
}]
|
|
|
|
result = {
|
|
"metadata": {
|
|
"split_strategy": "single_document",
|
|
"source_documents": [],
|
|
"extraction_method": "ai_generation"
|
|
},
|
|
"documents": documents
|
|
}
|
|
|
|
return json.dumps(result, indent=2)
|
|
|