gateway/modules/services/serviceAi/subResponseParsing.py
2026-01-23 01:10:00 +01:00

275 lines
11 KiB
Python

# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Response Parsing Module
Handles parsing of AI responses, including:
- Section extraction from responses
- JSON completeness detection
- Loop detection
- Document metadata extraction
- Final result building
"""
import json
import logging
from typing import Dict, Any, List, Optional, Tuple
from modules.shared.jsonUtils import extractJsonString, repairBrokenJson, extractSectionsFromDocument
from .subJsonResponseHandling import JsonResponseHandler
from modules.datamodels.datamodelAi import JsonAccumulationState
logger = logging.getLogger(__name__)
class ResponseParser:
"""Handles parsing of AI responses and completion detection."""
def __init__(self, services):
"""Initialize ResponseParser with service center access."""
self.services = services
def extractSectionsFromResponse(
self,
result: str,
iteration: int,
debugPrefix: str,
allSections: List[Dict[str, Any]] = None,
accumulationState: Optional[JsonAccumulationState] = None
) -> Tuple[List[Dict[str, Any]], bool, Optional[Dict[str, Any]], Optional[JsonAccumulationState]]:
"""
Extract sections from AI response, handling both valid and broken JSON.
NEW BEHAVIOR:
- First iteration: Check if complete, if not start accumulation
- Subsequent iterations: Accumulate strings, parse when complete
Returns:
Tuple of:
- sections: Extracted sections
- wasJsonComplete: True if JSON is complete
- parsedResult: Parsed JSON object
- updatedAccumulationState: Updated accumulation state (None if not in accumulation mode)
"""
if allSections is None:
allSections = []
if iteration == 1:
# First iteration - check if complete
parsed = None
try:
extracted = extractJsonString(result)
parsed = json.loads(extracted)
# Check completeness
if JsonResponseHandler.isJsonComplete(parsed):
# Complete JSON - no accumulation needed
sections = extractSectionsFromDocument(parsed)
logger.info(f"Iteration 1: Complete JSON detected, no accumulation needed")
return sections, True, parsed, None # No accumulation
except Exception:
pass
# Incomplete - try to extract partial sections from broken JSON
logger.info(f"Iteration 1: Incomplete JSON detected, attempting to extract partial sections")
partialSections = []
if parsed:
# Try to extract sections from parsed (even if incomplete)
partialSections = extractSectionsFromDocument(parsed)
else:
# Try to repair broken JSON and extract sections
try:
repaired = repairBrokenJson(result)
if repaired:
partialSections = extractSectionsFromDocument(repaired)
parsed = repaired # Use repaired version for accumulation state
except Exception:
pass # If repair fails, continue with empty sections
# Define KPIs (async call - need to handle this)
# For now, create accumulation state without KPIs, will be updated after async call
accumulationState = JsonAccumulationState(
accumulatedJsonString=result,
isAccumulationMode=True,
lastParsedResult=parsed,
allSections=partialSections,
kpis=[]
)
# Note: KPI definition will be done in the caller (async context)
return partialSections, False, parsed, accumulationState
else:
# Subsequent iterations - accumulate
if accumulationState and accumulationState.isAccumulationMode:
accumulated, sections, isComplete, parsedResult = \
JsonResponseHandler.accumulateAndParseJsonFragments(
accumulationState.accumulatedJsonString,
result,
allSections,
iteration
)
# Update accumulation state
accumulationState.accumulatedJsonString = accumulated
accumulationState.lastParsedResult = parsedResult
accumulationState.allSections = allSections + sections if sections else allSections
accumulationState.isAccumulationMode = not isComplete
# Log accumulated JSON for debugging
if parsedResult:
accumulated_json_str = json.dumps(parsedResult, indent=2, ensure_ascii=False)
self.services.utils.writeDebugFile(accumulated_json_str, f"{debugPrefix}_accumulated_json_iteration_{iteration}.json")
return sections, isComplete, parsedResult, accumulationState
else:
# No accumulation mode - process normally (shouldn't happen)
logger.warning(f"Iteration {iteration}: No accumulation state but iteration > 1")
return [], False, None, None
def shouldContinueGeneration(
self,
allSections: List[Dict[str, Any]],
iteration: int,
wasJsonComplete: bool,
rawResponse: str = None
) -> bool:
"""
Determine if AI generation loop should continue.
CRITICAL: This is ONLY about AI Loop Completion, NOT Action DoD!
Action DoD is checked AFTER the AI Loop completes in _refineDecide.
Simple logic:
- If JSON parsing failed or incomplete → continue (needs more content)
- If JSON parses successfully and is complete → stop (all content delivered)
- Loop detection prevents infinite loops
CRITICAL: JSON completeness is determined by parsing, NOT by last character check!
Returns True if we should continue, False if AI Loop is done.
"""
if len(allSections) == 0:
return True # No sections yet, continue
# CRITERION 1: If JSON was incomplete/broken (parsing failed or incomplete) - continue to repair/complete
if not wasJsonComplete:
logger.info(f"Iteration {iteration}: JSON incomplete/broken - continuing to complete")
return True
# CRITERION 2: JSON is complete (parsed successfully) - check for loop detection
if self._isStuckInLoop(allSections, iteration):
logger.warning(f"Iteration {iteration}: Detected potential infinite loop - stopping AI loop")
return False
# JSON is complete and not stuck in loop - done
logger.info(f"Iteration {iteration}: JSON complete - AI loop done")
return False
def _isStuckInLoop(
self,
allSections: List[Dict[str, Any]],
iteration: int
) -> bool:
"""
Detect if we're stuck in a loop (same content being repeated).
Generic approach: Check if recent iterations are adding minimal or duplicate content.
"""
if iteration < 3:
return False # Need at least 3 iterations to detect a loop
if len(allSections) == 0:
return False
# Check if last section is very small (might be stuck)
lastSection = allSections[-1]
elements = lastSection.get("elements", [])
if isinstance(elements, list) and elements:
lastElem = elements[-1] if elements else {}
else:
lastElem = elements if isinstance(elements, dict) else {}
# Check content size of last section
lastSectionSize = 0
if isinstance(lastElem, dict):
for key, value in lastElem.items():
if isinstance(value, str):
lastSectionSize += len(value)
elif isinstance(value, list):
lastSectionSize += len(str(value))
# If last section is very small and we've done many iterations, might be stuck
if lastSectionSize < 100 and iteration > 10:
logger.warning(f"Potential loop detected: iteration {iteration}, last section size {lastSectionSize}")
return True
return False
def extractDocumentMetadata(
self,
parsedResult: Dict[str, Any]
) -> Optional[Dict[str, Any]]:
"""
Extract document metadata (title, filename) from parsed AI response.
Returns dict with 'title' and 'filename' keys if found, None otherwise.
"""
if not isinstance(parsedResult, dict):
return None
# Try to get from documents array (preferred structure)
if "documents" in parsedResult and isinstance(parsedResult["documents"], list) and len(parsedResult["documents"]) > 0:
firstDoc = parsedResult["documents"][0]
if isinstance(firstDoc, dict):
title = firstDoc.get("title")
filename = firstDoc.get("filename")
if title or filename:
return {
"title": title,
"filename": filename
}
return None
def buildFinalResultFromSections(
self,
allSections: List[Dict[str, Any]],
documentMetadata: Optional[Dict[str, Any]] = None
) -> str:
"""
Build final JSON result from accumulated sections.
Uses AI-provided metadata (title, filename) if available.
"""
if not allSections:
return ""
# Extract metadata from AI response if available
title = "Generated Document"
filename = "document.json"
if documentMetadata:
if documentMetadata.get("title"):
title = documentMetadata["title"]
if documentMetadata.get("filename"):
filename = documentMetadata["filename"]
# Build documents structure
# Assuming single document for now
documents = [{
"id": "doc_1",
"title": title,
"filename": filename,
"sections": allSections
}]
result = {
"metadata": {
"split_strategy": "single_document",
"source_documents": [],
"extraction_method": "ai_generation"
},
"documents": documents
}
return json.dumps(result, indent=2)