From 3ccd284a587af03db896fc619aa86d4b8f2b61fa Mon Sep 17 00:00:00 2001
From: ValueOn AG
Date: Sun, 30 Nov 2025 17:35:19 +0100
Subject: [PATCH] fixed json merging chain for cut-off mapping with full-dynamc
json merger engine for any json structure and complexity
---
modules/services/serviceAi/mainServiceAi.py | 533 +--------
.../serviceAi/subJsonResponseHandling.py | 1022 +++++++++++++++++
.../renderers/rendererDocx.py | 3 -
.../renderers/rendererXlsx.py | 11 +-
.../services/serviceUtils/mainServiceUtils.py | 6 -
modules/shared/debugLogger.py | 1 -
modules/shared/jsonUtils.py | 41 -
modules/workflows/methods/methodAi.py | 30 +-
.../processing/adaptive/contentValidator.py | 102 +-
.../workflows/processing/modes/modeDynamic.py | 69 +-
.../shared/promptGenerationActionsDynamic.py | 22 +-
11 files changed, 1263 insertions(+), 577 deletions(-)
create mode 100644 modules/services/serviceAi/subJsonResponseHandling.py
diff --git a/modules/services/serviceAi/mainServiceAi.py b/modules/services/serviceAi/mainServiceAi.py
index 98f9d0ed..592099f3 100644
--- a/modules/services/serviceAi/mainServiceAi.py
+++ b/modules/services/serviceAi/mainServiceAi.py
@@ -16,6 +16,7 @@ from modules.shared.jsonUtils import (
buildContinuationContext,
parseJsonWithModel
)
+from modules.services.serviceAi.subJsonResponseHandling import JsonResponseHandler
logger = logging.getLogger(__name__)
@@ -304,7 +305,39 @@ Respond with ONLY a JSON object in this exact format:
# Extract sections from response (handles both valid and broken JSON)
# Only for document generation (JSON responses)
- extractedSections, wasJsonComplete, parsedResult = self._extractSectionsFromResponse(result, iteration, debugPrefix)
+ # CRITICAL: Pass allSections to enable fragment detection and merging
+ extractedSections, wasJsonComplete, parsedResult = self._extractSectionsFromResponse(
+ result, iteration, debugPrefix, allSections
+ )
+
+ # CRITICAL: Handle JSON fragments (continuation content)
+ # Fragment merging happens inside _extractSectionsFromResponse and updates allSections in place
+ # If no sections extracted but fragment was merged, allSections was updated in place
+ # Check if fragment was merged by checking if allSections was modified
+ if not extractedSections and allSections:
+ # Fragment was detected and merged directly into allSections (side effect in _extractSectionsFromResponse)
+ logger.info(f"Iteration {iteration}: JSON fragment detected and merged, continuing")
+ # Don't break - fragment was merged, continue to get more content if needed
+ # Check if we should continue based on JSON completeness
+ shouldContinue = self._shouldContinueGeneration(
+ allSections,
+ iteration,
+ wasJsonComplete,
+ result
+ )
+ if shouldContinue:
+ if iterationOperationId:
+ self.services.chat.progressLogUpdate(iterationOperationId, 0.8, "Fragment merged, continuing")
+ self.services.chat.progressLogFinish(iterationOperationId, True)
+ continue
+ else:
+ # Done - fragment was merged and JSON is complete
+ if iterationOperationId:
+ self.services.chat.progressLogFinish(iterationOperationId, True)
+ if operationId:
+ self.services.chat.progressLogUpdate(operationId, 0.95, f"Generation complete ({iteration} iterations, fragment merged)")
+ logger.info(f"Generation complete after {iteration} iterations: fragment merged")
+ break
# Extract document metadata from first iteration if available
if iteration == 1 and parsedResult and not documentMetadata:
@@ -321,14 +354,15 @@ Respond with ONLY a JSON object in this exact format:
if not wasJsonComplete:
logger.warning(f"Iteration {iteration}: No sections extracted from broken JSON, continuing for another attempt")
continue
- # If JSON was complete but no sections extracted - this is an error, stop
+ # If JSON was complete but no sections extracted - check if it was a fragment
+ # Fragments are handled above, so if we get here and it's complete, it's an error
logger.warning(f"Iteration {iteration}: No sections extracted from complete JSON, stopping")
break
# Merge new sections with existing sections intelligently
# This handles the STANDARD CASE: broken JSON iterations must be merged together
# The break can occur anywhere - in any section, at any depth
- allSections = self._mergeSectionsIntelligently(allSections, extractedSections, iteration)
+ allSections = JsonResponseHandler.mergeSectionsIntelligently(allSections, extractedSections, iteration)
# Check if we should continue (completion detection)
# Simple logic: JSON completeness determines continuation
@@ -370,484 +404,24 @@ Respond with ONLY a JSON object in this exact format:
return final_result
- def _mergeSectionsIntelligently(
- self,
- existingSections: List[Dict[str, Any]],
- newSections: List[Dict[str, Any]],
- iteration: int
- ) -> List[Dict[str, Any]]:
- """
- Intelligently merge sections from multiple iterations.
-
- This is a GENERIC merging strategy that handles broken JSON iterations.
- The break can occur anywhere - in any section, at any depth.
-
- Merging strategies (in order of priority):
- 1. Same Section ID: Merge sections with identical IDs
- 2. Same Content-Type + Position: If last section is incomplete and new section continues it
- 3. Same Order: Merge sections with same order value
- 4. Structural Analysis: Detect continuation based on content structure
-
- Args:
- existingSections: Sections accumulated from previous iterations
- newSections: Sections extracted from current iteration
- iteration: Current iteration number
-
- Returns:
- Merged list of sections
- """
- if not newSections:
- return existingSections
-
- if not existingSections:
- return newSections
-
- mergedSections = existingSections.copy()
-
- for newSection in newSections:
- merged = False
-
- # Strategy 1: Same Section ID - merge directly
- newSectionId = newSection.get("id")
- if newSectionId:
- for i, existingSection in enumerate(mergedSections):
- if existingSection.get("id") == newSectionId:
- # Merge sections with same ID
- mergedSections[i] = self._mergeSectionContent(existingSection, newSection, iteration)
- merged = True
- logger.debug(f"Iteration {iteration}: Merged section by ID '{newSectionId}'")
- break
-
- if merged:
- continue
-
- # Strategy 2: Same Content-Type + Position (continuation detection)
- # Check if last section is incomplete and new section continues it
- if mergedSections:
- lastSection = mergedSections[-1]
- lastContentType = lastSection.get("content_type")
- newContentType = newSection.get("content_type")
-
- if lastContentType == newContentType:
- # Same content type - check if last section is incomplete
- if self._isSectionIncomplete(lastSection):
- # Last section is incomplete, merge with new section
- mergedSections[-1] = self._mergeSectionContent(lastSection, newSection, iteration)
- merged = True
- logger.debug(f"Iteration {iteration}: Merged section by content-type continuation ({lastContentType})")
- continue
-
- # Strategy 3: Same Order value
- newOrder = newSection.get("order")
- if newOrder is not None:
- for i, existingSection in enumerate(mergedSections):
- existingOrder = existingSection.get("order")
- if existingOrder is not None and existingOrder == newOrder:
- # Merge sections with same order
- mergedSections[i] = self._mergeSectionContent(existingSection, newSection, iteration)
- merged = True
- logger.debug(f"Iteration {iteration}: Merged section by order {newOrder}")
- break
-
- if merged:
- continue
-
- # Strategy 4: Structural Analysis - detect continuation
- # For code_block and table: if last section matches new section type, merge them
- if mergedSections:
- lastSection = mergedSections[-1]
- lastContentType = lastSection.get("content_type")
- newContentType = newSection.get("content_type")
-
- # Both are code blocks - merge them
- if lastContentType == "code_block" and newContentType == "code_block":
- mergedSections[-1] = self._mergeSectionContent(lastSection, newSection, iteration)
- merged = True
- logger.debug(f"Iteration {iteration}: Merged code_block sections by structural analysis")
- continue
-
- # Both are tables - merge them (common case for broken JSON iterations)
- if lastContentType == "table" and newContentType == "table":
- mergedSections[-1] = self._mergeSectionContent(lastSection, newSection, iteration)
- merged = True
- logger.debug(f"Iteration {iteration}: Merged table sections by structural analysis")
- continue
-
- # No merge strategy matched - add as new section
- if not merged:
- mergedSections.append(newSection)
- logger.debug(f"Iteration {iteration}: Added new section '{newSection.get('id', 'no-id')}' ({newSection.get('content_type', 'unknown')})")
-
- return mergedSections
-
- def _isSectionIncomplete(self, section: Dict[str, Any]) -> bool:
- """
- Check if a section is incomplete (broken at the end).
-
- This detects incomplete sections based on content analysis:
- - Code blocks: ends mid-line, ends with comma, ends with incomplete structure
- - Text sections: ends mid-sentence, ends with incomplete structure
- - Other types: check for incomplete elements
- """
- contentType = section.get("content_type", "")
- elements = section.get("elements", [])
-
- if not elements:
- return False
-
- # Handle list of elements
- if isinstance(elements, list) and len(elements) > 0:
- lastElement = elements[-1]
- else:
- lastElement = elements
-
- if not isinstance(lastElement, dict):
- return False
-
- # Check code_block for incomplete code
- if contentType == "code_block":
- code = lastElement.get("code", "")
- if code:
- # Check if code ends incompletely:
- # - Ends with comma (incomplete CSV line)
- # - Ends with number but no newline (incomplete line)
- # - Ends mid-token (e.g., "23431,23" - incomplete number)
- codeStripped = code.rstrip()
- if codeStripped:
- # Check for incomplete patterns
- if codeStripped.endswith(',') or (',' in codeStripped and not codeStripped.endswith('\n')):
- # Ends with comma or has comma but no final newline - likely incomplete
- return True
- # Check if last line is incomplete (doesn't end with newline and has partial content)
- if not code.endswith('\n') and codeStripped:
- # No final newline - might be incomplete
- # More sophisticated: check if last number is complete
- lastLine = codeStripped.split('\n')[-1]
- if lastLine and ',' in lastLine:
- # Has commas but might be incomplete
- parts = lastLine.split(',')
- if parts and len(parts[-1]) < 5: # Last part is very short - might be incomplete
- return True
-
- # Check table for incomplete rows
- if contentType == "table":
- rows = lastElement.get("rows", [])
- if rows:
- # Check if last row is incomplete (ends with incomplete data)
- lastRow = rows[-1] if isinstance(rows, list) else []
- if isinstance(lastRow, list) and lastRow:
- # Check if last row ends with incomplete data (e.g., incomplete string)
- lastCell = lastRow[-1] if lastRow else ""
- if isinstance(lastCell, str):
- # If last cell is incomplete (ends with quote or is very short), section might be incomplete
- if lastCell.endswith('"') or (len(lastCell) < 3 and lastCell):
- return True
- # Also check if last row doesn't have expected number of columns (if headers exist)
- headers = lastElement.get("headers", [])
- if headers and isinstance(headers, list):
- expectedCols = len(headers)
- if len(lastRow) < expectedCols:
- return True
-
- # Check paragraph/text for incomplete sentences
- if contentType in ["paragraph", "heading"]:
- text = lastElement.get("text", "")
- if text:
- # Simple heuristic: if doesn't end with sentence-ending punctuation
- textStripped = text.rstrip()
- if textStripped and not textStripped[-1] in '.!?':
- # Might be incomplete, but this is less reliable
- # Only mark as incomplete if very short (likely cut off)
- if len(textStripped) < 20:
- return True
-
- # Check lists for incomplete items
- if contentType in ["bullet_list", "numbered_list"]:
- items = lastElement.get("items", [])
- if items and isinstance(items, list):
- # Check if last item is incomplete (very short or ends with incomplete string)
- lastItem = items[-1] if items else None
- if isinstance(lastItem, str) and len(lastItem) < 3:
- return True
- # Check if items array seems incomplete (e.g., expected count not reached)
- # This is harder to detect without context, so we rely on other heuristics
-
- # Check image for incomplete base64 data
- if contentType == "image":
- imageData = lastElement.get("base64Data", "")
- if imageData:
- # Base64 strings should end with padding ('=' or '==')
- # If it doesn't, it might be incomplete
- stripped = imageData.rstrip()
- if stripped and not stripped.endswith(('=', '==')):
- # Check if it's a valid base64 character sequence that was cut off
- # Base64 uses A-Z, a-z, 0-9, +, /, and = for padding
- if len(stripped) > 0 and stripped[-1] not in 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=':
- return True
- # If length is not a multiple of 4 (base64 requirement), it might be incomplete
- if len(stripped) % 4 != 0:
- return True
-
- # GENERIC CHECK: Look for incomplete structures in any element
- # Check if element has arrays/lists that might be incomplete
- for key, value in lastElement.items():
- if isinstance(value, list) and len(value) > 0:
- # Check last item in list
- lastItem = value[-1]
- if isinstance(lastItem, str):
- # If last string item is very short, might be incomplete
- if len(lastItem) < 3:
- return True
- elif isinstance(lastItem, dict):
- # If last dict item has very few keys, might be incomplete
- if len(lastItem) < 2:
- return True
- elif isinstance(value, str):
- # Check if string ends abruptly (no punctuation, very short)
- if len(value) > 0 and len(value) < 10 and not value[-1] in '.!?\n':
- return True
-
- return False
-
- def _mergeSectionContent(
- self,
- existingSection: Dict[str, Any],
- newSection: Dict[str, Any],
- iteration: int
- ) -> Dict[str, Any]:
- """
- Merge content from two sections.
-
- Handles different content types:
- - code_block: Append code, handle overlaps, merge incomplete lines
- - paragraph/heading: Append text
- - table: Merge rows
- - list: Merge items
- - Other: Merge elements
- """
- contentType = existingSection.get("content_type", "")
- existingElements = existingSection.get("elements", [])
- newElements = newSection.get("elements", [])
-
- if not newElements:
- return existingSection
-
- # Handle list of elements
- if isinstance(existingElements, list):
- existingElem = existingElements[-1] if existingElements else {}
- else:
- existingElem = existingElements
-
- if isinstance(newElements, list):
- newElem = newElements[0] if newElements else {}
- else:
- newElem = newElements
-
- if not isinstance(existingElem, dict) or not isinstance(newElem, dict):
- return existingSection
-
- # Merge based on content type
- if contentType == "code_block":
- existingCode = existingElem.get("code", "")
- newCode = newElem.get("code", "")
-
- if existingCode and newCode:
- mergedCode = self._mergeCodeBlocks(existingCode, newCode, iteration)
- existingElem["code"] = mergedCode
- # Preserve language from existing or new
- if "language" not in existingElem and "language" in newElem:
- existingElem["language"] = newElem["language"]
-
- elif contentType in ["paragraph", "heading"]:
- existingText = existingElem.get("text", "")
- newText = newElem.get("text", "")
-
- if existingText and newText:
- # Append text with space if needed
- if existingText.rstrip() and not existingText.rstrip()[-1] in '.!?\n':
- mergedText = existingText.rstrip() + " " + newText.lstrip()
- else:
- mergedText = existingText.rstrip() + "\n" + newText.lstrip()
- existingElem["text"] = mergedText
-
- elif contentType == "table":
- # Merge table rows with overlap detection
- existingRows = existingElem.get("rows", [])
- newRows = newElem.get("rows", [])
- if existingRows and newRows:
- # CRITICAL: Detect and remove overlaps before merging
- # Check if last existing row matches first new row (exact overlap)
- if len(existingRows) > 0 and len(newRows) > 0:
- lastExistingRow = existingRows[-1]
- firstNewRow = newRows[0]
- # Compare rows (handle both list and tuple formats)
- if isinstance(lastExistingRow, (list, tuple)) and isinstance(firstNewRow, (list, tuple)):
- if list(lastExistingRow) == list(firstNewRow):
- # Exact duplicate - remove first new row
- newRows = newRows[1:]
- logger.debug(f"Iteration {iteration}: Removed duplicate table row (exact match)")
-
- # Combine rows from both sections (after removing overlaps)
- existingElem["rows"] = existingRows + newRows
- logger.debug(f"Iteration {iteration}: Merged table rows - existing: {len(existingRows)}, new: {len(newRows)}, total: {len(existingRows) + len(newRows)}")
- elif newRows:
- # If existing has no rows but new does, use new rows
- existingElem["rows"] = newRows
- # Preserve headers from existing (or use new if existing has none)
- if not existingElem.get("headers") and newElem.get("headers"):
- existingElem["headers"] = newElem["headers"]
- # Preserve caption from existing (or use new if existing has none)
- if not existingElem.get("caption") and newElem.get("caption"):
- existingElem["caption"] = newElem["caption"]
-
- elif contentType in ["bullet_list", "numbered_list"]:
- # Merge list items
- existingItems = existingElem.get("items", [])
- newItems = newElem.get("items", [])
- if existingItems and newItems:
- existingElem["items"] = existingItems + newItems
-
- elif contentType == "image":
- # Images are typically complete - if new image is provided, replace existing
- # But check if existing image data is incomplete (e.g., base64 string cut off)
- existingImageData = existingElem.get("base64Data", "")
- newImageData = newElem.get("base64Data", "")
- if existingImageData and newImageData:
- # If existing image data doesn't end with valid base64 padding, it might be incomplete
- # Base64 padding is '=' or '==' at the end
- if not existingImageData.rstrip().endswith(('=', '==')):
- # Existing image might be incomplete - merge by appending new data
- # This handles cases where base64 string was cut off
- existingElem["base64Data"] = existingImageData + newImageData
- logger.debug(f"Iteration {iteration}: Merged incomplete image base64 data")
- else:
- # Existing image is complete - replace with new (or keep existing if new is empty)
- if newImageData:
- existingElem["base64Data"] = newImageData
- elif newImageData:
- existingElem["base64Data"] = newImageData
- # Preserve other image metadata
- if not existingElem.get("altText") and newElem.get("altText"):
- existingElem["altText"] = newElem["altText"]
- if not existingElem.get("caption") and newElem.get("caption"):
- existingElem["caption"] = newElem["caption"]
-
- else:
- # GENERIC FALLBACK: Handle any other content types or unknown structures
- # Try to merge common array/list fields generically
- for key in ["items", "rows", "columns", "cells", "elements", "data", "content"]:
- if key in existingElem and key in newElem:
- existingValue = existingElem[key]
- newValue = newElem[key]
- if isinstance(existingValue, list) and isinstance(newValue, list):
- # Merge lists by concatenation
- existingElem[key] = existingValue + newValue
- logger.debug(f"Iteration {iteration}: Merged generic list field '{key}' - existing: {len(existingValue)}, new: {len(newValue)}")
- break
-
- # If no common list fields found, try to merge all fields from newElem into existingElem
- # This handles cases where objects have different structures
- for key, value in newElem.items():
- if key not in existingElem:
- # New field - add it
- existingElem[key] = value
- elif isinstance(existingElem[key], list) and isinstance(value, list):
- # Both are lists - merge them
- existingElem[key] = existingElem[key] + value
- elif isinstance(existingElem[key], dict) and isinstance(value, dict):
- # Both are dicts - recursively merge (shallow merge)
- existingElem[key].update(value)
- elif isinstance(existingElem[key], str) and isinstance(value, str):
- # Both are strings - append new to existing
- existingElem[key] = existingElem[key] + "\n" + value
-
- # Update section with merged content
- mergedSection = existingSection.copy()
- if isinstance(existingElements, list):
- # Update the last element in the list with merged content
- if existingElements:
- existingElements[-1] = existingElem
- mergedSection["elements"] = existingElements
- else:
- mergedSection["elements"] = existingElem
-
- # Preserve metadata from new section if missing in existing
- if "order" not in mergedSection and "order" in newSection:
- mergedSection["order"] = newSection["order"]
-
- return mergedSection
-
- def _mergeCodeBlocks(self, existingCode: str, newCode: str, iteration: int) -> str:
- """
- Merge two code blocks intelligently, handling overlaps and incomplete lines.
- """
- if not existingCode:
- return newCode
- if not newCode:
- return existingCode
-
- existingLines = existingCode.rstrip().split('\n')
- newLines = newCode.strip().split('\n')
-
- if not existingLines or not newLines:
- return existingCode + "\n" + newCode
-
- lastExistingLine = existingLines[-1].strip()
- firstNewLine = newLines[0].strip()
-
- # Strategy 1: Exact overlap - remove duplicate line
- if lastExistingLine == firstNewLine:
- newLines = newLines[1:]
- logger.debug(f"Iteration {iteration}: Removed exact duplicate line in code merge")
-
- # Strategy 2: Incomplete line merge
- # If last existing line ends with comma or is incomplete, merge with first new line
- elif lastExistingLine.endswith(',') or (',' in lastExistingLine and len(lastExistingLine.split(',')[-1]) < 5):
- # Last line is incomplete - merge with first new line
- # Remove trailing comma from existing line
- mergedLine = lastExistingLine.rstrip(',') + ',' + firstNewLine.lstrip()
- existingLines[-1] = mergedLine
- newLines = newLines[1:]
- logger.debug(f"Iteration {iteration}: Merged incomplete line with continuation")
-
- # Strategy 3: Partial overlap detection
- # Check if first new line starts with the end of last existing line
- elif ',' in lastExistingLine and ',' in firstNewLine:
- lastExistingParts = lastExistingLine.split(',')
- firstNewParts = firstNewLine.split(',')
-
- # Check for overlap: if last part of existing matches first part of new
- if lastExistingParts and firstNewParts:
- lastExistingPart = lastExistingParts[-1].strip()
- firstNewPart = firstNewParts[0].strip()
-
- # If they match, there's overlap
- if lastExistingPart == firstNewPart and len(lastExistingParts) > 1:
- # Remove overlapping part from new line
- newLines[0] = ','.join(firstNewParts[1:])
- logger.debug(f"Iteration {iteration}: Removed partial overlap in code merge")
-
- # Reconstruct merged code
- mergedCode = '\n'.join(existingLines)
- if newLines:
- if mergedCode and not mergedCode.endswith('\n'):
- mergedCode += '\n'
- mergedCode += '\n'.join(newLines)
-
- return mergedCode
+ # JSON merging logic moved to subJsonResponseHandling.py
def _extractSectionsFromResponse(
self,
result: str,
iteration: int,
- debugPrefix: str
+ debugPrefix: str,
+ allSections: List[Dict[str, Any]] = None
) -> Tuple[List[Dict[str, Any]], bool, Optional[Dict[str, Any]]]:
"""
Extract sections from AI response, handling both valid and broken JSON.
Uses repair mechanism for broken JSON.
+ Handles JSON fragments (continuation content) that need to be merged into existing sections.
Determines completion based on JSON structure (complete JSON = complete, broken/incomplete = incomplete).
Returns (sections, wasJsonComplete, parsedResult)
"""
+ if allSections is None:
+ allSections = []
# First, try to parse as valid JSON
# CRITICAL: JSON completeness is determined by parsing, NOT by last character check!
@@ -862,6 +436,20 @@ Respond with ONLY a JSON object in this exact format:
# Extract sections from parsed JSON
sections = extractSectionsFromDocument(parsed_result)
+ # CRITICAL: If no sections extracted but we have existing sections, check if it's a fragment
+ if not sections and allSections:
+ fragment = JsonResponseHandler.detectAndParseJsonFragment(result, allSections)
+ if fragment:
+ logger.info(f"Iteration {iteration}: Detected JSON fragment ({fragment.get('fragment_type')}), merging into existing sections")
+ # Merge fragment into existing sections
+ merged_sections = JsonResponseHandler.mergeFragmentIntoSection(fragment, allSections, iteration)
+ # Update allSections in place (this is a side effect, but necessary for continuation)
+ # Note: This modifies the caller's allSections list
+ allSections[:] = merged_sections
+ # Return empty list to indicate we merged directly (not new sections)
+ # But mark as incomplete so loop continues if needed
+ return [], False, parsed_result
+
# JSON parsed successfully = complete
logger.info(f"Iteration {iteration}: JSON parsed successfully - marking as complete")
return sections, True, parsed_result
@@ -885,7 +473,7 @@ Respond with ONLY a JSON object in this exact format:
# Repair failed - but we should still continue to allow AI to retry
logger.warning(f"Iteration {iteration}: All repair strategies failed, but continuing to allow retry")
return [], False, None # Mark as incomplete so loop continues
-
+
except Exception as e:
logger.error(f"Iteration {iteration}: Unexpected error during parsing: {str(e)}")
return [], False, None
@@ -1413,8 +1001,3 @@ Respond with ONLY a JSON object in this exact format:
self.services.chat.progressLogFinish(aiOperationId, False)
raise
- # DEPRECATED METHODS REMOVED:
- # - callAiDocuments() - replaced by callAiContent()
- # - callAiText() - replaced by callAiContent()
- # All call sites have been updated to use callAiContent()
-
diff --git a/modules/services/serviceAi/subJsonResponseHandling.py b/modules/services/serviceAi/subJsonResponseHandling.py
new file mode 100644
index 00000000..5a6ec965
--- /dev/null
+++ b/modules/services/serviceAi/subJsonResponseHandling.py
@@ -0,0 +1,1022 @@
+"""
+JSON Response Handling Module
+
+Handles merging of JSON responses from multiple AI iterations, including:
+- Section merging with intelligent overlap detection
+- JSON fragment detection and merging
+- Deep recursive structure merging
+- Overlap detection for complex nested structures
+"""
+import json
+import logging
+from typing import Dict, Any, List, Optional, Tuple
+
+from modules.shared.jsonUtils import extractJsonString
+
+logger = logging.getLogger(__name__)
+
+
+class JsonResponseHandler:
+ """Handles JSON response merging and fragment detection for iterative AI generation."""
+
+ @staticmethod
+ def mergeSectionsIntelligently(
+ existingSections: List[Dict[str, Any]],
+ newSections: List[Dict[str, Any]],
+ iteration: int
+ ) -> List[Dict[str, Any]]:
+ """
+ Intelligently merge sections from multiple iterations.
+
+ This is a GENERIC merging strategy that handles broken JSON iterations.
+ The break can occur anywhere - in any section, at any depth.
+
+ Merging strategies (in order of priority):
+ 1. Same Section ID: Merge sections with identical IDs
+ 2. Same Content-Type + Position: If last section is incomplete and new section continues it
+ 3. Same Order: Merge sections with same order value
+ 4. Structural Analysis: Detect continuation based on content structure
+
+ Args:
+ existingSections: Sections accumulated from previous iterations
+ newSections: Sections extracted from current iteration
+ iteration: Current iteration number
+
+ Returns:
+ Merged list of sections
+ """
+ if not newSections:
+ return existingSections
+
+ if not existingSections:
+ return newSections
+
+ mergedSections = existingSections.copy()
+
+ for newSection in newSections:
+ merged = False
+
+ # Strategy 1: Same Section ID - merge directly
+ newSectionId = newSection.get("id")
+ if newSectionId:
+ for i, existingSection in enumerate(mergedSections):
+ if existingSection.get("id") == newSectionId:
+ # Merge sections with same ID
+ mergedSections[i] = JsonResponseHandler.mergeSectionContent(
+ existingSection, newSection, iteration
+ )
+ merged = True
+ logger.debug(f"Iteration {iteration}: Merged section by ID '{newSectionId}'")
+ break
+
+ if merged:
+ continue
+
+ # Strategy 2: Same Content-Type + Position (continuation detection)
+ # Check if last section is incomplete and new section continues it
+ if mergedSections:
+ lastSection = mergedSections[-1]
+ lastContentType = lastSection.get("content_type")
+ newContentType = newSection.get("content_type")
+
+ if lastContentType == newContentType:
+ # Same content type - check if last section is incomplete
+ if JsonResponseHandler.isSectionIncomplete(lastSection):
+ # Last section is incomplete, merge with new section
+ mergedSections[-1] = JsonResponseHandler.mergeSectionContent(
+ lastSection, newSection, iteration
+ )
+ merged = True
+ logger.debug(f"Iteration {iteration}: Merged section by content-type continuation ({lastContentType})")
+ continue
+
+ # Strategy 3: Same Order value
+ newOrder = newSection.get("order")
+ if newOrder is not None:
+ for i, existingSection in enumerate(mergedSections):
+ existingOrder = existingSection.get("order")
+ if existingOrder is not None and existingOrder == newOrder:
+ # Merge sections with same order
+ mergedSections[i] = JsonResponseHandler.mergeSectionContent(
+ existingSection, newSection, iteration
+ )
+ merged = True
+ logger.debug(f"Iteration {iteration}: Merged section by order {newOrder}")
+ break
+
+ if merged:
+ continue
+
+ # Strategy 4: Structural Analysis - detect continuation
+ # For code_block and table: if last section matches new section type, merge them
+ if mergedSections:
+ lastSection = mergedSections[-1]
+ lastContentType = lastSection.get("content_type")
+ newContentType = newSection.get("content_type")
+
+ # Both are code blocks - merge them
+ if lastContentType == "code_block" and newContentType == "code_block":
+ mergedSections[-1] = JsonResponseHandler.mergeSectionContent(
+ lastSection, newSection, iteration
+ )
+ merged = True
+ logger.debug(f"Iteration {iteration}: Merged code_block sections by structural analysis")
+ continue
+
+ # Both are tables - merge them (common case for broken JSON iterations)
+ if lastContentType == "table" and newContentType == "table":
+ mergedSections[-1] = JsonResponseHandler.mergeSectionContent(
+ lastSection, newSection, iteration
+ )
+ merged = True
+ logger.debug(f"Iteration {iteration}: Merged table sections by structural analysis")
+ continue
+
+ # No merge strategy matched - add as new section
+ if not merged:
+ mergedSections.append(newSection)
+ logger.debug(f"Iteration {iteration}: Added new section '{newSection.get('id', 'no-id')}' ({newSection.get('content_type', 'unknown')})")
+
+ return mergedSections
+
+ @staticmethod
+ def isSectionIncomplete(section: Dict[str, Any]) -> bool:
+ """
+ Check if a section is incomplete (broken at the end).
+
+ This detects incomplete sections based on content analysis:
+ - Code blocks: ends mid-line, ends with comma, ends with incomplete structure
+ - Text sections: ends mid-sentence, ends with incomplete structure
+ - Other types: check for incomplete elements
+ """
+ contentType = section.get("content_type", "")
+ elements = section.get("elements", [])
+
+ if not elements:
+ return False
+
+ # Handle list of elements
+ if isinstance(elements, list) and len(elements) > 0:
+ lastElement = elements[-1]
+ else:
+ lastElement = elements
+
+ if not isinstance(lastElement, dict):
+ return False
+
+ # Check code_block for incomplete code
+ if contentType == "code_block":
+ code = lastElement.get("code", "")
+ if code:
+ # Check if code ends incompletely:
+ # - Ends with comma (incomplete CSV line)
+ # - Ends with number but no newline (incomplete line)
+ # - Ends mid-token (e.g., "23431,23" - incomplete number)
+ codeStripped = code.rstrip()
+ if codeStripped:
+ # Check for incomplete patterns
+ if codeStripped.endswith(',') or (',' in codeStripped and not codeStripped.endswith('\n')):
+ # Ends with comma or has comma but no final newline - likely incomplete
+ return True
+ # Check if last line is incomplete (doesn't end with newline and has partial content)
+ if not code.endswith('\n') and codeStripped:
+ # No final newline - might be incomplete
+ # More sophisticated: check if last number is complete
+ lastLine = codeStripped.split('\n')[-1]
+ if lastLine and ',' in lastLine:
+ # Has commas but might be incomplete
+ parts = lastLine.split(',')
+ if parts and len(parts[-1]) < 5: # Last part is very short - might be incomplete
+ return True
+
+ # Check table for incomplete rows
+ if contentType == "table":
+ rows = lastElement.get("rows", [])
+ if rows:
+ # Check if last row is incomplete (ends with incomplete data)
+ lastRow = rows[-1] if isinstance(rows, list) else []
+ if isinstance(lastRow, list) and lastRow:
+ # Check if last row ends with incomplete data (e.g., incomplete string)
+ lastCell = lastRow[-1] if lastRow else ""
+ if isinstance(lastCell, str):
+ # If last cell is incomplete (ends with quote or is very short), section might be incomplete
+ if lastCell.endswith('"') or (len(lastCell) < 3 and lastCell):
+ return True
+ # Also check if last row doesn't have expected number of columns (if headers exist)
+ headers = lastElement.get("headers", [])
+ if headers and isinstance(headers, list):
+ expectedCols = len(headers)
+ if len(lastRow) < expectedCols:
+ return True
+
+ # Check paragraph/text for incomplete sentences
+ if contentType in ["paragraph", "heading"]:
+ text = lastElement.get("text", "")
+ if text:
+ # Simple heuristic: if doesn't end with sentence-ending punctuation
+ textStripped = text.rstrip()
+ if textStripped and not textStripped[-1] in '.!?':
+ # Might be incomplete, but this is less reliable
+ # Only mark as incomplete if very short (likely cut off)
+ if len(textStripped) < 20:
+ return True
+
+ # Check lists for incomplete items
+ if contentType in ["bullet_list", "numbered_list"]:
+ items = lastElement.get("items", [])
+ if items and isinstance(items, list):
+ # Check if last item is incomplete (very short or ends with incomplete string)
+ lastItem = items[-1] if items else None
+ if isinstance(lastItem, str) and len(lastItem) < 3:
+ return True
+
+ # Check image for incomplete base64 data
+ if contentType == "image":
+ imageData = lastElement.get("base64Data", "")
+ if imageData:
+ # Base64 strings should end with padding ('=' or '==')
+ # If it doesn't, it might be incomplete
+ stripped = imageData.rstrip()
+ if stripped and not stripped.endswith(('=', '==')):
+ # Check if it's a valid base64 character sequence that was cut off
+ if len(stripped) > 0 and stripped[-1] not in 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=':
+ return True
+ # If length is not a multiple of 4 (base64 requirement), it might be incomplete
+ if len(stripped) % 4 != 0:
+ return True
+
+ # GENERIC CHECK: Look for incomplete structures in any element
+ # Check if element has arrays/lists that might be incomplete
+ for key, value in lastElement.items():
+ if isinstance(value, list) and len(value) > 0:
+ # Check last item in list
+ lastItem = value[-1]
+ if isinstance(lastItem, str):
+ # If last string item is very short, might be incomplete
+ if len(lastItem) < 3:
+ return True
+ elif isinstance(lastItem, dict):
+ # If last dict item has very few keys, might be incomplete
+ if len(lastItem) < 2:
+ return True
+ elif isinstance(value, str):
+ # Check if string ends abruptly (no punctuation, very short)
+ if len(value) > 0 and len(value) < 10 and not value[-1] in '.!?\n':
+ return True
+
+ return False
+
+ @staticmethod
+ def mergeSectionContent(
+ existingSection: Dict[str, Any],
+ newSection: Dict[str, Any],
+ iteration: int
+ ) -> Dict[str, Any]:
+ """
+ Merge content from two sections.
+
+ Handles different content types:
+ - code_block: Append code, handle overlaps, merge incomplete lines
+ - paragraph/heading: Append text
+ - table: Merge rows
+ - list: Merge items
+ - Other: Merge elements
+ """
+ contentType = existingSection.get("content_type", "")
+ existingElements = existingSection.get("elements", [])
+ newElements = newSection.get("elements", [])
+
+ if not newElements:
+ return existingSection
+
+ # Handle list of elements
+ if isinstance(existingElements, list):
+ existingElem = existingElements[-1] if existingElements else {}
+ else:
+ existingElem = existingElements
+
+ if isinstance(newElements, list):
+ newElem = newElements[0] if newElements else {}
+ else:
+ newElem = newElements
+
+ if not isinstance(existingElem, dict) or not isinstance(newElem, dict):
+ return existingSection
+
+ # Merge based on content type
+ if contentType == "code_block":
+ existingCode = existingElem.get("code", "")
+ newCode = newElem.get("code", "")
+
+ if existingCode and newCode:
+ mergedCode = JsonResponseHandler.mergeCodeBlocks(existingCode, newCode, iteration)
+ existingElem["code"] = mergedCode
+ # Preserve language from existing or new
+ if "language" not in existingElem and "language" in newElem:
+ existingElem["language"] = newElem["language"]
+
+ elif contentType in ["paragraph", "heading"]:
+ existingText = existingElem.get("text", "")
+ newText = newElem.get("text", "")
+
+ if existingText and newText:
+ # Append text with space if needed
+ if existingText.rstrip() and not existingText.rstrip()[-1] in '.!?\n':
+ mergedText = existingText.rstrip() + " " + newText.lstrip()
+ else:
+ mergedText = existingText.rstrip() + "\n" + newText.lstrip()
+ existingElem["text"] = mergedText
+
+ elif contentType == "table":
+ # Merge table rows with sophisticated overlap detection
+ existingRows = existingElem.get("rows", [])
+ newRows = newElem.get("rows", [])
+ if existingRows and newRows:
+ # Use sophisticated overlap detection that handles multiple overlapping rows
+ mergedRows = JsonResponseHandler.mergeRowsWithOverlap(existingRows, newRows, iteration)
+ existingElem["rows"] = mergedRows
+ logger.debug(f"Iteration {iteration}: Merged table rows - existing: {len(existingRows)}, new: {len(newRows)}, total: {len(mergedRows)}")
+ elif newRows:
+ # If existing has no rows but new does, use new rows
+ existingElem["rows"] = newRows
+ # Preserve headers from existing (or use new if existing has none)
+ if not existingElem.get("headers") and newElem.get("headers"):
+ existingElem["headers"] = newElem["headers"]
+ # Preserve caption from existing (or use new if existing has none)
+ if not existingElem.get("caption") and newElem.get("caption"):
+ existingElem["caption"] = newElem.get("caption")
+
+ elif contentType in ["bullet_list", "numbered_list"]:
+ # Merge list items with sophisticated overlap detection
+ existingItems = existingElem.get("items", [])
+ newItems = newElem.get("items", [])
+ if existingItems and newItems:
+ mergedItems = JsonResponseHandler.mergeItemsWithOverlap(existingItems, newItems, iteration)
+ existingElem["items"] = mergedItems
+ elif newItems:
+ existingElem["items"] = newItems
+
+ elif contentType == "image":
+ # Images are typically complete - if new image is provided, replace existing
+ # But check if existing image data is incomplete (e.g., base64 string cut off)
+ existingImageData = existingElem.get("base64Data", "")
+ newImageData = newElem.get("base64Data", "")
+ if existingImageData and newImageData:
+ # If existing image data doesn't end with valid base64 padding, it might be incomplete
+ # Base64 padding is '=' or '==' at the end
+ if not existingImageData.rstrip().endswith(('=', '==')):
+ # Existing image might be incomplete - merge by appending new data
+ # This handles cases where base64 string was cut off
+ existingElem["base64Data"] = existingImageData + newImageData
+ logger.debug(f"Iteration {iteration}: Merged incomplete image base64 data")
+ else:
+ # Existing image is complete - replace with new (or keep existing if new is empty)
+ if newImageData:
+ existingElem["base64Data"] = newImageData
+ elif newImageData:
+ existingElem["base64Data"] = newImageData
+ # Preserve other image metadata
+ if not existingElem.get("altText") and newElem.get("altText"):
+ existingElem["altText"] = newElem["altText"]
+ if not existingElem.get("caption") and newElem.get("caption"):
+ existingElem["caption"] = newElem["caption"]
+
+ else:
+ # GENERIC FALLBACK: Use deep recursive merging for complex nested structures
+ # This handles any content type with arbitrary depth and complexity
+ merged_element = JsonResponseHandler.mergeDeepStructures(
+ existingElem,
+ newElem,
+ iteration,
+ f"section.{contentType}"
+ )
+ existingElem = merged_element
+
+ # Update section with merged content
+ mergedSection = existingSection.copy()
+ if isinstance(existingElements, list):
+ # Update the last element in the list with merged content
+ if existingElements:
+ existingElements[-1] = existingElem
+ mergedSection["elements"] = existingElements
+ else:
+ mergedSection["elements"] = existingElem
+
+ # Preserve metadata from new section if missing in existing
+ if "order" not in mergedSection and "order" in newSection:
+ mergedSection["order"] = newSection["order"]
+
+ return mergedSection
+
+ @staticmethod
+ def mergeCodeBlocks(existingCode: str, newCode: str, iteration: int) -> str:
+ """
+ Merge two code blocks intelligently, handling overlaps and incomplete lines.
+ """
+ if not existingCode:
+ return newCode
+ if not newCode:
+ return existingCode
+
+ existingLines = existingCode.rstrip().split('\n')
+ newLines = newCode.strip().split('\n')
+
+ if not existingLines or not newLines:
+ return existingCode + "\n" + newCode
+
+ lastExistingLine = existingLines[-1].strip()
+ firstNewLine = newLines[0].strip()
+
+ # Strategy 1: Exact overlap - remove duplicate line
+ if lastExistingLine == firstNewLine:
+ newLines = newLines[1:]
+ logger.debug(f"Iteration {iteration}: Removed exact duplicate line in code merge")
+
+ # Strategy 2: Incomplete line merge
+ # If last existing line ends with comma or is incomplete, merge with first new line
+ elif lastExistingLine.endswith(',') or (',' in lastExistingLine and len(lastExistingLine.split(',')[-1]) < 5):
+ # Last line is incomplete - merge with first new line
+ # Remove trailing comma from existing line
+ mergedLine = lastExistingLine.rstrip(',') + ',' + firstNewLine.lstrip()
+ existingLines[-1] = mergedLine
+ newLines = newLines[1:]
+ logger.debug(f"Iteration {iteration}: Merged incomplete line with continuation")
+
+ # Strategy 3: Partial overlap detection
+ # Check if first new line starts with the end of last existing line
+ elif ',' in lastExistingLine and ',' in firstNewLine:
+ lastExistingParts = lastExistingLine.split(',')
+ firstNewParts = firstNewLine.split(',')
+
+ # Check for overlap: if last part of existing matches first part of new
+ if lastExistingParts and firstNewParts:
+ lastExistingPart = lastExistingParts[-1].strip()
+ firstNewPart = firstNewParts[0].strip()
+
+ # If they match, there's overlap
+ if lastExistingPart == firstNewPart and len(lastExistingParts) > 1:
+ # Remove overlapping part from new line
+ newLines[0] = ','.join(firstNewParts[1:])
+ logger.debug(f"Iteration {iteration}: Removed partial overlap in code merge")
+
+ # Reconstruct merged code
+ mergedCode = '\n'.join(existingLines)
+ if newLines:
+ if mergedCode and not mergedCode.endswith('\n'):
+ mergedCode += '\n'
+ mergedCode += '\n'.join(newLines)
+
+ return mergedCode
+
+ @staticmethod
+ def detectAndParseJsonFragment(
+ result: str,
+ allSections: List[Dict[str, Any]]
+ ) -> Optional[Dict[str, Any]]:
+ """
+ Detect if response is a JSON fragment (continuation content) rather than full document structure.
+
+ Fragments are continuation content that needs to be merged into existing sections.
+ Examples:
+ - Array of table rows: [["37643", "37649", ...], ...]
+ - Array of code lines: ["line1", "line2", ...]
+ - Array of list items: ["item1", "item2", ...]
+
+ Returns fragment info dict with:
+ - fragment_type: "table_rows", "code_lines", "list_items", etc.
+ - fragment_data: The parsed fragment content
+ - target_section_id: ID of section to merge into (if identifiable)
+ """
+ try:
+ extracted = extractJsonString(result)
+ parsed = json.loads(extracted)
+
+ # Check if it's a JSON fragment (not full document structure)
+ # Fragment indicators:
+ # 1. It's an array (not an object)
+ # 2. It doesn't have "documents" or "sections" keys
+ # 3. It's continuation content (rows, lines, items, etc.)
+
+ if isinstance(parsed, list):
+ # It's an array - check if it looks like continuation content
+ if len(parsed) > 0:
+ first_item = parsed[0]
+
+ # Check if it's an array of arrays (table rows)
+ if isinstance(first_item, list):
+ # This looks like table rows: [["col1", "col2"], ["col3", "col4"], ...]
+ logger.debug("Detected JSON fragment: table rows array")
+ return {
+ "fragment_type": "table_rows",
+ "fragment_data": parsed,
+ "target_section_id": JsonResponseHandler.findTargetSectionId(allSections, "table")
+ }
+
+ # Check if it's an array of strings (code lines or list items)
+ elif isinstance(first_item, str):
+ # Could be code lines or list items - check context
+ # If we have a code_block section, it's likely code lines
+ # If we have a list section, it's likely list items
+ target_section_id = JsonResponseHandler.findTargetSectionId(allSections, "code_block")
+ if target_section_id:
+ logger.debug("Detected JSON fragment: code lines array")
+ return {
+ "fragment_type": "code_lines",
+ "fragment_data": parsed,
+ "target_section_id": target_section_id
+ }
+
+ target_section_id = JsonResponseHandler.findTargetSectionId(allSections, "bullet_list")
+ if target_section_id:
+ logger.debug("Detected JSON fragment: list items array")
+ return {
+ "fragment_type": "list_items",
+ "fragment_data": parsed,
+ "target_section_id": target_section_id
+ }
+
+ # Default to code lines if no context
+ logger.debug("Detected JSON fragment: string array (assuming code lines)")
+ return {
+ "fragment_type": "code_lines",
+ "fragment_data": parsed,
+ "target_section_id": JsonResponseHandler.findTargetSectionId(allSections, "code_block")
+ }
+
+ # Check if it's a partial object that's missing document structure
+ elif isinstance(parsed, dict):
+ # If it has "rows" but no "documents" or "sections", it might be a table element fragment
+ if "rows" in parsed and "documents" not in parsed and "sections" not in parsed:
+ logger.debug("Detected JSON fragment: table element with rows")
+ return {
+ "fragment_type": "table_element",
+ "fragment_data": parsed,
+ "target_section_id": JsonResponseHandler.findTargetSectionId(allSections, "table")
+ }
+
+ # If it has "code" but no "documents" or "sections", it might be a code element fragment
+ if "code" in parsed and "documents" not in parsed and "sections" not in parsed:
+ logger.debug("Detected JSON fragment: code element")
+ return {
+ "fragment_type": "code_element",
+ "fragment_data": parsed,
+ "target_section_id": JsonResponseHandler.findTargetSectionId(allSections, "code_block")
+ }
+
+ except Exception as e:
+ logger.debug(f"Error detecting JSON fragment: {e}")
+
+ return None
+
+ @staticmethod
+ def findTargetSectionId(
+ allSections: List[Dict[str, Any]],
+ contentType: str
+ ) -> Optional[str]:
+ """Find the last incomplete section of the given content type."""
+ # Find the last section with matching content type
+ for section in reversed(allSections):
+ if section.get("content_type") == contentType:
+ # Check if it's incomplete
+ if JsonResponseHandler.isSectionIncomplete(section):
+ return section.get("id")
+ # If not incomplete but it's the right type, still return it
+ return section.get("id")
+ return None
+
+ @staticmethod
+ def mergeFragmentIntoSection(
+ fragment: Dict[str, Any],
+ allSections: List[Dict[str, Any]],
+ iteration: int
+ ) -> List[Dict[str, Any]]:
+ """
+ Merge a JSON fragment into the appropriate section.
+
+ This handles the special case where iteration N returns continuation content
+ that needs to be merged into the existing structure at the overlapping point.
+ """
+ fragment_type = fragment.get("fragment_type")
+ fragment_data = fragment.get("fragment_data")
+ target_section_id = fragment.get("target_section_id")
+
+ if not fragment_type or not fragment_data:
+ return allSections
+
+ # Find the target section
+ target_section = None
+ target_index = -1
+ for i, section in enumerate(allSections):
+ if section.get("id") == target_section_id:
+ target_section = section
+ target_index = i
+ break
+
+ # If no target section found, try to find last incomplete section of matching type
+ if not target_section:
+ for i, section in enumerate(allSections):
+ if section.get("content_type") == JsonResponseHandler.getContentTypeForFragment(fragment_type):
+ if JsonResponseHandler.isSectionIncomplete(section):
+ target_section = section
+ target_index = i
+ break
+
+ # If still no target, find last section of matching type
+ if not target_section:
+ for i, section in enumerate(reversed(allSections)):
+ if section.get("content_type") == JsonResponseHandler.getContentTypeForFragment(fragment_type):
+ target_section = section
+ target_index = len(allSections) - 1 - i
+ break
+
+ if not target_section:
+ logger.warning(f"Iteration {iteration}: No target section found for fragment type {fragment_type}")
+ return allSections
+
+ # Merge fragment into target section based on type
+ merged_section = target_section.copy()
+ elements = merged_section.get("elements", [])
+
+ if not isinstance(elements, list):
+ elements = [elements] if elements else []
+
+ if not elements:
+ # Create new element if none exists
+ elements = [{}]
+
+ last_element = elements[-1] if elements else {}
+ if not isinstance(last_element, dict):
+ last_element = {}
+ elements.append(last_element)
+
+ # Merge based on fragment type using deep recursive merging
+ if fragment_type == "table_rows":
+ existing_rows = last_element.get("rows", [])
+ if not isinstance(existing_rows, list):
+ existing_rows = []
+
+ # Merge rows with sophisticated overlap detection
+ new_rows = fragment_data
+ merged_rows = JsonResponseHandler.mergeRowsWithOverlap(existing_rows, new_rows, iteration)
+ last_element["rows"] = merged_rows
+
+ # Preserve headers if they exist
+ if not last_element.get("headers") and isinstance(fragment_data, list) and len(fragment_data) > 0:
+ # Try to infer headers from first row if it's a header row
+ first_row = fragment_data[0]
+ if isinstance(first_row, list) and len(first_row) > 0:
+ # Check if first row looks like headers (all strings, descriptive)
+ if all(isinstance(cell, str) for cell in first_row):
+ last_element["headers"] = first_row
+ merged_rows = merged_rows[1:] # Remove header row
+ last_element["rows"] = merged_rows
+
+ elif fragment_type == "code_lines":
+ existing_code = last_element.get("code", "")
+ new_lines = fragment_data
+
+ # Convert array of strings to code block
+ if isinstance(new_lines, list):
+ new_code = "\n".join(str(line) for line in new_lines)
+ else:
+ new_code = str(new_lines)
+
+ merged_code = JsonResponseHandler.mergeCodeBlocks(existing_code, new_code, iteration)
+ last_element["code"] = merged_code
+
+ elif fragment_type == "list_items":
+ existing_items = last_element.get("items", [])
+ if not isinstance(existing_items, list):
+ existing_items = []
+
+ new_items = fragment_data if isinstance(fragment_data, list) else [fragment_data]
+ merged_items = JsonResponseHandler.mergeItemsWithOverlap(existing_items, new_items, iteration)
+ last_element["items"] = merged_items
+
+ elif fragment_type == "table_element":
+ # Use deep recursive merge for complex table structures
+ # This handles nested structures, multiple overlapping rows, etc.
+ merged_element = JsonResponseHandler.mergeDeepStructures(
+ last_element,
+ fragment_data,
+ iteration,
+ f"section.{target_section_id}.table_element"
+ )
+ last_element = merged_element
+
+ elif fragment_type == "code_element":
+ # Use deep recursive merge for complex code structures
+ merged_element = JsonResponseHandler.mergeDeepStructures(
+ last_element,
+ fragment_data,
+ iteration,
+ f"section.{target_section_id}.code_element"
+ )
+ last_element = merged_element
+
+ else:
+ # Generic fragment - use deep recursive merge
+ # This handles any complex nested structure
+ merged_element = JsonResponseHandler.mergeDeepStructures(
+ last_element,
+ fragment_data,
+ iteration,
+ f"section.{target_section_id}.{fragment_type}"
+ )
+ last_element = merged_element
+
+ # Update elements
+ elements[-1] = last_element
+ merged_section["elements"] = elements
+
+ # Update allSections
+ merged_sections = allSections.copy()
+ merged_sections[target_index] = merged_section
+
+ logger.info(f"Iteration {iteration}: Merged {fragment_type} fragment into section '{target_section_id}'")
+ return merged_sections
+
+ @staticmethod
+ def getContentTypeForFragment(fragment_type: str) -> str:
+ """Map fragment type to content type."""
+ mapping = {
+ "table_rows": "table",
+ "table_element": "table",
+ "code_lines": "code_block",
+ "code_element": "code_block",
+ "list_items": "bullet_list"
+ }
+ return mapping.get(fragment_type, "paragraph")
+
+ @staticmethod
+ def deepCompare(obj1: Any, obj2: Any, max_depth: int = 10) -> bool:
+ """
+ Deep recursive comparison of two JSON-serializable objects.
+ Handles nested structures of any depth and complexity.
+
+ Args:
+ obj1: First object to compare
+ obj2: Second object to compare
+ max_depth: Maximum recursion depth to prevent infinite loops
+
+ Returns:
+ True if objects are deeply equal, False otherwise
+ """
+ if max_depth <= 0:
+ return False
+
+ # Type check
+ if type(obj1) != type(obj2):
+ return False
+
+ # Primitive types
+ if isinstance(obj1, (str, int, float, bool, type(None))):
+ return obj1 == obj2
+
+ # Lists/arrays - compare element by element
+ if isinstance(obj1, list):
+ if len(obj1) != len(obj2):
+ return False
+ return all(JsonResponseHandler.deepCompare(item1, item2, max_depth - 1)
+ for item1, item2 in zip(obj1, obj2))
+
+ # Dicts/objects - compare key by key
+ if isinstance(obj1, dict):
+ if set(obj1.keys()) != set(obj2.keys()):
+ return False
+ return all(JsonResponseHandler.deepCompare(obj1[key], obj2[key], max_depth - 1)
+ for key in obj1.keys())
+
+ # Fallback for other types
+ return obj1 == obj2
+
+ @staticmethod
+ def findLongestCommonSuffix(
+ existing_list: List[Any],
+ new_list: List[Any],
+ min_overlap: int = 1
+ ) -> int:
+ """
+ Find the longest common suffix of existing_list that matches a prefix of new_list.
+
+ This handles cases where multiple elements overlap:
+ - existing: [A, B, C, D]
+ - new: [C, D, E, F]
+ - overlap: [C, D] (length 2)
+
+ Returns the length of the overlap (0 if no overlap found).
+ """
+ if not existing_list or not new_list:
+ return 0
+
+ max_overlap = min(len(existing_list), len(new_list))
+
+ # Try all possible overlap lengths (from longest to shortest)
+ for overlap_len in range(max_overlap, min_overlap - 1, -1):
+ existing_suffix = existing_list[-overlap_len:]
+ new_prefix = new_list[:overlap_len]
+
+ # Deep compare suffix and prefix
+ if all(JsonResponseHandler.deepCompare(existing_suffix[i], new_prefix[i])
+ for i in range(overlap_len)):
+ return overlap_len
+
+ return 0
+
+ @staticmethod
+ def findPartialOverlap(
+ existing_item: Any,
+ new_item: Any
+ ) -> Tuple[bool, Optional[Any]]:
+ """
+ Detect if new_item completes an incomplete existing_item.
+
+ Handles cases like:
+ - existing: ["37643", "37649", "37657", "37663", "37691", "37693", "37699", "37717", "37747", "376"]
+ - new: ["37643", "37649", ...]
+
+ Returns (is_partial_overlap, merged_item) if partial overlap detected, else (False, None).
+ """
+ # Check if both are lists
+ if isinstance(existing_item, list) and isinstance(new_item, list):
+ if not existing_item or not new_item:
+ return False, None
+
+ # Check if last element of existing is incomplete and matches first of new
+ last_existing = existing_item[-1]
+ first_new = new_item[0]
+
+ # If last existing is a string and first new is a string
+ if isinstance(last_existing, str) and isinstance(first_new, str):
+ # Check if last existing is incomplete (very short, ends with number, etc.)
+ if len(last_existing) < 10 and first_new.startswith(last_existing):
+ # Partial overlap - merge them
+ merged_last = last_existing + first_new[len(last_existing):]
+ merged_item = existing_item[:-1] + [merged_last] + new_item[1:]
+ return True, merged_item
+
+ # Check if last existing is incomplete list and first new completes it
+ if isinstance(last_existing, list) and isinstance(first_new, list):
+ if len(last_existing) < len(first_new):
+ # Check if last existing is prefix of first new
+ if first_new[:len(last_existing)] == last_existing:
+ # Merge: replace incomplete last with complete first
+ merged_item = existing_item[:-1] + [first_new] + new_item[1:]
+ return True, merged_item
+
+ # Check if existing is incomplete string and new completes it
+ if isinstance(existing_item, str) and isinstance(new_item, str):
+ if len(existing_item) < 50 and new_item.startswith(existing_item):
+ # Partial overlap
+ merged = existing_item + new_item[len(existing_item):]
+ return True, merged
+
+ return False, None
+
+ @staticmethod
+ def mergeRowsWithOverlap(
+ existing_rows: List[List[str]],
+ new_rows: List[List[str]],
+ iteration: int
+ ) -> List[List[str]]:
+ """
+ Merge table rows with sophisticated overlap detection.
+ Handles multiple overlapping rows and partial overlaps.
+ """
+ if not new_rows:
+ return existing_rows
+ if not existing_rows:
+ return new_rows
+
+ # Strategy 1: Find longest common suffix/prefix overlap
+ overlap_len = JsonResponseHandler.findLongestCommonSuffix(existing_rows, new_rows, min_overlap=1)
+ if overlap_len > 0:
+ logger.debug(f"Iteration {iteration}: Found {overlap_len} overlapping table rows, removing duplicates")
+ return existing_rows + new_rows[overlap_len:]
+
+ # Strategy 2: Check for partial overlap in last row
+ if len(existing_rows) > 0 and len(new_rows) > 0:
+ last_existing = existing_rows[-1]
+ first_new = new_rows[0]
+
+ is_partial, merged_row = JsonResponseHandler.findPartialOverlap(last_existing, first_new)
+ if is_partial:
+ logger.debug(f"Iteration {iteration}: Found partial overlap in table rows, merging")
+ return existing_rows[:-1] + [merged_row] + new_rows[1:]
+
+ # Strategy 3: Simple first/last comparison (fallback)
+ if isinstance(existing_rows[-1], list) and isinstance(new_rows[0], list):
+ if list(existing_rows[-1]) == list(new_rows[0]):
+ logger.debug(f"Iteration {iteration}: Removed duplicate table row (exact match)")
+ return existing_rows + new_rows[1:]
+
+ # No overlap detected - append all new rows
+ return existing_rows + new_rows
+
+ @staticmethod
+ def mergeItemsWithOverlap(
+ existing_items: List[str],
+ new_items: List[str],
+ iteration: int
+ ) -> List[str]:
+ """
+ Merge list items with sophisticated overlap detection.
+ Handles multiple overlapping items and partial overlaps.
+ """
+ if not new_items:
+ return existing_items
+ if not existing_items:
+ return new_items
+
+ # Strategy 1: Find longest common suffix/prefix overlap
+ overlap_len = JsonResponseHandler.findLongestCommonSuffix(existing_items, new_items, min_overlap=1)
+ if overlap_len > 0:
+ logger.debug(f"Iteration {iteration}: Found {overlap_len} overlapping list items, removing duplicates")
+ return existing_items + new_items[overlap_len:]
+
+ # Strategy 2: Check for partial overlap in last item
+ if len(existing_items) > 0 and len(new_items) > 0:
+ is_partial, merged_item = JsonResponseHandler.findPartialOverlap(existing_items[-1], new_items[0])
+ if is_partial:
+ logger.debug(f"Iteration {iteration}: Found partial overlap in list items, merging")
+ return existing_items[:-1] + [merged_item] + new_items[1:]
+
+ # Strategy 3: Simple first/last comparison (fallback)
+ if existing_items[-1] == new_items[0]:
+ logger.debug(f"Iteration {iteration}: Removed duplicate list item (exact match)")
+ return existing_items + new_items[1:]
+
+ # No overlap detected - append all new items
+ return existing_items + new_items
+
+ @staticmethod
+ def mergeDeepStructures(
+ existing: Any,
+ new: Any,
+ iteration: int,
+ path: str = "root"
+ ) -> Any:
+ """
+ Recursively merge two JSON structures of arbitrary depth and complexity.
+ Handles overlaps at any nesting level.
+
+ Args:
+ existing: Existing structure to merge into
+ new: New structure to merge
+ iteration: Current iteration number for logging
+ path: Current path in structure (for debugging)
+
+ Returns:
+ Merged structure
+ """
+ # Type check
+ if type(existing) != type(new):
+ # Types don't match - return new (replacement)
+ logger.debug(f"Iteration {iteration}: Types don't match at {path}, replacing")
+ return new
+
+ # Lists/arrays - merge with overlap detection
+ if isinstance(existing, list) and isinstance(new, list):
+ if not new:
+ return existing
+ if not existing:
+ return new
+
+ # Try to find overlap
+ overlap_len = JsonResponseHandler.findLongestCommonSuffix(existing, new, min_overlap=1)
+ if overlap_len > 0:
+ logger.debug(f"Iteration {iteration}: Found {overlap_len} overlapping elements at {path}, removing duplicates")
+ return existing + new[overlap_len:]
+
+ # Check for partial overlap in last element
+ if len(existing) > 0 and len(new) > 0:
+ is_partial, merged_item = JsonResponseHandler.findPartialOverlap(existing[-1], new[0])
+ if is_partial:
+ logger.debug(f"Iteration {iteration}: Found partial overlap at {path}, merging")
+ return existing[:-1] + [merged_item] + new[1:]
+
+ # No overlap - append all
+ return existing + new
+
+ # Dicts/objects - merge recursively
+ if isinstance(existing, dict) and isinstance(new, dict):
+ merged = existing.copy()
+ for key, new_value in new.items():
+ if key in merged:
+ # Key exists - merge recursively
+ merged[key] = JsonResponseHandler.mergeDeepStructures(
+ merged[key],
+ new_value,
+ iteration,
+ f"{path}.{key}"
+ )
+ else:
+ # New key - add it
+ merged[key] = new_value
+ return merged
+
+ # Primitives - if equal, return existing; otherwise return new
+ if existing == new:
+ return existing
+ return new
+
diff --git a/modules/services/serviceGeneration/renderers/rendererDocx.py b/modules/services/serviceGeneration/renderers/rendererDocx.py
index 61a645a7..179cbe75 100644
--- a/modules/services/serviceGeneration/renderers/rendererDocx.py
+++ b/modules/services/serviceGeneration/renderers/rendererDocx.py
@@ -497,13 +497,11 @@ class RendererDocx(BaseRenderer):
# Extract title from prompt if not provided
if not title or title == "Generated Document":
# Look for "create a ... document" or "generate a ... report"
- import re
title_match = re.search(r'(?:create|generate|make)\s+a\s+([^,]+?)(?:\s+document|\s+report|\s+summary)', userPrompt.lower())
if title_match:
structure['title'] = title_match.group(1).strip().title()
# Extract sections from numbered lists in prompt
- import re
section_pattern = r'(\d+)\)?\s*([^,]+?)(?:\s*[,:]|\s*$)'
sections = re.findall(section_pattern, userPrompt)
@@ -849,7 +847,6 @@ class RendererDocx(BaseRenderer):
Returns the content with tables replaced by placeholders.
"""
import csv
- import io
lines = content.split('\n')
processed_lines = []
diff --git a/modules/services/serviceGeneration/renderers/rendererXlsx.py b/modules/services/serviceGeneration/renderers/rendererXlsx.py
index f90a0980..9fca82e9 100644
--- a/modules/services/serviceGeneration/renderers/rendererXlsx.py
+++ b/modules/services/serviceGeneration/renderers/rendererXlsx.py
@@ -95,7 +95,7 @@ class RendererXlsx(BaseRenderer):
# Title
sheet['A1'] = title
sheet['A1'].font = Font(size=16, bold=True)
- sheet['A1'].alignment = Alignment(horizontal='center')
+ sheet['A1'].alignment = Alignment(horizontal='left')
# Generation info
sheet['A3'] = "Generated:"
@@ -325,7 +325,7 @@ class RendererXlsx(BaseRenderer):
def _getDefaultStyleSet(self) -> Dict[str, Any]:
"""Default Excel style set - used when no style instructions present."""
return {
- "title": {"font_size": 16, "color": "#FF1F4E79", "bold": True, "align": "center"},
+ "title": {"font_size": 16, "color": "#FF1F4E79", "bold": True, "align": "left"},
"heading": {"font_size": 14, "color": "#FF2F2F2F", "bold": True, "align": "left"},
"table_header": {"background": "#FF4F4F4F", "text_color": "#FFFFFFFF", "bold": True, "align": "center"},
"table_cell": {"background": "#FFFFFFFF", "text_color": "#FF2F2F2F", "bold": False, "align": "left"},
@@ -543,8 +543,9 @@ class RendererXlsx(BaseRenderer):
try:
# Sheet title
sheet['A1'] = sheetTitle
- sheet['A1'].font = Font(size=16, bold=True, color=self._getSafeColor(styles.get("title", {}).get("color", "FF1F4E79")))
- sheet['A1'].alignment = Alignment(horizontal="center")
+ title_style = styles.get("title", {})
+ sheet['A1'].font = Font(size=16, bold=True, color=self._getSafeColor(title_style.get("color", "FF1F4E79")))
+ sheet['A1'].alignment = Alignment(horizontal=title_style.get("align", "left"))
# Get table data from elements (canonical JSON format)
elements = section.get("elements", [])
@@ -592,7 +593,7 @@ class RendererXlsx(BaseRenderer):
sheet['A1'] = documentTitle
# Safety check for title style
- title_style = styles.get("title", {"font_size": 16, "bold": True, "color": "#FF1F4E79", "align": "center"})
+ title_style = styles.get("title", {"font_size": 16, "bold": True, "color": "#FF1F4E79", "align": "left"})
try:
safe_color = self._getSafeColor(title_style["color"])
sheet['A1'].font = Font(size=title_style["font_size"], bold=title_style["bold"], color=safe_color)
diff --git a/modules/services/serviceUtils/mainServiceUtils.py b/modules/services/serviceUtils/mainServiceUtils.py
index 849cc3ef..bbee6540 100644
--- a/modules/services/serviceUtils/mainServiceUtils.py
+++ b/modules/services/serviceUtils/mainServiceUtils.py
@@ -271,12 +271,6 @@ class UtilsService:
def jsonTryParse(self, text) -> tuple:
return jsonUtils.tryParseJson(text)
- def jsonParseOrRaise(self, text):
- return jsonUtils.parseJsonOrRaise(text)
-
- def jsonMergeRootLists(self, parts):
- return jsonUtils.mergeRootLists(parts)
-
# ===== Enum utility functions =====
def mapToEnum(self, enum_class, value_str, default_value):
diff --git a/modules/shared/debugLogger.py b/modules/shared/debugLogger.py
index c68546bf..6ee78bc7 100644
--- a/modules/shared/debugLogger.py
+++ b/modules/shared/debugLogger.py
@@ -159,7 +159,6 @@ def storeDebugMessageAndDocuments(message, currentUser) -> None:
"""
try:
import json
- from datetime import datetime, UTC
# Create base debug directory (use base debug dir, not prompts subdirectory)
baseDebugDir = _getBaseDebugDir()
diff --git a/modules/shared/jsonUtils.py b/modules/shared/jsonUtils.py
index dc51a349..3da04d21 100644
--- a/modules/shared/jsonUtils.py
+++ b/modules/shared/jsonUtils.py
@@ -97,47 +97,6 @@ def tryParseJson(text: Union[str, bytes]) -> Tuple[Optional[Union[Dict, List]],
return None, e, cleaned
-def parseJsonOrRaise(text: Union[str, bytes]) -> Union[Dict, List]:
- obj, err, cleaned = tryParseJson(text)
- if err is not None:
- logger.error(f"parse_json_or_raise failed: {err}. Cleaned preview: {cleaned[:200]}...")
- raise err
- return obj
-
-
-def mergeRootLists(jsonParts: List[Union[str, Dict, List]]) -> Dict[str, Any]:
- """
- Generic merger for root-level lists: take first dict as base; for each subsequent part:
- - if value is list and same key exists as list, extend it
- - if key absent, add it
- - for non-list keys, keep the original (from the first part)
- Sets continuation=None if present in base.
- """
- base: Optional[Dict[str, Any]] = None
- parsed: List[Dict[str, Any]] = []
- for part in jsonParts:
- if isinstance(part, (dict, list)):
- obj = part
- else:
- obj, err, _ = tryParseJson(part)
- if err is not None or not isinstance(obj, (dict, list)):
- continue
- if isinstance(obj, dict):
- parsed.append(obj)
- if not parsed:
- return {}
- base = dict(parsed[0])
- for obj in parsed[1:]:
- for k, v in obj.items():
- if isinstance(v, list) and isinstance(base.get(k), list):
- base[k].extend(v)
- elif k not in base:
- base[k] = v
- if 'continuation' in base:
- base['continuation'] = None
- return base
-
-
def repairBrokenJson(text: str) -> Optional[Dict[str, Any]]:
"""
Attempt to repair broken JSON using multiple strategies.
diff --git a/modules/workflows/methods/methodAi.py b/modules/workflows/methods/methodAi.py
index 183d4605..1e837f62 100644
--- a/modules/workflows/methods/methodAi.py
+++ b/modules/workflows/methods/methodAi.py
@@ -271,7 +271,20 @@ class MethodAi(MethodBase):
# Prepare extraction options
self.services.chat.progressLogUpdate(operationId, 0.3, "Preparing extraction options")
- extractionOptions = parameters.extractionOptions
+ extractionOptionsParam = parameters.get("extractionOptions")
+
+ # Convert dict to ExtractionOptions object if needed, or create defaults
+ if extractionOptionsParam:
+ if isinstance(extractionOptionsParam, dict):
+ # Convert dict to ExtractionOptions object
+ extractionOptions = ExtractionOptions(**extractionOptionsParam)
+ elif isinstance(extractionOptionsParam, ExtractionOptions):
+ extractionOptions = extractionOptionsParam
+ else:
+ # Invalid type, use defaults
+ extractionOptions = None
+ else:
+ extractionOptions = None
# If extractionOptions not provided, create defaults
if not extractionOptions:
@@ -297,10 +310,21 @@ class MethodAi(MethodBase):
# Build ActionDocuments from ContentExtracted results
self.services.chat.progressLogUpdate(operationId, 0.8, "Building result documents")
actionDocuments = []
- for extracted in extractedResults:
+ # Map extracted results back to original documents by index (results are in same order)
+ for i, extracted in enumerate(extractedResults):
+ # Get original document name if available
+ originalDoc = chatDocuments[i] if i < len(chatDocuments) else None
+ if originalDoc and hasattr(originalDoc, 'fileName') and originalDoc.fileName:
+ # Use original filename with "extracted_" prefix
+ baseName = originalDoc.fileName.rsplit('.', 1)[0] if '.' in originalDoc.fileName else originalDoc.fileName
+ documentName = f"{baseName}_extracted_{extracted.id}.json"
+ else:
+ # Fallback to generic name with index
+ documentName = f"document_{i+1:03d}_extracted_{extracted.id}.json"
+
# Store ContentExtracted object in ActionDocument.documentData
actionDoc = ActionDocument(
- documentName=f"extracted_{extracted.id}.json",
+ documentName=documentName,
documentData=extracted, # ContentExtracted object
mimeType="application/json"
)
diff --git a/modules/workflows/processing/adaptive/contentValidator.py b/modules/workflows/processing/adaptive/contentValidator.py
index be420b36..b24b4e52 100644
--- a/modules/workflows/processing/adaptive/contentValidator.py
+++ b/modules/workflows/processing/adaptive/contentValidator.py
@@ -22,7 +22,7 @@ class ContentValidator:
self.services = services
self.learningEngine = learningEngine
- async def validateContent(self, documents: List[Any], intent: Dict[str, Any], taskStep: Optional[Any] = None, actionName: Optional[str] = None) -> Dict[str, Any]:
+ async def validateContent(self, documents: List[Any], intent: Dict[str, Any], taskStep: Optional[Any] = None, actionName: Optional[str] = None, actionParameters: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
"""Validates delivered content against user intent using AI (single attempt; parse-or-fail)
Args:
@@ -30,8 +30,9 @@ class ContentValidator:
intent: Workflow-level intent dict (for format requirements)
taskStep: Optional TaskStep object (preferred source for objective)
actionName: Optional action name (e.g., "ai.process", "ai.webResearch") that created the documents
+ actionParameters: Optional action parameters used during execution (e.g., {"columnsPerRow": 10, "researchDepth": "deep"})
"""
- return await self._validateWithAI(documents, intent, taskStep, actionName)
+ return await self._validateWithAI(documents, intent, taskStep, actionName, actionParameters)
def _analyzeDocuments(self, documents: List[Any]) -> List[Dict[str, Any]]:
"""Generic document analysis - create simple summaries with metadata."""
@@ -368,7 +369,7 @@ class ContentValidator:
return False
- async def _validateWithAI(self, documents: List[Any], intent: Dict[str, Any], taskStep: Optional[Any] = None, actionName: Optional[str] = None) -> Dict[str, Any]:
+ async def _validateWithAI(self, documents: List[Any], intent: Dict[str, Any], taskStep: Optional[Any] = None, actionName: Optional[str] = None, actionParameters: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
"""AI-based comprehensive validation - generic approach"""
try:
if not hasattr(self, 'services') or not self.services or not hasattr(self.services, 'ai'):
@@ -430,48 +431,91 @@ class ContentValidator:
actionDescription = "Content processing"
actionContext = f"\nDOCUMENTS CREATED BY: {actionDescription} ({actionName})"
- # Format success criteria for display
- criteriaDisplay = json.dumps(successCriteria, ensure_ascii=False) if successCriteria else "[]"
+ # Build action parameters context
+ actionParamsContext = ""
+ if actionParameters and isinstance(actionParameters, dict) and len(actionParameters) > 0:
+ # Filter out documentList and other large/redundant parameters for clarity
+ relevantParams = {k: v for k, v in actionParameters.items()
+ if k not in ['documentList', 'connections'] and v is not None}
+ if relevantParams:
+ paramsJson = json.dumps(relevantParams, ensure_ascii=False, indent=2)
+ actionParamsContext = f"\nACTION PARAMETERS USED: {paramsJson}"
- # Build successCriteriaMet example - show proper array format
- criteriaMetExample = json.dumps([False] * criteriaCount) if criteriaCount > 0 else "[]"
+ # Format success criteria for display with index numbers
+ if successCriteria:
+ criteriaDisplay = "\n".join([f"[{i}] {criterion}" for i, criterion in enumerate(successCriteria)])
+ else:
+ criteriaDisplay = "[]"
promptBase = f"""TASK VALIDATION
+=== TASK INFORMATION ===
{objectiveLabel}: '{objectiveText}'
EXPECTED DATA TYPE: {dataType}
-EXPECTED FORMATS: {expectedFormats if expectedFormats else ['any']}
-SUCCESS CRITERIA ({criteriaCount} items): {criteriaDisplay}{actionContext}
+EXPECTED FORMATS: {expectedFormats if expectedFormats else ['any']}{actionContext}{actionParamsContext}
+
+=== VALIDATION INSTRUCTIONS ===
VALIDATION CONTEXT:
You have METADATA (filename, format, size, mimeType) and STRUCTURE SUMMARY (if available: sections, tables, captions, IDs, statistics).
VALIDATION PRINCIPLES:
-1. Format compatibility: Match delivered format to expected format
-2. Structure validation: Use structure summary to verify requirements (section count, table captions, IDs, section types, etc.)
-3. Filename appropriateness: Check if filename suggests correct content type
-4. Document count: Verify number matches expectations
-5. Size sanity: Only flag if clearly wrong (<1KB for complex content or suspiciously large)
+1. EVIDENCE-BASED VALIDATION (CRITICAL): Claims must match structure evidence. If structure shows different values than claimed, trust the structure evidence, not claims.
+2. INDEPENDENT CRITERIA EVALUATION (CRITICAL): For criteriaMapping reason field - address ONLY the specific criterion requirement. Do not mention other criteria or other issues.
+3. PRIORITY: Missing data > Formatting issues. Always prioritize data completeness over format correctness.
+4. Structure validation: Use structure summary (statistics, counts, structure metadata) as PRIMARY evidence. Compare with task requirements.
+5. Discrepancy detection: If task requires specific quantities/amounts but structure shows different values, classify as missing_data or incomplete_data, not success.
+6. Format compatibility: Match delivered format to expected format (secondary priority after data completeness)
+7. Filename appropriateness: Check if filename suggests correct content type
+8. Document count: Verify number matches expectations
LIMITATIONS:
- Cannot validate: Content accuracy, data correctness, formatting details, or requirements requiring full content reading
- If structure summary unavailable, validate only metadata (format, filename, count, size)
SCORING GUIDELINES:
-- Format matches + reasonable structure → qualityScore: 0.8-1.0
-- Format matches but structure issues → qualityScore: 0.7-0.8
+- Data complete + format matches + structure matches requirements → qualityScore: 0.9-1.0
+- Data complete but format/structure issues → qualityScore: 0.7-0.9
+- Missing/incomplete data (even if format correct) → qualityScore: <0.7
+- Claims don't match structure evidence → qualityScore: <0.6 (trust structure, not claims)
- Format mismatch → qualityScore: <0.7
- Only suggest improvements for CLEAR metadata/structure issues
-OUTPUT FORMAT (JSON only):
+VALIDATION LOGIC:
+- If structure shows fewer quantities/amounts than required → gapType: missing_data or incomplete_data
+- If structure shows wrong organization but correct quantity → gapType: wrong_structure
+- If structure matches requirements but format wrong → gapType: wrong_format
+- If claims say "X delivered" but structure shows "Y" (Y < X) → overallSuccess: false, gapType: missing_data
+- Always trust structure statistics over any claims or descriptions
+
+IMPROVEMENT SUGGESTIONS PRIORITY (CRITICAL):
+- Order by CRITERIA PRIORITY first, then gapType priority: missing_data > incomplete_data > wrong_structure > wrong_format
+- [0] MUST address the HIGHEST PRIORITY unmet criterion (check criteriaMapping for which criteria are unmet)
+- If multiple criteria are unmet, prioritize by: data completeness > structure > format
+- gapType indicates the PRIMARY issue, but improvement suggestions must prioritize based on unmet criteria order
+
+=== OUTPUT FORMAT (JSON TEMPLATE) ===
{{
"overallSuccess": false,
"qualityScore": 0.0,
"dataTypeMatch": false,
"formatMatch": false,
"documentCount": {len(documents)},
- "successCriteriaMet": {criteriaMetExample},
+ "criteriaMapping": [
+ {{
+ "index": 0,
+ "criterion": "exact_criterion_text_from_data_section",
+ "met": false,
+ "reason": "explanation_about_this_criterion_based_on_structure_evidence"
+ }}
+ ],
"gapAnalysis": "Brief description of gaps based on metadata/structure only. If validation is limited, state this clearly.",
+ "gapType": "missing_data" | "wrong_structure" | "wrong_format" | "incomplete_data" | "no_gap",
+ "structureComparison": {{
+ "required": {{}},
+ "found": {{}},
+ "gap": {{}}
+ }},
"improvementSuggestions": [],
"validationDetails": [
{{
@@ -482,6 +526,15 @@ OUTPUT FORMAT (JSON only):
]
}}
+OUTPUT FORMAT NOTES:
+- criteriaMapping reason: Address ONLY the specific criterion requirement.
+- improvementSuggestions: [0] = highest priority unmet criterion from criteriaMapping. Order: unmet criteria by index first (data completeness > structure > format), then by gapType priority.
+
+=== DATA ===
+
+SUCCESS CRITERIA TO VALIDATE in criteriaMapping array:
+{criteriaDisplay}
+
DELIVERED DOCUMENTS ({len(documents)} items):
"""
@@ -522,7 +575,6 @@ DELIVERED DOCUMENTS ({len(documents)} items):
# Proactively fix Python-style booleans (False/True -> false/true) BEFORE parsing
# This handles booleans in any context: standalone, in lists, in dicts, etc.
- import re
# Use word boundaries but also handle cases where booleans are in brackets/arrays
# Replace False/True regardless of context (word boundary handles string matching correctly)
normalizedJson = re.sub(r'\bFalse\b', 'false', extractedJson)
@@ -544,8 +596,10 @@ DELIVERED DOCUMENTS ({len(documents)} items):
quality = aiResult.get("qualityScore")
details = aiResult.get("validationDetails")
gap = aiResult.get("gapAnalysis", "")
- criteria = aiResult.get("successCriteriaMet")
improvements = aiResult.get("improvementSuggestions", [])
+ gap_type = aiResult.get("gapType", "")
+ structure_comp = aiResult.get("structureComparison", {})
+ criteria_mapping = aiResult.get("criteriaMapping", [])
# Normalize while keeping failures explicit
normalized = {
@@ -553,10 +607,12 @@ DELIVERED DOCUMENTS ({len(documents)} items):
"qualityScore": float(quality) if isinstance(quality, (int, float)) else None,
"documentCount": len(documentSummaries),
"gapAnalysis": gap if gap else "",
+ "gapType": gap_type if gap_type else "",
+ "structureComparison": structure_comp if structure_comp else {},
+ "criteriaMapping": criteria_mapping if isinstance(criteria_mapping, list) else [],
"validationDetails": details if isinstance(details, list) else [{
"documentName": "AI Validation",
- "gapAnalysis": gap,
- "successCriteriaMet": criteria if isinstance(criteria, list) else []
+ "gapAnalysis": gap
}],
"improvementSuggestions": improvements,
"schemaCompliant": True,
@@ -585,7 +641,7 @@ DELIVERED DOCUMENTS ({len(documents)} items):
"dataTypeMatch": False,
"formatMatch": False,
"documentCount": 0,
- "successCriteriaMet": [],
+ "criteriaMapping": [],
"gapAnalysis": errorMessage,
"improvementSuggestions": [],
"validationDetails": [],
diff --git a/modules/workflows/processing/modes/modeDynamic.py b/modules/workflows/processing/modes/modeDynamic.py
index 43d4f2b7..f91a4080 100644
--- a/modules/workflows/processing/modes/modeDynamic.py
+++ b/modules/workflows/processing/modes/modeDynamic.py
@@ -133,8 +133,10 @@ class DynamicMode(BaseMode):
# Pass ALL documents to validator - validator decides what to validate (generic approach)
# Pass taskStep so validator can use task.objective and format fields
# Pass action name so validator knows which action created the documents
+ # Pass action parameters so validator can verify parameter-specific requirements
actionName = selection.get('action', 'unknown')
- validationResult = await self.contentValidator.validateContent(result.documents, self.workflowIntent, taskStep, actionName)
+ actionParameters = selection.get('parameters', {})
+ validationResult = await self.contentValidator.validateContent(result.documents, self.workflowIntent, taskStep, actionName, actionParameters)
observation.contentValidation = validationResult
quality_score = validationResult.get('qualityScore', 0.0)
if quality_score is None:
@@ -807,9 +809,9 @@ class DynamicMode(BaseMode):
'documentsCount': observation.documentsCount,
'previews': [p.model_dump(exclude_none=True) if hasattr(p, 'model_dump') else p.dict() for p in observation.previews] if observation.previews else [],
'notes': observation.notes,
- 'contentValidation': observation.contentValidation if observation.contentValidation else {},
'contentAnalysis': observation.contentAnalysis if observation.contentAnalysis else {}
}
+ # Note: contentValidation is shown separately in CONTENT VALIDATION section, not duplicated here
reviewContext = ReviewContext(
taskStep=context.taskStep,
taskActions=[],
@@ -822,21 +824,36 @@ class DynamicMode(BaseMode):
baseReviewContent = extractReviewContent(reviewContext)
placeholders = {"REVIEW_CONTENT": baseReviewContent}
- # NEW: Add content validation to review content
- enhancedReviewContent = placeholders.get("REVIEW_CONTENT", "")
+ # NEW: Add content validation to review content - extract separately for prominence
+ baseReviewContent = placeholders.get("REVIEW_CONTENT", "")
+ # Add observation title if there's content
+ if baseReviewContent.strip():
+ baseReviewContent = f"=== OBSERVATION ===\n{baseReviewContent}"
+ contentValidationSection = ""
if observation.contentValidation:
validation = observation.contentValidation
- enhancedReviewContent += f"\n\nCONTENT VALIDATION:\n"
- enhancedReviewContent += f"Overall Success: {validation.get('overallSuccess', False)}\n"
+ contentValidationSection += f"\n=== CONTENT VALIDATION ===\n"
+ gap_type = validation.get('gapType', '')
+ if gap_type:
+ contentValidationSection += f"Gap Type: {gap_type}\n"
+ contentValidationSection += f"Overall Success: {validation.get('overallSuccess', False)}\n"
quality_score = validation.get('qualityScore', 0.0)
if quality_score is None:
quality_score = 0.0
- enhancedReviewContent += f"Quality Score: {quality_score:.2f}\n"
+ contentValidationSection += f"Quality Score: {quality_score:.2f}\n"
gap_analysis = validation.get('gapAnalysis', '')
if gap_analysis:
- enhancedReviewContent += f"Gap Analysis: {gap_analysis}\n"
+ contentValidationSection += f"Gap Analysis: {gap_analysis}\n"
+ structure_comparison = validation.get('structureComparison', {})
+ if structure_comparison:
+ contentValidationSection += f"Structure Comparison: {json.dumps(structure_comparison, indent=2, ensure_ascii=False)}\n"
if validation.get('improvementSuggestions'):
- enhancedReviewContent += f"Improvement Suggestions: {', '.join(validation['improvementSuggestions'])}\n"
+ suggestions = validation['improvementSuggestions']
+ contentValidationSection += f"Next Actions (in sequence):\n"
+ for i, suggestion in enumerate(suggestions):
+ contentValidationSection += f" [{i}] {suggestion}\n"
+
+ enhancedReviewContent = baseReviewContent + contentValidationSection
# NEW: Add content analysis to review content
if observation.contentAnalysis:
@@ -854,9 +871,41 @@ class DynamicMode(BaseMode):
enhancedReviewContent += f"Partial Achievements: {len(progressState['partialAchievements'])}\n"
enhancedReviewContent += f"Failed Attempts: {len(progressState['failedAttempts'])}\n"
enhancedReviewContent += f"Current Phase: {progressState['currentPhase']}\n"
- if progressState['nextActionsSuggested']:
+ # Use content validation priorities if available, otherwise fall back to progress tracker suggestions
+ if observation.contentValidation and observation.contentValidation.get('improvementSuggestions'):
+ # Content validation already shown above, no need to repeat
+ pass
+ elif progressState['nextActionsSuggested']:
enhancedReviewContent += f"Next Action Suggestions: {', '.join(progressState['nextActionsSuggested'])}\n"
+ # NEW: Add action history to review content
+ if hasattr(context, 'previousReviewResult') and context.previousReviewResult:
+ actionHistory = []
+ for i, prevDecision in enumerate(context.previousReviewResult, 1):
+ if prevDecision and hasattr(prevDecision, 'nextAction') and prevDecision.nextAction:
+ action = prevDecision.nextAction
+ params = getattr(prevDecision, 'nextActionParameters', {}) or {}
+ # Filter out documentList for clarity
+ relevantParams = {k: v for k, v in params.items() if k not in ['documentList', 'connections']}
+ paramsStr = json.dumps(relevantParams, ensure_ascii=False) if relevantParams else "{}"
+ quality = getattr(prevDecision, 'qualityScore', None)
+ qualityStr = f" (quality: {quality:.2f})" if quality is not None else ""
+ actionHistory.append(f"Round {i}: {action} {paramsStr}{qualityStr}")
+
+ if actionHistory:
+ enhancedReviewContent += f"\nACTION HISTORY:\n"
+ enhancedReviewContent += "\n".join(f"- {entry}" for entry in actionHistory)
+ # Detect repeated actions
+ actionCounts = {}
+ for entry in actionHistory:
+ # Extract action name (before first space or {)
+ actionName = entry.split()[1] if len(entry.split()) > 1 else "unknown"
+ actionCounts[actionName] = actionCounts.get(actionName, 0) + 1
+
+ repeatedActions = [action for action, count in actionCounts.items() if count >= 2]
+ if repeatedActions:
+ enhancedReviewContent += f"\nWARNING: Repeated actions detected: {', '.join(repeatedActions)}. Consider a fundamentally different approach.\n"
+
# Update placeholders with enhanced review content
placeholders["REVIEW_CONTENT"] = enhancedReviewContent
diff --git a/modules/workflows/processing/shared/promptGenerationActionsDynamic.py b/modules/workflows/processing/shared/promptGenerationActionsDynamic.py
index d9a699a6..c0af7adf 100644
--- a/modules/workflows/processing/shared/promptGenerationActionsDynamic.py
+++ b/modules/workflows/processing/shared/promptGenerationActionsDynamic.py
@@ -323,21 +323,22 @@ def generateDynamicRefinementPrompt(services, context: Any, reviewContent: str)
ACTIONS: {{KEY:AVAILABLE_METHODS}}
DOCUMENTS: {{KEY:AVAILABLE_DOCUMENTS_INDEX}}
-=== OBSERVATION ===
{{KEY:REVIEW_CONTENT}}
+=== NEXT ACTIONS ===
+Follow the improvement suggestions from CONTENT VALIDATION in priority order. Each suggestion indicates what action to take next.
+
=== OUTPUT FORMAT ===
{{
"status": "continue",
- "reason": "Brief reason",
- "nextAction": "ai.convert",
+ "reason": "Brief reason explaining why continuing",
+ "nextAction": "Selected_action_from_ACTIONS",
"nextActionParameters": {{
- "documentList": ["docItem:..."],
- "inputFormat": "json",
- "outputFormat": "csv",
- "columnsPerRow": 10
+ "documentList": ["docItem:reference_from_DOCUMENTS"],
+ "parameter1": "value1",
+ "parameter2": "value2"
}},
- "nextActionObjective": "Convert JSON to CSV with 10 columns per row"
+ "nextActionObjective": "Clear description of what this action will achieve based on improvement suggestions"
}}
=== RULES ===
@@ -345,9 +346,10 @@ DOCUMENTS: {{KEY:AVAILABLE_DOCUMENTS_INDEX}}
- nextAction: SPECIFIC action from AVAILABLE_METHODS (do not invent)
- nextActionParameters: concrete parameters (check AVAILABLE_METHODS for valid names)
- documentList: ONLY exact references from AVAILABLE_DOCUMENTS_INDEX (do not invent)
-- nextActionObjective: describe what this action will achieve
+- nextActionObjective: describe what this action will achieve based on the FIRST improvement suggestion from CONTENT VALIDATION
- Do NOT repeat failed actions - suggest DIFFERENT approach
-- Use improvement suggestions from content validation
+- If ACTION HISTORY shows repeated actions, suggest a fundamentally different approach
+- nextActionObjective must directly address the highest priority improvement suggestion from CONTENT VALIDATION
"""