fixed json merging chain for cut-off mapping with full-dynamc json merger engine for any json structure and complexity
This commit is contained in:
parent
11bb127a43
commit
3ccd284a58
11 changed files with 1263 additions and 577 deletions
|
|
@ -16,6 +16,7 @@ from modules.shared.jsonUtils import (
|
|||
buildContinuationContext,
|
||||
parseJsonWithModel
|
||||
)
|
||||
from modules.services.serviceAi.subJsonResponseHandling import JsonResponseHandler
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -304,7 +305,39 @@ Respond with ONLY a JSON object in this exact format:
|
|||
|
||||
# Extract sections from response (handles both valid and broken JSON)
|
||||
# Only for document generation (JSON responses)
|
||||
extractedSections, wasJsonComplete, parsedResult = self._extractSectionsFromResponse(result, iteration, debugPrefix)
|
||||
# CRITICAL: Pass allSections to enable fragment detection and merging
|
||||
extractedSections, wasJsonComplete, parsedResult = self._extractSectionsFromResponse(
|
||||
result, iteration, debugPrefix, allSections
|
||||
)
|
||||
|
||||
# CRITICAL: Handle JSON fragments (continuation content)
|
||||
# Fragment merging happens inside _extractSectionsFromResponse and updates allSections in place
|
||||
# If no sections extracted but fragment was merged, allSections was updated in place
|
||||
# Check if fragment was merged by checking if allSections was modified
|
||||
if not extractedSections and allSections:
|
||||
# Fragment was detected and merged directly into allSections (side effect in _extractSectionsFromResponse)
|
||||
logger.info(f"Iteration {iteration}: JSON fragment detected and merged, continuing")
|
||||
# Don't break - fragment was merged, continue to get more content if needed
|
||||
# Check if we should continue based on JSON completeness
|
||||
shouldContinue = self._shouldContinueGeneration(
|
||||
allSections,
|
||||
iteration,
|
||||
wasJsonComplete,
|
||||
result
|
||||
)
|
||||
if shouldContinue:
|
||||
if iterationOperationId:
|
||||
self.services.chat.progressLogUpdate(iterationOperationId, 0.8, "Fragment merged, continuing")
|
||||
self.services.chat.progressLogFinish(iterationOperationId, True)
|
||||
continue
|
||||
else:
|
||||
# Done - fragment was merged and JSON is complete
|
||||
if iterationOperationId:
|
||||
self.services.chat.progressLogFinish(iterationOperationId, True)
|
||||
if operationId:
|
||||
self.services.chat.progressLogUpdate(operationId, 0.95, f"Generation complete ({iteration} iterations, fragment merged)")
|
||||
logger.info(f"Generation complete after {iteration} iterations: fragment merged")
|
||||
break
|
||||
|
||||
# Extract document metadata from first iteration if available
|
||||
if iteration == 1 and parsedResult and not documentMetadata:
|
||||
|
|
@ -321,14 +354,15 @@ Respond with ONLY a JSON object in this exact format:
|
|||
if not wasJsonComplete:
|
||||
logger.warning(f"Iteration {iteration}: No sections extracted from broken JSON, continuing for another attempt")
|
||||
continue
|
||||
# If JSON was complete but no sections extracted - this is an error, stop
|
||||
# If JSON was complete but no sections extracted - check if it was a fragment
|
||||
# Fragments are handled above, so if we get here and it's complete, it's an error
|
||||
logger.warning(f"Iteration {iteration}: No sections extracted from complete JSON, stopping")
|
||||
break
|
||||
|
||||
# Merge new sections with existing sections intelligently
|
||||
# This handles the STANDARD CASE: broken JSON iterations must be merged together
|
||||
# The break can occur anywhere - in any section, at any depth
|
||||
allSections = self._mergeSectionsIntelligently(allSections, extractedSections, iteration)
|
||||
allSections = JsonResponseHandler.mergeSectionsIntelligently(allSections, extractedSections, iteration)
|
||||
|
||||
# Check if we should continue (completion detection)
|
||||
# Simple logic: JSON completeness determines continuation
|
||||
|
|
@ -370,484 +404,24 @@ Respond with ONLY a JSON object in this exact format:
|
|||
|
||||
return final_result
|
||||
|
||||
def _mergeSectionsIntelligently(
|
||||
self,
|
||||
existingSections: List[Dict[str, Any]],
|
||||
newSections: List[Dict[str, Any]],
|
||||
iteration: int
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Intelligently merge sections from multiple iterations.
|
||||
|
||||
This is a GENERIC merging strategy that handles broken JSON iterations.
|
||||
The break can occur anywhere - in any section, at any depth.
|
||||
|
||||
Merging strategies (in order of priority):
|
||||
1. Same Section ID: Merge sections with identical IDs
|
||||
2. Same Content-Type + Position: If last section is incomplete and new section continues it
|
||||
3. Same Order: Merge sections with same order value
|
||||
4. Structural Analysis: Detect continuation based on content structure
|
||||
|
||||
Args:
|
||||
existingSections: Sections accumulated from previous iterations
|
||||
newSections: Sections extracted from current iteration
|
||||
iteration: Current iteration number
|
||||
|
||||
Returns:
|
||||
Merged list of sections
|
||||
"""
|
||||
if not newSections:
|
||||
return existingSections
|
||||
|
||||
if not existingSections:
|
||||
return newSections
|
||||
|
||||
mergedSections = existingSections.copy()
|
||||
|
||||
for newSection in newSections:
|
||||
merged = False
|
||||
|
||||
# Strategy 1: Same Section ID - merge directly
|
||||
newSectionId = newSection.get("id")
|
||||
if newSectionId:
|
||||
for i, existingSection in enumerate(mergedSections):
|
||||
if existingSection.get("id") == newSectionId:
|
||||
# Merge sections with same ID
|
||||
mergedSections[i] = self._mergeSectionContent(existingSection, newSection, iteration)
|
||||
merged = True
|
||||
logger.debug(f"Iteration {iteration}: Merged section by ID '{newSectionId}'")
|
||||
break
|
||||
|
||||
if merged:
|
||||
continue
|
||||
|
||||
# Strategy 2: Same Content-Type + Position (continuation detection)
|
||||
# Check if last section is incomplete and new section continues it
|
||||
if mergedSections:
|
||||
lastSection = mergedSections[-1]
|
||||
lastContentType = lastSection.get("content_type")
|
||||
newContentType = newSection.get("content_type")
|
||||
|
||||
if lastContentType == newContentType:
|
||||
# Same content type - check if last section is incomplete
|
||||
if self._isSectionIncomplete(lastSection):
|
||||
# Last section is incomplete, merge with new section
|
||||
mergedSections[-1] = self._mergeSectionContent(lastSection, newSection, iteration)
|
||||
merged = True
|
||||
logger.debug(f"Iteration {iteration}: Merged section by content-type continuation ({lastContentType})")
|
||||
continue
|
||||
|
||||
# Strategy 3: Same Order value
|
||||
newOrder = newSection.get("order")
|
||||
if newOrder is not None:
|
||||
for i, existingSection in enumerate(mergedSections):
|
||||
existingOrder = existingSection.get("order")
|
||||
if existingOrder is not None and existingOrder == newOrder:
|
||||
# Merge sections with same order
|
||||
mergedSections[i] = self._mergeSectionContent(existingSection, newSection, iteration)
|
||||
merged = True
|
||||
logger.debug(f"Iteration {iteration}: Merged section by order {newOrder}")
|
||||
break
|
||||
|
||||
if merged:
|
||||
continue
|
||||
|
||||
# Strategy 4: Structural Analysis - detect continuation
|
||||
# For code_block and table: if last section matches new section type, merge them
|
||||
if mergedSections:
|
||||
lastSection = mergedSections[-1]
|
||||
lastContentType = lastSection.get("content_type")
|
||||
newContentType = newSection.get("content_type")
|
||||
|
||||
# Both are code blocks - merge them
|
||||
if lastContentType == "code_block" and newContentType == "code_block":
|
||||
mergedSections[-1] = self._mergeSectionContent(lastSection, newSection, iteration)
|
||||
merged = True
|
||||
logger.debug(f"Iteration {iteration}: Merged code_block sections by structural analysis")
|
||||
continue
|
||||
|
||||
# Both are tables - merge them (common case for broken JSON iterations)
|
||||
if lastContentType == "table" and newContentType == "table":
|
||||
mergedSections[-1] = self._mergeSectionContent(lastSection, newSection, iteration)
|
||||
merged = True
|
||||
logger.debug(f"Iteration {iteration}: Merged table sections by structural analysis")
|
||||
continue
|
||||
|
||||
# No merge strategy matched - add as new section
|
||||
if not merged:
|
||||
mergedSections.append(newSection)
|
||||
logger.debug(f"Iteration {iteration}: Added new section '{newSection.get('id', 'no-id')}' ({newSection.get('content_type', 'unknown')})")
|
||||
|
||||
return mergedSections
|
||||
|
||||
def _isSectionIncomplete(self, section: Dict[str, Any]) -> bool:
|
||||
"""
|
||||
Check if a section is incomplete (broken at the end).
|
||||
|
||||
This detects incomplete sections based on content analysis:
|
||||
- Code blocks: ends mid-line, ends with comma, ends with incomplete structure
|
||||
- Text sections: ends mid-sentence, ends with incomplete structure
|
||||
- Other types: check for incomplete elements
|
||||
"""
|
||||
contentType = section.get("content_type", "")
|
||||
elements = section.get("elements", [])
|
||||
|
||||
if not elements:
|
||||
return False
|
||||
|
||||
# Handle list of elements
|
||||
if isinstance(elements, list) and len(elements) > 0:
|
||||
lastElement = elements[-1]
|
||||
else:
|
||||
lastElement = elements
|
||||
|
||||
if not isinstance(lastElement, dict):
|
||||
return False
|
||||
|
||||
# Check code_block for incomplete code
|
||||
if contentType == "code_block":
|
||||
code = lastElement.get("code", "")
|
||||
if code:
|
||||
# Check if code ends incompletely:
|
||||
# - Ends with comma (incomplete CSV line)
|
||||
# - Ends with number but no newline (incomplete line)
|
||||
# - Ends mid-token (e.g., "23431,23" - incomplete number)
|
||||
codeStripped = code.rstrip()
|
||||
if codeStripped:
|
||||
# Check for incomplete patterns
|
||||
if codeStripped.endswith(',') or (',' in codeStripped and not codeStripped.endswith('\n')):
|
||||
# Ends with comma or has comma but no final newline - likely incomplete
|
||||
return True
|
||||
# Check if last line is incomplete (doesn't end with newline and has partial content)
|
||||
if not code.endswith('\n') and codeStripped:
|
||||
# No final newline - might be incomplete
|
||||
# More sophisticated: check if last number is complete
|
||||
lastLine = codeStripped.split('\n')[-1]
|
||||
if lastLine and ',' in lastLine:
|
||||
# Has commas but might be incomplete
|
||||
parts = lastLine.split(',')
|
||||
if parts and len(parts[-1]) < 5: # Last part is very short - might be incomplete
|
||||
return True
|
||||
|
||||
# Check table for incomplete rows
|
||||
if contentType == "table":
|
||||
rows = lastElement.get("rows", [])
|
||||
if rows:
|
||||
# Check if last row is incomplete (ends with incomplete data)
|
||||
lastRow = rows[-1] if isinstance(rows, list) else []
|
||||
if isinstance(lastRow, list) and lastRow:
|
||||
# Check if last row ends with incomplete data (e.g., incomplete string)
|
||||
lastCell = lastRow[-1] if lastRow else ""
|
||||
if isinstance(lastCell, str):
|
||||
# If last cell is incomplete (ends with quote or is very short), section might be incomplete
|
||||
if lastCell.endswith('"') or (len(lastCell) < 3 and lastCell):
|
||||
return True
|
||||
# Also check if last row doesn't have expected number of columns (if headers exist)
|
||||
headers = lastElement.get("headers", [])
|
||||
if headers and isinstance(headers, list):
|
||||
expectedCols = len(headers)
|
||||
if len(lastRow) < expectedCols:
|
||||
return True
|
||||
|
||||
# Check paragraph/text for incomplete sentences
|
||||
if contentType in ["paragraph", "heading"]:
|
||||
text = lastElement.get("text", "")
|
||||
if text:
|
||||
# Simple heuristic: if doesn't end with sentence-ending punctuation
|
||||
textStripped = text.rstrip()
|
||||
if textStripped and not textStripped[-1] in '.!?':
|
||||
# Might be incomplete, but this is less reliable
|
||||
# Only mark as incomplete if very short (likely cut off)
|
||||
if len(textStripped) < 20:
|
||||
return True
|
||||
|
||||
# Check lists for incomplete items
|
||||
if contentType in ["bullet_list", "numbered_list"]:
|
||||
items = lastElement.get("items", [])
|
||||
if items and isinstance(items, list):
|
||||
# Check if last item is incomplete (very short or ends with incomplete string)
|
||||
lastItem = items[-1] if items else None
|
||||
if isinstance(lastItem, str) and len(lastItem) < 3:
|
||||
return True
|
||||
# Check if items array seems incomplete (e.g., expected count not reached)
|
||||
# This is harder to detect without context, so we rely on other heuristics
|
||||
|
||||
# Check image for incomplete base64 data
|
||||
if contentType == "image":
|
||||
imageData = lastElement.get("base64Data", "")
|
||||
if imageData:
|
||||
# Base64 strings should end with padding ('=' or '==')
|
||||
# If it doesn't, it might be incomplete
|
||||
stripped = imageData.rstrip()
|
||||
if stripped and not stripped.endswith(('=', '==')):
|
||||
# Check if it's a valid base64 character sequence that was cut off
|
||||
# Base64 uses A-Z, a-z, 0-9, +, /, and = for padding
|
||||
if len(stripped) > 0 and stripped[-1] not in 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=':
|
||||
return True
|
||||
# If length is not a multiple of 4 (base64 requirement), it might be incomplete
|
||||
if len(stripped) % 4 != 0:
|
||||
return True
|
||||
|
||||
# GENERIC CHECK: Look for incomplete structures in any element
|
||||
# Check if element has arrays/lists that might be incomplete
|
||||
for key, value in lastElement.items():
|
||||
if isinstance(value, list) and len(value) > 0:
|
||||
# Check last item in list
|
||||
lastItem = value[-1]
|
||||
if isinstance(lastItem, str):
|
||||
# If last string item is very short, might be incomplete
|
||||
if len(lastItem) < 3:
|
||||
return True
|
||||
elif isinstance(lastItem, dict):
|
||||
# If last dict item has very few keys, might be incomplete
|
||||
if len(lastItem) < 2:
|
||||
return True
|
||||
elif isinstance(value, str):
|
||||
# Check if string ends abruptly (no punctuation, very short)
|
||||
if len(value) > 0 and len(value) < 10 and not value[-1] in '.!?\n':
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def _mergeSectionContent(
|
||||
self,
|
||||
existingSection: Dict[str, Any],
|
||||
newSection: Dict[str, Any],
|
||||
iteration: int
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Merge content from two sections.
|
||||
|
||||
Handles different content types:
|
||||
- code_block: Append code, handle overlaps, merge incomplete lines
|
||||
- paragraph/heading: Append text
|
||||
- table: Merge rows
|
||||
- list: Merge items
|
||||
- Other: Merge elements
|
||||
"""
|
||||
contentType = existingSection.get("content_type", "")
|
||||
existingElements = existingSection.get("elements", [])
|
||||
newElements = newSection.get("elements", [])
|
||||
|
||||
if not newElements:
|
||||
return existingSection
|
||||
|
||||
# Handle list of elements
|
||||
if isinstance(existingElements, list):
|
||||
existingElem = existingElements[-1] if existingElements else {}
|
||||
else:
|
||||
existingElem = existingElements
|
||||
|
||||
if isinstance(newElements, list):
|
||||
newElem = newElements[0] if newElements else {}
|
||||
else:
|
||||
newElem = newElements
|
||||
|
||||
if not isinstance(existingElem, dict) or not isinstance(newElem, dict):
|
||||
return existingSection
|
||||
|
||||
# Merge based on content type
|
||||
if contentType == "code_block":
|
||||
existingCode = existingElem.get("code", "")
|
||||
newCode = newElem.get("code", "")
|
||||
|
||||
if existingCode and newCode:
|
||||
mergedCode = self._mergeCodeBlocks(existingCode, newCode, iteration)
|
||||
existingElem["code"] = mergedCode
|
||||
# Preserve language from existing or new
|
||||
if "language" not in existingElem and "language" in newElem:
|
||||
existingElem["language"] = newElem["language"]
|
||||
|
||||
elif contentType in ["paragraph", "heading"]:
|
||||
existingText = existingElem.get("text", "")
|
||||
newText = newElem.get("text", "")
|
||||
|
||||
if existingText and newText:
|
||||
# Append text with space if needed
|
||||
if existingText.rstrip() and not existingText.rstrip()[-1] in '.!?\n':
|
||||
mergedText = existingText.rstrip() + " " + newText.lstrip()
|
||||
else:
|
||||
mergedText = existingText.rstrip() + "\n" + newText.lstrip()
|
||||
existingElem["text"] = mergedText
|
||||
|
||||
elif contentType == "table":
|
||||
# Merge table rows with overlap detection
|
||||
existingRows = existingElem.get("rows", [])
|
||||
newRows = newElem.get("rows", [])
|
||||
if existingRows and newRows:
|
||||
# CRITICAL: Detect and remove overlaps before merging
|
||||
# Check if last existing row matches first new row (exact overlap)
|
||||
if len(existingRows) > 0 and len(newRows) > 0:
|
||||
lastExistingRow = existingRows[-1]
|
||||
firstNewRow = newRows[0]
|
||||
# Compare rows (handle both list and tuple formats)
|
||||
if isinstance(lastExistingRow, (list, tuple)) and isinstance(firstNewRow, (list, tuple)):
|
||||
if list(lastExistingRow) == list(firstNewRow):
|
||||
# Exact duplicate - remove first new row
|
||||
newRows = newRows[1:]
|
||||
logger.debug(f"Iteration {iteration}: Removed duplicate table row (exact match)")
|
||||
|
||||
# Combine rows from both sections (after removing overlaps)
|
||||
existingElem["rows"] = existingRows + newRows
|
||||
logger.debug(f"Iteration {iteration}: Merged table rows - existing: {len(existingRows)}, new: {len(newRows)}, total: {len(existingRows) + len(newRows)}")
|
||||
elif newRows:
|
||||
# If existing has no rows but new does, use new rows
|
||||
existingElem["rows"] = newRows
|
||||
# Preserve headers from existing (or use new if existing has none)
|
||||
if not existingElem.get("headers") and newElem.get("headers"):
|
||||
existingElem["headers"] = newElem["headers"]
|
||||
# Preserve caption from existing (or use new if existing has none)
|
||||
if not existingElem.get("caption") and newElem.get("caption"):
|
||||
existingElem["caption"] = newElem["caption"]
|
||||
|
||||
elif contentType in ["bullet_list", "numbered_list"]:
|
||||
# Merge list items
|
||||
existingItems = existingElem.get("items", [])
|
||||
newItems = newElem.get("items", [])
|
||||
if existingItems and newItems:
|
||||
existingElem["items"] = existingItems + newItems
|
||||
|
||||
elif contentType == "image":
|
||||
# Images are typically complete - if new image is provided, replace existing
|
||||
# But check if existing image data is incomplete (e.g., base64 string cut off)
|
||||
existingImageData = existingElem.get("base64Data", "")
|
||||
newImageData = newElem.get("base64Data", "")
|
||||
if existingImageData and newImageData:
|
||||
# If existing image data doesn't end with valid base64 padding, it might be incomplete
|
||||
# Base64 padding is '=' or '==' at the end
|
||||
if not existingImageData.rstrip().endswith(('=', '==')):
|
||||
# Existing image might be incomplete - merge by appending new data
|
||||
# This handles cases where base64 string was cut off
|
||||
existingElem["base64Data"] = existingImageData + newImageData
|
||||
logger.debug(f"Iteration {iteration}: Merged incomplete image base64 data")
|
||||
else:
|
||||
# Existing image is complete - replace with new (or keep existing if new is empty)
|
||||
if newImageData:
|
||||
existingElem["base64Data"] = newImageData
|
||||
elif newImageData:
|
||||
existingElem["base64Data"] = newImageData
|
||||
# Preserve other image metadata
|
||||
if not existingElem.get("altText") and newElem.get("altText"):
|
||||
existingElem["altText"] = newElem["altText"]
|
||||
if not existingElem.get("caption") and newElem.get("caption"):
|
||||
existingElem["caption"] = newElem["caption"]
|
||||
|
||||
else:
|
||||
# GENERIC FALLBACK: Handle any other content types or unknown structures
|
||||
# Try to merge common array/list fields generically
|
||||
for key in ["items", "rows", "columns", "cells", "elements", "data", "content"]:
|
||||
if key in existingElem and key in newElem:
|
||||
existingValue = existingElem[key]
|
||||
newValue = newElem[key]
|
||||
if isinstance(existingValue, list) and isinstance(newValue, list):
|
||||
# Merge lists by concatenation
|
||||
existingElem[key] = existingValue + newValue
|
||||
logger.debug(f"Iteration {iteration}: Merged generic list field '{key}' - existing: {len(existingValue)}, new: {len(newValue)}")
|
||||
break
|
||||
|
||||
# If no common list fields found, try to merge all fields from newElem into existingElem
|
||||
# This handles cases where objects have different structures
|
||||
for key, value in newElem.items():
|
||||
if key not in existingElem:
|
||||
# New field - add it
|
||||
existingElem[key] = value
|
||||
elif isinstance(existingElem[key], list) and isinstance(value, list):
|
||||
# Both are lists - merge them
|
||||
existingElem[key] = existingElem[key] + value
|
||||
elif isinstance(existingElem[key], dict) and isinstance(value, dict):
|
||||
# Both are dicts - recursively merge (shallow merge)
|
||||
existingElem[key].update(value)
|
||||
elif isinstance(existingElem[key], str) and isinstance(value, str):
|
||||
# Both are strings - append new to existing
|
||||
existingElem[key] = existingElem[key] + "\n" + value
|
||||
|
||||
# Update section with merged content
|
||||
mergedSection = existingSection.copy()
|
||||
if isinstance(existingElements, list):
|
||||
# Update the last element in the list with merged content
|
||||
if existingElements:
|
||||
existingElements[-1] = existingElem
|
||||
mergedSection["elements"] = existingElements
|
||||
else:
|
||||
mergedSection["elements"] = existingElem
|
||||
|
||||
# Preserve metadata from new section if missing in existing
|
||||
if "order" not in mergedSection and "order" in newSection:
|
||||
mergedSection["order"] = newSection["order"]
|
||||
|
||||
return mergedSection
|
||||
|
||||
def _mergeCodeBlocks(self, existingCode: str, newCode: str, iteration: int) -> str:
|
||||
"""
|
||||
Merge two code blocks intelligently, handling overlaps and incomplete lines.
|
||||
"""
|
||||
if not existingCode:
|
||||
return newCode
|
||||
if not newCode:
|
||||
return existingCode
|
||||
|
||||
existingLines = existingCode.rstrip().split('\n')
|
||||
newLines = newCode.strip().split('\n')
|
||||
|
||||
if not existingLines or not newLines:
|
||||
return existingCode + "\n" + newCode
|
||||
|
||||
lastExistingLine = existingLines[-1].strip()
|
||||
firstNewLine = newLines[0].strip()
|
||||
|
||||
# Strategy 1: Exact overlap - remove duplicate line
|
||||
if lastExistingLine == firstNewLine:
|
||||
newLines = newLines[1:]
|
||||
logger.debug(f"Iteration {iteration}: Removed exact duplicate line in code merge")
|
||||
|
||||
# Strategy 2: Incomplete line merge
|
||||
# If last existing line ends with comma or is incomplete, merge with first new line
|
||||
elif lastExistingLine.endswith(',') or (',' in lastExistingLine and len(lastExistingLine.split(',')[-1]) < 5):
|
||||
# Last line is incomplete - merge with first new line
|
||||
# Remove trailing comma from existing line
|
||||
mergedLine = lastExistingLine.rstrip(',') + ',' + firstNewLine.lstrip()
|
||||
existingLines[-1] = mergedLine
|
||||
newLines = newLines[1:]
|
||||
logger.debug(f"Iteration {iteration}: Merged incomplete line with continuation")
|
||||
|
||||
# Strategy 3: Partial overlap detection
|
||||
# Check if first new line starts with the end of last existing line
|
||||
elif ',' in lastExistingLine and ',' in firstNewLine:
|
||||
lastExistingParts = lastExistingLine.split(',')
|
||||
firstNewParts = firstNewLine.split(',')
|
||||
|
||||
# Check for overlap: if last part of existing matches first part of new
|
||||
if lastExistingParts and firstNewParts:
|
||||
lastExistingPart = lastExistingParts[-1].strip()
|
||||
firstNewPart = firstNewParts[0].strip()
|
||||
|
||||
# If they match, there's overlap
|
||||
if lastExistingPart == firstNewPart and len(lastExistingParts) > 1:
|
||||
# Remove overlapping part from new line
|
||||
newLines[0] = ','.join(firstNewParts[1:])
|
||||
logger.debug(f"Iteration {iteration}: Removed partial overlap in code merge")
|
||||
|
||||
# Reconstruct merged code
|
||||
mergedCode = '\n'.join(existingLines)
|
||||
if newLines:
|
||||
if mergedCode and not mergedCode.endswith('\n'):
|
||||
mergedCode += '\n'
|
||||
mergedCode += '\n'.join(newLines)
|
||||
|
||||
return mergedCode
|
||||
# JSON merging logic moved to subJsonResponseHandling.py
|
||||
|
||||
def _extractSectionsFromResponse(
|
||||
self,
|
||||
result: str,
|
||||
iteration: int,
|
||||
debugPrefix: str
|
||||
debugPrefix: str,
|
||||
allSections: List[Dict[str, Any]] = None
|
||||
) -> Tuple[List[Dict[str, Any]], bool, Optional[Dict[str, Any]]]:
|
||||
"""
|
||||
Extract sections from AI response, handling both valid and broken JSON.
|
||||
Uses repair mechanism for broken JSON.
|
||||
Handles JSON fragments (continuation content) that need to be merged into existing sections.
|
||||
Determines completion based on JSON structure (complete JSON = complete, broken/incomplete = incomplete).
|
||||
Returns (sections, wasJsonComplete, parsedResult)
|
||||
"""
|
||||
if allSections is None:
|
||||
allSections = []
|
||||
|
||||
# First, try to parse as valid JSON
|
||||
# CRITICAL: JSON completeness is determined by parsing, NOT by last character check!
|
||||
|
|
@ -862,6 +436,20 @@ Respond with ONLY a JSON object in this exact format:
|
|||
# Extract sections from parsed JSON
|
||||
sections = extractSectionsFromDocument(parsed_result)
|
||||
|
||||
# CRITICAL: If no sections extracted but we have existing sections, check if it's a fragment
|
||||
if not sections and allSections:
|
||||
fragment = JsonResponseHandler.detectAndParseJsonFragment(result, allSections)
|
||||
if fragment:
|
||||
logger.info(f"Iteration {iteration}: Detected JSON fragment ({fragment.get('fragment_type')}), merging into existing sections")
|
||||
# Merge fragment into existing sections
|
||||
merged_sections = JsonResponseHandler.mergeFragmentIntoSection(fragment, allSections, iteration)
|
||||
# Update allSections in place (this is a side effect, but necessary for continuation)
|
||||
# Note: This modifies the caller's allSections list
|
||||
allSections[:] = merged_sections
|
||||
# Return empty list to indicate we merged directly (not new sections)
|
||||
# But mark as incomplete so loop continues if needed
|
||||
return [], False, parsed_result
|
||||
|
||||
# JSON parsed successfully = complete
|
||||
logger.info(f"Iteration {iteration}: JSON parsed successfully - marking as complete")
|
||||
return sections, True, parsed_result
|
||||
|
|
@ -885,7 +473,7 @@ Respond with ONLY a JSON object in this exact format:
|
|||
# Repair failed - but we should still continue to allow AI to retry
|
||||
logger.warning(f"Iteration {iteration}: All repair strategies failed, but continuing to allow retry")
|
||||
return [], False, None # Mark as incomplete so loop continues
|
||||
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Iteration {iteration}: Unexpected error during parsing: {str(e)}")
|
||||
return [], False, None
|
||||
|
|
@ -1413,8 +1001,3 @@ Respond with ONLY a JSON object in this exact format:
|
|||
self.services.chat.progressLogFinish(aiOperationId, False)
|
||||
raise
|
||||
|
||||
# DEPRECATED METHODS REMOVED:
|
||||
# - callAiDocuments() - replaced by callAiContent()
|
||||
# - callAiText() - replaced by callAiContent()
|
||||
# All call sites have been updated to use callAiContent()
|
||||
|
||||
|
|
|
|||
1022
modules/services/serviceAi/subJsonResponseHandling.py
Normal file
1022
modules/services/serviceAi/subJsonResponseHandling.py
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -497,13 +497,11 @@ class RendererDocx(BaseRenderer):
|
|||
# Extract title from prompt if not provided
|
||||
if not title or title == "Generated Document":
|
||||
# Look for "create a ... document" or "generate a ... report"
|
||||
import re
|
||||
title_match = re.search(r'(?:create|generate|make)\s+a\s+([^,]+?)(?:\s+document|\s+report|\s+summary)', userPrompt.lower())
|
||||
if title_match:
|
||||
structure['title'] = title_match.group(1).strip().title()
|
||||
|
||||
# Extract sections from numbered lists in prompt
|
||||
import re
|
||||
section_pattern = r'(\d+)\)?\s*([^,]+?)(?:\s*[,:]|\s*$)'
|
||||
sections = re.findall(section_pattern, userPrompt)
|
||||
|
||||
|
|
@ -849,7 +847,6 @@ class RendererDocx(BaseRenderer):
|
|||
Returns the content with tables replaced by placeholders.
|
||||
"""
|
||||
import csv
|
||||
import io
|
||||
|
||||
lines = content.split('\n')
|
||||
processed_lines = []
|
||||
|
|
|
|||
|
|
@ -95,7 +95,7 @@ class RendererXlsx(BaseRenderer):
|
|||
# Title
|
||||
sheet['A1'] = title
|
||||
sheet['A1'].font = Font(size=16, bold=True)
|
||||
sheet['A1'].alignment = Alignment(horizontal='center')
|
||||
sheet['A1'].alignment = Alignment(horizontal='left')
|
||||
|
||||
# Generation info
|
||||
sheet['A3'] = "Generated:"
|
||||
|
|
@ -325,7 +325,7 @@ class RendererXlsx(BaseRenderer):
|
|||
def _getDefaultStyleSet(self) -> Dict[str, Any]:
|
||||
"""Default Excel style set - used when no style instructions present."""
|
||||
return {
|
||||
"title": {"font_size": 16, "color": "#FF1F4E79", "bold": True, "align": "center"},
|
||||
"title": {"font_size": 16, "color": "#FF1F4E79", "bold": True, "align": "left"},
|
||||
"heading": {"font_size": 14, "color": "#FF2F2F2F", "bold": True, "align": "left"},
|
||||
"table_header": {"background": "#FF4F4F4F", "text_color": "#FFFFFFFF", "bold": True, "align": "center"},
|
||||
"table_cell": {"background": "#FFFFFFFF", "text_color": "#FF2F2F2F", "bold": False, "align": "left"},
|
||||
|
|
@ -543,8 +543,9 @@ class RendererXlsx(BaseRenderer):
|
|||
try:
|
||||
# Sheet title
|
||||
sheet['A1'] = sheetTitle
|
||||
sheet['A1'].font = Font(size=16, bold=True, color=self._getSafeColor(styles.get("title", {}).get("color", "FF1F4E79")))
|
||||
sheet['A1'].alignment = Alignment(horizontal="center")
|
||||
title_style = styles.get("title", {})
|
||||
sheet['A1'].font = Font(size=16, bold=True, color=self._getSafeColor(title_style.get("color", "FF1F4E79")))
|
||||
sheet['A1'].alignment = Alignment(horizontal=title_style.get("align", "left"))
|
||||
|
||||
# Get table data from elements (canonical JSON format)
|
||||
elements = section.get("elements", [])
|
||||
|
|
@ -592,7 +593,7 @@ class RendererXlsx(BaseRenderer):
|
|||
sheet['A1'] = documentTitle
|
||||
|
||||
# Safety check for title style
|
||||
title_style = styles.get("title", {"font_size": 16, "bold": True, "color": "#FF1F4E79", "align": "center"})
|
||||
title_style = styles.get("title", {"font_size": 16, "bold": True, "color": "#FF1F4E79", "align": "left"})
|
||||
try:
|
||||
safe_color = self._getSafeColor(title_style["color"])
|
||||
sheet['A1'].font = Font(size=title_style["font_size"], bold=title_style["bold"], color=safe_color)
|
||||
|
|
|
|||
|
|
@ -271,12 +271,6 @@ class UtilsService:
|
|||
def jsonTryParse(self, text) -> tuple:
|
||||
return jsonUtils.tryParseJson(text)
|
||||
|
||||
def jsonParseOrRaise(self, text):
|
||||
return jsonUtils.parseJsonOrRaise(text)
|
||||
|
||||
def jsonMergeRootLists(self, parts):
|
||||
return jsonUtils.mergeRootLists(parts)
|
||||
|
||||
# ===== Enum utility functions =====
|
||||
|
||||
def mapToEnum(self, enum_class, value_str, default_value):
|
||||
|
|
|
|||
|
|
@ -159,7 +159,6 @@ def storeDebugMessageAndDocuments(message, currentUser) -> None:
|
|||
"""
|
||||
try:
|
||||
import json
|
||||
from datetime import datetime, UTC
|
||||
|
||||
# Create base debug directory (use base debug dir, not prompts subdirectory)
|
||||
baseDebugDir = _getBaseDebugDir()
|
||||
|
|
|
|||
|
|
@ -97,47 +97,6 @@ def tryParseJson(text: Union[str, bytes]) -> Tuple[Optional[Union[Dict, List]],
|
|||
return None, e, cleaned
|
||||
|
||||
|
||||
def parseJsonOrRaise(text: Union[str, bytes]) -> Union[Dict, List]:
|
||||
obj, err, cleaned = tryParseJson(text)
|
||||
if err is not None:
|
||||
logger.error(f"parse_json_or_raise failed: {err}. Cleaned preview: {cleaned[:200]}...")
|
||||
raise err
|
||||
return obj
|
||||
|
||||
|
||||
def mergeRootLists(jsonParts: List[Union[str, Dict, List]]) -> Dict[str, Any]:
|
||||
"""
|
||||
Generic merger for root-level lists: take first dict as base; for each subsequent part:
|
||||
- if value is list and same key exists as list, extend it
|
||||
- if key absent, add it
|
||||
- for non-list keys, keep the original (from the first part)
|
||||
Sets continuation=None if present in base.
|
||||
"""
|
||||
base: Optional[Dict[str, Any]] = None
|
||||
parsed: List[Dict[str, Any]] = []
|
||||
for part in jsonParts:
|
||||
if isinstance(part, (dict, list)):
|
||||
obj = part
|
||||
else:
|
||||
obj, err, _ = tryParseJson(part)
|
||||
if err is not None or not isinstance(obj, (dict, list)):
|
||||
continue
|
||||
if isinstance(obj, dict):
|
||||
parsed.append(obj)
|
||||
if not parsed:
|
||||
return {}
|
||||
base = dict(parsed[0])
|
||||
for obj in parsed[1:]:
|
||||
for k, v in obj.items():
|
||||
if isinstance(v, list) and isinstance(base.get(k), list):
|
||||
base[k].extend(v)
|
||||
elif k not in base:
|
||||
base[k] = v
|
||||
if 'continuation' in base:
|
||||
base['continuation'] = None
|
||||
return base
|
||||
|
||||
|
||||
def repairBrokenJson(text: str) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Attempt to repair broken JSON using multiple strategies.
|
||||
|
|
|
|||
|
|
@ -271,7 +271,20 @@ class MethodAi(MethodBase):
|
|||
|
||||
# Prepare extraction options
|
||||
self.services.chat.progressLogUpdate(operationId, 0.3, "Preparing extraction options")
|
||||
extractionOptions = parameters.extractionOptions
|
||||
extractionOptionsParam = parameters.get("extractionOptions")
|
||||
|
||||
# Convert dict to ExtractionOptions object if needed, or create defaults
|
||||
if extractionOptionsParam:
|
||||
if isinstance(extractionOptionsParam, dict):
|
||||
# Convert dict to ExtractionOptions object
|
||||
extractionOptions = ExtractionOptions(**extractionOptionsParam)
|
||||
elif isinstance(extractionOptionsParam, ExtractionOptions):
|
||||
extractionOptions = extractionOptionsParam
|
||||
else:
|
||||
# Invalid type, use defaults
|
||||
extractionOptions = None
|
||||
else:
|
||||
extractionOptions = None
|
||||
|
||||
# If extractionOptions not provided, create defaults
|
||||
if not extractionOptions:
|
||||
|
|
@ -297,10 +310,21 @@ class MethodAi(MethodBase):
|
|||
# Build ActionDocuments from ContentExtracted results
|
||||
self.services.chat.progressLogUpdate(operationId, 0.8, "Building result documents")
|
||||
actionDocuments = []
|
||||
for extracted in extractedResults:
|
||||
# Map extracted results back to original documents by index (results are in same order)
|
||||
for i, extracted in enumerate(extractedResults):
|
||||
# Get original document name if available
|
||||
originalDoc = chatDocuments[i] if i < len(chatDocuments) else None
|
||||
if originalDoc and hasattr(originalDoc, 'fileName') and originalDoc.fileName:
|
||||
# Use original filename with "extracted_" prefix
|
||||
baseName = originalDoc.fileName.rsplit('.', 1)[0] if '.' in originalDoc.fileName else originalDoc.fileName
|
||||
documentName = f"{baseName}_extracted_{extracted.id}.json"
|
||||
else:
|
||||
# Fallback to generic name with index
|
||||
documentName = f"document_{i+1:03d}_extracted_{extracted.id}.json"
|
||||
|
||||
# Store ContentExtracted object in ActionDocument.documentData
|
||||
actionDoc = ActionDocument(
|
||||
documentName=f"extracted_{extracted.id}.json",
|
||||
documentName=documentName,
|
||||
documentData=extracted, # ContentExtracted object
|
||||
mimeType="application/json"
|
||||
)
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ class ContentValidator:
|
|||
self.services = services
|
||||
self.learningEngine = learningEngine
|
||||
|
||||
async def validateContent(self, documents: List[Any], intent: Dict[str, Any], taskStep: Optional[Any] = None, actionName: Optional[str] = None) -> Dict[str, Any]:
|
||||
async def validateContent(self, documents: List[Any], intent: Dict[str, Any], taskStep: Optional[Any] = None, actionName: Optional[str] = None, actionParameters: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
|
||||
"""Validates delivered content against user intent using AI (single attempt; parse-or-fail)
|
||||
|
||||
Args:
|
||||
|
|
@ -30,8 +30,9 @@ class ContentValidator:
|
|||
intent: Workflow-level intent dict (for format requirements)
|
||||
taskStep: Optional TaskStep object (preferred source for objective)
|
||||
actionName: Optional action name (e.g., "ai.process", "ai.webResearch") that created the documents
|
||||
actionParameters: Optional action parameters used during execution (e.g., {"columnsPerRow": 10, "researchDepth": "deep"})
|
||||
"""
|
||||
return await self._validateWithAI(documents, intent, taskStep, actionName)
|
||||
return await self._validateWithAI(documents, intent, taskStep, actionName, actionParameters)
|
||||
|
||||
def _analyzeDocuments(self, documents: List[Any]) -> List[Dict[str, Any]]:
|
||||
"""Generic document analysis - create simple summaries with metadata."""
|
||||
|
|
@ -368,7 +369,7 @@ class ContentValidator:
|
|||
|
||||
return False
|
||||
|
||||
async def _validateWithAI(self, documents: List[Any], intent: Dict[str, Any], taskStep: Optional[Any] = None, actionName: Optional[str] = None) -> Dict[str, Any]:
|
||||
async def _validateWithAI(self, documents: List[Any], intent: Dict[str, Any], taskStep: Optional[Any] = None, actionName: Optional[str] = None, actionParameters: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
|
||||
"""AI-based comprehensive validation - generic approach"""
|
||||
try:
|
||||
if not hasattr(self, 'services') or not self.services or not hasattr(self.services, 'ai'):
|
||||
|
|
@ -430,48 +431,91 @@ class ContentValidator:
|
|||
actionDescription = "Content processing"
|
||||
actionContext = f"\nDOCUMENTS CREATED BY: {actionDescription} ({actionName})"
|
||||
|
||||
# Format success criteria for display
|
||||
criteriaDisplay = json.dumps(successCriteria, ensure_ascii=False) if successCriteria else "[]"
|
||||
# Build action parameters context
|
||||
actionParamsContext = ""
|
||||
if actionParameters and isinstance(actionParameters, dict) and len(actionParameters) > 0:
|
||||
# Filter out documentList and other large/redundant parameters for clarity
|
||||
relevantParams = {k: v for k, v in actionParameters.items()
|
||||
if k not in ['documentList', 'connections'] and v is not None}
|
||||
if relevantParams:
|
||||
paramsJson = json.dumps(relevantParams, ensure_ascii=False, indent=2)
|
||||
actionParamsContext = f"\nACTION PARAMETERS USED: {paramsJson}"
|
||||
|
||||
# Build successCriteriaMet example - show proper array format
|
||||
criteriaMetExample = json.dumps([False] * criteriaCount) if criteriaCount > 0 else "[]"
|
||||
# Format success criteria for display with index numbers
|
||||
if successCriteria:
|
||||
criteriaDisplay = "\n".join([f"[{i}] {criterion}" for i, criterion in enumerate(successCriteria)])
|
||||
else:
|
||||
criteriaDisplay = "[]"
|
||||
|
||||
promptBase = f"""TASK VALIDATION
|
||||
|
||||
=== TASK INFORMATION ===
|
||||
{objectiveLabel}: '{objectiveText}'
|
||||
EXPECTED DATA TYPE: {dataType}
|
||||
EXPECTED FORMATS: {expectedFormats if expectedFormats else ['any']}
|
||||
SUCCESS CRITERIA ({criteriaCount} items): {criteriaDisplay}{actionContext}
|
||||
EXPECTED FORMATS: {expectedFormats if expectedFormats else ['any']}{actionContext}{actionParamsContext}
|
||||
|
||||
=== VALIDATION INSTRUCTIONS ===
|
||||
|
||||
VALIDATION CONTEXT:
|
||||
You have METADATA (filename, format, size, mimeType) and STRUCTURE SUMMARY (if available: sections, tables, captions, IDs, statistics).
|
||||
|
||||
VALIDATION PRINCIPLES:
|
||||
1. Format compatibility: Match delivered format to expected format
|
||||
2. Structure validation: Use structure summary to verify requirements (section count, table captions, IDs, section types, etc.)
|
||||
3. Filename appropriateness: Check if filename suggests correct content type
|
||||
4. Document count: Verify number matches expectations
|
||||
5. Size sanity: Only flag if clearly wrong (<1KB for complex content or suspiciously large)
|
||||
1. EVIDENCE-BASED VALIDATION (CRITICAL): Claims must match structure evidence. If structure shows different values than claimed, trust the structure evidence, not claims.
|
||||
2. INDEPENDENT CRITERIA EVALUATION (CRITICAL): For criteriaMapping reason field - address ONLY the specific criterion requirement. Do not mention other criteria or other issues.
|
||||
3. PRIORITY: Missing data > Formatting issues. Always prioritize data completeness over format correctness.
|
||||
4. Structure validation: Use structure summary (statistics, counts, structure metadata) as PRIMARY evidence. Compare with task requirements.
|
||||
5. Discrepancy detection: If task requires specific quantities/amounts but structure shows different values, classify as missing_data or incomplete_data, not success.
|
||||
6. Format compatibility: Match delivered format to expected format (secondary priority after data completeness)
|
||||
7. Filename appropriateness: Check if filename suggests correct content type
|
||||
8. Document count: Verify number matches expectations
|
||||
|
||||
LIMITATIONS:
|
||||
- Cannot validate: Content accuracy, data correctness, formatting details, or requirements requiring full content reading
|
||||
- If structure summary unavailable, validate only metadata (format, filename, count, size)
|
||||
|
||||
SCORING GUIDELINES:
|
||||
- Format matches + reasonable structure → qualityScore: 0.8-1.0
|
||||
- Format matches but structure issues → qualityScore: 0.7-0.8
|
||||
- Data complete + format matches + structure matches requirements → qualityScore: 0.9-1.0
|
||||
- Data complete but format/structure issues → qualityScore: 0.7-0.9
|
||||
- Missing/incomplete data (even if format correct) → qualityScore: <0.7
|
||||
- Claims don't match structure evidence → qualityScore: <0.6 (trust structure, not claims)
|
||||
- Format mismatch → qualityScore: <0.7
|
||||
- Only suggest improvements for CLEAR metadata/structure issues
|
||||
|
||||
OUTPUT FORMAT (JSON only):
|
||||
VALIDATION LOGIC:
|
||||
- If structure shows fewer quantities/amounts than required → gapType: missing_data or incomplete_data
|
||||
- If structure shows wrong organization but correct quantity → gapType: wrong_structure
|
||||
- If structure matches requirements but format wrong → gapType: wrong_format
|
||||
- If claims say "X delivered" but structure shows "Y" (Y < X) → overallSuccess: false, gapType: missing_data
|
||||
- Always trust structure statistics over any claims or descriptions
|
||||
|
||||
IMPROVEMENT SUGGESTIONS PRIORITY (CRITICAL):
|
||||
- Order by CRITERIA PRIORITY first, then gapType priority: missing_data > incomplete_data > wrong_structure > wrong_format
|
||||
- [0] MUST address the HIGHEST PRIORITY unmet criterion (check criteriaMapping for which criteria are unmet)
|
||||
- If multiple criteria are unmet, prioritize by: data completeness > structure > format
|
||||
- gapType indicates the PRIMARY issue, but improvement suggestions must prioritize based on unmet criteria order
|
||||
|
||||
=== OUTPUT FORMAT (JSON TEMPLATE) ===
|
||||
{{
|
||||
"overallSuccess": false,
|
||||
"qualityScore": 0.0,
|
||||
"dataTypeMatch": false,
|
||||
"formatMatch": false,
|
||||
"documentCount": {len(documents)},
|
||||
"successCriteriaMet": {criteriaMetExample},
|
||||
"criteriaMapping": [
|
||||
{{
|
||||
"index": 0,
|
||||
"criterion": "exact_criterion_text_from_data_section",
|
||||
"met": false,
|
||||
"reason": "explanation_about_this_criterion_based_on_structure_evidence"
|
||||
}}
|
||||
],
|
||||
"gapAnalysis": "Brief description of gaps based on metadata/structure only. If validation is limited, state this clearly.",
|
||||
"gapType": "missing_data" | "wrong_structure" | "wrong_format" | "incomplete_data" | "no_gap",
|
||||
"structureComparison": {{
|
||||
"required": {{}},
|
||||
"found": {{}},
|
||||
"gap": {{}}
|
||||
}},
|
||||
"improvementSuggestions": [],
|
||||
"validationDetails": [
|
||||
{{
|
||||
|
|
@ -482,6 +526,15 @@ OUTPUT FORMAT (JSON only):
|
|||
]
|
||||
}}
|
||||
|
||||
OUTPUT FORMAT NOTES:
|
||||
- criteriaMapping reason: Address ONLY the specific criterion requirement.
|
||||
- improvementSuggestions: [0] = highest priority unmet criterion from criteriaMapping. Order: unmet criteria by index first (data completeness > structure > format), then by gapType priority.
|
||||
|
||||
=== DATA ===
|
||||
|
||||
SUCCESS CRITERIA TO VALIDATE in criteriaMapping array:
|
||||
{criteriaDisplay}
|
||||
|
||||
DELIVERED DOCUMENTS ({len(documents)} items):
|
||||
"""
|
||||
|
||||
|
|
@ -522,7 +575,6 @@ DELIVERED DOCUMENTS ({len(documents)} items):
|
|||
|
||||
# Proactively fix Python-style booleans (False/True -> false/true) BEFORE parsing
|
||||
# This handles booleans in any context: standalone, in lists, in dicts, etc.
|
||||
import re
|
||||
# Use word boundaries but also handle cases where booleans are in brackets/arrays
|
||||
# Replace False/True regardless of context (word boundary handles string matching correctly)
|
||||
normalizedJson = re.sub(r'\bFalse\b', 'false', extractedJson)
|
||||
|
|
@ -544,8 +596,10 @@ DELIVERED DOCUMENTS ({len(documents)} items):
|
|||
quality = aiResult.get("qualityScore")
|
||||
details = aiResult.get("validationDetails")
|
||||
gap = aiResult.get("gapAnalysis", "")
|
||||
criteria = aiResult.get("successCriteriaMet")
|
||||
improvements = aiResult.get("improvementSuggestions", [])
|
||||
gap_type = aiResult.get("gapType", "")
|
||||
structure_comp = aiResult.get("structureComparison", {})
|
||||
criteria_mapping = aiResult.get("criteriaMapping", [])
|
||||
|
||||
# Normalize while keeping failures explicit
|
||||
normalized = {
|
||||
|
|
@ -553,10 +607,12 @@ DELIVERED DOCUMENTS ({len(documents)} items):
|
|||
"qualityScore": float(quality) if isinstance(quality, (int, float)) else None,
|
||||
"documentCount": len(documentSummaries),
|
||||
"gapAnalysis": gap if gap else "",
|
||||
"gapType": gap_type if gap_type else "",
|
||||
"structureComparison": structure_comp if structure_comp else {},
|
||||
"criteriaMapping": criteria_mapping if isinstance(criteria_mapping, list) else [],
|
||||
"validationDetails": details if isinstance(details, list) else [{
|
||||
"documentName": "AI Validation",
|
||||
"gapAnalysis": gap,
|
||||
"successCriteriaMet": criteria if isinstance(criteria, list) else []
|
||||
"gapAnalysis": gap
|
||||
}],
|
||||
"improvementSuggestions": improvements,
|
||||
"schemaCompliant": True,
|
||||
|
|
@ -585,7 +641,7 @@ DELIVERED DOCUMENTS ({len(documents)} items):
|
|||
"dataTypeMatch": False,
|
||||
"formatMatch": False,
|
||||
"documentCount": 0,
|
||||
"successCriteriaMet": [],
|
||||
"criteriaMapping": [],
|
||||
"gapAnalysis": errorMessage,
|
||||
"improvementSuggestions": [],
|
||||
"validationDetails": [],
|
||||
|
|
|
|||
|
|
@ -133,8 +133,10 @@ class DynamicMode(BaseMode):
|
|||
# Pass ALL documents to validator - validator decides what to validate (generic approach)
|
||||
# Pass taskStep so validator can use task.objective and format fields
|
||||
# Pass action name so validator knows which action created the documents
|
||||
# Pass action parameters so validator can verify parameter-specific requirements
|
||||
actionName = selection.get('action', 'unknown')
|
||||
validationResult = await self.contentValidator.validateContent(result.documents, self.workflowIntent, taskStep, actionName)
|
||||
actionParameters = selection.get('parameters', {})
|
||||
validationResult = await self.contentValidator.validateContent(result.documents, self.workflowIntent, taskStep, actionName, actionParameters)
|
||||
observation.contentValidation = validationResult
|
||||
quality_score = validationResult.get('qualityScore', 0.0)
|
||||
if quality_score is None:
|
||||
|
|
@ -807,9 +809,9 @@ class DynamicMode(BaseMode):
|
|||
'documentsCount': observation.documentsCount,
|
||||
'previews': [p.model_dump(exclude_none=True) if hasattr(p, 'model_dump') else p.dict() for p in observation.previews] if observation.previews else [],
|
||||
'notes': observation.notes,
|
||||
'contentValidation': observation.contentValidation if observation.contentValidation else {},
|
||||
'contentAnalysis': observation.contentAnalysis if observation.contentAnalysis else {}
|
||||
}
|
||||
# Note: contentValidation is shown separately in CONTENT VALIDATION section, not duplicated here
|
||||
reviewContext = ReviewContext(
|
||||
taskStep=context.taskStep,
|
||||
taskActions=[],
|
||||
|
|
@ -822,21 +824,36 @@ class DynamicMode(BaseMode):
|
|||
baseReviewContent = extractReviewContent(reviewContext)
|
||||
placeholders = {"REVIEW_CONTENT": baseReviewContent}
|
||||
|
||||
# NEW: Add content validation to review content
|
||||
enhancedReviewContent = placeholders.get("REVIEW_CONTENT", "")
|
||||
# NEW: Add content validation to review content - extract separately for prominence
|
||||
baseReviewContent = placeholders.get("REVIEW_CONTENT", "")
|
||||
# Add observation title if there's content
|
||||
if baseReviewContent.strip():
|
||||
baseReviewContent = f"=== OBSERVATION ===\n{baseReviewContent}"
|
||||
contentValidationSection = ""
|
||||
if observation.contentValidation:
|
||||
validation = observation.contentValidation
|
||||
enhancedReviewContent += f"\n\nCONTENT VALIDATION:\n"
|
||||
enhancedReviewContent += f"Overall Success: {validation.get('overallSuccess', False)}\n"
|
||||
contentValidationSection += f"\n=== CONTENT VALIDATION ===\n"
|
||||
gap_type = validation.get('gapType', '')
|
||||
if gap_type:
|
||||
contentValidationSection += f"Gap Type: {gap_type}\n"
|
||||
contentValidationSection += f"Overall Success: {validation.get('overallSuccess', False)}\n"
|
||||
quality_score = validation.get('qualityScore', 0.0)
|
||||
if quality_score is None:
|
||||
quality_score = 0.0
|
||||
enhancedReviewContent += f"Quality Score: {quality_score:.2f}\n"
|
||||
contentValidationSection += f"Quality Score: {quality_score:.2f}\n"
|
||||
gap_analysis = validation.get('gapAnalysis', '')
|
||||
if gap_analysis:
|
||||
enhancedReviewContent += f"Gap Analysis: {gap_analysis}\n"
|
||||
contentValidationSection += f"Gap Analysis: {gap_analysis}\n"
|
||||
structure_comparison = validation.get('structureComparison', {})
|
||||
if structure_comparison:
|
||||
contentValidationSection += f"Structure Comparison: {json.dumps(structure_comparison, indent=2, ensure_ascii=False)}\n"
|
||||
if validation.get('improvementSuggestions'):
|
||||
enhancedReviewContent += f"Improvement Suggestions: {', '.join(validation['improvementSuggestions'])}\n"
|
||||
suggestions = validation['improvementSuggestions']
|
||||
contentValidationSection += f"Next Actions (in sequence):\n"
|
||||
for i, suggestion in enumerate(suggestions):
|
||||
contentValidationSection += f" [{i}] {suggestion}\n"
|
||||
|
||||
enhancedReviewContent = baseReviewContent + contentValidationSection
|
||||
|
||||
# NEW: Add content analysis to review content
|
||||
if observation.contentAnalysis:
|
||||
|
|
@ -854,9 +871,41 @@ class DynamicMode(BaseMode):
|
|||
enhancedReviewContent += f"Partial Achievements: {len(progressState['partialAchievements'])}\n"
|
||||
enhancedReviewContent += f"Failed Attempts: {len(progressState['failedAttempts'])}\n"
|
||||
enhancedReviewContent += f"Current Phase: {progressState['currentPhase']}\n"
|
||||
if progressState['nextActionsSuggested']:
|
||||
# Use content validation priorities if available, otherwise fall back to progress tracker suggestions
|
||||
if observation.contentValidation and observation.contentValidation.get('improvementSuggestions'):
|
||||
# Content validation already shown above, no need to repeat
|
||||
pass
|
||||
elif progressState['nextActionsSuggested']:
|
||||
enhancedReviewContent += f"Next Action Suggestions: {', '.join(progressState['nextActionsSuggested'])}\n"
|
||||
|
||||
# NEW: Add action history to review content
|
||||
if hasattr(context, 'previousReviewResult') and context.previousReviewResult:
|
||||
actionHistory = []
|
||||
for i, prevDecision in enumerate(context.previousReviewResult, 1):
|
||||
if prevDecision and hasattr(prevDecision, 'nextAction') and prevDecision.nextAction:
|
||||
action = prevDecision.nextAction
|
||||
params = getattr(prevDecision, 'nextActionParameters', {}) or {}
|
||||
# Filter out documentList for clarity
|
||||
relevantParams = {k: v for k, v in params.items() if k not in ['documentList', 'connections']}
|
||||
paramsStr = json.dumps(relevantParams, ensure_ascii=False) if relevantParams else "{}"
|
||||
quality = getattr(prevDecision, 'qualityScore', None)
|
||||
qualityStr = f" (quality: {quality:.2f})" if quality is not None else ""
|
||||
actionHistory.append(f"Round {i}: {action} {paramsStr}{qualityStr}")
|
||||
|
||||
if actionHistory:
|
||||
enhancedReviewContent += f"\nACTION HISTORY:\n"
|
||||
enhancedReviewContent += "\n".join(f"- {entry}" for entry in actionHistory)
|
||||
# Detect repeated actions
|
||||
actionCounts = {}
|
||||
for entry in actionHistory:
|
||||
# Extract action name (before first space or {)
|
||||
actionName = entry.split()[1] if len(entry.split()) > 1 else "unknown"
|
||||
actionCounts[actionName] = actionCounts.get(actionName, 0) + 1
|
||||
|
||||
repeatedActions = [action for action, count in actionCounts.items() if count >= 2]
|
||||
if repeatedActions:
|
||||
enhancedReviewContent += f"\nWARNING: Repeated actions detected: {', '.join(repeatedActions)}. Consider a fundamentally different approach.\n"
|
||||
|
||||
# Update placeholders with enhanced review content
|
||||
placeholders["REVIEW_CONTENT"] = enhancedReviewContent
|
||||
|
||||
|
|
|
|||
|
|
@ -323,21 +323,22 @@ def generateDynamicRefinementPrompt(services, context: Any, reviewContent: str)
|
|||
ACTIONS: {{KEY:AVAILABLE_METHODS}}
|
||||
DOCUMENTS: {{KEY:AVAILABLE_DOCUMENTS_INDEX}}
|
||||
|
||||
=== OBSERVATION ===
|
||||
{{KEY:REVIEW_CONTENT}}
|
||||
|
||||
=== NEXT ACTIONS ===
|
||||
Follow the improvement suggestions from CONTENT VALIDATION in priority order. Each suggestion indicates what action to take next.
|
||||
|
||||
=== OUTPUT FORMAT ===
|
||||
{{
|
||||
"status": "continue",
|
||||
"reason": "Brief reason",
|
||||
"nextAction": "ai.convert",
|
||||
"reason": "Brief reason explaining why continuing",
|
||||
"nextAction": "Selected_action_from_ACTIONS",
|
||||
"nextActionParameters": {{
|
||||
"documentList": ["docItem:..."],
|
||||
"inputFormat": "json",
|
||||
"outputFormat": "csv",
|
||||
"columnsPerRow": 10
|
||||
"documentList": ["docItem:reference_from_DOCUMENTS"],
|
||||
"parameter1": "value1",
|
||||
"parameter2": "value2"
|
||||
}},
|
||||
"nextActionObjective": "Convert JSON to CSV with 10 columns per row"
|
||||
"nextActionObjective": "Clear description of what this action will achieve based on improvement suggestions"
|
||||
}}
|
||||
|
||||
=== RULES ===
|
||||
|
|
@ -345,9 +346,10 @@ DOCUMENTS: {{KEY:AVAILABLE_DOCUMENTS_INDEX}}
|
|||
- nextAction: SPECIFIC action from AVAILABLE_METHODS (do not invent)
|
||||
- nextActionParameters: concrete parameters (check AVAILABLE_METHODS for valid names)
|
||||
- documentList: ONLY exact references from AVAILABLE_DOCUMENTS_INDEX (do not invent)
|
||||
- nextActionObjective: describe what this action will achieve
|
||||
- nextActionObjective: describe what this action will achieve based on the FIRST improvement suggestion from CONTENT VALIDATION
|
||||
- Do NOT repeat failed actions - suggest DIFFERENT approach
|
||||
- Use improvement suggestions from content validation
|
||||
- If ACTION HISTORY shows repeated actions, suggest a fundamentally different approach
|
||||
- nextActionObjective must directly address the highest priority improvement suggestion from CONTENT VALIDATION
|
||||
|
||||
"""
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue