fixed json merging chain for cut-off mapping with full-dynamc json merger engine for any json structure and complexity
This commit is contained in:
parent
11bb127a43
commit
3ccd284a58
11 changed files with 1263 additions and 577 deletions
|
|
@ -16,6 +16,7 @@ from modules.shared.jsonUtils import (
|
||||||
buildContinuationContext,
|
buildContinuationContext,
|
||||||
parseJsonWithModel
|
parseJsonWithModel
|
||||||
)
|
)
|
||||||
|
from modules.services.serviceAi.subJsonResponseHandling import JsonResponseHandler
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
@ -304,7 +305,39 @@ Respond with ONLY a JSON object in this exact format:
|
||||||
|
|
||||||
# Extract sections from response (handles both valid and broken JSON)
|
# Extract sections from response (handles both valid and broken JSON)
|
||||||
# Only for document generation (JSON responses)
|
# Only for document generation (JSON responses)
|
||||||
extractedSections, wasJsonComplete, parsedResult = self._extractSectionsFromResponse(result, iteration, debugPrefix)
|
# CRITICAL: Pass allSections to enable fragment detection and merging
|
||||||
|
extractedSections, wasJsonComplete, parsedResult = self._extractSectionsFromResponse(
|
||||||
|
result, iteration, debugPrefix, allSections
|
||||||
|
)
|
||||||
|
|
||||||
|
# CRITICAL: Handle JSON fragments (continuation content)
|
||||||
|
# Fragment merging happens inside _extractSectionsFromResponse and updates allSections in place
|
||||||
|
# If no sections extracted but fragment was merged, allSections was updated in place
|
||||||
|
# Check if fragment was merged by checking if allSections was modified
|
||||||
|
if not extractedSections and allSections:
|
||||||
|
# Fragment was detected and merged directly into allSections (side effect in _extractSectionsFromResponse)
|
||||||
|
logger.info(f"Iteration {iteration}: JSON fragment detected and merged, continuing")
|
||||||
|
# Don't break - fragment was merged, continue to get more content if needed
|
||||||
|
# Check if we should continue based on JSON completeness
|
||||||
|
shouldContinue = self._shouldContinueGeneration(
|
||||||
|
allSections,
|
||||||
|
iteration,
|
||||||
|
wasJsonComplete,
|
||||||
|
result
|
||||||
|
)
|
||||||
|
if shouldContinue:
|
||||||
|
if iterationOperationId:
|
||||||
|
self.services.chat.progressLogUpdate(iterationOperationId, 0.8, "Fragment merged, continuing")
|
||||||
|
self.services.chat.progressLogFinish(iterationOperationId, True)
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
# Done - fragment was merged and JSON is complete
|
||||||
|
if iterationOperationId:
|
||||||
|
self.services.chat.progressLogFinish(iterationOperationId, True)
|
||||||
|
if operationId:
|
||||||
|
self.services.chat.progressLogUpdate(operationId, 0.95, f"Generation complete ({iteration} iterations, fragment merged)")
|
||||||
|
logger.info(f"Generation complete after {iteration} iterations: fragment merged")
|
||||||
|
break
|
||||||
|
|
||||||
# Extract document metadata from first iteration if available
|
# Extract document metadata from first iteration if available
|
||||||
if iteration == 1 and parsedResult and not documentMetadata:
|
if iteration == 1 and parsedResult and not documentMetadata:
|
||||||
|
|
@ -321,14 +354,15 @@ Respond with ONLY a JSON object in this exact format:
|
||||||
if not wasJsonComplete:
|
if not wasJsonComplete:
|
||||||
logger.warning(f"Iteration {iteration}: No sections extracted from broken JSON, continuing for another attempt")
|
logger.warning(f"Iteration {iteration}: No sections extracted from broken JSON, continuing for another attempt")
|
||||||
continue
|
continue
|
||||||
# If JSON was complete but no sections extracted - this is an error, stop
|
# If JSON was complete but no sections extracted - check if it was a fragment
|
||||||
|
# Fragments are handled above, so if we get here and it's complete, it's an error
|
||||||
logger.warning(f"Iteration {iteration}: No sections extracted from complete JSON, stopping")
|
logger.warning(f"Iteration {iteration}: No sections extracted from complete JSON, stopping")
|
||||||
break
|
break
|
||||||
|
|
||||||
# Merge new sections with existing sections intelligently
|
# Merge new sections with existing sections intelligently
|
||||||
# This handles the STANDARD CASE: broken JSON iterations must be merged together
|
# This handles the STANDARD CASE: broken JSON iterations must be merged together
|
||||||
# The break can occur anywhere - in any section, at any depth
|
# The break can occur anywhere - in any section, at any depth
|
||||||
allSections = self._mergeSectionsIntelligently(allSections, extractedSections, iteration)
|
allSections = JsonResponseHandler.mergeSectionsIntelligently(allSections, extractedSections, iteration)
|
||||||
|
|
||||||
# Check if we should continue (completion detection)
|
# Check if we should continue (completion detection)
|
||||||
# Simple logic: JSON completeness determines continuation
|
# Simple logic: JSON completeness determines continuation
|
||||||
|
|
@ -370,484 +404,24 @@ Respond with ONLY a JSON object in this exact format:
|
||||||
|
|
||||||
return final_result
|
return final_result
|
||||||
|
|
||||||
def _mergeSectionsIntelligently(
|
# JSON merging logic moved to subJsonResponseHandling.py
|
||||||
self,
|
|
||||||
existingSections: List[Dict[str, Any]],
|
|
||||||
newSections: List[Dict[str, Any]],
|
|
||||||
iteration: int
|
|
||||||
) -> List[Dict[str, Any]]:
|
|
||||||
"""
|
|
||||||
Intelligently merge sections from multiple iterations.
|
|
||||||
|
|
||||||
This is a GENERIC merging strategy that handles broken JSON iterations.
|
|
||||||
The break can occur anywhere - in any section, at any depth.
|
|
||||||
|
|
||||||
Merging strategies (in order of priority):
|
|
||||||
1. Same Section ID: Merge sections with identical IDs
|
|
||||||
2. Same Content-Type + Position: If last section is incomplete and new section continues it
|
|
||||||
3. Same Order: Merge sections with same order value
|
|
||||||
4. Structural Analysis: Detect continuation based on content structure
|
|
||||||
|
|
||||||
Args:
|
|
||||||
existingSections: Sections accumulated from previous iterations
|
|
||||||
newSections: Sections extracted from current iteration
|
|
||||||
iteration: Current iteration number
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Merged list of sections
|
|
||||||
"""
|
|
||||||
if not newSections:
|
|
||||||
return existingSections
|
|
||||||
|
|
||||||
if not existingSections:
|
|
||||||
return newSections
|
|
||||||
|
|
||||||
mergedSections = existingSections.copy()
|
|
||||||
|
|
||||||
for newSection in newSections:
|
|
||||||
merged = False
|
|
||||||
|
|
||||||
# Strategy 1: Same Section ID - merge directly
|
|
||||||
newSectionId = newSection.get("id")
|
|
||||||
if newSectionId:
|
|
||||||
for i, existingSection in enumerate(mergedSections):
|
|
||||||
if existingSection.get("id") == newSectionId:
|
|
||||||
# Merge sections with same ID
|
|
||||||
mergedSections[i] = self._mergeSectionContent(existingSection, newSection, iteration)
|
|
||||||
merged = True
|
|
||||||
logger.debug(f"Iteration {iteration}: Merged section by ID '{newSectionId}'")
|
|
||||||
break
|
|
||||||
|
|
||||||
if merged:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Strategy 2: Same Content-Type + Position (continuation detection)
|
|
||||||
# Check if last section is incomplete and new section continues it
|
|
||||||
if mergedSections:
|
|
||||||
lastSection = mergedSections[-1]
|
|
||||||
lastContentType = lastSection.get("content_type")
|
|
||||||
newContentType = newSection.get("content_type")
|
|
||||||
|
|
||||||
if lastContentType == newContentType:
|
|
||||||
# Same content type - check if last section is incomplete
|
|
||||||
if self._isSectionIncomplete(lastSection):
|
|
||||||
# Last section is incomplete, merge with new section
|
|
||||||
mergedSections[-1] = self._mergeSectionContent(lastSection, newSection, iteration)
|
|
||||||
merged = True
|
|
||||||
logger.debug(f"Iteration {iteration}: Merged section by content-type continuation ({lastContentType})")
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Strategy 3: Same Order value
|
|
||||||
newOrder = newSection.get("order")
|
|
||||||
if newOrder is not None:
|
|
||||||
for i, existingSection in enumerate(mergedSections):
|
|
||||||
existingOrder = existingSection.get("order")
|
|
||||||
if existingOrder is not None and existingOrder == newOrder:
|
|
||||||
# Merge sections with same order
|
|
||||||
mergedSections[i] = self._mergeSectionContent(existingSection, newSection, iteration)
|
|
||||||
merged = True
|
|
||||||
logger.debug(f"Iteration {iteration}: Merged section by order {newOrder}")
|
|
||||||
break
|
|
||||||
|
|
||||||
if merged:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Strategy 4: Structural Analysis - detect continuation
|
|
||||||
# For code_block and table: if last section matches new section type, merge them
|
|
||||||
if mergedSections:
|
|
||||||
lastSection = mergedSections[-1]
|
|
||||||
lastContentType = lastSection.get("content_type")
|
|
||||||
newContentType = newSection.get("content_type")
|
|
||||||
|
|
||||||
# Both are code blocks - merge them
|
|
||||||
if lastContentType == "code_block" and newContentType == "code_block":
|
|
||||||
mergedSections[-1] = self._mergeSectionContent(lastSection, newSection, iteration)
|
|
||||||
merged = True
|
|
||||||
logger.debug(f"Iteration {iteration}: Merged code_block sections by structural analysis")
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Both are tables - merge them (common case for broken JSON iterations)
|
|
||||||
if lastContentType == "table" and newContentType == "table":
|
|
||||||
mergedSections[-1] = self._mergeSectionContent(lastSection, newSection, iteration)
|
|
||||||
merged = True
|
|
||||||
logger.debug(f"Iteration {iteration}: Merged table sections by structural analysis")
|
|
||||||
continue
|
|
||||||
|
|
||||||
# No merge strategy matched - add as new section
|
|
||||||
if not merged:
|
|
||||||
mergedSections.append(newSection)
|
|
||||||
logger.debug(f"Iteration {iteration}: Added new section '{newSection.get('id', 'no-id')}' ({newSection.get('content_type', 'unknown')})")
|
|
||||||
|
|
||||||
return mergedSections
|
|
||||||
|
|
||||||
def _isSectionIncomplete(self, section: Dict[str, Any]) -> bool:
|
|
||||||
"""
|
|
||||||
Check if a section is incomplete (broken at the end).
|
|
||||||
|
|
||||||
This detects incomplete sections based on content analysis:
|
|
||||||
- Code blocks: ends mid-line, ends with comma, ends with incomplete structure
|
|
||||||
- Text sections: ends mid-sentence, ends with incomplete structure
|
|
||||||
- Other types: check for incomplete elements
|
|
||||||
"""
|
|
||||||
contentType = section.get("content_type", "")
|
|
||||||
elements = section.get("elements", [])
|
|
||||||
|
|
||||||
if not elements:
|
|
||||||
return False
|
|
||||||
|
|
||||||
# Handle list of elements
|
|
||||||
if isinstance(elements, list) and len(elements) > 0:
|
|
||||||
lastElement = elements[-1]
|
|
||||||
else:
|
|
||||||
lastElement = elements
|
|
||||||
|
|
||||||
if not isinstance(lastElement, dict):
|
|
||||||
return False
|
|
||||||
|
|
||||||
# Check code_block for incomplete code
|
|
||||||
if contentType == "code_block":
|
|
||||||
code = lastElement.get("code", "")
|
|
||||||
if code:
|
|
||||||
# Check if code ends incompletely:
|
|
||||||
# - Ends with comma (incomplete CSV line)
|
|
||||||
# - Ends with number but no newline (incomplete line)
|
|
||||||
# - Ends mid-token (e.g., "23431,23" - incomplete number)
|
|
||||||
codeStripped = code.rstrip()
|
|
||||||
if codeStripped:
|
|
||||||
# Check for incomplete patterns
|
|
||||||
if codeStripped.endswith(',') or (',' in codeStripped and not codeStripped.endswith('\n')):
|
|
||||||
# Ends with comma or has comma but no final newline - likely incomplete
|
|
||||||
return True
|
|
||||||
# Check if last line is incomplete (doesn't end with newline and has partial content)
|
|
||||||
if not code.endswith('\n') and codeStripped:
|
|
||||||
# No final newline - might be incomplete
|
|
||||||
# More sophisticated: check if last number is complete
|
|
||||||
lastLine = codeStripped.split('\n')[-1]
|
|
||||||
if lastLine and ',' in lastLine:
|
|
||||||
# Has commas but might be incomplete
|
|
||||||
parts = lastLine.split(',')
|
|
||||||
if parts and len(parts[-1]) < 5: # Last part is very short - might be incomplete
|
|
||||||
return True
|
|
||||||
|
|
||||||
# Check table for incomplete rows
|
|
||||||
if contentType == "table":
|
|
||||||
rows = lastElement.get("rows", [])
|
|
||||||
if rows:
|
|
||||||
# Check if last row is incomplete (ends with incomplete data)
|
|
||||||
lastRow = rows[-1] if isinstance(rows, list) else []
|
|
||||||
if isinstance(lastRow, list) and lastRow:
|
|
||||||
# Check if last row ends with incomplete data (e.g., incomplete string)
|
|
||||||
lastCell = lastRow[-1] if lastRow else ""
|
|
||||||
if isinstance(lastCell, str):
|
|
||||||
# If last cell is incomplete (ends with quote or is very short), section might be incomplete
|
|
||||||
if lastCell.endswith('"') or (len(lastCell) < 3 and lastCell):
|
|
||||||
return True
|
|
||||||
# Also check if last row doesn't have expected number of columns (if headers exist)
|
|
||||||
headers = lastElement.get("headers", [])
|
|
||||||
if headers and isinstance(headers, list):
|
|
||||||
expectedCols = len(headers)
|
|
||||||
if len(lastRow) < expectedCols:
|
|
||||||
return True
|
|
||||||
|
|
||||||
# Check paragraph/text for incomplete sentences
|
|
||||||
if contentType in ["paragraph", "heading"]:
|
|
||||||
text = lastElement.get("text", "")
|
|
||||||
if text:
|
|
||||||
# Simple heuristic: if doesn't end with sentence-ending punctuation
|
|
||||||
textStripped = text.rstrip()
|
|
||||||
if textStripped and not textStripped[-1] in '.!?':
|
|
||||||
# Might be incomplete, but this is less reliable
|
|
||||||
# Only mark as incomplete if very short (likely cut off)
|
|
||||||
if len(textStripped) < 20:
|
|
||||||
return True
|
|
||||||
|
|
||||||
# Check lists for incomplete items
|
|
||||||
if contentType in ["bullet_list", "numbered_list"]:
|
|
||||||
items = lastElement.get("items", [])
|
|
||||||
if items and isinstance(items, list):
|
|
||||||
# Check if last item is incomplete (very short or ends with incomplete string)
|
|
||||||
lastItem = items[-1] if items else None
|
|
||||||
if isinstance(lastItem, str) and len(lastItem) < 3:
|
|
||||||
return True
|
|
||||||
# Check if items array seems incomplete (e.g., expected count not reached)
|
|
||||||
# This is harder to detect without context, so we rely on other heuristics
|
|
||||||
|
|
||||||
# Check image for incomplete base64 data
|
|
||||||
if contentType == "image":
|
|
||||||
imageData = lastElement.get("base64Data", "")
|
|
||||||
if imageData:
|
|
||||||
# Base64 strings should end with padding ('=' or '==')
|
|
||||||
# If it doesn't, it might be incomplete
|
|
||||||
stripped = imageData.rstrip()
|
|
||||||
if stripped and not stripped.endswith(('=', '==')):
|
|
||||||
# Check if it's a valid base64 character sequence that was cut off
|
|
||||||
# Base64 uses A-Z, a-z, 0-9, +, /, and = for padding
|
|
||||||
if len(stripped) > 0 and stripped[-1] not in 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=':
|
|
||||||
return True
|
|
||||||
# If length is not a multiple of 4 (base64 requirement), it might be incomplete
|
|
||||||
if len(stripped) % 4 != 0:
|
|
||||||
return True
|
|
||||||
|
|
||||||
# GENERIC CHECK: Look for incomplete structures in any element
|
|
||||||
# Check if element has arrays/lists that might be incomplete
|
|
||||||
for key, value in lastElement.items():
|
|
||||||
if isinstance(value, list) and len(value) > 0:
|
|
||||||
# Check last item in list
|
|
||||||
lastItem = value[-1]
|
|
||||||
if isinstance(lastItem, str):
|
|
||||||
# If last string item is very short, might be incomplete
|
|
||||||
if len(lastItem) < 3:
|
|
||||||
return True
|
|
||||||
elif isinstance(lastItem, dict):
|
|
||||||
# If last dict item has very few keys, might be incomplete
|
|
||||||
if len(lastItem) < 2:
|
|
||||||
return True
|
|
||||||
elif isinstance(value, str):
|
|
||||||
# Check if string ends abruptly (no punctuation, very short)
|
|
||||||
if len(value) > 0 and len(value) < 10 and not value[-1] in '.!?\n':
|
|
||||||
return True
|
|
||||||
|
|
||||||
return False
|
|
||||||
|
|
||||||
def _mergeSectionContent(
|
|
||||||
self,
|
|
||||||
existingSection: Dict[str, Any],
|
|
||||||
newSection: Dict[str, Any],
|
|
||||||
iteration: int
|
|
||||||
) -> Dict[str, Any]:
|
|
||||||
"""
|
|
||||||
Merge content from two sections.
|
|
||||||
|
|
||||||
Handles different content types:
|
|
||||||
- code_block: Append code, handle overlaps, merge incomplete lines
|
|
||||||
- paragraph/heading: Append text
|
|
||||||
- table: Merge rows
|
|
||||||
- list: Merge items
|
|
||||||
- Other: Merge elements
|
|
||||||
"""
|
|
||||||
contentType = existingSection.get("content_type", "")
|
|
||||||
existingElements = existingSection.get("elements", [])
|
|
||||||
newElements = newSection.get("elements", [])
|
|
||||||
|
|
||||||
if not newElements:
|
|
||||||
return existingSection
|
|
||||||
|
|
||||||
# Handle list of elements
|
|
||||||
if isinstance(existingElements, list):
|
|
||||||
existingElem = existingElements[-1] if existingElements else {}
|
|
||||||
else:
|
|
||||||
existingElem = existingElements
|
|
||||||
|
|
||||||
if isinstance(newElements, list):
|
|
||||||
newElem = newElements[0] if newElements else {}
|
|
||||||
else:
|
|
||||||
newElem = newElements
|
|
||||||
|
|
||||||
if not isinstance(existingElem, dict) or not isinstance(newElem, dict):
|
|
||||||
return existingSection
|
|
||||||
|
|
||||||
# Merge based on content type
|
|
||||||
if contentType == "code_block":
|
|
||||||
existingCode = existingElem.get("code", "")
|
|
||||||
newCode = newElem.get("code", "")
|
|
||||||
|
|
||||||
if existingCode and newCode:
|
|
||||||
mergedCode = self._mergeCodeBlocks(existingCode, newCode, iteration)
|
|
||||||
existingElem["code"] = mergedCode
|
|
||||||
# Preserve language from existing or new
|
|
||||||
if "language" not in existingElem and "language" in newElem:
|
|
||||||
existingElem["language"] = newElem["language"]
|
|
||||||
|
|
||||||
elif contentType in ["paragraph", "heading"]:
|
|
||||||
existingText = existingElem.get("text", "")
|
|
||||||
newText = newElem.get("text", "")
|
|
||||||
|
|
||||||
if existingText and newText:
|
|
||||||
# Append text with space if needed
|
|
||||||
if existingText.rstrip() and not existingText.rstrip()[-1] in '.!?\n':
|
|
||||||
mergedText = existingText.rstrip() + " " + newText.lstrip()
|
|
||||||
else:
|
|
||||||
mergedText = existingText.rstrip() + "\n" + newText.lstrip()
|
|
||||||
existingElem["text"] = mergedText
|
|
||||||
|
|
||||||
elif contentType == "table":
|
|
||||||
# Merge table rows with overlap detection
|
|
||||||
existingRows = existingElem.get("rows", [])
|
|
||||||
newRows = newElem.get("rows", [])
|
|
||||||
if existingRows and newRows:
|
|
||||||
# CRITICAL: Detect and remove overlaps before merging
|
|
||||||
# Check if last existing row matches first new row (exact overlap)
|
|
||||||
if len(existingRows) > 0 and len(newRows) > 0:
|
|
||||||
lastExistingRow = existingRows[-1]
|
|
||||||
firstNewRow = newRows[0]
|
|
||||||
# Compare rows (handle both list and tuple formats)
|
|
||||||
if isinstance(lastExistingRow, (list, tuple)) and isinstance(firstNewRow, (list, tuple)):
|
|
||||||
if list(lastExistingRow) == list(firstNewRow):
|
|
||||||
# Exact duplicate - remove first new row
|
|
||||||
newRows = newRows[1:]
|
|
||||||
logger.debug(f"Iteration {iteration}: Removed duplicate table row (exact match)")
|
|
||||||
|
|
||||||
# Combine rows from both sections (after removing overlaps)
|
|
||||||
existingElem["rows"] = existingRows + newRows
|
|
||||||
logger.debug(f"Iteration {iteration}: Merged table rows - existing: {len(existingRows)}, new: {len(newRows)}, total: {len(existingRows) + len(newRows)}")
|
|
||||||
elif newRows:
|
|
||||||
# If existing has no rows but new does, use new rows
|
|
||||||
existingElem["rows"] = newRows
|
|
||||||
# Preserve headers from existing (or use new if existing has none)
|
|
||||||
if not existingElem.get("headers") and newElem.get("headers"):
|
|
||||||
existingElem["headers"] = newElem["headers"]
|
|
||||||
# Preserve caption from existing (or use new if existing has none)
|
|
||||||
if not existingElem.get("caption") and newElem.get("caption"):
|
|
||||||
existingElem["caption"] = newElem["caption"]
|
|
||||||
|
|
||||||
elif contentType in ["bullet_list", "numbered_list"]:
|
|
||||||
# Merge list items
|
|
||||||
existingItems = existingElem.get("items", [])
|
|
||||||
newItems = newElem.get("items", [])
|
|
||||||
if existingItems and newItems:
|
|
||||||
existingElem["items"] = existingItems + newItems
|
|
||||||
|
|
||||||
elif contentType == "image":
|
|
||||||
# Images are typically complete - if new image is provided, replace existing
|
|
||||||
# But check if existing image data is incomplete (e.g., base64 string cut off)
|
|
||||||
existingImageData = existingElem.get("base64Data", "")
|
|
||||||
newImageData = newElem.get("base64Data", "")
|
|
||||||
if existingImageData and newImageData:
|
|
||||||
# If existing image data doesn't end with valid base64 padding, it might be incomplete
|
|
||||||
# Base64 padding is '=' or '==' at the end
|
|
||||||
if not existingImageData.rstrip().endswith(('=', '==')):
|
|
||||||
# Existing image might be incomplete - merge by appending new data
|
|
||||||
# This handles cases where base64 string was cut off
|
|
||||||
existingElem["base64Data"] = existingImageData + newImageData
|
|
||||||
logger.debug(f"Iteration {iteration}: Merged incomplete image base64 data")
|
|
||||||
else:
|
|
||||||
# Existing image is complete - replace with new (or keep existing if new is empty)
|
|
||||||
if newImageData:
|
|
||||||
existingElem["base64Data"] = newImageData
|
|
||||||
elif newImageData:
|
|
||||||
existingElem["base64Data"] = newImageData
|
|
||||||
# Preserve other image metadata
|
|
||||||
if not existingElem.get("altText") and newElem.get("altText"):
|
|
||||||
existingElem["altText"] = newElem["altText"]
|
|
||||||
if not existingElem.get("caption") and newElem.get("caption"):
|
|
||||||
existingElem["caption"] = newElem["caption"]
|
|
||||||
|
|
||||||
else:
|
|
||||||
# GENERIC FALLBACK: Handle any other content types or unknown structures
|
|
||||||
# Try to merge common array/list fields generically
|
|
||||||
for key in ["items", "rows", "columns", "cells", "elements", "data", "content"]:
|
|
||||||
if key in existingElem and key in newElem:
|
|
||||||
existingValue = existingElem[key]
|
|
||||||
newValue = newElem[key]
|
|
||||||
if isinstance(existingValue, list) and isinstance(newValue, list):
|
|
||||||
# Merge lists by concatenation
|
|
||||||
existingElem[key] = existingValue + newValue
|
|
||||||
logger.debug(f"Iteration {iteration}: Merged generic list field '{key}' - existing: {len(existingValue)}, new: {len(newValue)}")
|
|
||||||
break
|
|
||||||
|
|
||||||
# If no common list fields found, try to merge all fields from newElem into existingElem
|
|
||||||
# This handles cases where objects have different structures
|
|
||||||
for key, value in newElem.items():
|
|
||||||
if key not in existingElem:
|
|
||||||
# New field - add it
|
|
||||||
existingElem[key] = value
|
|
||||||
elif isinstance(existingElem[key], list) and isinstance(value, list):
|
|
||||||
# Both are lists - merge them
|
|
||||||
existingElem[key] = existingElem[key] + value
|
|
||||||
elif isinstance(existingElem[key], dict) and isinstance(value, dict):
|
|
||||||
# Both are dicts - recursively merge (shallow merge)
|
|
||||||
existingElem[key].update(value)
|
|
||||||
elif isinstance(existingElem[key], str) and isinstance(value, str):
|
|
||||||
# Both are strings - append new to existing
|
|
||||||
existingElem[key] = existingElem[key] + "\n" + value
|
|
||||||
|
|
||||||
# Update section with merged content
|
|
||||||
mergedSection = existingSection.copy()
|
|
||||||
if isinstance(existingElements, list):
|
|
||||||
# Update the last element in the list with merged content
|
|
||||||
if existingElements:
|
|
||||||
existingElements[-1] = existingElem
|
|
||||||
mergedSection["elements"] = existingElements
|
|
||||||
else:
|
|
||||||
mergedSection["elements"] = existingElem
|
|
||||||
|
|
||||||
# Preserve metadata from new section if missing in existing
|
|
||||||
if "order" not in mergedSection and "order" in newSection:
|
|
||||||
mergedSection["order"] = newSection["order"]
|
|
||||||
|
|
||||||
return mergedSection
|
|
||||||
|
|
||||||
def _mergeCodeBlocks(self, existingCode: str, newCode: str, iteration: int) -> str:
|
|
||||||
"""
|
|
||||||
Merge two code blocks intelligently, handling overlaps and incomplete lines.
|
|
||||||
"""
|
|
||||||
if not existingCode:
|
|
||||||
return newCode
|
|
||||||
if not newCode:
|
|
||||||
return existingCode
|
|
||||||
|
|
||||||
existingLines = existingCode.rstrip().split('\n')
|
|
||||||
newLines = newCode.strip().split('\n')
|
|
||||||
|
|
||||||
if not existingLines or not newLines:
|
|
||||||
return existingCode + "\n" + newCode
|
|
||||||
|
|
||||||
lastExistingLine = existingLines[-1].strip()
|
|
||||||
firstNewLine = newLines[0].strip()
|
|
||||||
|
|
||||||
# Strategy 1: Exact overlap - remove duplicate line
|
|
||||||
if lastExistingLine == firstNewLine:
|
|
||||||
newLines = newLines[1:]
|
|
||||||
logger.debug(f"Iteration {iteration}: Removed exact duplicate line in code merge")
|
|
||||||
|
|
||||||
# Strategy 2: Incomplete line merge
|
|
||||||
# If last existing line ends with comma or is incomplete, merge with first new line
|
|
||||||
elif lastExistingLine.endswith(',') or (',' in lastExistingLine and len(lastExistingLine.split(',')[-1]) < 5):
|
|
||||||
# Last line is incomplete - merge with first new line
|
|
||||||
# Remove trailing comma from existing line
|
|
||||||
mergedLine = lastExistingLine.rstrip(',') + ',' + firstNewLine.lstrip()
|
|
||||||
existingLines[-1] = mergedLine
|
|
||||||
newLines = newLines[1:]
|
|
||||||
logger.debug(f"Iteration {iteration}: Merged incomplete line with continuation")
|
|
||||||
|
|
||||||
# Strategy 3: Partial overlap detection
|
|
||||||
# Check if first new line starts with the end of last existing line
|
|
||||||
elif ',' in lastExistingLine and ',' in firstNewLine:
|
|
||||||
lastExistingParts = lastExistingLine.split(',')
|
|
||||||
firstNewParts = firstNewLine.split(',')
|
|
||||||
|
|
||||||
# Check for overlap: if last part of existing matches first part of new
|
|
||||||
if lastExistingParts and firstNewParts:
|
|
||||||
lastExistingPart = lastExistingParts[-1].strip()
|
|
||||||
firstNewPart = firstNewParts[0].strip()
|
|
||||||
|
|
||||||
# If they match, there's overlap
|
|
||||||
if lastExistingPart == firstNewPart and len(lastExistingParts) > 1:
|
|
||||||
# Remove overlapping part from new line
|
|
||||||
newLines[0] = ','.join(firstNewParts[1:])
|
|
||||||
logger.debug(f"Iteration {iteration}: Removed partial overlap in code merge")
|
|
||||||
|
|
||||||
# Reconstruct merged code
|
|
||||||
mergedCode = '\n'.join(existingLines)
|
|
||||||
if newLines:
|
|
||||||
if mergedCode and not mergedCode.endswith('\n'):
|
|
||||||
mergedCode += '\n'
|
|
||||||
mergedCode += '\n'.join(newLines)
|
|
||||||
|
|
||||||
return mergedCode
|
|
||||||
|
|
||||||
def _extractSectionsFromResponse(
|
def _extractSectionsFromResponse(
|
||||||
self,
|
self,
|
||||||
result: str,
|
result: str,
|
||||||
iteration: int,
|
iteration: int,
|
||||||
debugPrefix: str
|
debugPrefix: str,
|
||||||
|
allSections: List[Dict[str, Any]] = None
|
||||||
) -> Tuple[List[Dict[str, Any]], bool, Optional[Dict[str, Any]]]:
|
) -> Tuple[List[Dict[str, Any]], bool, Optional[Dict[str, Any]]]:
|
||||||
"""
|
"""
|
||||||
Extract sections from AI response, handling both valid and broken JSON.
|
Extract sections from AI response, handling both valid and broken JSON.
|
||||||
Uses repair mechanism for broken JSON.
|
Uses repair mechanism for broken JSON.
|
||||||
|
Handles JSON fragments (continuation content) that need to be merged into existing sections.
|
||||||
Determines completion based on JSON structure (complete JSON = complete, broken/incomplete = incomplete).
|
Determines completion based on JSON structure (complete JSON = complete, broken/incomplete = incomplete).
|
||||||
Returns (sections, wasJsonComplete, parsedResult)
|
Returns (sections, wasJsonComplete, parsedResult)
|
||||||
"""
|
"""
|
||||||
|
if allSections is None:
|
||||||
|
allSections = []
|
||||||
|
|
||||||
# First, try to parse as valid JSON
|
# First, try to parse as valid JSON
|
||||||
# CRITICAL: JSON completeness is determined by parsing, NOT by last character check!
|
# CRITICAL: JSON completeness is determined by parsing, NOT by last character check!
|
||||||
|
|
@ -862,6 +436,20 @@ Respond with ONLY a JSON object in this exact format:
|
||||||
# Extract sections from parsed JSON
|
# Extract sections from parsed JSON
|
||||||
sections = extractSectionsFromDocument(parsed_result)
|
sections = extractSectionsFromDocument(parsed_result)
|
||||||
|
|
||||||
|
# CRITICAL: If no sections extracted but we have existing sections, check if it's a fragment
|
||||||
|
if not sections and allSections:
|
||||||
|
fragment = JsonResponseHandler.detectAndParseJsonFragment(result, allSections)
|
||||||
|
if fragment:
|
||||||
|
logger.info(f"Iteration {iteration}: Detected JSON fragment ({fragment.get('fragment_type')}), merging into existing sections")
|
||||||
|
# Merge fragment into existing sections
|
||||||
|
merged_sections = JsonResponseHandler.mergeFragmentIntoSection(fragment, allSections, iteration)
|
||||||
|
# Update allSections in place (this is a side effect, but necessary for continuation)
|
||||||
|
# Note: This modifies the caller's allSections list
|
||||||
|
allSections[:] = merged_sections
|
||||||
|
# Return empty list to indicate we merged directly (not new sections)
|
||||||
|
# But mark as incomplete so loop continues if needed
|
||||||
|
return [], False, parsed_result
|
||||||
|
|
||||||
# JSON parsed successfully = complete
|
# JSON parsed successfully = complete
|
||||||
logger.info(f"Iteration {iteration}: JSON parsed successfully - marking as complete")
|
logger.info(f"Iteration {iteration}: JSON parsed successfully - marking as complete")
|
||||||
return sections, True, parsed_result
|
return sections, True, parsed_result
|
||||||
|
|
@ -885,7 +473,7 @@ Respond with ONLY a JSON object in this exact format:
|
||||||
# Repair failed - but we should still continue to allow AI to retry
|
# Repair failed - but we should still continue to allow AI to retry
|
||||||
logger.warning(f"Iteration {iteration}: All repair strategies failed, but continuing to allow retry")
|
logger.warning(f"Iteration {iteration}: All repair strategies failed, but continuing to allow retry")
|
||||||
return [], False, None # Mark as incomplete so loop continues
|
return [], False, None # Mark as incomplete so loop continues
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Iteration {iteration}: Unexpected error during parsing: {str(e)}")
|
logger.error(f"Iteration {iteration}: Unexpected error during parsing: {str(e)}")
|
||||||
return [], False, None
|
return [], False, None
|
||||||
|
|
@ -1413,8 +1001,3 @@ Respond with ONLY a JSON object in this exact format:
|
||||||
self.services.chat.progressLogFinish(aiOperationId, False)
|
self.services.chat.progressLogFinish(aiOperationId, False)
|
||||||
raise
|
raise
|
||||||
|
|
||||||
# DEPRECATED METHODS REMOVED:
|
|
||||||
# - callAiDocuments() - replaced by callAiContent()
|
|
||||||
# - callAiText() - replaced by callAiContent()
|
|
||||||
# All call sites have been updated to use callAiContent()
|
|
||||||
|
|
||||||
|
|
|
||||||
1022
modules/services/serviceAi/subJsonResponseHandling.py
Normal file
1022
modules/services/serviceAi/subJsonResponseHandling.py
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -497,13 +497,11 @@ class RendererDocx(BaseRenderer):
|
||||||
# Extract title from prompt if not provided
|
# Extract title from prompt if not provided
|
||||||
if not title or title == "Generated Document":
|
if not title or title == "Generated Document":
|
||||||
# Look for "create a ... document" or "generate a ... report"
|
# Look for "create a ... document" or "generate a ... report"
|
||||||
import re
|
|
||||||
title_match = re.search(r'(?:create|generate|make)\s+a\s+([^,]+?)(?:\s+document|\s+report|\s+summary)', userPrompt.lower())
|
title_match = re.search(r'(?:create|generate|make)\s+a\s+([^,]+?)(?:\s+document|\s+report|\s+summary)', userPrompt.lower())
|
||||||
if title_match:
|
if title_match:
|
||||||
structure['title'] = title_match.group(1).strip().title()
|
structure['title'] = title_match.group(1).strip().title()
|
||||||
|
|
||||||
# Extract sections from numbered lists in prompt
|
# Extract sections from numbered lists in prompt
|
||||||
import re
|
|
||||||
section_pattern = r'(\d+)\)?\s*([^,]+?)(?:\s*[,:]|\s*$)'
|
section_pattern = r'(\d+)\)?\s*([^,]+?)(?:\s*[,:]|\s*$)'
|
||||||
sections = re.findall(section_pattern, userPrompt)
|
sections = re.findall(section_pattern, userPrompt)
|
||||||
|
|
||||||
|
|
@ -849,7 +847,6 @@ class RendererDocx(BaseRenderer):
|
||||||
Returns the content with tables replaced by placeholders.
|
Returns the content with tables replaced by placeholders.
|
||||||
"""
|
"""
|
||||||
import csv
|
import csv
|
||||||
import io
|
|
||||||
|
|
||||||
lines = content.split('\n')
|
lines = content.split('\n')
|
||||||
processed_lines = []
|
processed_lines = []
|
||||||
|
|
|
||||||
|
|
@ -95,7 +95,7 @@ class RendererXlsx(BaseRenderer):
|
||||||
# Title
|
# Title
|
||||||
sheet['A1'] = title
|
sheet['A1'] = title
|
||||||
sheet['A1'].font = Font(size=16, bold=True)
|
sheet['A1'].font = Font(size=16, bold=True)
|
||||||
sheet['A1'].alignment = Alignment(horizontal='center')
|
sheet['A1'].alignment = Alignment(horizontal='left')
|
||||||
|
|
||||||
# Generation info
|
# Generation info
|
||||||
sheet['A3'] = "Generated:"
|
sheet['A3'] = "Generated:"
|
||||||
|
|
@ -325,7 +325,7 @@ class RendererXlsx(BaseRenderer):
|
||||||
def _getDefaultStyleSet(self) -> Dict[str, Any]:
|
def _getDefaultStyleSet(self) -> Dict[str, Any]:
|
||||||
"""Default Excel style set - used when no style instructions present."""
|
"""Default Excel style set - used when no style instructions present."""
|
||||||
return {
|
return {
|
||||||
"title": {"font_size": 16, "color": "#FF1F4E79", "bold": True, "align": "center"},
|
"title": {"font_size": 16, "color": "#FF1F4E79", "bold": True, "align": "left"},
|
||||||
"heading": {"font_size": 14, "color": "#FF2F2F2F", "bold": True, "align": "left"},
|
"heading": {"font_size": 14, "color": "#FF2F2F2F", "bold": True, "align": "left"},
|
||||||
"table_header": {"background": "#FF4F4F4F", "text_color": "#FFFFFFFF", "bold": True, "align": "center"},
|
"table_header": {"background": "#FF4F4F4F", "text_color": "#FFFFFFFF", "bold": True, "align": "center"},
|
||||||
"table_cell": {"background": "#FFFFFFFF", "text_color": "#FF2F2F2F", "bold": False, "align": "left"},
|
"table_cell": {"background": "#FFFFFFFF", "text_color": "#FF2F2F2F", "bold": False, "align": "left"},
|
||||||
|
|
@ -543,8 +543,9 @@ class RendererXlsx(BaseRenderer):
|
||||||
try:
|
try:
|
||||||
# Sheet title
|
# Sheet title
|
||||||
sheet['A1'] = sheetTitle
|
sheet['A1'] = sheetTitle
|
||||||
sheet['A1'].font = Font(size=16, bold=True, color=self._getSafeColor(styles.get("title", {}).get("color", "FF1F4E79")))
|
title_style = styles.get("title", {})
|
||||||
sheet['A1'].alignment = Alignment(horizontal="center")
|
sheet['A1'].font = Font(size=16, bold=True, color=self._getSafeColor(title_style.get("color", "FF1F4E79")))
|
||||||
|
sheet['A1'].alignment = Alignment(horizontal=title_style.get("align", "left"))
|
||||||
|
|
||||||
# Get table data from elements (canonical JSON format)
|
# Get table data from elements (canonical JSON format)
|
||||||
elements = section.get("elements", [])
|
elements = section.get("elements", [])
|
||||||
|
|
@ -592,7 +593,7 @@ class RendererXlsx(BaseRenderer):
|
||||||
sheet['A1'] = documentTitle
|
sheet['A1'] = documentTitle
|
||||||
|
|
||||||
# Safety check for title style
|
# Safety check for title style
|
||||||
title_style = styles.get("title", {"font_size": 16, "bold": True, "color": "#FF1F4E79", "align": "center"})
|
title_style = styles.get("title", {"font_size": 16, "bold": True, "color": "#FF1F4E79", "align": "left"})
|
||||||
try:
|
try:
|
||||||
safe_color = self._getSafeColor(title_style["color"])
|
safe_color = self._getSafeColor(title_style["color"])
|
||||||
sheet['A1'].font = Font(size=title_style["font_size"], bold=title_style["bold"], color=safe_color)
|
sheet['A1'].font = Font(size=title_style["font_size"], bold=title_style["bold"], color=safe_color)
|
||||||
|
|
|
||||||
|
|
@ -271,12 +271,6 @@ class UtilsService:
|
||||||
def jsonTryParse(self, text) -> tuple:
|
def jsonTryParse(self, text) -> tuple:
|
||||||
return jsonUtils.tryParseJson(text)
|
return jsonUtils.tryParseJson(text)
|
||||||
|
|
||||||
def jsonParseOrRaise(self, text):
|
|
||||||
return jsonUtils.parseJsonOrRaise(text)
|
|
||||||
|
|
||||||
def jsonMergeRootLists(self, parts):
|
|
||||||
return jsonUtils.mergeRootLists(parts)
|
|
||||||
|
|
||||||
# ===== Enum utility functions =====
|
# ===== Enum utility functions =====
|
||||||
|
|
||||||
def mapToEnum(self, enum_class, value_str, default_value):
|
def mapToEnum(self, enum_class, value_str, default_value):
|
||||||
|
|
|
||||||
|
|
@ -159,7 +159,6 @@ def storeDebugMessageAndDocuments(message, currentUser) -> None:
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
import json
|
import json
|
||||||
from datetime import datetime, UTC
|
|
||||||
|
|
||||||
# Create base debug directory (use base debug dir, not prompts subdirectory)
|
# Create base debug directory (use base debug dir, not prompts subdirectory)
|
||||||
baseDebugDir = _getBaseDebugDir()
|
baseDebugDir = _getBaseDebugDir()
|
||||||
|
|
|
||||||
|
|
@ -97,47 +97,6 @@ def tryParseJson(text: Union[str, bytes]) -> Tuple[Optional[Union[Dict, List]],
|
||||||
return None, e, cleaned
|
return None, e, cleaned
|
||||||
|
|
||||||
|
|
||||||
def parseJsonOrRaise(text: Union[str, bytes]) -> Union[Dict, List]:
|
|
||||||
obj, err, cleaned = tryParseJson(text)
|
|
||||||
if err is not None:
|
|
||||||
logger.error(f"parse_json_or_raise failed: {err}. Cleaned preview: {cleaned[:200]}...")
|
|
||||||
raise err
|
|
||||||
return obj
|
|
||||||
|
|
||||||
|
|
||||||
def mergeRootLists(jsonParts: List[Union[str, Dict, List]]) -> Dict[str, Any]:
|
|
||||||
"""
|
|
||||||
Generic merger for root-level lists: take first dict as base; for each subsequent part:
|
|
||||||
- if value is list and same key exists as list, extend it
|
|
||||||
- if key absent, add it
|
|
||||||
- for non-list keys, keep the original (from the first part)
|
|
||||||
Sets continuation=None if present in base.
|
|
||||||
"""
|
|
||||||
base: Optional[Dict[str, Any]] = None
|
|
||||||
parsed: List[Dict[str, Any]] = []
|
|
||||||
for part in jsonParts:
|
|
||||||
if isinstance(part, (dict, list)):
|
|
||||||
obj = part
|
|
||||||
else:
|
|
||||||
obj, err, _ = tryParseJson(part)
|
|
||||||
if err is not None or not isinstance(obj, (dict, list)):
|
|
||||||
continue
|
|
||||||
if isinstance(obj, dict):
|
|
||||||
parsed.append(obj)
|
|
||||||
if not parsed:
|
|
||||||
return {}
|
|
||||||
base = dict(parsed[0])
|
|
||||||
for obj in parsed[1:]:
|
|
||||||
for k, v in obj.items():
|
|
||||||
if isinstance(v, list) and isinstance(base.get(k), list):
|
|
||||||
base[k].extend(v)
|
|
||||||
elif k not in base:
|
|
||||||
base[k] = v
|
|
||||||
if 'continuation' in base:
|
|
||||||
base['continuation'] = None
|
|
||||||
return base
|
|
||||||
|
|
||||||
|
|
||||||
def repairBrokenJson(text: str) -> Optional[Dict[str, Any]]:
|
def repairBrokenJson(text: str) -> Optional[Dict[str, Any]]:
|
||||||
"""
|
"""
|
||||||
Attempt to repair broken JSON using multiple strategies.
|
Attempt to repair broken JSON using multiple strategies.
|
||||||
|
|
|
||||||
|
|
@ -271,7 +271,20 @@ class MethodAi(MethodBase):
|
||||||
|
|
||||||
# Prepare extraction options
|
# Prepare extraction options
|
||||||
self.services.chat.progressLogUpdate(operationId, 0.3, "Preparing extraction options")
|
self.services.chat.progressLogUpdate(operationId, 0.3, "Preparing extraction options")
|
||||||
extractionOptions = parameters.extractionOptions
|
extractionOptionsParam = parameters.get("extractionOptions")
|
||||||
|
|
||||||
|
# Convert dict to ExtractionOptions object if needed, or create defaults
|
||||||
|
if extractionOptionsParam:
|
||||||
|
if isinstance(extractionOptionsParam, dict):
|
||||||
|
# Convert dict to ExtractionOptions object
|
||||||
|
extractionOptions = ExtractionOptions(**extractionOptionsParam)
|
||||||
|
elif isinstance(extractionOptionsParam, ExtractionOptions):
|
||||||
|
extractionOptions = extractionOptionsParam
|
||||||
|
else:
|
||||||
|
# Invalid type, use defaults
|
||||||
|
extractionOptions = None
|
||||||
|
else:
|
||||||
|
extractionOptions = None
|
||||||
|
|
||||||
# If extractionOptions not provided, create defaults
|
# If extractionOptions not provided, create defaults
|
||||||
if not extractionOptions:
|
if not extractionOptions:
|
||||||
|
|
@ -297,10 +310,21 @@ class MethodAi(MethodBase):
|
||||||
# Build ActionDocuments from ContentExtracted results
|
# Build ActionDocuments from ContentExtracted results
|
||||||
self.services.chat.progressLogUpdate(operationId, 0.8, "Building result documents")
|
self.services.chat.progressLogUpdate(operationId, 0.8, "Building result documents")
|
||||||
actionDocuments = []
|
actionDocuments = []
|
||||||
for extracted in extractedResults:
|
# Map extracted results back to original documents by index (results are in same order)
|
||||||
|
for i, extracted in enumerate(extractedResults):
|
||||||
|
# Get original document name if available
|
||||||
|
originalDoc = chatDocuments[i] if i < len(chatDocuments) else None
|
||||||
|
if originalDoc and hasattr(originalDoc, 'fileName') and originalDoc.fileName:
|
||||||
|
# Use original filename with "extracted_" prefix
|
||||||
|
baseName = originalDoc.fileName.rsplit('.', 1)[0] if '.' in originalDoc.fileName else originalDoc.fileName
|
||||||
|
documentName = f"{baseName}_extracted_{extracted.id}.json"
|
||||||
|
else:
|
||||||
|
# Fallback to generic name with index
|
||||||
|
documentName = f"document_{i+1:03d}_extracted_{extracted.id}.json"
|
||||||
|
|
||||||
# Store ContentExtracted object in ActionDocument.documentData
|
# Store ContentExtracted object in ActionDocument.documentData
|
||||||
actionDoc = ActionDocument(
|
actionDoc = ActionDocument(
|
||||||
documentName=f"extracted_{extracted.id}.json",
|
documentName=documentName,
|
||||||
documentData=extracted, # ContentExtracted object
|
documentData=extracted, # ContentExtracted object
|
||||||
mimeType="application/json"
|
mimeType="application/json"
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -22,7 +22,7 @@ class ContentValidator:
|
||||||
self.services = services
|
self.services = services
|
||||||
self.learningEngine = learningEngine
|
self.learningEngine = learningEngine
|
||||||
|
|
||||||
async def validateContent(self, documents: List[Any], intent: Dict[str, Any], taskStep: Optional[Any] = None, actionName: Optional[str] = None) -> Dict[str, Any]:
|
async def validateContent(self, documents: List[Any], intent: Dict[str, Any], taskStep: Optional[Any] = None, actionName: Optional[str] = None, actionParameters: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
|
||||||
"""Validates delivered content against user intent using AI (single attempt; parse-or-fail)
|
"""Validates delivered content against user intent using AI (single attempt; parse-or-fail)
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
|
|
@ -30,8 +30,9 @@ class ContentValidator:
|
||||||
intent: Workflow-level intent dict (for format requirements)
|
intent: Workflow-level intent dict (for format requirements)
|
||||||
taskStep: Optional TaskStep object (preferred source for objective)
|
taskStep: Optional TaskStep object (preferred source for objective)
|
||||||
actionName: Optional action name (e.g., "ai.process", "ai.webResearch") that created the documents
|
actionName: Optional action name (e.g., "ai.process", "ai.webResearch") that created the documents
|
||||||
|
actionParameters: Optional action parameters used during execution (e.g., {"columnsPerRow": 10, "researchDepth": "deep"})
|
||||||
"""
|
"""
|
||||||
return await self._validateWithAI(documents, intent, taskStep, actionName)
|
return await self._validateWithAI(documents, intent, taskStep, actionName, actionParameters)
|
||||||
|
|
||||||
def _analyzeDocuments(self, documents: List[Any]) -> List[Dict[str, Any]]:
|
def _analyzeDocuments(self, documents: List[Any]) -> List[Dict[str, Any]]:
|
||||||
"""Generic document analysis - create simple summaries with metadata."""
|
"""Generic document analysis - create simple summaries with metadata."""
|
||||||
|
|
@ -368,7 +369,7 @@ class ContentValidator:
|
||||||
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
async def _validateWithAI(self, documents: List[Any], intent: Dict[str, Any], taskStep: Optional[Any] = None, actionName: Optional[str] = None) -> Dict[str, Any]:
|
async def _validateWithAI(self, documents: List[Any], intent: Dict[str, Any], taskStep: Optional[Any] = None, actionName: Optional[str] = None, actionParameters: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
|
||||||
"""AI-based comprehensive validation - generic approach"""
|
"""AI-based comprehensive validation - generic approach"""
|
||||||
try:
|
try:
|
||||||
if not hasattr(self, 'services') or not self.services or not hasattr(self.services, 'ai'):
|
if not hasattr(self, 'services') or not self.services or not hasattr(self.services, 'ai'):
|
||||||
|
|
@ -430,48 +431,91 @@ class ContentValidator:
|
||||||
actionDescription = "Content processing"
|
actionDescription = "Content processing"
|
||||||
actionContext = f"\nDOCUMENTS CREATED BY: {actionDescription} ({actionName})"
|
actionContext = f"\nDOCUMENTS CREATED BY: {actionDescription} ({actionName})"
|
||||||
|
|
||||||
# Format success criteria for display
|
# Build action parameters context
|
||||||
criteriaDisplay = json.dumps(successCriteria, ensure_ascii=False) if successCriteria else "[]"
|
actionParamsContext = ""
|
||||||
|
if actionParameters and isinstance(actionParameters, dict) and len(actionParameters) > 0:
|
||||||
|
# Filter out documentList and other large/redundant parameters for clarity
|
||||||
|
relevantParams = {k: v for k, v in actionParameters.items()
|
||||||
|
if k not in ['documentList', 'connections'] and v is not None}
|
||||||
|
if relevantParams:
|
||||||
|
paramsJson = json.dumps(relevantParams, ensure_ascii=False, indent=2)
|
||||||
|
actionParamsContext = f"\nACTION PARAMETERS USED: {paramsJson}"
|
||||||
|
|
||||||
# Build successCriteriaMet example - show proper array format
|
# Format success criteria for display with index numbers
|
||||||
criteriaMetExample = json.dumps([False] * criteriaCount) if criteriaCount > 0 else "[]"
|
if successCriteria:
|
||||||
|
criteriaDisplay = "\n".join([f"[{i}] {criterion}" for i, criterion in enumerate(successCriteria)])
|
||||||
|
else:
|
||||||
|
criteriaDisplay = "[]"
|
||||||
|
|
||||||
promptBase = f"""TASK VALIDATION
|
promptBase = f"""TASK VALIDATION
|
||||||
|
|
||||||
|
=== TASK INFORMATION ===
|
||||||
{objectiveLabel}: '{objectiveText}'
|
{objectiveLabel}: '{objectiveText}'
|
||||||
EXPECTED DATA TYPE: {dataType}
|
EXPECTED DATA TYPE: {dataType}
|
||||||
EXPECTED FORMATS: {expectedFormats if expectedFormats else ['any']}
|
EXPECTED FORMATS: {expectedFormats if expectedFormats else ['any']}{actionContext}{actionParamsContext}
|
||||||
SUCCESS CRITERIA ({criteriaCount} items): {criteriaDisplay}{actionContext}
|
|
||||||
|
=== VALIDATION INSTRUCTIONS ===
|
||||||
|
|
||||||
VALIDATION CONTEXT:
|
VALIDATION CONTEXT:
|
||||||
You have METADATA (filename, format, size, mimeType) and STRUCTURE SUMMARY (if available: sections, tables, captions, IDs, statistics).
|
You have METADATA (filename, format, size, mimeType) and STRUCTURE SUMMARY (if available: sections, tables, captions, IDs, statistics).
|
||||||
|
|
||||||
VALIDATION PRINCIPLES:
|
VALIDATION PRINCIPLES:
|
||||||
1. Format compatibility: Match delivered format to expected format
|
1. EVIDENCE-BASED VALIDATION (CRITICAL): Claims must match structure evidence. If structure shows different values than claimed, trust the structure evidence, not claims.
|
||||||
2. Structure validation: Use structure summary to verify requirements (section count, table captions, IDs, section types, etc.)
|
2. INDEPENDENT CRITERIA EVALUATION (CRITICAL): For criteriaMapping reason field - address ONLY the specific criterion requirement. Do not mention other criteria or other issues.
|
||||||
3. Filename appropriateness: Check if filename suggests correct content type
|
3. PRIORITY: Missing data > Formatting issues. Always prioritize data completeness over format correctness.
|
||||||
4. Document count: Verify number matches expectations
|
4. Structure validation: Use structure summary (statistics, counts, structure metadata) as PRIMARY evidence. Compare with task requirements.
|
||||||
5. Size sanity: Only flag if clearly wrong (<1KB for complex content or suspiciously large)
|
5. Discrepancy detection: If task requires specific quantities/amounts but structure shows different values, classify as missing_data or incomplete_data, not success.
|
||||||
|
6. Format compatibility: Match delivered format to expected format (secondary priority after data completeness)
|
||||||
|
7. Filename appropriateness: Check if filename suggests correct content type
|
||||||
|
8. Document count: Verify number matches expectations
|
||||||
|
|
||||||
LIMITATIONS:
|
LIMITATIONS:
|
||||||
- Cannot validate: Content accuracy, data correctness, formatting details, or requirements requiring full content reading
|
- Cannot validate: Content accuracy, data correctness, formatting details, or requirements requiring full content reading
|
||||||
- If structure summary unavailable, validate only metadata (format, filename, count, size)
|
- If structure summary unavailable, validate only metadata (format, filename, count, size)
|
||||||
|
|
||||||
SCORING GUIDELINES:
|
SCORING GUIDELINES:
|
||||||
- Format matches + reasonable structure → qualityScore: 0.8-1.0
|
- Data complete + format matches + structure matches requirements → qualityScore: 0.9-1.0
|
||||||
- Format matches but structure issues → qualityScore: 0.7-0.8
|
- Data complete but format/structure issues → qualityScore: 0.7-0.9
|
||||||
|
- Missing/incomplete data (even if format correct) → qualityScore: <0.7
|
||||||
|
- Claims don't match structure evidence → qualityScore: <0.6 (trust structure, not claims)
|
||||||
- Format mismatch → qualityScore: <0.7
|
- Format mismatch → qualityScore: <0.7
|
||||||
- Only suggest improvements for CLEAR metadata/structure issues
|
- Only suggest improvements for CLEAR metadata/structure issues
|
||||||
|
|
||||||
OUTPUT FORMAT (JSON only):
|
VALIDATION LOGIC:
|
||||||
|
- If structure shows fewer quantities/amounts than required → gapType: missing_data or incomplete_data
|
||||||
|
- If structure shows wrong organization but correct quantity → gapType: wrong_structure
|
||||||
|
- If structure matches requirements but format wrong → gapType: wrong_format
|
||||||
|
- If claims say "X delivered" but structure shows "Y" (Y < X) → overallSuccess: false, gapType: missing_data
|
||||||
|
- Always trust structure statistics over any claims or descriptions
|
||||||
|
|
||||||
|
IMPROVEMENT SUGGESTIONS PRIORITY (CRITICAL):
|
||||||
|
- Order by CRITERIA PRIORITY first, then gapType priority: missing_data > incomplete_data > wrong_structure > wrong_format
|
||||||
|
- [0] MUST address the HIGHEST PRIORITY unmet criterion (check criteriaMapping for which criteria are unmet)
|
||||||
|
- If multiple criteria are unmet, prioritize by: data completeness > structure > format
|
||||||
|
- gapType indicates the PRIMARY issue, but improvement suggestions must prioritize based on unmet criteria order
|
||||||
|
|
||||||
|
=== OUTPUT FORMAT (JSON TEMPLATE) ===
|
||||||
{{
|
{{
|
||||||
"overallSuccess": false,
|
"overallSuccess": false,
|
||||||
"qualityScore": 0.0,
|
"qualityScore": 0.0,
|
||||||
"dataTypeMatch": false,
|
"dataTypeMatch": false,
|
||||||
"formatMatch": false,
|
"formatMatch": false,
|
||||||
"documentCount": {len(documents)},
|
"documentCount": {len(documents)},
|
||||||
"successCriteriaMet": {criteriaMetExample},
|
"criteriaMapping": [
|
||||||
|
{{
|
||||||
|
"index": 0,
|
||||||
|
"criterion": "exact_criterion_text_from_data_section",
|
||||||
|
"met": false,
|
||||||
|
"reason": "explanation_about_this_criterion_based_on_structure_evidence"
|
||||||
|
}}
|
||||||
|
],
|
||||||
"gapAnalysis": "Brief description of gaps based on metadata/structure only. If validation is limited, state this clearly.",
|
"gapAnalysis": "Brief description of gaps based on metadata/structure only. If validation is limited, state this clearly.",
|
||||||
|
"gapType": "missing_data" | "wrong_structure" | "wrong_format" | "incomplete_data" | "no_gap",
|
||||||
|
"structureComparison": {{
|
||||||
|
"required": {{}},
|
||||||
|
"found": {{}},
|
||||||
|
"gap": {{}}
|
||||||
|
}},
|
||||||
"improvementSuggestions": [],
|
"improvementSuggestions": [],
|
||||||
"validationDetails": [
|
"validationDetails": [
|
||||||
{{
|
{{
|
||||||
|
|
@ -482,6 +526,15 @@ OUTPUT FORMAT (JSON only):
|
||||||
]
|
]
|
||||||
}}
|
}}
|
||||||
|
|
||||||
|
OUTPUT FORMAT NOTES:
|
||||||
|
- criteriaMapping reason: Address ONLY the specific criterion requirement.
|
||||||
|
- improvementSuggestions: [0] = highest priority unmet criterion from criteriaMapping. Order: unmet criteria by index first (data completeness > structure > format), then by gapType priority.
|
||||||
|
|
||||||
|
=== DATA ===
|
||||||
|
|
||||||
|
SUCCESS CRITERIA TO VALIDATE in criteriaMapping array:
|
||||||
|
{criteriaDisplay}
|
||||||
|
|
||||||
DELIVERED DOCUMENTS ({len(documents)} items):
|
DELIVERED DOCUMENTS ({len(documents)} items):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
@ -522,7 +575,6 @@ DELIVERED DOCUMENTS ({len(documents)} items):
|
||||||
|
|
||||||
# Proactively fix Python-style booleans (False/True -> false/true) BEFORE parsing
|
# Proactively fix Python-style booleans (False/True -> false/true) BEFORE parsing
|
||||||
# This handles booleans in any context: standalone, in lists, in dicts, etc.
|
# This handles booleans in any context: standalone, in lists, in dicts, etc.
|
||||||
import re
|
|
||||||
# Use word boundaries but also handle cases where booleans are in brackets/arrays
|
# Use word boundaries but also handle cases where booleans are in brackets/arrays
|
||||||
# Replace False/True regardless of context (word boundary handles string matching correctly)
|
# Replace False/True regardless of context (word boundary handles string matching correctly)
|
||||||
normalizedJson = re.sub(r'\bFalse\b', 'false', extractedJson)
|
normalizedJson = re.sub(r'\bFalse\b', 'false', extractedJson)
|
||||||
|
|
@ -544,8 +596,10 @@ DELIVERED DOCUMENTS ({len(documents)} items):
|
||||||
quality = aiResult.get("qualityScore")
|
quality = aiResult.get("qualityScore")
|
||||||
details = aiResult.get("validationDetails")
|
details = aiResult.get("validationDetails")
|
||||||
gap = aiResult.get("gapAnalysis", "")
|
gap = aiResult.get("gapAnalysis", "")
|
||||||
criteria = aiResult.get("successCriteriaMet")
|
|
||||||
improvements = aiResult.get("improvementSuggestions", [])
|
improvements = aiResult.get("improvementSuggestions", [])
|
||||||
|
gap_type = aiResult.get("gapType", "")
|
||||||
|
structure_comp = aiResult.get("structureComparison", {})
|
||||||
|
criteria_mapping = aiResult.get("criteriaMapping", [])
|
||||||
|
|
||||||
# Normalize while keeping failures explicit
|
# Normalize while keeping failures explicit
|
||||||
normalized = {
|
normalized = {
|
||||||
|
|
@ -553,10 +607,12 @@ DELIVERED DOCUMENTS ({len(documents)} items):
|
||||||
"qualityScore": float(quality) if isinstance(quality, (int, float)) else None,
|
"qualityScore": float(quality) if isinstance(quality, (int, float)) else None,
|
||||||
"documentCount": len(documentSummaries),
|
"documentCount": len(documentSummaries),
|
||||||
"gapAnalysis": gap if gap else "",
|
"gapAnalysis": gap if gap else "",
|
||||||
|
"gapType": gap_type if gap_type else "",
|
||||||
|
"structureComparison": structure_comp if structure_comp else {},
|
||||||
|
"criteriaMapping": criteria_mapping if isinstance(criteria_mapping, list) else [],
|
||||||
"validationDetails": details if isinstance(details, list) else [{
|
"validationDetails": details if isinstance(details, list) else [{
|
||||||
"documentName": "AI Validation",
|
"documentName": "AI Validation",
|
||||||
"gapAnalysis": gap,
|
"gapAnalysis": gap
|
||||||
"successCriteriaMet": criteria if isinstance(criteria, list) else []
|
|
||||||
}],
|
}],
|
||||||
"improvementSuggestions": improvements,
|
"improvementSuggestions": improvements,
|
||||||
"schemaCompliant": True,
|
"schemaCompliant": True,
|
||||||
|
|
@ -585,7 +641,7 @@ DELIVERED DOCUMENTS ({len(documents)} items):
|
||||||
"dataTypeMatch": False,
|
"dataTypeMatch": False,
|
||||||
"formatMatch": False,
|
"formatMatch": False,
|
||||||
"documentCount": 0,
|
"documentCount": 0,
|
||||||
"successCriteriaMet": [],
|
"criteriaMapping": [],
|
||||||
"gapAnalysis": errorMessage,
|
"gapAnalysis": errorMessage,
|
||||||
"improvementSuggestions": [],
|
"improvementSuggestions": [],
|
||||||
"validationDetails": [],
|
"validationDetails": [],
|
||||||
|
|
|
||||||
|
|
@ -133,8 +133,10 @@ class DynamicMode(BaseMode):
|
||||||
# Pass ALL documents to validator - validator decides what to validate (generic approach)
|
# Pass ALL documents to validator - validator decides what to validate (generic approach)
|
||||||
# Pass taskStep so validator can use task.objective and format fields
|
# Pass taskStep so validator can use task.objective and format fields
|
||||||
# Pass action name so validator knows which action created the documents
|
# Pass action name so validator knows which action created the documents
|
||||||
|
# Pass action parameters so validator can verify parameter-specific requirements
|
||||||
actionName = selection.get('action', 'unknown')
|
actionName = selection.get('action', 'unknown')
|
||||||
validationResult = await self.contentValidator.validateContent(result.documents, self.workflowIntent, taskStep, actionName)
|
actionParameters = selection.get('parameters', {})
|
||||||
|
validationResult = await self.contentValidator.validateContent(result.documents, self.workflowIntent, taskStep, actionName, actionParameters)
|
||||||
observation.contentValidation = validationResult
|
observation.contentValidation = validationResult
|
||||||
quality_score = validationResult.get('qualityScore', 0.0)
|
quality_score = validationResult.get('qualityScore', 0.0)
|
||||||
if quality_score is None:
|
if quality_score is None:
|
||||||
|
|
@ -807,9 +809,9 @@ class DynamicMode(BaseMode):
|
||||||
'documentsCount': observation.documentsCount,
|
'documentsCount': observation.documentsCount,
|
||||||
'previews': [p.model_dump(exclude_none=True) if hasattr(p, 'model_dump') else p.dict() for p in observation.previews] if observation.previews else [],
|
'previews': [p.model_dump(exclude_none=True) if hasattr(p, 'model_dump') else p.dict() for p in observation.previews] if observation.previews else [],
|
||||||
'notes': observation.notes,
|
'notes': observation.notes,
|
||||||
'contentValidation': observation.contentValidation if observation.contentValidation else {},
|
|
||||||
'contentAnalysis': observation.contentAnalysis if observation.contentAnalysis else {}
|
'contentAnalysis': observation.contentAnalysis if observation.contentAnalysis else {}
|
||||||
}
|
}
|
||||||
|
# Note: contentValidation is shown separately in CONTENT VALIDATION section, not duplicated here
|
||||||
reviewContext = ReviewContext(
|
reviewContext = ReviewContext(
|
||||||
taskStep=context.taskStep,
|
taskStep=context.taskStep,
|
||||||
taskActions=[],
|
taskActions=[],
|
||||||
|
|
@ -822,21 +824,36 @@ class DynamicMode(BaseMode):
|
||||||
baseReviewContent = extractReviewContent(reviewContext)
|
baseReviewContent = extractReviewContent(reviewContext)
|
||||||
placeholders = {"REVIEW_CONTENT": baseReviewContent}
|
placeholders = {"REVIEW_CONTENT": baseReviewContent}
|
||||||
|
|
||||||
# NEW: Add content validation to review content
|
# NEW: Add content validation to review content - extract separately for prominence
|
||||||
enhancedReviewContent = placeholders.get("REVIEW_CONTENT", "")
|
baseReviewContent = placeholders.get("REVIEW_CONTENT", "")
|
||||||
|
# Add observation title if there's content
|
||||||
|
if baseReviewContent.strip():
|
||||||
|
baseReviewContent = f"=== OBSERVATION ===\n{baseReviewContent}"
|
||||||
|
contentValidationSection = ""
|
||||||
if observation.contentValidation:
|
if observation.contentValidation:
|
||||||
validation = observation.contentValidation
|
validation = observation.contentValidation
|
||||||
enhancedReviewContent += f"\n\nCONTENT VALIDATION:\n"
|
contentValidationSection += f"\n=== CONTENT VALIDATION ===\n"
|
||||||
enhancedReviewContent += f"Overall Success: {validation.get('overallSuccess', False)}\n"
|
gap_type = validation.get('gapType', '')
|
||||||
|
if gap_type:
|
||||||
|
contentValidationSection += f"Gap Type: {gap_type}\n"
|
||||||
|
contentValidationSection += f"Overall Success: {validation.get('overallSuccess', False)}\n"
|
||||||
quality_score = validation.get('qualityScore', 0.0)
|
quality_score = validation.get('qualityScore', 0.0)
|
||||||
if quality_score is None:
|
if quality_score is None:
|
||||||
quality_score = 0.0
|
quality_score = 0.0
|
||||||
enhancedReviewContent += f"Quality Score: {quality_score:.2f}\n"
|
contentValidationSection += f"Quality Score: {quality_score:.2f}\n"
|
||||||
gap_analysis = validation.get('gapAnalysis', '')
|
gap_analysis = validation.get('gapAnalysis', '')
|
||||||
if gap_analysis:
|
if gap_analysis:
|
||||||
enhancedReviewContent += f"Gap Analysis: {gap_analysis}\n"
|
contentValidationSection += f"Gap Analysis: {gap_analysis}\n"
|
||||||
|
structure_comparison = validation.get('structureComparison', {})
|
||||||
|
if structure_comparison:
|
||||||
|
contentValidationSection += f"Structure Comparison: {json.dumps(structure_comparison, indent=2, ensure_ascii=False)}\n"
|
||||||
if validation.get('improvementSuggestions'):
|
if validation.get('improvementSuggestions'):
|
||||||
enhancedReviewContent += f"Improvement Suggestions: {', '.join(validation['improvementSuggestions'])}\n"
|
suggestions = validation['improvementSuggestions']
|
||||||
|
contentValidationSection += f"Next Actions (in sequence):\n"
|
||||||
|
for i, suggestion in enumerate(suggestions):
|
||||||
|
contentValidationSection += f" [{i}] {suggestion}\n"
|
||||||
|
|
||||||
|
enhancedReviewContent = baseReviewContent + contentValidationSection
|
||||||
|
|
||||||
# NEW: Add content analysis to review content
|
# NEW: Add content analysis to review content
|
||||||
if observation.contentAnalysis:
|
if observation.contentAnalysis:
|
||||||
|
|
@ -854,9 +871,41 @@ class DynamicMode(BaseMode):
|
||||||
enhancedReviewContent += f"Partial Achievements: {len(progressState['partialAchievements'])}\n"
|
enhancedReviewContent += f"Partial Achievements: {len(progressState['partialAchievements'])}\n"
|
||||||
enhancedReviewContent += f"Failed Attempts: {len(progressState['failedAttempts'])}\n"
|
enhancedReviewContent += f"Failed Attempts: {len(progressState['failedAttempts'])}\n"
|
||||||
enhancedReviewContent += f"Current Phase: {progressState['currentPhase']}\n"
|
enhancedReviewContent += f"Current Phase: {progressState['currentPhase']}\n"
|
||||||
if progressState['nextActionsSuggested']:
|
# Use content validation priorities if available, otherwise fall back to progress tracker suggestions
|
||||||
|
if observation.contentValidation and observation.contentValidation.get('improvementSuggestions'):
|
||||||
|
# Content validation already shown above, no need to repeat
|
||||||
|
pass
|
||||||
|
elif progressState['nextActionsSuggested']:
|
||||||
enhancedReviewContent += f"Next Action Suggestions: {', '.join(progressState['nextActionsSuggested'])}\n"
|
enhancedReviewContent += f"Next Action Suggestions: {', '.join(progressState['nextActionsSuggested'])}\n"
|
||||||
|
|
||||||
|
# NEW: Add action history to review content
|
||||||
|
if hasattr(context, 'previousReviewResult') and context.previousReviewResult:
|
||||||
|
actionHistory = []
|
||||||
|
for i, prevDecision in enumerate(context.previousReviewResult, 1):
|
||||||
|
if prevDecision and hasattr(prevDecision, 'nextAction') and prevDecision.nextAction:
|
||||||
|
action = prevDecision.nextAction
|
||||||
|
params = getattr(prevDecision, 'nextActionParameters', {}) or {}
|
||||||
|
# Filter out documentList for clarity
|
||||||
|
relevantParams = {k: v for k, v in params.items() if k not in ['documentList', 'connections']}
|
||||||
|
paramsStr = json.dumps(relevantParams, ensure_ascii=False) if relevantParams else "{}"
|
||||||
|
quality = getattr(prevDecision, 'qualityScore', None)
|
||||||
|
qualityStr = f" (quality: {quality:.2f})" if quality is not None else ""
|
||||||
|
actionHistory.append(f"Round {i}: {action} {paramsStr}{qualityStr}")
|
||||||
|
|
||||||
|
if actionHistory:
|
||||||
|
enhancedReviewContent += f"\nACTION HISTORY:\n"
|
||||||
|
enhancedReviewContent += "\n".join(f"- {entry}" for entry in actionHistory)
|
||||||
|
# Detect repeated actions
|
||||||
|
actionCounts = {}
|
||||||
|
for entry in actionHistory:
|
||||||
|
# Extract action name (before first space or {)
|
||||||
|
actionName = entry.split()[1] if len(entry.split()) > 1 else "unknown"
|
||||||
|
actionCounts[actionName] = actionCounts.get(actionName, 0) + 1
|
||||||
|
|
||||||
|
repeatedActions = [action for action, count in actionCounts.items() if count >= 2]
|
||||||
|
if repeatedActions:
|
||||||
|
enhancedReviewContent += f"\nWARNING: Repeated actions detected: {', '.join(repeatedActions)}. Consider a fundamentally different approach.\n"
|
||||||
|
|
||||||
# Update placeholders with enhanced review content
|
# Update placeholders with enhanced review content
|
||||||
placeholders["REVIEW_CONTENT"] = enhancedReviewContent
|
placeholders["REVIEW_CONTENT"] = enhancedReviewContent
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -323,21 +323,22 @@ def generateDynamicRefinementPrompt(services, context: Any, reviewContent: str)
|
||||||
ACTIONS: {{KEY:AVAILABLE_METHODS}}
|
ACTIONS: {{KEY:AVAILABLE_METHODS}}
|
||||||
DOCUMENTS: {{KEY:AVAILABLE_DOCUMENTS_INDEX}}
|
DOCUMENTS: {{KEY:AVAILABLE_DOCUMENTS_INDEX}}
|
||||||
|
|
||||||
=== OBSERVATION ===
|
|
||||||
{{KEY:REVIEW_CONTENT}}
|
{{KEY:REVIEW_CONTENT}}
|
||||||
|
|
||||||
|
=== NEXT ACTIONS ===
|
||||||
|
Follow the improvement suggestions from CONTENT VALIDATION in priority order. Each suggestion indicates what action to take next.
|
||||||
|
|
||||||
=== OUTPUT FORMAT ===
|
=== OUTPUT FORMAT ===
|
||||||
{{
|
{{
|
||||||
"status": "continue",
|
"status": "continue",
|
||||||
"reason": "Brief reason",
|
"reason": "Brief reason explaining why continuing",
|
||||||
"nextAction": "ai.convert",
|
"nextAction": "Selected_action_from_ACTIONS",
|
||||||
"nextActionParameters": {{
|
"nextActionParameters": {{
|
||||||
"documentList": ["docItem:..."],
|
"documentList": ["docItem:reference_from_DOCUMENTS"],
|
||||||
"inputFormat": "json",
|
"parameter1": "value1",
|
||||||
"outputFormat": "csv",
|
"parameter2": "value2"
|
||||||
"columnsPerRow": 10
|
|
||||||
}},
|
}},
|
||||||
"nextActionObjective": "Convert JSON to CSV with 10 columns per row"
|
"nextActionObjective": "Clear description of what this action will achieve based on improvement suggestions"
|
||||||
}}
|
}}
|
||||||
|
|
||||||
=== RULES ===
|
=== RULES ===
|
||||||
|
|
@ -345,9 +346,10 @@ DOCUMENTS: {{KEY:AVAILABLE_DOCUMENTS_INDEX}}
|
||||||
- nextAction: SPECIFIC action from AVAILABLE_METHODS (do not invent)
|
- nextAction: SPECIFIC action from AVAILABLE_METHODS (do not invent)
|
||||||
- nextActionParameters: concrete parameters (check AVAILABLE_METHODS for valid names)
|
- nextActionParameters: concrete parameters (check AVAILABLE_METHODS for valid names)
|
||||||
- documentList: ONLY exact references from AVAILABLE_DOCUMENTS_INDEX (do not invent)
|
- documentList: ONLY exact references from AVAILABLE_DOCUMENTS_INDEX (do not invent)
|
||||||
- nextActionObjective: describe what this action will achieve
|
- nextActionObjective: describe what this action will achieve based on the FIRST improvement suggestion from CONTENT VALIDATION
|
||||||
- Do NOT repeat failed actions - suggest DIFFERENT approach
|
- Do NOT repeat failed actions - suggest DIFFERENT approach
|
||||||
- Use improvement suggestions from content validation
|
- If ACTION HISTORY shows repeated actions, suggest a fundamentally different approach
|
||||||
|
- nextActionObjective must directly address the highest priority improvement suggestion from CONTENT VALIDATION
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue