gateway/modules/services/serviceAi/mainServiceAi.py
2025-11-28 16:57:53 +01:00

1419 lines
67 KiB
Python

import json
import logging
import re
import time
from typing import Dict, Any, List, Optional, Tuple
from modules.datamodels.datamodelChat import PromptPlaceholder
from modules.services.serviceExtraction.mainServiceExtraction import ExtractionService
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum
from modules.datamodels.datamodelExtraction import ContentPart
from modules.datamodels.datamodelWorkflow import AiResponse, AiResponseMetadata, DocumentData
from modules.interfaces.interfaceAiObjects import AiObjects
from modules.shared.jsonUtils import (
extractJsonString,
repairBrokenJson,
extractSectionsFromDocument,
buildContinuationContext,
parseJsonWithModel
)
logger = logging.getLogger(__name__)
# Rebuild the model to resolve forward references
AiCallRequest.model_rebuild()
class AiService:
"""AI service with core operations integrated."""
def __init__(self, serviceCenter=None) -> None:
"""Initialize AI service with service center access.
Args:
serviceCenter: Service center instance for accessing other services
"""
self.services = serviceCenter
# Only depend on interfaces
self.aiObjects = None # Will be initialized in create() or ensureAiObjectsInitialized()
# Submodules initialized as None - will be set in _initializeSubmodules() after aiObjects is ready
self.extractionService = None
def _initializeSubmodules(self):
"""Initialize all submodules after aiObjects is ready."""
if self.aiObjects is None:
raise RuntimeError("aiObjects must be initialized before initializing submodules")
if self.extractionService is None:
logger.info("Initializing ExtractionService...")
self.extractionService = ExtractionService(self.services)
async def ensureAiObjectsInitialized(self):
"""Ensure aiObjects is initialized and submodules are ready."""
if self.aiObjects is None:
logger.info("Lazy initializing AiObjects...")
self.aiObjects = await AiObjects.create()
logger.info("AiObjects initialization completed")
# Initialize submodules after aiObjects is ready
self._initializeSubmodules()
@classmethod
async def create(cls, serviceCenter=None) -> "AiService":
"""Create AiService instance with all connectors and submodules initialized."""
logger.info("AiService.create() called")
instance = cls(serviceCenter)
logger.info("AiService created, about to call AiObjects.create()...")
instance.aiObjects = await AiObjects.create()
logger.info("AiObjects.create() completed")
# Initialize all submodules after aiObjects is ready
instance._initializeSubmodules()
logger.info("AiService submodules initialized")
return instance
# Helper methods
def _buildPromptWithPlaceholders(self, prompt: str, placeholders: Optional[Dict[str, str]]) -> str:
"""
Build full prompt by replacing placeholders with their content.
Uses the new {{KEY:placeholder}} format.
Args:
prompt: The base prompt template
placeholders: Dictionary of placeholder key-value pairs
Returns:
Prompt with placeholders replaced
"""
if not placeholders:
return prompt
full_prompt = prompt
for placeholder, content in placeholders.items():
# Skip if content is None or empty
if content is None:
continue
# Replace {{KEY:placeholder}}
full_prompt = full_prompt.replace(f"{{{{KEY:{placeholder}}}}}", str(content))
return full_prompt
async def _analyzePromptAndCreateOptions(self, prompt: str) -> AiCallOptions:
"""Analyze prompt to determine appropriate AiCallOptions parameters."""
try:
# Get dynamic enum values from Pydantic models
operationTypes = [e.value for e in OperationTypeEnum]
priorities = [e.value for e in PriorityEnum]
processingModes = [e.value for e in ProcessingModeEnum]
# Create analysis prompt for AI to determine operation type and parameters
analysisPrompt = f"""
You are an AI operation analyzer. Analyze the following prompt and determine the most appropriate operation type and parameters.
PROMPT TO ANALYZE:
{self.services.utils.sanitizePromptContent(prompt, 'userinput')}
Based on the prompt content, determine:
1. operationType: Choose the most appropriate from: {', '.join(operationTypes)}
2. priority: Choose from: {', '.join(priorities)}
3. processingMode: Choose from: {', '.join(processingModes)}
4. compressPrompt: true/false (true for story-like prompts, false for structured prompts with JSON/schemas)
5. compressContext: true/false (true to summarize context, false to process fully)
Respond with ONLY a JSON object in this exact format:
{{
"operationType": "dataAnalyse",
"priority": "balanced",
"processingMode": "basic",
"compressPrompt": true,
"compressContext": true
}}
"""
# Use AI to analyze the prompt
request = AiCallRequest(
prompt=analysisPrompt,
options=AiCallOptions(
operationType=OperationTypeEnum.DATA_ANALYSE,
priority=PriorityEnum.SPEED,
processingMode=ProcessingModeEnum.BASIC,
compressPrompt=True,
compressContext=False
)
)
response = await self.aiObjects.call(request)
# Parse AI response using structured parsing with AiCallOptions model
try:
# Use parseJsonWithModel to parse response into AiCallOptions (handles enum conversion automatically)
analysis = parseJsonWithModel(response.content, AiCallOptions)
return analysis
except Exception as e:
logger.warning(f"Failed to parse AI analysis response: {e}")
except Exception as e:
logger.warning(f"Prompt analysis failed: {e}")
# Fallback to default options
return AiCallOptions(
operationType=OperationTypeEnum.DATA_ANALYSE,
priority=PriorityEnum.BALANCED,
processingMode=ProcessingModeEnum.BASIC
)
async def _callAiWithLooping(
self,
prompt: str,
options: AiCallOptions,
debugPrefix: str = "ai_call",
promptBuilder: Optional[callable] = None,
promptArgs: Optional[Dict[str, Any]] = None,
operationId: Optional[str] = None,
userPrompt: Optional[str] = None
) -> str:
"""
Shared core function for AI calls with repair-based looping system.
Automatically repairs broken JSON and continues generation seamlessly.
Args:
prompt: The prompt to send to AI
options: AI call configuration options
debugPrefix: Prefix for debug file names
promptBuilder: Optional function to rebuild prompts for continuation
promptArgs: Optional arguments for prompt builder
operationId: Optional operation ID for progress tracking
Returns:
Complete AI response after all iterations
"""
maxIterations = 50 # Prevent infinite loops
iteration = 0
allSections = [] # Accumulate all sections across iterations
lastRawResponse = None # Store last raw JSON response for continuation
documentMetadata = None # Store document metadata (title, filename) from first iteration
# Get parent log ID for iteration operations
parentLogId = None
if operationId:
parentLogId = self.services.chat.getOperationLogId(operationId)
while iteration < maxIterations:
iteration += 1
# Create separate operation for each iteration with parent reference
iterationOperationId = None
if operationId:
iterationOperationId = f"{operationId}_iter_{iteration}"
self.services.chat.progressLogStart(
iterationOperationId,
"AI Call",
f"Iteration {iteration}",
"",
parentId=parentLogId
)
# Build iteration prompt
# CRITICAL: Build continuation prompt if we have sections OR if we have a previous response (even if broken)
# This ensures continuation prompts are built even when JSON is so broken that no sections can be extracted
if (len(allSections) > 0 or lastRawResponse) and promptBuilder and promptArgs:
# This is a continuation - build continuation context with raw JSON and rebuild prompt
continuationContext = buildContinuationContext(allSections, lastRawResponse)
if not lastRawResponse:
logger.warning(f"Iteration {iteration}: No previous response available for continuation!")
# Filter promptArgs to only include parameters that buildGenerationPrompt accepts
# buildGenerationPrompt accepts: outputFormat, userPrompt, title, extracted_content, continuationContext
filteredPromptArgs = {
k: v for k, v in promptArgs.items()
if k in ['outputFormat', 'userPrompt', 'title', 'extracted_content']
}
# Rebuild prompt with continuation context using the provided prompt builder
iterationPrompt = await promptBuilder(**filteredPromptArgs, continuationContext=continuationContext)
else:
# First iteration - use original prompt
iterationPrompt = prompt
# Make AI call
try:
if iterationOperationId:
self.services.chat.progressLogUpdate(iterationOperationId, 0.3, "Calling AI model")
request = AiCallRequest(
prompt=iterationPrompt,
context="",
options=options
)
# Write the ACTUAL prompt sent to AI
if iteration == 1:
self.services.utils.writeDebugFile(iterationPrompt, f"{debugPrefix}_prompt")
else:
self.services.utils.writeDebugFile(iterationPrompt, f"{debugPrefix}_prompt_iteration_{iteration}")
response = await self.aiObjects.call(request)
result = response.content
# Update progress after AI call
if iterationOperationId:
self.services.chat.progressLogUpdate(iterationOperationId, 0.6, "AI response received")
# Write raw AI response to debug file
if iteration == 1:
self.services.utils.writeDebugFile(result, f"{debugPrefix}_response")
else:
self.services.utils.writeDebugFile(result, f"{debugPrefix}_response_iteration_{iteration}")
# Emit stats for this iteration (only if workflow exists and has id)
if self.services.workflow and hasattr(self.services.workflow, 'id') and self.services.workflow.id:
try:
self.services.chat.storeWorkflowStat(
self.services.workflow,
response,
f"ai.call.{debugPrefix}.iteration_{iteration}"
)
except Exception as statError:
# Don't break the main loop if stat storage fails
logger.warning(f"Failed to store workflow stat: {str(statError)}")
# Check for error response using generic error detection (errorCount > 0 or modelName == "error")
if hasattr(response, 'errorCount') and response.errorCount > 0:
errorMsg = f"Iteration {iteration}: Error response detected (errorCount={response.errorCount}), stopping loop: {result[:200] if result else 'empty'}"
logger.error(errorMsg)
break
if hasattr(response, 'modelName') and response.modelName == "error":
errorMsg = f"Iteration {iteration}: Error response detected (modelName=error), stopping loop: {result[:200] if result else 'empty'}"
logger.error(errorMsg)
break
if not result or not result.strip():
logger.warning(f"Iteration {iteration}: Empty response, stopping")
break
# Check if this is a text response (not document generation)
# Text responses don't need JSON parsing - return immediately after first successful response
isTextResponse = (promptBuilder is None and promptArgs is None) or debugPrefix == "text"
if isTextResponse:
# For text responses, return the text immediately - no JSON parsing needed
logger.info(f"Iteration {iteration}: Text response received, returning immediately")
if iterationOperationId:
self.services.chat.progressLogFinish(iterationOperationId, True)
return result
# Store raw response for continuation (even if broken)
lastRawResponse = result
# Extract sections from response (handles both valid and broken JSON)
# Only for document generation (JSON responses)
extractedSections, wasJsonComplete, parsedResult = self._extractSectionsFromResponse(result, iteration, debugPrefix)
# Extract document metadata from first iteration if available
if iteration == 1 and parsedResult and not documentMetadata:
documentMetadata = self._extractDocumentMetadata(parsedResult)
# Update progress after parsing
if iterationOperationId:
if extractedSections:
self.services.chat.progressLogUpdate(iterationOperationId, 0.8, f"Extracted {len(extractedSections)} sections")
if not extractedSections:
# CRITICAL: If JSON was incomplete/broken, continue even if no sections extracted
# This allows the AI to retry and complete the broken JSON
if not wasJsonComplete:
logger.warning(f"Iteration {iteration}: No sections extracted from broken JSON, continuing for another attempt")
continue
# If JSON was complete but no sections extracted - this is an error, stop
logger.warning(f"Iteration {iteration}: No sections extracted from complete JSON, stopping")
break
# Merge new sections with existing sections intelligently
# This handles the STANDARD CASE: broken JSON iterations must be merged together
# The break can occur anywhere - in any section, at any depth
allSections = self._mergeSectionsIntelligently(allSections, extractedSections, iteration)
# Check if we should continue (completion detection)
# Simple logic: JSON completeness determines continuation
shouldContinue = self._shouldContinueGeneration(
allSections,
iteration,
wasJsonComplete,
result
)
if shouldContinue:
# Finish iteration operation (will continue with next iteration)
if iterationOperationId:
self.services.chat.progressLogFinish(iterationOperationId, True)
continue
else:
# Done - finish iteration and update main operation
if iterationOperationId:
self.services.chat.progressLogFinish(iterationOperationId, True)
if operationId:
self.services.chat.progressLogUpdate(operationId, 0.95, f"Generation complete ({iteration} iterations, {len(allSections)} sections)")
logger.info(f"Generation complete after {iteration} iterations: {len(allSections)} sections")
break
except Exception as e:
logger.error(f"Error in AI call iteration {iteration}: {str(e)}")
if iterationOperationId:
self.services.chat.progressLogFinish(iterationOperationId, False)
break
if iteration >= maxIterations:
logger.warning(f"AI call stopped after maximum iterations ({maxIterations})")
# Build final result from accumulated sections
final_result = self._buildFinalResultFromSections(allSections, documentMetadata)
# Write final result to debug file
self.services.utils.writeDebugFile(final_result, f"{debugPrefix}_final_result")
return final_result
def _mergeSectionsIntelligently(
self,
existingSections: List[Dict[str, Any]],
newSections: List[Dict[str, Any]],
iteration: int
) -> List[Dict[str, Any]]:
"""
Intelligently merge sections from multiple iterations.
This is a GENERIC merging strategy that handles broken JSON iterations.
The break can occur anywhere - in any section, at any depth.
Merging strategies (in order of priority):
1. Same Section ID: Merge sections with identical IDs
2. Same Content-Type + Position: If last section is incomplete and new section continues it
3. Same Order: Merge sections with same order value
4. Structural Analysis: Detect continuation based on content structure
Args:
existingSections: Sections accumulated from previous iterations
newSections: Sections extracted from current iteration
iteration: Current iteration number
Returns:
Merged list of sections
"""
if not newSections:
return existingSections
if not existingSections:
return newSections
mergedSections = existingSections.copy()
for newSection in newSections:
merged = False
# Strategy 1: Same Section ID - merge directly
newSectionId = newSection.get("id")
if newSectionId:
for i, existingSection in enumerate(mergedSections):
if existingSection.get("id") == newSectionId:
# Merge sections with same ID
mergedSections[i] = self._mergeSectionContent(existingSection, newSection, iteration)
merged = True
logger.debug(f"Iteration {iteration}: Merged section by ID '{newSectionId}'")
break
if merged:
continue
# Strategy 2: Same Content-Type + Position (continuation detection)
# Check if last section is incomplete and new section continues it
if mergedSections:
lastSection = mergedSections[-1]
lastContentType = lastSection.get("content_type")
newContentType = newSection.get("content_type")
if lastContentType == newContentType:
# Same content type - check if last section is incomplete
if self._isSectionIncomplete(lastSection):
# Last section is incomplete, merge with new section
mergedSections[-1] = self._mergeSectionContent(lastSection, newSection, iteration)
merged = True
logger.debug(f"Iteration {iteration}: Merged section by content-type continuation ({lastContentType})")
continue
# Strategy 3: Same Order value
newOrder = newSection.get("order")
if newOrder is not None:
for i, existingSection in enumerate(mergedSections):
existingOrder = existingSection.get("order")
if existingOrder is not None and existingOrder == newOrder:
# Merge sections with same order
mergedSections[i] = self._mergeSectionContent(existingSection, newSection, iteration)
merged = True
logger.debug(f"Iteration {iteration}: Merged section by order {newOrder}")
break
if merged:
continue
# Strategy 4: Structural Analysis - detect continuation
# For code_block and table: if last section matches new section type, merge them
if mergedSections:
lastSection = mergedSections[-1]
lastContentType = lastSection.get("content_type")
newContentType = newSection.get("content_type")
# Both are code blocks - merge them
if lastContentType == "code_block" and newContentType == "code_block":
mergedSections[-1] = self._mergeSectionContent(lastSection, newSection, iteration)
merged = True
logger.debug(f"Iteration {iteration}: Merged code_block sections by structural analysis")
continue
# Both are tables - merge them (common case for broken JSON iterations)
if lastContentType == "table" and newContentType == "table":
mergedSections[-1] = self._mergeSectionContent(lastSection, newSection, iteration)
merged = True
logger.debug(f"Iteration {iteration}: Merged table sections by structural analysis")
continue
# No merge strategy matched - add as new section
if not merged:
mergedSections.append(newSection)
logger.debug(f"Iteration {iteration}: Added new section '{newSection.get('id', 'no-id')}' ({newSection.get('content_type', 'unknown')})")
return mergedSections
def _isSectionIncomplete(self, section: Dict[str, Any]) -> bool:
"""
Check if a section is incomplete (broken at the end).
This detects incomplete sections based on content analysis:
- Code blocks: ends mid-line, ends with comma, ends with incomplete structure
- Text sections: ends mid-sentence, ends with incomplete structure
- Other types: check for incomplete elements
"""
contentType = section.get("content_type", "")
elements = section.get("elements", [])
if not elements:
return False
# Handle list of elements
if isinstance(elements, list) and len(elements) > 0:
lastElement = elements[-1]
else:
lastElement = elements
if not isinstance(lastElement, dict):
return False
# Check code_block for incomplete code
if contentType == "code_block":
code = lastElement.get("code", "")
if code:
# Check if code ends incompletely:
# - Ends with comma (incomplete CSV line)
# - Ends with number but no newline (incomplete line)
# - Ends mid-token (e.g., "23431,23" - incomplete number)
codeStripped = code.rstrip()
if codeStripped:
# Check for incomplete patterns
if codeStripped.endswith(',') or (',' in codeStripped and not codeStripped.endswith('\n')):
# Ends with comma or has comma but no final newline - likely incomplete
return True
# Check if last line is incomplete (doesn't end with newline and has partial content)
if not code.endswith('\n') and codeStripped:
# No final newline - might be incomplete
# More sophisticated: check if last number is complete
lastLine = codeStripped.split('\n')[-1]
if lastLine and ',' in lastLine:
# Has commas but might be incomplete
parts = lastLine.split(',')
if parts and len(parts[-1]) < 5: # Last part is very short - might be incomplete
return True
# Check table for incomplete rows
if contentType == "table":
rows = lastElement.get("rows", [])
if rows:
# Check if last row is incomplete (ends with incomplete data)
lastRow = rows[-1] if isinstance(rows, list) else []
if isinstance(lastRow, list) and lastRow:
# Check if last row ends with incomplete data (e.g., incomplete string)
lastCell = lastRow[-1] if lastRow else ""
if isinstance(lastCell, str):
# If last cell is incomplete (ends with quote or is very short), section might be incomplete
if lastCell.endswith('"') or (len(lastCell) < 3 and lastCell):
return True
# Also check if last row doesn't have expected number of columns (if headers exist)
headers = lastElement.get("headers", [])
if headers and isinstance(headers, list):
expectedCols = len(headers)
if len(lastRow) < expectedCols:
return True
# Check paragraph/text for incomplete sentences
if contentType in ["paragraph", "heading"]:
text = lastElement.get("text", "")
if text:
# Simple heuristic: if doesn't end with sentence-ending punctuation
textStripped = text.rstrip()
if textStripped and not textStripped[-1] in '.!?':
# Might be incomplete, but this is less reliable
# Only mark as incomplete if very short (likely cut off)
if len(textStripped) < 20:
return True
# Check lists for incomplete items
if contentType in ["bullet_list", "numbered_list"]:
items = lastElement.get("items", [])
if items and isinstance(items, list):
# Check if last item is incomplete (very short or ends with incomplete string)
lastItem = items[-1] if items else None
if isinstance(lastItem, str) and len(lastItem) < 3:
return True
# Check if items array seems incomplete (e.g., expected count not reached)
# This is harder to detect without context, so we rely on other heuristics
# Check image for incomplete base64 data
if contentType == "image":
imageData = lastElement.get("base64Data", "")
if imageData:
# Base64 strings should end with padding ('=' or '==')
# If it doesn't, it might be incomplete
stripped = imageData.rstrip()
if stripped and not stripped.endswith(('=', '==')):
# Check if it's a valid base64 character sequence that was cut off
# Base64 uses A-Z, a-z, 0-9, +, /, and = for padding
if len(stripped) > 0 and stripped[-1] not in 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=':
return True
# If length is not a multiple of 4 (base64 requirement), it might be incomplete
if len(stripped) % 4 != 0:
return True
# GENERIC CHECK: Look for incomplete structures in any element
# Check if element has arrays/lists that might be incomplete
for key, value in lastElement.items():
if isinstance(value, list) and len(value) > 0:
# Check last item in list
lastItem = value[-1]
if isinstance(lastItem, str):
# If last string item is very short, might be incomplete
if len(lastItem) < 3:
return True
elif isinstance(lastItem, dict):
# If last dict item has very few keys, might be incomplete
if len(lastItem) < 2:
return True
elif isinstance(value, str):
# Check if string ends abruptly (no punctuation, very short)
if len(value) > 0 and len(value) < 10 and not value[-1] in '.!?\n':
return True
return False
def _mergeSectionContent(
self,
existingSection: Dict[str, Any],
newSection: Dict[str, Any],
iteration: int
) -> Dict[str, Any]:
"""
Merge content from two sections.
Handles different content types:
- code_block: Append code, handle overlaps, merge incomplete lines
- paragraph/heading: Append text
- table: Merge rows
- list: Merge items
- Other: Merge elements
"""
contentType = existingSection.get("content_type", "")
existingElements = existingSection.get("elements", [])
newElements = newSection.get("elements", [])
if not newElements:
return existingSection
# Handle list of elements
if isinstance(existingElements, list):
existingElem = existingElements[-1] if existingElements else {}
else:
existingElem = existingElements
if isinstance(newElements, list):
newElem = newElements[0] if newElements else {}
else:
newElem = newElements
if not isinstance(existingElem, dict) or not isinstance(newElem, dict):
return existingSection
# Merge based on content type
if contentType == "code_block":
existingCode = existingElem.get("code", "")
newCode = newElem.get("code", "")
if existingCode and newCode:
mergedCode = self._mergeCodeBlocks(existingCode, newCode, iteration)
existingElem["code"] = mergedCode
# Preserve language from existing or new
if "language" not in existingElem and "language" in newElem:
existingElem["language"] = newElem["language"]
elif contentType in ["paragraph", "heading"]:
existingText = existingElem.get("text", "")
newText = newElem.get("text", "")
if existingText and newText:
# Append text with space if needed
if existingText.rstrip() and not existingText.rstrip()[-1] in '.!?\n':
mergedText = existingText.rstrip() + " " + newText.lstrip()
else:
mergedText = existingText.rstrip() + "\n" + newText.lstrip()
existingElem["text"] = mergedText
elif contentType == "table":
# Merge table rows with overlap detection
existingRows = existingElem.get("rows", [])
newRows = newElem.get("rows", [])
if existingRows and newRows:
# CRITICAL: Detect and remove overlaps before merging
# Check if last existing row matches first new row (exact overlap)
if len(existingRows) > 0 and len(newRows) > 0:
lastExistingRow = existingRows[-1]
firstNewRow = newRows[0]
# Compare rows (handle both list and tuple formats)
if isinstance(lastExistingRow, (list, tuple)) and isinstance(firstNewRow, (list, tuple)):
if list(lastExistingRow) == list(firstNewRow):
# Exact duplicate - remove first new row
newRows = newRows[1:]
logger.debug(f"Iteration {iteration}: Removed duplicate table row (exact match)")
# Combine rows from both sections (after removing overlaps)
existingElem["rows"] = existingRows + newRows
logger.debug(f"Iteration {iteration}: Merged table rows - existing: {len(existingRows)}, new: {len(newRows)}, total: {len(existingRows) + len(newRows)}")
elif newRows:
# If existing has no rows but new does, use new rows
existingElem["rows"] = newRows
# Preserve headers from existing (or use new if existing has none)
if not existingElem.get("headers") and newElem.get("headers"):
existingElem["headers"] = newElem["headers"]
# Preserve caption from existing (or use new if existing has none)
if not existingElem.get("caption") and newElem.get("caption"):
existingElem["caption"] = newElem["caption"]
elif contentType in ["bullet_list", "numbered_list"]:
# Merge list items
existingItems = existingElem.get("items", [])
newItems = newElem.get("items", [])
if existingItems and newItems:
existingElem["items"] = existingItems + newItems
elif contentType == "image":
# Images are typically complete - if new image is provided, replace existing
# But check if existing image data is incomplete (e.g., base64 string cut off)
existingImageData = existingElem.get("base64Data", "")
newImageData = newElem.get("base64Data", "")
if existingImageData and newImageData:
# If existing image data doesn't end with valid base64 padding, it might be incomplete
# Base64 padding is '=' or '==' at the end
if not existingImageData.rstrip().endswith(('=', '==')):
# Existing image might be incomplete - merge by appending new data
# This handles cases where base64 string was cut off
existingElem["base64Data"] = existingImageData + newImageData
logger.debug(f"Iteration {iteration}: Merged incomplete image base64 data")
else:
# Existing image is complete - replace with new (or keep existing if new is empty)
if newImageData:
existingElem["base64Data"] = newImageData
elif newImageData:
existingElem["base64Data"] = newImageData
# Preserve other image metadata
if not existingElem.get("altText") and newElem.get("altText"):
existingElem["altText"] = newElem["altText"]
if not existingElem.get("caption") and newElem.get("caption"):
existingElem["caption"] = newElem["caption"]
else:
# GENERIC FALLBACK: Handle any other content types or unknown structures
# Try to merge common array/list fields generically
for key in ["items", "rows", "columns", "cells", "elements", "data", "content"]:
if key in existingElem and key in newElem:
existingValue = existingElem[key]
newValue = newElem[key]
if isinstance(existingValue, list) and isinstance(newValue, list):
# Merge lists by concatenation
existingElem[key] = existingValue + newValue
logger.debug(f"Iteration {iteration}: Merged generic list field '{key}' - existing: {len(existingValue)}, new: {len(newValue)}")
break
# If no common list fields found, try to merge all fields from newElem into existingElem
# This handles cases where objects have different structures
for key, value in newElem.items():
if key not in existingElem:
# New field - add it
existingElem[key] = value
elif isinstance(existingElem[key], list) and isinstance(value, list):
# Both are lists - merge them
existingElem[key] = existingElem[key] + value
elif isinstance(existingElem[key], dict) and isinstance(value, dict):
# Both are dicts - recursively merge (shallow merge)
existingElem[key].update(value)
elif isinstance(existingElem[key], str) and isinstance(value, str):
# Both are strings - append new to existing
existingElem[key] = existingElem[key] + "\n" + value
# Update section with merged content
mergedSection = existingSection.copy()
if isinstance(existingElements, list):
# Update the last element in the list with merged content
if existingElements:
existingElements[-1] = existingElem
mergedSection["elements"] = existingElements
else:
mergedSection["elements"] = existingElem
# Preserve metadata from new section if missing in existing
if "order" not in mergedSection and "order" in newSection:
mergedSection["order"] = newSection["order"]
return mergedSection
def _mergeCodeBlocks(self, existingCode: str, newCode: str, iteration: int) -> str:
"""
Merge two code blocks intelligently, handling overlaps and incomplete lines.
"""
if not existingCode:
return newCode
if not newCode:
return existingCode
existingLines = existingCode.rstrip().split('\n')
newLines = newCode.strip().split('\n')
if not existingLines or not newLines:
return existingCode + "\n" + newCode
lastExistingLine = existingLines[-1].strip()
firstNewLine = newLines[0].strip()
# Strategy 1: Exact overlap - remove duplicate line
if lastExistingLine == firstNewLine:
newLines = newLines[1:]
logger.debug(f"Iteration {iteration}: Removed exact duplicate line in code merge")
# Strategy 2: Incomplete line merge
# If last existing line ends with comma or is incomplete, merge with first new line
elif lastExistingLine.endswith(',') or (',' in lastExistingLine and len(lastExistingLine.split(',')[-1]) < 5):
# Last line is incomplete - merge with first new line
# Remove trailing comma from existing line
mergedLine = lastExistingLine.rstrip(',') + ',' + firstNewLine.lstrip()
existingLines[-1] = mergedLine
newLines = newLines[1:]
logger.debug(f"Iteration {iteration}: Merged incomplete line with continuation")
# Strategy 3: Partial overlap detection
# Check if first new line starts with the end of last existing line
elif ',' in lastExistingLine and ',' in firstNewLine:
lastExistingParts = lastExistingLine.split(',')
firstNewParts = firstNewLine.split(',')
# Check for overlap: if last part of existing matches first part of new
if lastExistingParts and firstNewParts:
lastExistingPart = lastExistingParts[-1].strip()
firstNewPart = firstNewParts[0].strip()
# If they match, there's overlap
if lastExistingPart == firstNewPart and len(lastExistingParts) > 1:
# Remove overlapping part from new line
newLines[0] = ','.join(firstNewParts[1:])
logger.debug(f"Iteration {iteration}: Removed partial overlap in code merge")
# Reconstruct merged code
mergedCode = '\n'.join(existingLines)
if newLines:
if mergedCode and not mergedCode.endswith('\n'):
mergedCode += '\n'
mergedCode += '\n'.join(newLines)
return mergedCode
def _extractSectionsFromResponse(
self,
result: str,
iteration: int,
debugPrefix: str
) -> Tuple[List[Dict[str, Any]], bool, Optional[Dict[str, Any]]]:
"""
Extract sections from AI response, handling both valid and broken JSON.
Uses repair mechanism for broken JSON.
Determines completion based on JSON structure (complete JSON = complete, broken/incomplete = incomplete).
Returns (sections, wasJsonComplete, parsedResult)
"""
# First, try to parse as valid JSON
# CRITICAL: JSON completeness is determined by parsing, NOT by last character check!
# Last character could be } or ] by chance, JSON still incomplete
try:
extracted = extractJsonString(result)
# Try to parse the extracted JSON
# If parsing succeeds, JSON is complete
parsed_result = json.loads(extracted)
# Extract sections from parsed JSON
sections = extractSectionsFromDocument(parsed_result)
# JSON parsed successfully = complete
logger.info(f"Iteration {iteration}: JSON parsed successfully - marking as complete")
return sections, True, parsed_result
except json.JSONDecodeError as e:
# Broken JSON - try repair mechanism (normal in iterative generation)
self.services.utils.writeDebugFile(result, f"{debugPrefix}_broken_json_iteration_{iteration}")
logger.info(f"Iteration {iteration}: JSON parsing failed (broken JSON), attempting repair")
# Try to repair
repaired_json = repairBrokenJson(result)
if repaired_json:
# Extract sections from repaired JSON
sections = extractSectionsFromDocument(repaired_json)
# CRITICAL: JSON was broken, so mark as incomplete (wasJsonComplete = False)
# This ensures the loop continues to get the rest of the content
logger.info(f"Iteration {iteration}: JSON repaired, extracted {len(sections)} sections, marking as incomplete to continue")
return sections, False, repaired_json # JSON was broken but repaired - mark as incomplete
else:
# Repair failed - but we should still continue to allow AI to retry
logger.warning(f"Iteration {iteration}: All repair strategies failed, but continuing to allow retry")
return [], False, None # Mark as incomplete so loop continues
except Exception as e:
logger.error(f"Iteration {iteration}: Unexpected error during parsing: {str(e)}")
return [], False, None
def _shouldContinueGeneration(
self,
allSections: List[Dict[str, Any]],
iteration: int,
wasJsonComplete: bool,
rawResponse: str = None
) -> bool:
"""
Determine if AI generation loop should continue.
CRITICAL: This is ONLY about AI Loop Completion, NOT Action DoD!
Action DoD is checked AFTER the AI Loop completes in _refineDecide.
Simple logic:
- If JSON parsing failed or incomplete → continue (needs more content)
- If JSON parses successfully and is complete → stop (all content delivered)
- Loop detection prevents infinite loops
CRITICAL: JSON completeness is determined by parsing, NOT by last character check!
Returns True if we should continue, False if AI Loop is done.
"""
if len(allSections) == 0:
return True # No sections yet, continue
# CRITERION 1: If JSON was incomplete/broken (parsing failed or incomplete) - continue to repair/complete
if not wasJsonComplete:
logger.info(f"Iteration {iteration}: JSON incomplete/broken - continuing to complete")
return True
# CRITERION 2: JSON is complete (parsed successfully) - check for loop detection
if self._isStuckInLoop(allSections, iteration):
logger.warning(f"Iteration {iteration}: Detected potential infinite loop - stopping AI loop")
return False
# JSON is complete and not stuck in loop - done
logger.info(f"Iteration {iteration}: JSON complete - AI loop done")
return False
def _isStuckInLoop(
self,
allSections: List[Dict[str, Any]],
iteration: int
) -> bool:
"""
Detect if we're stuck in a loop (same content being repeated).
Generic approach: Check if recent iterations are adding minimal or duplicate content.
"""
if iteration < 3:
return False # Need at least 3 iterations to detect a loop
if len(allSections) == 0:
return False
# Check if last section is very small (might be stuck)
lastSection = allSections[-1]
elements = lastSection.get("elements", [])
if isinstance(elements, list) and elements:
lastElem = elements[-1] if elements else {}
else:
lastElem = elements if isinstance(elements, dict) else {}
# Check content size of last section
lastSectionSize = 0
if isinstance(lastElem, dict):
for key, value in lastElem.items():
if isinstance(value, str):
lastSectionSize += len(value)
elif isinstance(value, list):
lastSectionSize += len(str(value))
# If last section is very small and we've done many iterations, might be stuck
if lastSectionSize < 100 and iteration > 10:
logger.warning(f"Potential loop detected: iteration {iteration}, last section size {lastSectionSize}")
return True
return False
def _extractDocumentMetadata(
self,
parsedResult: Dict[str, Any]
) -> Optional[Dict[str, Any]]:
"""
Extract document metadata (title, filename) from parsed AI response.
Returns dict with 'title' and 'filename' keys if found, None otherwise.
"""
if not isinstance(parsedResult, dict):
return None
# Try to get from documents array (preferred structure)
if "documents" in parsedResult and isinstance(parsedResult["documents"], list) and len(parsedResult["documents"]) > 0:
firstDoc = parsedResult["documents"][0]
if isinstance(firstDoc, dict):
title = firstDoc.get("title")
filename = firstDoc.get("filename")
if title or filename:
return {
"title": title,
"filename": filename
}
return None
def _buildFinalResultFromSections(
self,
allSections: List[Dict[str, Any]],
documentMetadata: Optional[Dict[str, Any]] = None
) -> str:
"""
Build final JSON result from accumulated sections.
Uses AI-provided metadata (title, filename) if available.
"""
if not allSections:
return ""
# Extract metadata from AI response if available
title = "Generated Document"
filename = "document.json"
if documentMetadata:
if documentMetadata.get("title"):
title = documentMetadata["title"]
if documentMetadata.get("filename"):
filename = documentMetadata["filename"]
# Build documents structure
# Assuming single document for now
documents = [{
"id": "doc_1",
"title": title,
"filename": filename,
"sections": allSections
}]
result = {
"metadata": {
"split_strategy": "single_document",
"source_documents": [],
"extraction_method": "ai_generation"
},
"documents": documents
}
return json.dumps(result, indent=2)
# Public API Methods
# Planning AI Call
async def callAiPlanning(
self,
prompt: str,
placeholders: Optional[List[PromptPlaceholder]] = None,
debugType: Optional[str] = None
) -> str:
"""
Planning AI call for task planning, action planning, action selection, etc.
Always uses static parameters optimized for planning tasks.
Args:
prompt: The planning prompt
placeholders: Optional list of placeholder replacements
debugType: Optional debug file type identifier (e.g., 'taskplan', 'dynamic', 'intentanalysis')
If not provided, defaults to 'plan'
Returns:
Planning JSON response
"""
await self.ensureAiObjectsInitialized()
# Planning calls always use static parameters
options = AiCallOptions(
operationType=OperationTypeEnum.PLAN,
priority=PriorityEnum.QUALITY,
processingMode=ProcessingModeEnum.DETAILED,
compressPrompt=False,
compressContext=False
)
# Build full prompt with placeholders
if placeholders:
placeholdersDict = {p.label: p.content for p in placeholders}
fullPrompt = self._buildPromptWithPlaceholders(prompt, placeholdersDict)
else:
fullPrompt = prompt
# Root-cause fix: planning must return raw single-shot JSON, not section-based output
request = AiCallRequest(
prompt=fullPrompt,
context="",
options=options
)
# Debug: persist prompt/response for analysis with context-specific naming
debugPrefix = debugType if debugType else "plan"
self.services.utils.writeDebugFile(fullPrompt, f"{debugPrefix}_prompt")
response = await self.aiObjects.call(request)
result = response.content or ""
self.services.utils.writeDebugFile(result, f"{debugPrefix}_response")
return result
async def callAiContent(
self,
prompt: str,
options: AiCallOptions,
contentParts: Optional[List[ContentPart]] = None,
outputFormat: Optional[str] = None,
title: Optional[str] = None,
parentOperationId: Optional[str] = None # Parent operation ID for hierarchical logging
) -> AiResponse:
"""
Unified AI content processing method (replaces callAiDocuments and callAiText).
Args:
prompt: The main prompt for the AI call
contentParts: Optional list of already-extracted content parts (preferred)
options: AI call configuration options (REQUIRED - operationType must be set)
outputFormat: Optional output format for document generation (e.g., 'pdf', 'docx', 'xlsx')
title: Optional title for generated documents
parentOperationId: Optional parent operation ID for hierarchical logging
Returns:
AiResponse with content, metadata, and optional documents
"""
await self.ensureAiObjectsInitialized()
# Create separate operationId for detailed progress tracking
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
aiOperationId = f"ai_content_{workflowId}_{int(time.time())}"
# Get parent log ID if parent operation exists
parentLogId = None
if parentOperationId:
parentLogId = self.services.chat.getOperationLogId(parentOperationId)
# Start progress tracking with parent reference
self.services.chat.progressLogStart(
aiOperationId,
"AI content processing",
"Content Processing",
f"Format: {outputFormat or 'text'}",
parentId=parentLogId
)
try:
# Extraction is now separate - contentParts must be extracted before calling
# Require operationType to be set before calling
opType = getattr(options, "operationType", None)
if not opType:
# If outputFormat is specified, default to DATA_GENERATE
if outputFormat:
options.operationType = OperationTypeEnum.DATA_GENERATE
opType = OperationTypeEnum.DATA_GENERATE
else:
self.services.chat.progressLogUpdate(aiOperationId, 0.1, "Analyzing prompt parameters")
analyzedOptions = await self._analyzePromptAndCreateOptions(prompt)
if analyzedOptions and hasattr(analyzedOptions, "operationType") and analyzedOptions.operationType:
options.operationType = analyzedOptions.operationType
# Merge other analyzed options
if hasattr(analyzedOptions, "priority"):
options.priority = analyzedOptions.priority
if hasattr(analyzedOptions, "processingMode"):
options.processingMode = analyzedOptions.processingMode
if hasattr(analyzedOptions, "compressPrompt"):
options.compressPrompt = analyzedOptions.compressPrompt
if hasattr(analyzedOptions, "compressContext"):
options.compressContext = analyzedOptions.compressContext
else:
# Default to DATA_ANALYSE if analysis fails
options.operationType = OperationTypeEnum.DATA_ANALYSE
opType = options.operationType
# Handle IMAGE_GENERATE operations
if opType == OperationTypeEnum.IMAGE_GENERATE:
self.services.chat.progressLogUpdate(aiOperationId, 0.4, "Calling AI for image generation")
request = AiCallRequest(
prompt=prompt,
context="",
options=options
)
response = await self.aiObjects.call(request)
if response.content:
# Build document data for image
imageDoc = DocumentData(
documentName="generated_image.png",
documentData=response.content,
mimeType="image/png"
)
metadata = AiResponseMetadata(
title=title or "Generated Image",
operationType=opType.value
)
self.services.chat.storeWorkflowStat(
self.services.workflow,
response,
"ai.generate.image"
)
self.services.chat.progressLogUpdate(aiOperationId, 0.9, "Image generated")
self.services.chat.progressLogFinish(aiOperationId, True)
return AiResponse(
content=response.content,
metadata=metadata,
documents=[imageDoc]
)
else:
errorMsg = f"No image data returned: {response.content}"
logger.error(f"Error in AI image generation: {errorMsg}")
self.services.chat.progressLogFinish(aiOperationId, False)
raise ValueError(errorMsg)
# Handle WEB_SEARCH and WEB_CRAWL operations
if opType == OperationTypeEnum.WEB_SEARCH or opType == OperationTypeEnum.WEB_CRAWL:
self.services.chat.progressLogUpdate(aiOperationId, 0.4, f"Calling AI for {opType.name}")
request = AiCallRequest(
prompt=prompt, # Raw JSON prompt - connector will parse it
context="",
options=options
)
response = await self.aiObjects.call(request)
if response.content:
metadata = AiResponseMetadata(
operationType=opType.value
)
self.services.chat.storeWorkflowStat(
self.services.workflow,
response,
f"ai.{opType.name.lower()}"
)
self.services.chat.progressLogUpdate(aiOperationId, 0.9, f"{opType.name} completed")
self.services.chat.progressLogFinish(aiOperationId, True)
return AiResponse(
content=response.content,
metadata=metadata
)
else:
errorMsg = f"No content returned from {opType.name}: {response.content}"
logger.error(f"Error in {opType.name}: {errorMsg}")
self.services.chat.progressLogFinish(aiOperationId, False)
raise ValueError(errorMsg)
# Handle document generation (outputFormat specified)
if outputFormat:
# CRITICAL: For document generation with JSON templates, NEVER compress the prompt
options.compressPrompt = False
options.compressContext = False
# Convert contentParts to text for generation prompt (if provided)
if contentParts:
# Convert contentParts to text for generation prompt
content_for_generation = "\n\n".join([f"[{part.label}]\n{part.data}" for part in contentParts if part.data])
else:
content_for_generation = None
self.services.chat.progressLogUpdate(aiOperationId, 0.3, "Building generation prompt")
from modules.services.serviceGeneration.subPromptBuilderGeneration import buildGenerationPrompt
generation_prompt = await buildGenerationPrompt(
outputFormat, prompt, title, content_for_generation, None
)
promptArgs = {
"outputFormat": outputFormat,
"userPrompt": prompt,
"title": title,
"extracted_content": content_for_generation
}
self.services.chat.progressLogUpdate(aiOperationId, 0.4, "Calling AI for content generation")
# Extract user prompt from promptArgs for task completion analysis
userPrompt = None
if promptArgs:
userPrompt = promptArgs.get("userPrompt") or promptArgs.get("user_prompt")
generated_json = await self._callAiWithLooping(
generation_prompt,
options,
"document_generation",
buildGenerationPrompt,
promptArgs,
aiOperationId,
userPrompt=userPrompt
)
self.services.chat.progressLogUpdate(aiOperationId, 0.7, "Parsing generated JSON")
try:
extracted_json = self.services.utils.jsonExtractString(generated_json)
generated_data = json.loads(extracted_json)
except json.JSONDecodeError as e:
logger.error(f"Failed to parse generated JSON: {str(e)}")
self.services.utils.writeDebugFile(generated_json, "failed_json_parsing")
self.services.chat.progressLogFinish(aiOperationId, False)
raise ValueError(f"Generated content is not valid JSON: {str(e)}")
# Extract title and filename from generated document structure
extractedTitle = title
extractedFilename = None
if isinstance(generated_data, dict) and "documents" in generated_data:
docs = generated_data["documents"]
if isinstance(docs, list) and len(docs) > 0:
firstDoc = docs[0]
if isinstance(firstDoc, dict):
if firstDoc.get("title"):
extractedTitle = firstDoc["title"]
if firstDoc.get("filename"):
extractedFilename = firstDoc["filename"]
# Ensure metadata contains the extracted title
if "metadata" not in generated_data:
generated_data["metadata"] = {}
if extractedTitle:
generated_data["metadata"]["title"] = extractedTitle
# Create separate operation for content rendering
renderOperationId = f"{aiOperationId}_render"
renderParentLogId = self.services.chat.getOperationLogId(aiOperationId)
self.services.chat.progressLogStart(
renderOperationId,
"Content Rendering",
"Rendering",
f"Format: {outputFormat}",
parentId=renderParentLogId
)
try:
from modules.services.serviceGeneration.mainServiceGeneration import GenerationService
generationService = GenerationService(self.services)
self.services.chat.progressLogUpdate(renderOperationId, 0.5, f"Rendering to {outputFormat} format")
rendered_content, mime_type = await generationService.renderReport(
generated_data, outputFormat, extractedTitle or "Generated Document", prompt, self
)
self.services.chat.progressLogFinish(renderOperationId, True)
# Determine document name
if extractedFilename:
documentName = extractedFilename
elif extractedTitle and extractedTitle != "Generated Document":
sanitized = re.sub(r"[^a-zA-Z0-9._-]", "_", extractedTitle)
sanitized = re.sub(r"_+", "_", sanitized).strip("_")
if sanitized:
if not sanitized.lower().endswith(f".{outputFormat}"):
documentName = f"{sanitized}.{outputFormat}"
else:
documentName = sanitized
else:
documentName = f"generated.{outputFormat}"
else:
documentName = f"generated.{outputFormat}"
# Build document data
docData = DocumentData(
documentName=documentName,
documentData=rendered_content,
mimeType=mime_type
)
metadata = AiResponseMetadata(
title=extractedTitle or title or "Generated Document",
filename=extractedFilename,
operationType=opType.value if opType else None
)
self.services.utils.writeDebugFile(str(generated_data), "document_generation_response")
self.services.chat.progressLogFinish(aiOperationId, True)
return AiResponse(
content=json.dumps(generated_data),
metadata=metadata,
documents=[docData]
)
except Exception as e:
logger.error(f"Error rendering document: {str(e)}")
if renderOperationId:
self.services.chat.progressLogFinish(renderOperationId, False)
self.services.chat.progressLogFinish(aiOperationId, False)
raise ValueError(f"Rendering failed: {str(e)}")
# Handle text processing (no outputFormat)
self.services.chat.progressLogUpdate(aiOperationId, 0.5, "Processing text call")
if contentParts:
# Process contentParts through AI
# Convert contentParts to text for prompt
contentText = "\n\n".join([f"[{part.label}]\n{part.data}" for part in contentParts if part.data])
fullPrompt = f"{prompt}\n\n{contentText}" if contentText else prompt
result_content = await self._callAiWithLooping(
fullPrompt, options, "text", None, None, aiOperationId
)
else:
# Direct text call (no documents to process)
result_content = await self._callAiWithLooping(
prompt, options, "text", None, None, aiOperationId
)
metadata = AiResponseMetadata(
operationType=opType.value if opType else None
)
self.services.chat.progressLogFinish(aiOperationId, True)
return AiResponse(
content=result_content,
metadata=metadata
)
except Exception as e:
logger.error(f"Error in callAiContent: {str(e)}")
self.services.chat.progressLogFinish(aiOperationId, False)
raise
# DEPRECATED METHODS REMOVED:
# - callAiDocuments() - replaced by callAiContent()
# - callAiText() - replaced by callAiContent()
# All call sites have been updated to use callAiContent()