1182 lines
53 KiB
Python
1182 lines
53 KiB
Python
import json
|
|
import logging
|
|
import re
|
|
import time
|
|
from typing import Dict, Any, List, Optional, Tuple
|
|
from modules.datamodels.datamodelChat import PromptPlaceholder
|
|
from modules.services.serviceExtraction.mainServiceExtraction import ExtractionService
|
|
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum
|
|
from modules.datamodels.datamodelExtraction import ContentPart
|
|
from modules.datamodels.datamodelWorkflow import AiResponse, AiResponseMetadata, DocumentData
|
|
from modules.interfaces.interfaceAiObjects import AiObjects
|
|
from modules.shared.jsonUtils import (
|
|
extractJsonString,
|
|
repairBrokenJson,
|
|
extractSectionsFromDocument,
|
|
buildContinuationContext,
|
|
parseJsonWithModel
|
|
)
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Rebuild the model to resolve forward references
|
|
AiCallRequest.model_rebuild()
|
|
|
|
class AiService:
|
|
"""AI service with core operations integrated."""
|
|
|
|
def __init__(self, serviceCenter=None) -> None:
|
|
"""Initialize AI service with service center access.
|
|
|
|
Args:
|
|
serviceCenter: Service center instance for accessing other services
|
|
"""
|
|
self.services = serviceCenter
|
|
# Only depend on interfaces
|
|
self.aiObjects = None # Will be initialized in create() or ensureAiObjectsInitialized()
|
|
# Submodules initialized as None - will be set in _initializeSubmodules() after aiObjects is ready
|
|
self.extractionService = None
|
|
|
|
def _initializeSubmodules(self):
|
|
"""Initialize all submodules after aiObjects is ready."""
|
|
if self.aiObjects is None:
|
|
raise RuntimeError("aiObjects must be initialized before initializing submodules")
|
|
|
|
if self.extractionService is None:
|
|
logger.info("Initializing ExtractionService...")
|
|
self.extractionService = ExtractionService(self.services)
|
|
|
|
async def ensureAiObjectsInitialized(self):
|
|
"""Ensure aiObjects is initialized and submodules are ready."""
|
|
if self.aiObjects is None:
|
|
logger.info("Lazy initializing AiObjects...")
|
|
self.aiObjects = await AiObjects.create()
|
|
logger.info("AiObjects initialization completed")
|
|
# Initialize submodules after aiObjects is ready
|
|
self._initializeSubmodules()
|
|
|
|
@classmethod
|
|
async def create(cls, serviceCenter=None) -> "AiService":
|
|
"""Create AiService instance with all connectors and submodules initialized."""
|
|
logger.info("AiService.create() called")
|
|
instance = cls(serviceCenter)
|
|
logger.info("AiService created, about to call AiObjects.create()...")
|
|
instance.aiObjects = await AiObjects.create()
|
|
logger.info("AiObjects.create() completed")
|
|
# Initialize all submodules after aiObjects is ready
|
|
instance._initializeSubmodules()
|
|
logger.info("AiService submodules initialized")
|
|
return instance
|
|
|
|
# Helper methods
|
|
|
|
def _buildPromptWithPlaceholders(self, prompt: str, placeholders: Optional[Dict[str, str]]) -> str:
|
|
"""
|
|
Build full prompt by replacing placeholders with their content.
|
|
Uses the new {{KEY:placeholder}} format.
|
|
|
|
Args:
|
|
prompt: The base prompt template
|
|
placeholders: Dictionary of placeholder key-value pairs
|
|
|
|
Returns:
|
|
Prompt with placeholders replaced
|
|
"""
|
|
if not placeholders:
|
|
return prompt
|
|
|
|
full_prompt = prompt
|
|
for placeholder, content in placeholders.items():
|
|
# Skip if content is None or empty
|
|
if content is None:
|
|
continue
|
|
# Replace {{KEY:placeholder}}
|
|
full_prompt = full_prompt.replace(f"{{{{KEY:{placeholder}}}}}", str(content))
|
|
|
|
return full_prompt
|
|
|
|
async def _analyzePromptAndCreateOptions(self, prompt: str) -> AiCallOptions:
|
|
"""Analyze prompt to determine appropriate AiCallOptions parameters."""
|
|
try:
|
|
# Get dynamic enum values from Pydantic models
|
|
operationTypes = [e.value for e in OperationTypeEnum]
|
|
priorities = [e.value for e in PriorityEnum]
|
|
processingModes = [e.value for e in ProcessingModeEnum]
|
|
|
|
# Create analysis prompt for AI to determine operation type and parameters
|
|
analysisPrompt = f"""
|
|
You are an AI operation analyzer. Analyze the following prompt and determine the most appropriate operation type and parameters.
|
|
|
|
PROMPT TO ANALYZE:
|
|
{self.services.utils.sanitizePromptContent(prompt, 'userinput')}
|
|
|
|
Based on the prompt content, determine:
|
|
1. operationType: Choose the most appropriate from: {', '.join(operationTypes)}
|
|
2. priority: Choose from: {', '.join(priorities)}
|
|
3. processingMode: Choose from: {', '.join(processingModes)}
|
|
4. compressPrompt: true/false (true for story-like prompts, false for structured prompts with JSON/schemas)
|
|
5. compressContext: true/false (true to summarize context, false to process fully)
|
|
|
|
Respond with ONLY a JSON object in this exact format:
|
|
{{
|
|
"operationType": "dataAnalyse",
|
|
"priority": "balanced",
|
|
"processingMode": "basic",
|
|
"compressPrompt": true,
|
|
"compressContext": true
|
|
}}
|
|
"""
|
|
|
|
# Use AI to analyze the prompt
|
|
request = AiCallRequest(
|
|
prompt=analysisPrompt,
|
|
options=AiCallOptions(
|
|
operationType=OperationTypeEnum.DATA_ANALYSE,
|
|
priority=PriorityEnum.SPEED,
|
|
processingMode=ProcessingModeEnum.BASIC,
|
|
compressPrompt=True,
|
|
compressContext=False
|
|
)
|
|
)
|
|
|
|
response = await self.aiObjects.call(request)
|
|
|
|
# Parse AI response using structured parsing with AiCallOptions model
|
|
try:
|
|
# Use parseJsonWithModel to parse response into AiCallOptions (handles enum conversion automatically)
|
|
analysis = parseJsonWithModel(response.content, AiCallOptions)
|
|
return analysis
|
|
except Exception as e:
|
|
logger.warning(f"Failed to parse AI analysis response: {e}")
|
|
|
|
except Exception as e:
|
|
logger.warning(f"Prompt analysis failed: {e}")
|
|
|
|
# Fallback to default options
|
|
return AiCallOptions(
|
|
operationType=OperationTypeEnum.DATA_ANALYSE,
|
|
priority=PriorityEnum.BALANCED,
|
|
processingMode=ProcessingModeEnum.BASIC
|
|
)
|
|
|
|
async def _callAiWithLooping(
|
|
self,
|
|
prompt: str,
|
|
options: AiCallOptions,
|
|
debugPrefix: str = "ai_call",
|
|
promptBuilder: Optional[callable] = None,
|
|
promptArgs: Optional[Dict[str, Any]] = None,
|
|
operationId: Optional[str] = None
|
|
) -> str:
|
|
"""
|
|
Shared core function for AI calls with repair-based looping system.
|
|
Automatically repairs broken JSON and continues generation seamlessly.
|
|
|
|
Args:
|
|
prompt: The prompt to send to AI
|
|
options: AI call configuration options
|
|
debugPrefix: Prefix for debug file names
|
|
promptBuilder: Optional function to rebuild prompts for continuation
|
|
promptArgs: Optional arguments for prompt builder
|
|
operationId: Optional operation ID for progress tracking
|
|
|
|
Returns:
|
|
Complete AI response after all iterations
|
|
"""
|
|
maxIterations = 50 # Prevent infinite loops
|
|
iteration = 0
|
|
allSections = [] # Accumulate all sections across iterations
|
|
lastRawResponse = None # Store last raw JSON response for continuation
|
|
documentMetadata = None # Store document metadata (title, filename) from first iteration
|
|
|
|
# Get parent log ID for iteration operations
|
|
parentLogId = None
|
|
if operationId:
|
|
parentLogId = self.services.chat.getOperationLogId(operationId)
|
|
|
|
while iteration < maxIterations:
|
|
iteration += 1
|
|
|
|
# Create separate operation for each iteration with parent reference
|
|
iterationOperationId = None
|
|
if operationId:
|
|
iterationOperationId = f"{operationId}_iter_{iteration}"
|
|
self.services.chat.progressLogStart(
|
|
iterationOperationId,
|
|
"AI Call",
|
|
f"Iteration {iteration}",
|
|
"",
|
|
parentId=parentLogId
|
|
)
|
|
|
|
# Build iteration prompt
|
|
if len(allSections) > 0 and promptBuilder and promptArgs:
|
|
# This is a continuation - build continuation context with raw JSON and rebuild prompt
|
|
continuationContext = buildContinuationContext(allSections, lastRawResponse)
|
|
if not lastRawResponse:
|
|
logger.warning(f"Iteration {iteration}: No previous response available for continuation!")
|
|
|
|
# Rebuild prompt with continuation context using the provided prompt builder
|
|
iterationPrompt = await promptBuilder(**promptArgs, continuationContext=continuationContext)
|
|
else:
|
|
# First iteration - use original prompt
|
|
iterationPrompt = prompt
|
|
|
|
# Make AI call
|
|
try:
|
|
if iterationOperationId:
|
|
self.services.chat.progressLogUpdate(iterationOperationId, 0.3, "Calling AI model")
|
|
request = AiCallRequest(
|
|
prompt=iterationPrompt,
|
|
context="",
|
|
options=options
|
|
)
|
|
|
|
# Write the ACTUAL prompt sent to AI
|
|
if iteration == 1:
|
|
self.services.utils.writeDebugFile(iterationPrompt, f"{debugPrefix}_prompt")
|
|
else:
|
|
self.services.utils.writeDebugFile(iterationPrompt, f"{debugPrefix}_prompt_iteration_{iteration}")
|
|
|
|
response = await self.aiObjects.call(request)
|
|
result = response.content
|
|
|
|
# Update progress after AI call
|
|
if iterationOperationId:
|
|
self.services.chat.progressLogUpdate(iterationOperationId, 0.6, "AI response received")
|
|
|
|
# Write raw AI response to debug file
|
|
if iteration == 1:
|
|
self.services.utils.writeDebugFile(result, f"{debugPrefix}_response")
|
|
else:
|
|
self.services.utils.writeDebugFile(result, f"{debugPrefix}_response_iteration_{iteration}")
|
|
|
|
# Emit stats for this iteration (only if workflow exists and has id)
|
|
if self.services.workflow and hasattr(self.services.workflow, 'id') and self.services.workflow.id:
|
|
try:
|
|
self.services.chat.storeWorkflowStat(
|
|
self.services.workflow,
|
|
response,
|
|
f"ai.call.{debugPrefix}.iteration_{iteration}"
|
|
)
|
|
except Exception as statError:
|
|
# Don't break the main loop if stat storage fails
|
|
logger.warning(f"Failed to store workflow stat: {str(statError)}")
|
|
|
|
if not result or not result.strip():
|
|
logger.warning(f"Iteration {iteration}: Empty response, stopping")
|
|
break
|
|
|
|
# Store raw response for continuation (even if broken)
|
|
lastRawResponse = result
|
|
|
|
# Check for complete_response flag in raw response (before parsing)
|
|
import re
|
|
if re.search(r'"complete_response"\s*:\s*true', result, re.IGNORECASE):
|
|
pass # Flag detected, will stop in _shouldContinueGeneration
|
|
|
|
# Extract sections from response (handles both valid and broken JSON)
|
|
extractedSections, wasJsonComplete, parsedResult = self._extractSectionsFromResponse(result, iteration, debugPrefix)
|
|
|
|
# Extract document metadata from first iteration if available
|
|
if iteration == 1 and parsedResult and not documentMetadata:
|
|
documentMetadata = self._extractDocumentMetadata(parsedResult)
|
|
|
|
# Update progress after parsing
|
|
if iterationOperationId:
|
|
if extractedSections:
|
|
self.services.chat.progressLogUpdate(iterationOperationId, 0.8, f"Extracted {len(extractedSections)} sections")
|
|
|
|
if not extractedSections:
|
|
# If we're in continuation mode and JSON was incomplete, don't stop - continue to allow retry
|
|
if iteration > 1 and not wasJsonComplete:
|
|
logger.warning(f"Iteration {iteration}: No sections extracted from continuation fragment, continuing for another attempt")
|
|
continue
|
|
# Otherwise, stop if no sections
|
|
logger.warning(f"Iteration {iteration}: No sections extracted, stopping")
|
|
break
|
|
|
|
# Merge new sections with existing sections intelligently
|
|
# This handles the STANDARD CASE: broken JSON iterations must be merged together
|
|
# The break can occur anywhere - in any section, at any depth
|
|
allSections = self._mergeSectionsIntelligently(allSections, extractedSections, iteration)
|
|
|
|
# Check if we should continue (completion detection)
|
|
if self._shouldContinueGeneration(allSections, iteration, wasJsonComplete, result):
|
|
# Finish iteration operation (will continue with next iteration)
|
|
if iterationOperationId:
|
|
self.services.chat.progressLogFinish(iterationOperationId, True)
|
|
continue
|
|
else:
|
|
# Done - finish iteration and update main operation
|
|
if iterationOperationId:
|
|
self.services.chat.progressLogFinish(iterationOperationId, True)
|
|
if operationId:
|
|
self.services.chat.progressLogUpdate(operationId, 0.95, f"Generation complete ({iteration} iterations, {len(allSections)} sections)")
|
|
logger.info(f"Generation complete after {iteration} iterations: {len(allSections)} sections")
|
|
break
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error in AI call iteration {iteration}: {str(e)}")
|
|
if iterationOperationId:
|
|
self.services.chat.progressLogFinish(iterationOperationId, False)
|
|
break
|
|
|
|
if iteration >= maxIterations:
|
|
logger.warning(f"AI call stopped after maximum iterations ({maxIterations})")
|
|
|
|
# Build final result from accumulated sections
|
|
final_result = self._buildFinalResultFromSections(allSections, documentMetadata)
|
|
|
|
# Write final result to debug file
|
|
self.services.utils.writeDebugFile(final_result, f"{debugPrefix}_final_result")
|
|
|
|
return final_result
|
|
|
|
def _mergeSectionsIntelligently(
|
|
self,
|
|
existingSections: List[Dict[str, Any]],
|
|
newSections: List[Dict[str, Any]],
|
|
iteration: int
|
|
) -> List[Dict[str, Any]]:
|
|
"""
|
|
Intelligently merge sections from multiple iterations.
|
|
|
|
This is a GENERIC merging strategy that handles broken JSON iterations.
|
|
The break can occur anywhere - in any section, at any depth.
|
|
|
|
Merging strategies (in order of priority):
|
|
1. Same Section ID: Merge sections with identical IDs
|
|
2. Same Content-Type + Position: If last section is incomplete and new section continues it
|
|
3. Same Order: Merge sections with same order value
|
|
4. Structural Analysis: Detect continuation based on content structure
|
|
|
|
Args:
|
|
existingSections: Sections accumulated from previous iterations
|
|
newSections: Sections extracted from current iteration
|
|
iteration: Current iteration number
|
|
|
|
Returns:
|
|
Merged list of sections
|
|
"""
|
|
if not newSections:
|
|
return existingSections
|
|
|
|
if not existingSections:
|
|
return newSections
|
|
|
|
mergedSections = existingSections.copy()
|
|
|
|
for newSection in newSections:
|
|
merged = False
|
|
|
|
# Strategy 1: Same Section ID - merge directly
|
|
newSectionId = newSection.get("id")
|
|
if newSectionId:
|
|
for i, existingSection in enumerate(mergedSections):
|
|
if existingSection.get("id") == newSectionId:
|
|
# Merge sections with same ID
|
|
mergedSections[i] = self._mergeSectionContent(existingSection, newSection, iteration)
|
|
merged = True
|
|
logger.debug(f"Iteration {iteration}: Merged section by ID '{newSectionId}'")
|
|
break
|
|
|
|
if merged:
|
|
continue
|
|
|
|
# Strategy 2: Same Content-Type + Position (continuation detection)
|
|
# Check if last section is incomplete and new section continues it
|
|
if mergedSections:
|
|
lastSection = mergedSections[-1]
|
|
lastContentType = lastSection.get("content_type")
|
|
newContentType = newSection.get("content_type")
|
|
|
|
if lastContentType == newContentType:
|
|
# Same content type - check if last section is incomplete
|
|
if self._isSectionIncomplete(lastSection):
|
|
# Last section is incomplete, merge with new section
|
|
mergedSections[-1] = self._mergeSectionContent(lastSection, newSection, iteration)
|
|
merged = True
|
|
logger.debug(f"Iteration {iteration}: Merged section by content-type continuation ({lastContentType})")
|
|
continue
|
|
|
|
# Strategy 3: Same Order value
|
|
newOrder = newSection.get("order")
|
|
if newOrder is not None:
|
|
for i, existingSection in enumerate(mergedSections):
|
|
existingOrder = existingSection.get("order")
|
|
if existingOrder is not None and existingOrder == newOrder:
|
|
# Merge sections with same order
|
|
mergedSections[i] = self._mergeSectionContent(existingSection, newSection, iteration)
|
|
merged = True
|
|
logger.debug(f"Iteration {iteration}: Merged section by order {newOrder}")
|
|
break
|
|
|
|
if merged:
|
|
continue
|
|
|
|
# Strategy 4: Structural Analysis - detect continuation
|
|
# For code_block: if last section is code_block and new section is also code_block, merge
|
|
if mergedSections:
|
|
lastSection = mergedSections[-1]
|
|
if (lastSection.get("content_type") == "code_block" and
|
|
newSection.get("content_type") == "code_block"):
|
|
# Both are code blocks - merge them
|
|
mergedSections[-1] = self._mergeSectionContent(lastSection, newSection, iteration)
|
|
merged = True
|
|
logger.debug(f"Iteration {iteration}: Merged code_block sections by structural analysis")
|
|
continue
|
|
|
|
# No merge strategy matched - add as new section
|
|
if not merged:
|
|
mergedSections.append(newSection)
|
|
logger.debug(f"Iteration {iteration}: Added new section '{newSection.get('id', 'no-id')}' ({newSection.get('content_type', 'unknown')})")
|
|
|
|
return mergedSections
|
|
|
|
def _isSectionIncomplete(self, section: Dict[str, Any]) -> bool:
|
|
"""
|
|
Check if a section is incomplete (broken at the end).
|
|
|
|
This detects incomplete sections based on content analysis:
|
|
- Code blocks: ends mid-line, ends with comma, ends with incomplete structure
|
|
- Text sections: ends mid-sentence, ends with incomplete structure
|
|
- Other types: check for incomplete elements
|
|
"""
|
|
contentType = section.get("content_type", "")
|
|
elements = section.get("elements", [])
|
|
|
|
if not elements:
|
|
return False
|
|
|
|
# Handle list of elements
|
|
if isinstance(elements, list) and len(elements) > 0:
|
|
lastElement = elements[-1]
|
|
else:
|
|
lastElement = elements
|
|
|
|
if not isinstance(lastElement, dict):
|
|
return False
|
|
|
|
# Check code_block for incomplete code
|
|
if contentType == "code_block":
|
|
code = lastElement.get("code", "")
|
|
if code:
|
|
# Check if code ends incompletely:
|
|
# - Ends with comma (incomplete CSV line)
|
|
# - Ends with number but no newline (incomplete line)
|
|
# - Ends mid-token (e.g., "23431,23" - incomplete number)
|
|
codeStripped = code.rstrip()
|
|
if codeStripped:
|
|
# Check for incomplete patterns
|
|
if codeStripped.endswith(',') or (',' in codeStripped and not codeStripped.endswith('\n')):
|
|
# Ends with comma or has comma but no final newline - likely incomplete
|
|
return True
|
|
# Check if last line is incomplete (doesn't end with newline and has partial content)
|
|
if not code.endswith('\n') and codeStripped:
|
|
# No final newline - might be incomplete
|
|
# More sophisticated: check if last number is complete
|
|
lastLine = codeStripped.split('\n')[-1]
|
|
if lastLine and ',' in lastLine:
|
|
# Has commas but might be incomplete
|
|
parts = lastLine.split(',')
|
|
if parts and len(parts[-1]) < 5: # Last part is very short - might be incomplete
|
|
return True
|
|
|
|
# Check paragraph/text for incomplete sentences
|
|
if contentType in ["paragraph", "heading"]:
|
|
text = lastElement.get("text", "")
|
|
if text:
|
|
# Simple heuristic: if doesn't end with sentence-ending punctuation
|
|
textStripped = text.rstrip()
|
|
if textStripped and not textStripped[-1] in '.!?':
|
|
# Might be incomplete, but this is less reliable
|
|
# Only mark as incomplete if very short (likely cut off)
|
|
if len(textStripped) < 20:
|
|
return True
|
|
|
|
return False
|
|
|
|
def _mergeSectionContent(
|
|
self,
|
|
existingSection: Dict[str, Any],
|
|
newSection: Dict[str, Any],
|
|
iteration: int
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Merge content from two sections.
|
|
|
|
Handles different content types:
|
|
- code_block: Append code, handle overlaps, merge incomplete lines
|
|
- paragraph/heading: Append text
|
|
- table: Merge rows
|
|
- list: Merge items
|
|
- Other: Merge elements
|
|
"""
|
|
contentType = existingSection.get("content_type", "")
|
|
existingElements = existingSection.get("elements", [])
|
|
newElements = newSection.get("elements", [])
|
|
|
|
if not newElements:
|
|
return existingSection
|
|
|
|
# Handle list of elements
|
|
if isinstance(existingElements, list):
|
|
existingElem = existingElements[-1] if existingElements else {}
|
|
else:
|
|
existingElem = existingElements
|
|
|
|
if isinstance(newElements, list):
|
|
newElem = newElements[0] if newElements else {}
|
|
else:
|
|
newElem = newElements
|
|
|
|
if not isinstance(existingElem, dict) or not isinstance(newElem, dict):
|
|
return existingSection
|
|
|
|
# Merge based on content type
|
|
if contentType == "code_block":
|
|
existingCode = existingElem.get("code", "")
|
|
newCode = newElem.get("code", "")
|
|
|
|
if existingCode and newCode:
|
|
mergedCode = self._mergeCodeBlocks(existingCode, newCode, iteration)
|
|
existingElem["code"] = mergedCode
|
|
# Preserve language from existing or new
|
|
if "language" not in existingElem and "language" in newElem:
|
|
existingElem["language"] = newElem["language"]
|
|
|
|
elif contentType in ["paragraph", "heading"]:
|
|
existingText = existingElem.get("text", "")
|
|
newText = newElem.get("text", "")
|
|
|
|
if existingText and newText:
|
|
# Append text with space if needed
|
|
if existingText.rstrip() and not existingText.rstrip()[-1] in '.!?\n':
|
|
mergedText = existingText.rstrip() + " " + newText.lstrip()
|
|
else:
|
|
mergedText = existingText.rstrip() + "\n" + newText.lstrip()
|
|
existingElem["text"] = mergedText
|
|
|
|
elif contentType == "table":
|
|
# Merge table rows
|
|
existingRows = existingElem.get("rows", [])
|
|
newRows = newElem.get("rows", [])
|
|
if existingRows and newRows:
|
|
existingElem["rows"] = existingRows + newRows
|
|
|
|
elif contentType in ["bullet_list", "numbered_list"]:
|
|
# Merge list items
|
|
existingItems = existingElem.get("items", [])
|
|
newItems = newElem.get("items", [])
|
|
if existingItems and newItems:
|
|
existingElem["items"] = existingItems + newItems
|
|
|
|
# Update section with merged content
|
|
mergedSection = existingSection.copy()
|
|
if isinstance(existingElements, list):
|
|
mergedSection["elements"] = existingElements
|
|
else:
|
|
mergedSection["elements"] = existingElem
|
|
|
|
# Preserve metadata from new section if missing in existing
|
|
if "order" not in mergedSection and "order" in newSection:
|
|
mergedSection["order"] = newSection["order"]
|
|
|
|
return mergedSection
|
|
|
|
def _mergeCodeBlocks(self, existingCode: str, newCode: str, iteration: int) -> str:
|
|
"""
|
|
Merge two code blocks intelligently, handling overlaps and incomplete lines.
|
|
"""
|
|
if not existingCode:
|
|
return newCode
|
|
if not newCode:
|
|
return existingCode
|
|
|
|
existingLines = existingCode.rstrip().split('\n')
|
|
newLines = newCode.strip().split('\n')
|
|
|
|
if not existingLines or not newLines:
|
|
return existingCode + "\n" + newCode
|
|
|
|
lastExistingLine = existingLines[-1].strip()
|
|
firstNewLine = newLines[0].strip()
|
|
|
|
# Strategy 1: Exact overlap - remove duplicate line
|
|
if lastExistingLine == firstNewLine:
|
|
newLines = newLines[1:]
|
|
logger.debug(f"Iteration {iteration}: Removed exact duplicate line in code merge")
|
|
|
|
# Strategy 2: Incomplete line merge
|
|
# If last existing line ends with comma or is incomplete, merge with first new line
|
|
elif lastExistingLine.endswith(',') or (',' in lastExistingLine and len(lastExistingLine.split(',')[-1]) < 5):
|
|
# Last line is incomplete - merge with first new line
|
|
# Remove trailing comma from existing line
|
|
mergedLine = lastExistingLine.rstrip(',') + ',' + firstNewLine.lstrip()
|
|
existingLines[-1] = mergedLine
|
|
newLines = newLines[1:]
|
|
logger.debug(f"Iteration {iteration}: Merged incomplete line with continuation")
|
|
|
|
# Strategy 3: Partial overlap detection
|
|
# Check if first new line starts with the end of last existing line
|
|
elif ',' in lastExistingLine and ',' in firstNewLine:
|
|
lastExistingParts = lastExistingLine.split(',')
|
|
firstNewParts = firstNewLine.split(',')
|
|
|
|
# Check for overlap: if last part of existing matches first part of new
|
|
if lastExistingParts and firstNewParts:
|
|
lastExistingPart = lastExistingParts[-1].strip()
|
|
firstNewPart = firstNewParts[0].strip()
|
|
|
|
# If they match, there's overlap
|
|
if lastExistingPart == firstNewPart and len(lastExistingParts) > 1:
|
|
# Remove overlapping part from new line
|
|
newLines[0] = ','.join(firstNewParts[1:])
|
|
logger.debug(f"Iteration {iteration}: Removed partial overlap in code merge")
|
|
|
|
# Reconstruct merged code
|
|
mergedCode = '\n'.join(existingLines)
|
|
if newLines:
|
|
if mergedCode and not mergedCode.endswith('\n'):
|
|
mergedCode += '\n'
|
|
mergedCode += '\n'.join(newLines)
|
|
|
|
return mergedCode
|
|
|
|
def _extractSectionsFromResponse(
|
|
self,
|
|
result: str,
|
|
iteration: int,
|
|
debugPrefix: str
|
|
) -> Tuple[List[Dict[str, Any]], bool, Optional[Dict[str, Any]]]:
|
|
"""
|
|
Extract sections from AI response, handling both valid and broken JSON.
|
|
Uses repair mechanism for broken JSON.
|
|
Checks for "complete_response": true flag to determine completion.
|
|
Returns (sections, wasJsonComplete, parsedResult)
|
|
"""
|
|
# First, try to parse as valid JSON
|
|
try:
|
|
extracted = extractJsonString(result)
|
|
parsed_result = json.loads(extracted)
|
|
|
|
# Check if AI marked response as complete
|
|
isComplete = parsed_result.get("complete_response", False) == True
|
|
|
|
# Extract sections from parsed JSON
|
|
sections = extractSectionsFromDocument(parsed_result)
|
|
|
|
# If AI marked as complete, always return as complete
|
|
if isComplete:
|
|
return sections, True, parsed_result
|
|
|
|
# If in continuation mode (iteration > 1), continuation responses are expected to be fragments
|
|
# A fragment with 0 extractable sections means JSON is incomplete - need another iteration
|
|
if len(sections) == 0 and iteration > 1:
|
|
return sections, False, parsed_result # Mark as incomplete so loop continues
|
|
|
|
# First iteration with 0 sections means empty response - stop
|
|
if len(sections) == 0:
|
|
return sections, True, parsed_result # Complete but empty
|
|
|
|
return sections, True, parsed_result # JSON was complete with sections
|
|
|
|
except json.JSONDecodeError as e:
|
|
# Broken JSON - try repair mechanism (normal in iterative generation)
|
|
self.services.utils.writeDebugFile(result, f"{debugPrefix}_broken_json_iteration_{iteration}")
|
|
|
|
# Try to repair
|
|
repaired_json = repairBrokenJson(result)
|
|
|
|
if repaired_json:
|
|
# Extract sections from repaired JSON
|
|
sections = extractSectionsFromDocument(repaired_json)
|
|
return sections, False, repaired_json # JSON was broken but repaired
|
|
else:
|
|
# Repair failed - log error
|
|
logger.error(f"Iteration {iteration}: All repair strategies failed")
|
|
return [], False, None
|
|
|
|
except Exception as e:
|
|
logger.error(f"Iteration {iteration}: Unexpected error during parsing: {str(e)}")
|
|
return [], False, None
|
|
|
|
def _shouldContinueGeneration(
|
|
self,
|
|
allSections: List[Dict[str, Any]],
|
|
iteration: int,
|
|
wasJsonComplete: bool,
|
|
rawResponse: str = None
|
|
) -> bool:
|
|
"""
|
|
Determine if generation should continue based on JSON completeness, complete_response flag, and task completion.
|
|
Returns True if we should continue, False if done.
|
|
"""
|
|
if len(allSections) == 0:
|
|
return True # No sections yet, continue
|
|
|
|
# Check for complete_response flag in raw response
|
|
if rawResponse:
|
|
import re
|
|
if re.search(r'"complete_response"\s*:\s*true', rawResponse, re.IGNORECASE):
|
|
logger.info(f"Iteration {iteration}: AI marked response as complete (complete_response flag detected)")
|
|
return False
|
|
|
|
# If JSON was complete, stop (AI should have set complete_response if task is done)
|
|
# For continuation iterations (iteration > 1), if JSON is complete but no flag was set,
|
|
# stop to prevent infinite loops - AI had a chance to set the flag
|
|
if wasJsonComplete:
|
|
if iteration > 1:
|
|
# Continuation mode: JSON complete without flag means we're likely done
|
|
# Stop to prevent infinite loops
|
|
logger.info(f"Iteration {iteration}: JSON complete without complete_response flag - stopping")
|
|
return False
|
|
# First iteration with complete JSON - done
|
|
return False
|
|
else:
|
|
# JSON was incomplete/broken - continue
|
|
return True
|
|
|
|
def _extractDocumentMetadata(
|
|
self,
|
|
parsedResult: Dict[str, Any]
|
|
) -> Optional[Dict[str, Any]]:
|
|
"""
|
|
Extract document metadata (title, filename) from parsed AI response.
|
|
Returns dict with 'title' and 'filename' keys if found, None otherwise.
|
|
"""
|
|
if not isinstance(parsedResult, dict):
|
|
return None
|
|
|
|
# Try to get from documents array (preferred structure)
|
|
if "documents" in parsedResult and isinstance(parsedResult["documents"], list) and len(parsedResult["documents"]) > 0:
|
|
firstDoc = parsedResult["documents"][0]
|
|
if isinstance(firstDoc, dict):
|
|
title = firstDoc.get("title")
|
|
filename = firstDoc.get("filename")
|
|
if title or filename:
|
|
return {
|
|
"title": title,
|
|
"filename": filename
|
|
}
|
|
|
|
return None
|
|
|
|
def _buildFinalResultFromSections(
|
|
self,
|
|
allSections: List[Dict[str, Any]],
|
|
documentMetadata: Optional[Dict[str, Any]] = None
|
|
) -> str:
|
|
"""
|
|
Build final JSON result from accumulated sections.
|
|
Uses AI-provided metadata (title, filename) if available.
|
|
"""
|
|
if not allSections:
|
|
return ""
|
|
|
|
# Extract metadata from AI response if available
|
|
title = "Generated Document"
|
|
filename = "document.json"
|
|
if documentMetadata:
|
|
if documentMetadata.get("title"):
|
|
title = documentMetadata["title"]
|
|
if documentMetadata.get("filename"):
|
|
filename = documentMetadata["filename"]
|
|
|
|
# Build documents structure
|
|
# Assuming single document for now
|
|
documents = [{
|
|
"id": "doc_1",
|
|
"title": title,
|
|
"filename": filename,
|
|
"sections": allSections
|
|
}]
|
|
|
|
result = {
|
|
"metadata": {
|
|
"split_strategy": "single_document",
|
|
"source_documents": [],
|
|
"extraction_method": "ai_generation"
|
|
},
|
|
"documents": documents
|
|
}
|
|
|
|
return json.dumps(result, indent=2)
|
|
|
|
# Public API Methods
|
|
|
|
# Planning AI Call
|
|
async def callAiPlanning(
|
|
self,
|
|
prompt: str,
|
|
placeholders: Optional[List[PromptPlaceholder]] = None,
|
|
debugType: Optional[str] = None
|
|
) -> str:
|
|
"""
|
|
Planning AI call for task planning, action planning, action selection, etc.
|
|
Always uses static parameters optimized for planning tasks.
|
|
|
|
Args:
|
|
prompt: The planning prompt
|
|
placeholders: Optional list of placeholder replacements
|
|
debugType: Optional debug file type identifier (e.g., 'taskplan', 'dynamic', 'intentanalysis')
|
|
If not provided, defaults to 'plan'
|
|
|
|
Returns:
|
|
Planning JSON response
|
|
"""
|
|
await self.ensureAiObjectsInitialized()
|
|
|
|
# Planning calls always use static parameters
|
|
options = AiCallOptions(
|
|
operationType=OperationTypeEnum.PLAN,
|
|
priority=PriorityEnum.QUALITY,
|
|
processingMode=ProcessingModeEnum.DETAILED,
|
|
compressPrompt=False,
|
|
compressContext=False
|
|
)
|
|
|
|
# Build full prompt with placeholders
|
|
if placeholders:
|
|
placeholdersDict = {p.label: p.content for p in placeholders}
|
|
fullPrompt = self._buildPromptWithPlaceholders(prompt, placeholdersDict)
|
|
else:
|
|
fullPrompt = prompt
|
|
|
|
# Root-cause fix: planning must return raw single-shot JSON, not section-based output
|
|
request = AiCallRequest(
|
|
prompt=fullPrompt,
|
|
context="",
|
|
options=options
|
|
)
|
|
|
|
# Debug: persist prompt/response for analysis with context-specific naming
|
|
debugPrefix = debugType if debugType else "plan"
|
|
self.services.utils.writeDebugFile(fullPrompt, f"{debugPrefix}_prompt")
|
|
response = await self.aiObjects.call(request)
|
|
result = response.content or ""
|
|
self.services.utils.writeDebugFile(result, f"{debugPrefix}_response")
|
|
return result
|
|
|
|
async def callAiContent(
|
|
self,
|
|
prompt: str,
|
|
options: AiCallOptions,
|
|
contentParts: Optional[List[ContentPart]] = None,
|
|
outputFormat: Optional[str] = None,
|
|
title: Optional[str] = None,
|
|
parentOperationId: Optional[str] = None # Parent operation ID for hierarchical logging
|
|
) -> AiResponse:
|
|
"""
|
|
Unified AI content processing method (replaces callAiDocuments and callAiText).
|
|
|
|
Args:
|
|
prompt: The main prompt for the AI call
|
|
contentParts: Optional list of already-extracted content parts (preferred)
|
|
options: AI call configuration options (REQUIRED - operationType must be set)
|
|
outputFormat: Optional output format for document generation (e.g., 'pdf', 'docx', 'xlsx')
|
|
title: Optional title for generated documents
|
|
parentOperationId: Optional parent operation ID for hierarchical logging
|
|
|
|
Returns:
|
|
AiResponse with content, metadata, and optional documents
|
|
"""
|
|
await self.ensureAiObjectsInitialized()
|
|
|
|
# Create separate operationId for detailed progress tracking
|
|
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
|
|
aiOperationId = f"ai_content_{workflowId}_{int(time.time())}"
|
|
|
|
# Get parent log ID if parent operation exists
|
|
parentLogId = None
|
|
if parentOperationId:
|
|
parentLogId = self.services.chat.getOperationLogId(parentOperationId)
|
|
|
|
# Start progress tracking with parent reference
|
|
self.services.chat.progressLogStart(
|
|
aiOperationId,
|
|
"AI content processing",
|
|
"Content Processing",
|
|
f"Format: {outputFormat or 'text'}",
|
|
parentId=parentLogId
|
|
)
|
|
|
|
try:
|
|
# Extraction is now separate - contentParts must be extracted before calling
|
|
# Require operationType to be set before calling
|
|
opType = getattr(options, "operationType", None)
|
|
if not opType:
|
|
# If outputFormat is specified, default to DATA_GENERATE
|
|
if outputFormat:
|
|
options.operationType = OperationTypeEnum.DATA_GENERATE
|
|
opType = OperationTypeEnum.DATA_GENERATE
|
|
else:
|
|
self.services.chat.progressLogUpdate(aiOperationId, 0.1, "Analyzing prompt parameters")
|
|
analyzedOptions = await self._analyzePromptAndCreateOptions(prompt)
|
|
if analyzedOptions and hasattr(analyzedOptions, "operationType") and analyzedOptions.operationType:
|
|
options.operationType = analyzedOptions.operationType
|
|
# Merge other analyzed options
|
|
if hasattr(analyzedOptions, "priority"):
|
|
options.priority = analyzedOptions.priority
|
|
if hasattr(analyzedOptions, "processingMode"):
|
|
options.processingMode = analyzedOptions.processingMode
|
|
if hasattr(analyzedOptions, "compressPrompt"):
|
|
options.compressPrompt = analyzedOptions.compressPrompt
|
|
if hasattr(analyzedOptions, "compressContext"):
|
|
options.compressContext = analyzedOptions.compressContext
|
|
else:
|
|
# Default to DATA_ANALYSE if analysis fails
|
|
options.operationType = OperationTypeEnum.DATA_ANALYSE
|
|
opType = options.operationType
|
|
|
|
# Handle IMAGE_GENERATE operations
|
|
if opType == OperationTypeEnum.IMAGE_GENERATE:
|
|
self.services.chat.progressLogUpdate(aiOperationId, 0.4, "Calling AI for image generation")
|
|
|
|
request = AiCallRequest(
|
|
prompt=prompt,
|
|
context="",
|
|
options=options
|
|
)
|
|
|
|
response = await self.aiObjects.call(request)
|
|
|
|
if response.content:
|
|
# Build document data for image
|
|
imageDoc = DocumentData(
|
|
documentName="generated_image.png",
|
|
documentData=response.content,
|
|
mimeType="image/png"
|
|
)
|
|
|
|
metadata = AiResponseMetadata(
|
|
title=title or "Generated Image",
|
|
operationType=opType.value
|
|
)
|
|
|
|
self.services.chat.storeWorkflowStat(
|
|
self.services.workflow,
|
|
response,
|
|
"ai.generate.image"
|
|
)
|
|
|
|
self.services.chat.progressLogUpdate(aiOperationId, 0.9, "Image generated")
|
|
self.services.chat.progressLogFinish(aiOperationId, True)
|
|
|
|
return AiResponse(
|
|
content=response.content,
|
|
metadata=metadata,
|
|
documents=[imageDoc]
|
|
)
|
|
else:
|
|
errorMsg = f"No image data returned: {response.content}"
|
|
logger.error(f"Error in AI image generation: {errorMsg}")
|
|
self.services.chat.progressLogFinish(aiOperationId, False)
|
|
raise ValueError(errorMsg)
|
|
|
|
# Handle WEB_SEARCH and WEB_CRAWL operations
|
|
if opType == OperationTypeEnum.WEB_SEARCH or opType == OperationTypeEnum.WEB_CRAWL:
|
|
self.services.chat.progressLogUpdate(aiOperationId, 0.4, f"Calling AI for {opType.name}")
|
|
|
|
request = AiCallRequest(
|
|
prompt=prompt, # Raw JSON prompt - connector will parse it
|
|
context="",
|
|
options=options
|
|
)
|
|
|
|
response = await self.aiObjects.call(request)
|
|
|
|
if response.content:
|
|
metadata = AiResponseMetadata(
|
|
operationType=opType.value
|
|
)
|
|
|
|
self.services.chat.storeWorkflowStat(
|
|
self.services.workflow,
|
|
response,
|
|
f"ai.{opType.name.lower()}"
|
|
)
|
|
|
|
self.services.chat.progressLogUpdate(aiOperationId, 0.9, f"{opType.name} completed")
|
|
self.services.chat.progressLogFinish(aiOperationId, True)
|
|
|
|
return AiResponse(
|
|
content=response.content,
|
|
metadata=metadata
|
|
)
|
|
else:
|
|
errorMsg = f"No content returned from {opType.name}: {response.content}"
|
|
logger.error(f"Error in {opType.name}: {errorMsg}")
|
|
self.services.chat.progressLogFinish(aiOperationId, False)
|
|
raise ValueError(errorMsg)
|
|
|
|
# Handle document generation (outputFormat specified)
|
|
if outputFormat:
|
|
# CRITICAL: For document generation with JSON templates, NEVER compress the prompt
|
|
options.compressPrompt = False
|
|
options.compressContext = False
|
|
|
|
# Convert contentParts to text for generation prompt (if provided)
|
|
if contentParts:
|
|
# Convert contentParts to text for generation prompt
|
|
content_for_generation = "\n\n".join([f"[{part.label}]\n{part.data}" for part in contentParts if part.data])
|
|
else:
|
|
content_for_generation = None
|
|
|
|
self.services.chat.progressLogUpdate(aiOperationId, 0.3, "Building generation prompt")
|
|
from modules.services.serviceGeneration.subPromptBuilderGeneration import buildGenerationPrompt
|
|
|
|
generation_prompt = await buildGenerationPrompt(
|
|
outputFormat, prompt, title, content_for_generation, None
|
|
)
|
|
|
|
promptArgs = {
|
|
"outputFormat": outputFormat,
|
|
"userPrompt": prompt,
|
|
"title": title,
|
|
"extracted_content": content_for_generation
|
|
}
|
|
|
|
self.services.chat.progressLogUpdate(aiOperationId, 0.4, "Calling AI for content generation")
|
|
generated_json = await self._callAiWithLooping(
|
|
generation_prompt,
|
|
options,
|
|
"document_generation",
|
|
buildGenerationPrompt,
|
|
promptArgs,
|
|
aiOperationId
|
|
)
|
|
|
|
self.services.chat.progressLogUpdate(aiOperationId, 0.7, "Parsing generated JSON")
|
|
try:
|
|
extracted_json = self.services.utils.jsonExtractString(generated_json)
|
|
generated_data = json.loads(extracted_json)
|
|
except json.JSONDecodeError as e:
|
|
logger.error(f"Failed to parse generated JSON: {str(e)}")
|
|
self.services.utils.writeDebugFile(generated_json, "failed_json_parsing")
|
|
self.services.chat.progressLogFinish(aiOperationId, False)
|
|
raise ValueError(f"Generated content is not valid JSON: {str(e)}")
|
|
|
|
# Extract title and filename from generated document structure
|
|
extractedTitle = title
|
|
extractedFilename = None
|
|
if isinstance(generated_data, dict) and "documents" in generated_data:
|
|
docs = generated_data["documents"]
|
|
if isinstance(docs, list) and len(docs) > 0:
|
|
firstDoc = docs[0]
|
|
if isinstance(firstDoc, dict):
|
|
if firstDoc.get("title"):
|
|
extractedTitle = firstDoc["title"]
|
|
if firstDoc.get("filename"):
|
|
extractedFilename = firstDoc["filename"]
|
|
|
|
# Ensure metadata contains the extracted title
|
|
if "metadata" not in generated_data:
|
|
generated_data["metadata"] = {}
|
|
if extractedTitle:
|
|
generated_data["metadata"]["title"] = extractedTitle
|
|
|
|
# Create separate operation for content rendering
|
|
renderOperationId = f"{aiOperationId}_render"
|
|
renderParentLogId = self.services.chat.getOperationLogId(aiOperationId)
|
|
self.services.chat.progressLogStart(
|
|
renderOperationId,
|
|
"Content Rendering",
|
|
"Rendering",
|
|
f"Format: {outputFormat}",
|
|
parentId=renderParentLogId
|
|
)
|
|
|
|
try:
|
|
from modules.services.serviceGeneration.mainServiceGeneration import GenerationService
|
|
generationService = GenerationService(self.services)
|
|
self.services.chat.progressLogUpdate(renderOperationId, 0.5, f"Rendering to {outputFormat} format")
|
|
rendered_content, mime_type = await generationService.renderReport(
|
|
generated_data, outputFormat, extractedTitle or "Generated Document", prompt, self
|
|
)
|
|
self.services.chat.progressLogFinish(renderOperationId, True)
|
|
|
|
# Determine document name
|
|
if extractedFilename:
|
|
documentName = extractedFilename
|
|
elif extractedTitle and extractedTitle != "Generated Document":
|
|
sanitized = re.sub(r"[^a-zA-Z0-9._-]", "_", extractedTitle)
|
|
sanitized = re.sub(r"_+", "_", sanitized).strip("_")
|
|
if sanitized:
|
|
if not sanitized.lower().endswith(f".{outputFormat}"):
|
|
documentName = f"{sanitized}.{outputFormat}"
|
|
else:
|
|
documentName = sanitized
|
|
else:
|
|
documentName = f"generated.{outputFormat}"
|
|
else:
|
|
documentName = f"generated.{outputFormat}"
|
|
|
|
# Build document data
|
|
docData = DocumentData(
|
|
documentName=documentName,
|
|
documentData=rendered_content,
|
|
mimeType=mime_type
|
|
)
|
|
|
|
metadata = AiResponseMetadata(
|
|
title=extractedTitle or title or "Generated Document",
|
|
filename=extractedFilename,
|
|
operationType=opType.value if opType else None
|
|
)
|
|
|
|
self.services.utils.writeDebugFile(str(generated_data), "document_generation_response")
|
|
self.services.chat.progressLogFinish(aiOperationId, True)
|
|
|
|
return AiResponse(
|
|
content=json.dumps(generated_data),
|
|
metadata=metadata,
|
|
documents=[docData]
|
|
)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error rendering document: {str(e)}")
|
|
if renderOperationId:
|
|
self.services.chat.progressLogFinish(renderOperationId, False)
|
|
self.services.chat.progressLogFinish(aiOperationId, False)
|
|
raise ValueError(f"Rendering failed: {str(e)}")
|
|
|
|
# Handle text processing (no outputFormat)
|
|
self.services.chat.progressLogUpdate(aiOperationId, 0.5, "Processing text call")
|
|
|
|
if contentParts:
|
|
# Process contentParts through AI
|
|
# Convert contentParts to text for prompt
|
|
contentText = "\n\n".join([f"[{part.label}]\n{part.data}" for part in contentParts if part.data])
|
|
fullPrompt = f"{prompt}\n\n{contentText}" if contentText else prompt
|
|
result_content = await self._callAiWithLooping(
|
|
fullPrompt, options, "text", None, None, aiOperationId
|
|
)
|
|
else:
|
|
# Direct text call (no documents to process)
|
|
result_content = await self._callAiWithLooping(
|
|
prompt, options, "text", None, None, aiOperationId
|
|
)
|
|
|
|
metadata = AiResponseMetadata(
|
|
operationType=opType.value if opType else None
|
|
)
|
|
|
|
self.services.chat.progressLogFinish(aiOperationId, True)
|
|
|
|
return AiResponse(
|
|
content=result_content,
|
|
metadata=metadata
|
|
)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error in callAiContent: {str(e)}")
|
|
self.services.chat.progressLogFinish(aiOperationId, False)
|
|
raise
|
|
|
|
# DEPRECATED METHODS REMOVED:
|
|
# - callAiDocuments() - replaced by callAiContent()
|
|
# - callAiText() - replaced by callAiContent()
|
|
# All call sites have been updated to use callAiContent()
|
|
|