gateway/modules/services/serviceAi/subCoreAi.py
2025-10-28 00:14:24 +01:00

616 lines
27 KiB
Python

import json
import logging
from typing import Dict, Any, List, Optional, Tuple, Union
from modules.datamodels.datamodelChat import PromptPlaceholder, ChatDocument
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum
from modules.services.serviceAi.subSharedAiUtils import (
buildPromptWithPlaceholders,
extractTextFromContentParts,
reduceText,
determineCallType
)
logger = logging.getLogger(__name__)
# Generic continuation instruction for all prompts with JSON responses
# Used by _callAiWithLooping() to replace LOOP_INSTRUCTION placeholder
LOOP_INSTRUCTION_TEXT = """
MANDATORY RULE:
Return ONLY raw JSON (no ```json blocks, no text before/after)
CONTINUATION REQUIREMENT:
Your response must be a valid JSON object with a "continuation" field.
- If you can complete the FULL request: Set {"continuation": null}
- If you MUST stop early (due to token limits): Set {"continuation": {"last_data_items": "brief summary of what was delivered for context", "next_instruction": "what to deliver next to complete the request"}}
The "continuation" field controls whether this AI call continues in a loop or stops.
Refer to the json template below to see where to set the "continuation" information.
"""
# Rebuild the model to resolve forward references
AiCallRequest.model_rebuild()
class SubCoreAi:
"""Core AI operations including image analysis, text generation, and planning calls."""
def __init__(self, services, aiObjects):
"""Initialize core AI operations.
Args:
services: Service center instance for accessing other services
aiObjects: Initialized AiObjects instance
"""
self.services = services
self.aiObjects = aiObjects
async def _analyzePromptAndCreateOptions(self, prompt: str) -> AiCallOptions:
"""Analyze prompt to determine appropriate AiCallOptions parameters."""
try:
# Get dynamic enum values from Pydantic models
operation_types = [e.value for e in OperationTypeEnum]
priorities = [e.value for e in PriorityEnum]
processing_modes = [e.value for e in ProcessingModeEnum]
# Create analysis prompt for AI to determine operation type and parameters
analysisPrompt = f"""
You are an AI operation analyzer. Analyze the following prompt and determine the most appropriate operation type and parameters.
PROMPT TO ANALYZE:
{self.services.ai.sanitizePromptContent(prompt, 'userinput')}
Based on the prompt content, determine:
1. operationType: Choose the most appropriate from: {', '.join(operation_types)}
2. priority: Choose from: {', '.join(priorities)}
3. processingMode: Choose from: {', '.join(processing_modes)}
4. compressPrompt: true/false (true for story-like prompts, false for structured prompts with JSON/schemas)
5. compressContext: true/false (true to summarize context, false to process fully)
Respond with ONLY a JSON object in this exact format:
{{
"operationType": "dataAnalyse",
"priority": "balanced",
"processingMode": "basic",
"compressPrompt": true,
"compressContext": true
}}
"""
# Use AI to analyze the prompt
request = AiCallRequest(
prompt=analysisPrompt,
options=AiCallOptions(
operationType=OperationTypeEnum.DATA_ANALYSE,
priority=PriorityEnum.SPEED,
processingMode=ProcessingModeEnum.BASIC,
compressPrompt=True,
compressContext=False
)
)
response = await self.aiObjects.call(request)
# Parse AI response
try:
import json
json_start = response.content.find('{')
json_end = response.content.rfind('}') + 1
if json_start != -1 and json_end > json_start:
analysis = json.loads(response.content[json_start:json_end])
# Map string values to enums
operation_type = OperationTypeEnum(analysis.get('operationType', 'dataAnalyse'))
priority = PriorityEnum(analysis.get('priority', 'balanced'))
processing_mode = ProcessingModeEnum(analysis.get('processingMode', 'basic'))
return AiCallOptions(
operationType=operation_type,
priority=priority,
processingMode=processing_mode,
compressPrompt=analysis.get('compressPrompt', True),
compressContext=analysis.get('compressContext', True)
)
except Exception as e:
logger.warning(f"Failed to parse AI analysis response: {e}")
except Exception as e:
logger.warning(f"Prompt analysis failed: {e}")
# Fallback to default options
return AiCallOptions(
operationType=OperationTypeEnum.DATA_ANALYSE,
priority=PriorityEnum.BALANCED,
processingMode=ProcessingModeEnum.BASIC
)
# Shared Core Function for AI Calls with Looping
async def _callAiWithLooping(
self,
prompt: str,
options: AiCallOptions,
debugPrefix: str = "ai_call"
) -> str:
"""
Shared core function for AI calls with looping system.
Handles continuation logic when response needs multiple rounds.
Delivers prompt and response to debug file log.
Args:
prompt: The prompt to send to AI
options: AI call configuration options
debugPrefix: Prefix for debug file names
Returns:
Complete AI response after all iterations
"""
max_iterations = 100 # Prevent infinite loops
iteration = 0
accumulatedContent = []
lastContinuationData = None
logger.debug(f"Starting AI call with looping (debug prefix: {debugPrefix})")
# Use generic LOOP_INSTRUCTION_TEXT
loopInstruction = LOOP_INSTRUCTION_TEXT if ("LOOP_INSTRUCTION" in prompt) else ""
while iteration < max_iterations:
iteration += 1
logger.debug(f"AI call iteration {iteration}/{max_iterations}")
# Build iteration prompt
if iteration == 1:
# First iteration - replace LOOP_INSTRUCTION with standardized instruction
if "LOOP_INSTRUCTION" in prompt:
iterationPrompt = prompt.replace("LOOP_INSTRUCTION", loopInstruction)
else:
iterationPrompt = prompt
else:
# Subsequent iterations - include continuation data if available
if lastContinuationData and isinstance(lastContinuationData, dict):
continuationPrompt = self._buildContinuationPrompt(lastContinuationData, iteration)
if "LOOP_INSTRUCTION" in prompt:
iterationPrompt = prompt.replace("LOOP_INSTRUCTION", f"{continuationPrompt}\n\n{loopInstruction}")
else:
iterationPrompt = prompt
else:
# No continuation data - re-send original prompt
if "LOOP_INSTRUCTION" in prompt:
iterationPrompt = prompt.replace("LOOP_INSTRUCTION", loopInstruction)
else:
iterationPrompt = prompt
# Make AI call
try:
from modules.datamodels.datamodelAi import AiCallRequest
request = AiCallRequest(
prompt=iterationPrompt,
context="",
options=options
)
# Write the ACTUAL prompt sent to AI (including continuation context)
if iteration == 1:
# First iteration - use the historic naming pattern
self.services.utils.writeDebugFile(iterationPrompt, f"{debugPrefix}_prompt")
else:
# Subsequent iterations - include iteration number
self.services.utils.writeDebugFile(iterationPrompt, f"{debugPrefix}_prompt_iteration_{iteration}")
response = await self.aiObjects.call(request)
result = response.content
# Write raw AI response to debug file
if iteration == 1:
# First iteration - use the historic naming pattern
self.services.utils.writeDebugFile(result, f"{debugPrefix}_response")
else:
# Subsequent iterations - include iteration number
self.services.utils.writeDebugFile(result, f"{debugPrefix}_response_iteration_{iteration}")
# Emit stats for this iteration
self.services.workflow.storeWorkflowStat(
self.services.currentWorkflow,
response,
f"ai.call.{debugPrefix}.iteration_{iteration}"
)
if not result or not result.strip():
logger.warning(f"Iteration {iteration}: Empty response, stopping")
break
accumulatedContent.append(result)
# Check if this is a continuation response (only when LOOP_INSTRUCTION was used)
if loopInstruction:
try:
# Extract JSON substring if wrapped (e.g., ```json ... ```)
extracted = self.services.utils.jsonExtractString(result)
parsed_result = json.loads(extracted)
if isinstance(parsed_result, dict):
continuation = parsed_result.get("continuation")
if continuation is None:
# Final response - break loop
logger.debug(f"Iteration {iteration}: Final response received (continuation: null)")
break
else:
# Continuation detected - extract data for next iteration
lastContinuationData = continuation if isinstance(continuation, dict) else None
logger.debug(f"Iteration {iteration}: Continuation detected, continuing...")
continue
except json.JSONDecodeError:
# Not JSON, treat as final response
logger.warning(f"Iteration {iteration}: Non-JSON response - treating as final")
self.services.utils.writeDebugFile(result, f"{debugPrefix}_error_non_json_response_iteration_{iteration}")
break
else:
# No loop instruction format - treat as final response
logger.debug(f"Iteration {iteration}: Final response received (no loop format)")
break
except Exception as e:
logger.error(f"Error in AI call iteration {iteration}: {str(e)}")
break
if iteration >= max_iterations:
logger.warning(f"AI call stopped after maximum iterations ({max_iterations})")
# Intelligently merge JSON content from all iterations
final_result = self._mergeJsonContent(accumulatedContent) if accumulatedContent else ""
# Write final result to debug file
self.services.utils.writeDebugFile(final_result, f"{debugPrefix}_final_result")
logger.info(f"AI call completed: {len(accumulatedContent)} parts from {iteration} iterations")
return final_result
def _buildContinuationPrompt(
self,
continuationData: dict,
iteration: int
) -> str:
"""
Build standardized continuation prompt from continuation data dict.
This replaces the complex _buildContinuationContent method with a simpler approach.
Args:
continuationData: Dictionary containing last_data_items and next_instruction
iteration: Current iteration number
Returns:
Formatted continuation prompt string
"""
last_data_items = continuationData.get("last_data_items", "")
next_instruction = continuationData.get("next_instruction", "")
continuation_prompt = f"""CONTINUATION REQUEST (Iteration {iteration}):
You are continuing a previous response. DO NOT repeat any previous content.
{f"Already delivered data: {last_data_items}" if last_data_items else "No previous data specified"}
{f"Your task to deliver: {next_instruction}" if next_instruction else "No specific task provided"}
CRITICAL REQUIREMENTS:
- Start from the exact point specified above
- DO NOT repeat any previous content"""
return continuation_prompt
def _mergeJsonContent(self, accumulatedContent: List[str]) -> str:
"""
Generic JSON merger that combines all lists from multiple iterations.
Structure: root attributes + 1..n lists that get merged together.
"""
if not accumulatedContent:
return ""
if len(accumulatedContent) == 1:
return accumulatedContent[0]
try:
# Parse all JSON responses
parsed_responses = []
for content in accumulatedContent:
try:
extracted = self.services.utils.jsonExtractString(content)
parsed = json.loads(extracted)
parsed_responses.append(parsed)
except json.JSONDecodeError as e:
logger.warning(f"Failed to parse JSON content: {str(e)}")
continue
if not parsed_responses:
return accumulatedContent[0] # Return first response if all parsing failed
# Start with first response as base
merged = parsed_responses[0].copy()
# Merge all lists from all responses
for response in parsed_responses[1:]:
for key, value in response.items():
if isinstance(value, list) and key in merged and isinstance(merged[key], list):
# Merge lists by extending
merged[key].extend(value)
elif key not in merged:
# Add new fields
merged[key] = value
# Mark as complete
merged["continuation"] = None
return json.dumps(merged, indent=2)
except Exception as e:
logger.error(f"Error merging JSON content: {str(e)}")
return accumulatedContent[0] # Return first response on error
# Planning AI Call
async def callAiPlanning(
self,
prompt: str,
placeholders: Optional[List[PromptPlaceholder]] = None
) -> str:
"""
Planning AI call for task planning, action planning, action selection, etc.
Always uses static parameters optimized for planning tasks.
Args:
prompt: The planning prompt
placeholders: Optional list of placeholder replacements
Returns:
Planning JSON response
"""
# Planning calls always use static parameters
logger.debug("Using static parameters for planning call")
options = AiCallOptions(
operationType=OperationTypeEnum.PLAN,
priority=PriorityEnum.QUALITY,
processingMode=ProcessingModeEnum.DETAILED,
compressPrompt=False,
compressContext=False
)
# Build full prompt with placeholders
if placeholders:
placeholders_dict = {p.label: p.content for p in placeholders}
full_prompt = buildPromptWithPlaceholders(prompt, placeholders_dict)
else:
full_prompt = prompt
# Use shared core function with planning-specific debug prefix
return await self._callAiWithLooping(full_prompt, options, "plan")
# Document Generation AI Call
async def callAiDocuments(
self,
prompt: str,
documents: Optional[List[ChatDocument]] = None,
options: Optional[AiCallOptions] = None,
outputFormat: Optional[str] = None,
title: Optional[str] = None
) -> Union[str, Dict[str, Any]]:
"""
Document generation AI call for all non-planning calls.
Uses the current unified path with extraction and generation.
Args:
prompt: The main prompt for the AI call
documents: Optional list of documents to process
options: AI call configuration options
outputFormat: Optional output format for document generation
title: Optional title for generated documents
Returns:
AI response as string, or dict with documents if outputFormat is specified
"""
if options is None or (hasattr(options, 'operationType') and options.operationType is None):
# Use AI to determine parameters ONLY when truly needed (options=None OR operationType=None)
logger.debug("Analyzing prompt to determine optimal parameters")
options = await self._analyzePromptAndCreateOptions(prompt)
else:
logger.debug(f"Using provided options: operationType={options.operationType}, priority={options.priority}")
# Handle document generation with specific output format using unified approach
if outputFormat:
# Use unified generation method for all document generation
if documents and len(documents) > 0:
logger.debug(f"Extracting content from {len(documents)} documents")
extracted_content = await self.services.ai.documentProcessor.callAiText(prompt, documents, options)
else:
logger.debug("No documents provided - using direct generation")
extracted_content = None
logger.debug(f"[DEBUG] title value: {title}, type: {type(title)}")
from modules.services.serviceGeneration.subPromptBuilderGeneration import buildGenerationPrompt
generation_prompt = await buildGenerationPrompt(outputFormat, prompt, title, extracted_content)
generated_json = await self._callAiWithLooping(generation_prompt, options, "document_generation")
# Parse the generated JSON (extract fenced/embedded JSON first)
try:
extracted_json = self.services.utils.jsonExtractString(generated_json)
generated_data = json.loads(extracted_json)
except json.JSONDecodeError as e:
logger.error(f"Failed to parse generated JSON: {str(e)}")
logger.error(f"JSON content length: {len(generated_json)}")
logger.error(f"JSON content preview (last 200 chars): ...{generated_json[-200:]}")
logger.error(f"JSON content around error position: {generated_json[max(0, e.pos-50):e.pos+50]}")
# Write the problematic JSON to debug file
self.services.utils.writeDebugFile(generated_json, "failed_json_parsing")
return {"success": False, "error": f"Generated content is not valid JSON: {str(e)}"}
# Render to final format using the existing renderer
try:
from modules.services.serviceGeneration.mainServiceGeneration import GenerationService
generationService = GenerationService(self.services)
rendered_content, mime_type = await generationService.renderReport(
generated_data, outputFormat, title or "Generated Document", prompt, self
)
# Build result in the expected format
result = {
"success": True,
"content": generated_data,
"documents": [{
"documentName": f"generated.{outputFormat}",
"documentData": rendered_content,
"mimeType": mime_type,
"title": title or "Generated Document"
}],
"is_multi_file": False,
"format": outputFormat,
"title": title,
"split_strategy": "single",
"total_documents": 1,
"processed_documents": 1
}
# Log AI response for debugging
self.services.utils.writeDebugFile(str(result), "document_generation_response", documents)
return result
except Exception as e:
logger.error(f"Error rendering document: {str(e)}")
return {"success": False, "error": f"Rendering failed: {str(e)}"}
# Handle text calls (no output format specified)
if documents:
# Use document processing for text calls with documents
result = await self.services.ai.documentProcessor.callAiText(prompt, documents, options)
else:
# Use shared core function for direct text calls
result = await self._callAiWithLooping(prompt, options, "text")
return result
# AI Image Analysis
async def readImage(
self,
prompt: str,
imageData: Union[str, bytes],
mimeType: str = None,
options: Optional[AiCallOptions] = None,
) -> str:
"""Call AI for image analysis using interface.call() with contentParts."""
try:
# Check if imageData is valid
if not imageData:
error_msg = "No image data provided"
self.services.utils.debugLogToFile(f"Error in AI image analysis: {error_msg}", "AI_SERVICE")
logger.error(f"Error in AI image analysis: {error_msg}")
return f"Error: {error_msg}"
self.services.utils.debugLogToFile(f"readImage called with prompt, imageData type: {type(imageData)}, length: {len(imageData) if imageData else 0}, mimeType: {mimeType}", "AI_SERVICE")
logger.info(f"readImage called with prompt, imageData type: {type(imageData)}, length: {len(imageData) if imageData else 0}, mimeType: {mimeType}")
# Always use IMAGE_ANALYSE operation type for image processing
if options is None:
options = AiCallOptions(operationType=OperationTypeEnum.IMAGE_ANALYSE)
else:
# Override the operation type to ensure image analysis
options.operationType = OperationTypeEnum.IMAGE_ANALYSE
# Create content parts with image data
from modules.datamodels.datamodelExtraction import ContentPart
import base64
# ContentPart.data must be a string - convert bytes to base64 if needed
if isinstance(imageData, bytes):
imageDataStr = base64.b64encode(imageData).decode('utf-8')
else:
# Already a base64 string
imageDataStr = imageData
imagePart = ContentPart(
id="image_0",
parentId=None,
label="Image",
typeGroup="image",
mimeType=mimeType or "image/jpeg",
data=imageDataStr, # Must be a string (base64 encoded)
metadata={"imageAnalysis": True}
)
# Create request with content parts
from modules.datamodels.datamodelAi import AiCallRequest
request = AiCallRequest(
prompt=prompt,
context="",
options=options,
contentParts=[imagePart]
)
self.services.utils.debugLogToFile(f"Calling aiObjects.call() with operationType: {options.operationType}", "AI_SERVICE")
logger.info(f"Calling aiObjects.call() with operationType: {options.operationType}")
# Write image analysis prompt to debug file
self.services.utils.writeDebugFile(prompt, "image_analysis_prompt")
response = await self.aiObjects.call(request)
# Write image analysis response to debug file
# response is an AiCallResponse object
result = response.content
self.services.utils.writeDebugFile(result, "image_analysis_response")
# Debug the result
self.services.utils.debugLogToFile(f"AI image analysis result type: {type(response)}, content length: {len(result)}", "AI_SERVICE")
# Check if result is valid
if not result or (isinstance(result, str) and not result.strip()):
error_msg = f"No response from AI image analysis (result: {repr(result)})"
self.services.utils.debugLogToFile(f"Error in AI image analysis: {error_msg}", "AI_SERVICE")
logger.error(f"Error in AI image analysis: {error_msg}")
return f"Error: {error_msg}"
self.services.utils.debugLogToFile(f"callImage returned: {result[:200]}..." if len(result) > 200 else result, "AI_SERVICE")
logger.info(f"callImage returned: {result[:200]}..." if len(result) > 200 else result)
return result
except Exception as e:
self.services.utils.debugLogToFile(f"Error in AI image analysis: {str(e)}", "AI_SERVICE")
logger.error(f"Error in AI image analysis: {str(e)}")
return f"Error: {str(e)}"
# AI Image Generation
async def generateImage(
self,
prompt: str,
size: str = "1024x1024",
quality: str = "standard",
style: str = "vivid",
options: Optional[AiCallOptions] = None,
) -> Dict[str, Any]:
"""Generate an image using AI using interface.generateImage()."""
try:
response = await self.aiObjects.generateImage(prompt, size, quality, style, options)
# Emit stats for image generation
self.services.workflow.storeWorkflowStat(
self.services.currentWorkflow,
response,
f"ai.generate.image"
)
# Convert response to dict format for backward compatibility
if hasattr(response, 'content'):
return {
"success": True,
"content": response.content,
"modelName": response.modelName,
"priceUsd": response.priceUsd,
"processingTime": response.processingTime
}
else:
return response
except Exception as e:
logger.error(f"Error in AI image generation: {str(e)}")
return {"success": False, "error": str(e)}