gateway/modules/services/serviceAi/subCoreAi.py
2025-10-20 01:33:48 +02:00

660 lines
28 KiB
Python

import logging
from typing import Dict, Any, List, Optional, Tuple, Union
from modules.datamodels.datamodelChat import PromptPlaceholder, ChatDocument
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, ModelCapabilities, OperationType, Priority
from modules.shared.debugLogger import writeDebugFile
logger = logging.getLogger(__name__)
class SubCoreAi:
"""Core AI operations including image analysis, text generation, and planning calls."""
def __init__(self, services, aiObjects):
"""Initialize core AI operations.
Args:
services: Service center instance for accessing other services
aiObjects: Initialized AiObjects instance
"""
self.services = services
self.aiObjects = aiObjects
# Shared Core Function for AI Calls with Looping
async def _callAiWithLooping(
self,
prompt: str,
options: AiCallOptions,
debugPrefix: str = "ai_call",
loopInstruction: str = None
) -> str:
"""
Shared core function for AI calls with looping system.
Handles continuation logic when response needs multiple rounds.
Delivers prompt and response to debug file log.
Args:
prompt: The prompt to send to AI
options: AI call configuration options
debugPrefix: Prefix for debug file names
loopInstruction: If provided, replaces LOOP_INSTRUCTION placeholder and includes in continuation prompts
Returns:
Complete AI response after all iterations
"""
max_iterations = 100 # Prevent infinite loops
iteration = 0
accumulatedContent = []
logger.debug(f"Starting AI call with looping (debug prefix: {debugPrefix}, loopInstruction: {loopInstruction is not None})")
# Import debug logger for use in iterations
# Store original prompt to preserve LOOP_INSTRUCTION placeholder
originalPrompt = prompt
# Handle LOOP_INSTRUCTION placeholder replacement for first iteration
if loopInstruction and iteration == 0:
if "LOOP_INSTRUCTION" not in prompt:
raise ValueError("LOOP_INSTRUCTION placeholder not found in prompt when loopInstruction provided")
prompt = prompt.replace("LOOP_INSTRUCTION", loopInstruction)
logger.debug("Replaced LOOP_INSTRUCTION placeholder with provided instruction")
while iteration < max_iterations:
iteration += 1
logger.debug(f"AI call iteration {iteration}/{max_iterations}")
# Build iteration prompt
if iteration == 1:
iterationPrompt = prompt
elif loopInstruction and iteration > 1:
# Only use continuation logic if loopInstruction is provided
iterationPrompt = self._buildContinuationPrompt(originalPrompt, accumulatedContent, iteration, loopInstruction)
else:
# No looping - use original prompt
iterationPrompt = prompt
# Make AI call
try:
from modules.datamodels.datamodelAi import AiCallRequest
request = AiCallRequest(
prompt=iterationPrompt,
context="",
options=options
)
# Write the ACTUAL prompt sent to AI (including continuation context)
writeDebugFile(iterationPrompt, f"{debugPrefix}_prompt_iteration_{iteration}", None)
response = await self.aiObjects.call(request)
result = response.content
# Write raw AI response to debug file
writeDebugFile(result, f"{debugPrefix}_response_iteration_{iteration}", None)
# Emit stats for this iteration
self.services.workflow.storeWorkflowStat(
self.services.currentWorkflow,
response,
f"ai.call.{debugPrefix}.iteration_{iteration}"
)
if not result or not result.strip():
logger.warning(f"Iteration {iteration}: Empty response, stopping")
break
# Check if this is a continuation response (only if loopInstruction is provided)
if loopInstruction and "[CONTINUE:" in result:
# Extract the content before the continuation marker
contentPart = result.split("[CONTINUE:")[0].strip()
if contentPart:
accumulatedContent.append(contentPart)
logger.debug(f"Iteration {iteration}: Continuation detected, continuing...")
continue
else:
# This is the final response
accumulatedContent.append(result)
logger.debug(f"Iteration {iteration}: Final response received")
break
except Exception as e:
logger.error(f"Error in AI call iteration {iteration}: {str(e)}")
break
if iteration >= max_iterations:
logger.warning(f"AI call stopped after maximum iterations ({max_iterations})")
# Combine all accumulated content
final_result = "\n\n".join(accumulatedContent) if accumulatedContent else ""
# Write final result to debug file
writeDebugFile(final_result, f"{debugPrefix}_final_result", None)
logger.info(f"AI call completed: {len(accumulatedContent)} parts from {iteration} iterations")
return final_result
def _buildContinuationPrompt(
self,
base_prompt: str,
accumulatedContent: List[str],
iteration: int,
loopInstruction: str = None
) -> str:
"""
Build a prompt for continuation iterations.
"""
continuation_instructions = f"""
CONTINUATION REQUEST (Iteration {iteration}):
You are continuing from a previous response. Please continue generating content from where you left off.
IMPORTANT:
- Continue from the exact point where you stopped
- Maintain the same format and structure
- {loopInstruction if loopInstruction else "If you cannot complete the full response, end with: [CONTINUE: brief description of what still needs to be generated]"}
- Only stop when the response is completely generated
Previous content generated:
{chr(10).join(accumulatedContent[-1:]) if accumulatedContent else "None"}
Continue generating content now:
"""
return f"{base_prompt}{continuation_instructions}"
def _buildPromptWithPlaceholders(self, prompt: str, placeholders: Optional[Dict[str, str]]) -> str:
"""
Build full prompt by replacing placeholders with their content.
Uses the new {{KEY:placeholder}} format.
"""
if not placeholders:
return prompt
full_prompt = prompt
for placeholder, content in placeholders.items():
# Replace both old format {{placeholder}} and new format {{KEY:placeholder}}
full_prompt = full_prompt.replace(f"{{{{{placeholder}}}}}", content)
full_prompt = full_prompt.replace(f"{{{{KEY:{placeholder}}}}}", content)
return full_prompt
async def _buildGenerationPrompt(
self,
prompt: str,
extracted_content: Optional[str],
outputFormat: str,
title: str
) -> str:
"""
Build generation prompt for document generation.
"""
from modules.services.serviceGeneration.subPromptBuilder import buildGenerationPrompt
# Build the generation prompt using the existing system
generation_prompt = await buildGenerationPrompt(
outputFormat=outputFormat,
userPrompt=prompt,
title=title,
aiService=self,
services=self.services
)
# If we have extracted content, prepend it to the prompt
if extracted_content:
generation_prompt = f"""EXTRACTED CONTENT FROM DOCUMENTS:
{extracted_content}
{generation_prompt}"""
return generation_prompt
# Planning AI Call
async def callAiPlanning(
self,
prompt: str,
placeholders: Optional[List[PromptPlaceholder]] = None,
options: Optional[AiCallOptions] = None,
loopInstruction: Optional[str] = None
) -> str:
"""
Planning AI call for task planning, action planning, action selection, etc.
Args:
prompt: The planning prompt
placeholders: Optional list of placeholder replacements
options: AI call configuration options
Returns:
Planning JSON response
"""
if options is None:
options = AiCallOptions()
# Build full prompt with placeholders
if placeholders:
placeholders_dict = {p.label: p.content for p in placeholders}
full_prompt = self._buildPromptWithPlaceholders(prompt, placeholders_dict)
else:
full_prompt = prompt
# Use shared core function with planning-specific debug prefix
return await self._callAiWithLooping(full_prompt, options, "planning", loopInstruction=loopInstruction)
# Document Generation AI Call
async def callAiDocuments(
self,
prompt: str,
documents: Optional[List[ChatDocument]] = None,
options: Optional[AiCallOptions] = None,
outputFormat: Optional[str] = None,
title: Optional[str] = None
) -> Union[str, Dict[str, Any]]:
"""
Document generation AI call for all non-planning calls.
Uses the current unified path with extraction and generation.
Args:
prompt: The main prompt for the AI call
documents: Optional list of documents to process
options: AI call configuration options
outputFormat: Optional output format for document generation
title: Optional title for generated documents
Returns:
AI response as string, or dict with documents if outputFormat is specified
"""
if options is None:
options = AiCallOptions()
# Handle document generation with specific output format using unified approach
if outputFormat:
# Use unified generation method for all document generation
if documents and len(documents) > 0:
logger.info(f"Extracting content from {len(documents)} documents")
extracted_content = await self.services.ai.documentProcessor.callAiText(prompt, documents, options)
else:
logger.info("No documents provided - using direct generation")
extracted_content = None
generation_prompt = await self._buildGenerationPrompt(prompt, extracted_content, outputFormat, title)
generated_json = await self._callAiWithLooping(generation_prompt, options, "document_generation", loopInstruction="If you cannot complete the full response, end with: [CONTINUE: brief description of what still needs to be generated]")
# Parse the generated JSON
try:
import json
generated_data = json.loads(generated_json)
except json.JSONDecodeError as e:
logger.error(f"Failed to parse generated JSON: {str(e)}")
logger.error(f"JSON content length: {len(generated_json)}")
logger.error(f"JSON content preview (last 200 chars): ...{generated_json[-200:]}")
logger.error(f"JSON content around error position: {generated_json[max(0, e.pos-50):e.pos+50]}")
# Write the problematic JSON to debug file
writeDebugFile(generated_json, "failed_json_parsing", None)
return {"success": False, "error": f"Generated content is not valid JSON: {str(e)}"}
# Render to final format using the existing renderer
try:
from modules.services.serviceGeneration.mainServiceGeneration import GenerationService
generationService = GenerationService(self.services)
rendered_content, mime_type = await generationService.renderReport(
generated_data, outputFormat, title or "Generated Document", prompt, self
)
# Build result in the expected format
result = {
"success": True,
"content": generated_data,
"documents": [{
"documentName": f"generated.{outputFormat}",
"documentData": rendered_content,
"mimeType": mime_type,
"title": title or "Generated Document"
}],
"is_multi_file": False,
"format": outputFormat,
"title": title,
"split_strategy": "single",
"total_documents": 1,
"processed_documents": 1
}
# Log AI response for debugging
try:
writeDebugFile(str(result), "documentGenerationResponse", documents)
except Exception:
pass
return result
except Exception as e:
logger.error(f"Error rendering document: {str(e)}")
return {"success": False, "error": f"Rendering failed: {str(e)}"}
# Handle text calls (no output format specified)
if documents:
# Use document processing for text calls with documents
result = await self.services.ai.documentProcessor.callAiText(prompt, documents, options)
else:
# Use shared core function for direct text calls
result = await self._callAiWithLooping(prompt, options, "text", loopInstruction=None)
return result
# AI Image Analysis
async def readImage(
self,
prompt: str,
imageData: Union[str, bytes],
mimeType: str = None,
options: Optional[AiCallOptions] = None,
) -> str:
"""Call AI for image analysis using interface.callImage()."""
try:
# Check if imageData is valid
if not imageData:
error_msg = "No image data provided"
self.services.utils.debugLogToFile(f"Error in AI image analysis: {error_msg}", "AI_SERVICE")
logger.error(f"Error in AI image analysis: {error_msg}")
return f"Error: {error_msg}"
self.services.utils.debugLogToFile(f"readImage called with prompt, imageData type: {type(imageData)}, length: {len(imageData) if imageData else 0}, mimeType: {mimeType}", "AI_SERVICE")
logger.info(f"readImage called with prompt, imageData type: {type(imageData)}, length: {len(imageData) if imageData else 0}, mimeType: {mimeType}")
# Always use IMAGE_ANALYSIS operation type for image processing
if options is None:
options = AiCallOptions(operationType=OperationType.IMAGE_ANALYSIS)
else:
# Override the operation type to ensure image analysis
options.operationType = OperationType.IMAGE_ANALYSIS
self.services.utils.debugLogToFile(f"Calling aiObjects.callImage with operationType: {options.operationType}", "AI_SERVICE")
logger.info(f"Calling aiObjects.callImage with operationType: {options.operationType}")
response = await self.aiObjects.callImage(prompt, imageData, mimeType, options)
# Emit stats for image analysis
self.services.workflow.storeWorkflowStat(
self.services.currentWorkflow,
response,
f"ai.image.{options.operationType}"
)
# Debug the result
self.services.utils.debugLogToFile(f"Raw AI result type: {type(response)}, value: {repr(response)}", "AI_SERVICE")
# Extract content from response
result = response.content if hasattr(response, 'content') else str(response)
# Check if result is valid
if not result or (isinstance(result, str) and not result.strip()):
error_msg = f"No response from AI image analysis (result: {repr(result)})"
self.services.utils.debugLogToFile(f"Error in AI image analysis: {error_msg}", "AI_SERVICE")
logger.error(f"Error in AI image analysis: {error_msg}")
return f"Error: {error_msg}"
self.services.utils.debugLogToFile(f"callImage returned: {result[:200]}..." if len(result) > 200 else result, "AI_SERVICE")
logger.info(f"callImage returned: {result[:200]}..." if len(result) > 200 else result)
return result
except Exception as e:
self.services.utils.debugLogToFile(f"Error in AI image analysis: {str(e)}", "AI_SERVICE")
logger.error(f"Error in AI image analysis: {str(e)}")
return f"Error: {str(e)}"
# AI Image Generation
async def generateImage(
self,
prompt: str,
size: str = "1024x1024",
quality: str = "standard",
style: str = "vivid",
options: Optional[AiCallOptions] = None,
) -> Dict[str, Any]:
"""Generate an image using AI using interface.generateImage()."""
try:
response = await self.aiObjects.generateImage(prompt, size, quality, style, options)
# Emit stats for image generation
self.services.workflow.storeWorkflowStat(
self.services.currentWorkflow,
response,
f"ai.generate.image"
)
# Convert response to dict format for backward compatibility
if hasattr(response, 'content'):
return {
"success": True,
"content": response.content,
"modelName": response.modelName,
"priceUsd": response.priceUsd,
"processingTime": response.processingTime
}
else:
return response
except Exception as e:
logger.error(f"Error in AI image generation: {str(e)}")
return {"success": False, "error": str(e)}
def _determineCallType(self, documents: Optional[List[ChatDocument]], operation_type: str) -> str:
"""
Determine call type based on documents and operation type.
Criteria: no documents AND operationType is "generate_plan" -> planning
All other cases -> text
"""
has_documents = documents is not None and len(documents) > 0
is_planning_operation = operation_type == OperationType.GENERATE_PLAN
if not has_documents and is_planning_operation:
return "planning"
else:
return "text"
# TO CHECK FUNCTIONS TODO
def _getModelCapabilitiesForContent(self, prompt: str, documents: Optional[List[ChatDocument]], options: AiCallOptions) -> Dict[str, int]:
"""
Get model capabilities for content processing, including appropriate size limits for chunking.
"""
# Estimate total content size
prompt_size = len(prompt.encode('utf-8'))
document_size = 0
if documents:
# Rough estimate of document content size
for doc in documents:
document_size += doc.fileSize or 0
total_size = prompt_size + document_size
# Use AiObjects to select the best model for this content size
# We'll simulate the model selection by checking available models
from modules.interfaces.interfaceAiObjects import aiModels
# Find the best model for this content size and operation
best_model = None
best_context_length = 0
for model_name, model_info in aiModels.items():
context_length = model_info.get("contextLength", 0)
# Skip models with no context length or too small for content
if context_length == 0:
continue
# Check if model supports the operation type
capabilities = model_info.get("capabilities", [])
if options.operationType == OperationType.IMAGE_ANALYSIS and "image_analysis" not in capabilities:
continue
elif options.operationType == OperationType.IMAGE_GENERATION and "image_generation" not in capabilities:
continue
elif options.operationType == OperationType.WEB_RESEARCH and "web_search" not in capabilities:
continue
elif "text_generation" not in capabilities:
continue
# Prefer models that can handle the content without chunking, but allow chunking if needed
if context_length >= total_size * 0.8: # 80% of content size
if context_length > best_context_length:
best_model = model_info
best_context_length = context_length
elif best_model is None: # Fallback to largest available model
if context_length > best_context_length:
best_model = model_info
best_context_length = context_length
# Fallback to a reasonable default if no model found
if best_model is None:
best_model = {
"contextLength": 128000, # GPT-4o default
"llmName": "gpt-4o"
}
# Calculate appropriate sizes
# Convert tokens to bytes (rough estimate: 1 token ≈ 4 characters)
context_length_bytes = int(best_model["contextLength"] * 4)
max_context_bytes = int(context_length_bytes * 0.9) # 90% of context length
text_chunk_size = int(max_context_bytes * 0.7) # 70% of max context for text chunks
image_chunk_size = int(max_context_bytes * 0.8) # 80% of max context for image chunks
logger.debug(f"Selected model: {best_model.get('llmName', 'unknown')} with context length: {best_model['contextLength']}")
logger.debug(f"Content size: {total_size} bytes, Max context: {max_context_bytes} bytes")
logger.debug(f"Text chunk size: {text_chunk_size} bytes, Image chunk size: {image_chunk_size} bytes")
return {
"maxContextBytes": max_context_bytes,
"textChunkSize": text_chunk_size,
"imageChunkSize": image_chunk_size
}
def _getModelsForOperation(self, operation_type: str, options: AiCallOptions) -> List[ModelCapabilities]:
"""
Get models capable of handling the specific operation with capability filtering.
"""
# Use the actual AI objects model selection instead of hardcoded default
if hasattr(self, 'aiObjects') and self.aiObjects:
# Let AiObjects handle the model selection
return []
else:
# Fallback to default model if AiObjects not available
default_model = ModelCapabilities(
name="default",
maxTokens=4000,
capabilities=["text", "reasoning"] if operation_type == "planning" else ["text"],
costPerToken=0.001,
processingTime=1.0,
isAvailable=True
)
return [default_model]
def _buildPromptWithPlaceholders(self, prompt: str, placeholders: Optional[Dict[str, str]]) -> str:
"""
Build full prompt by replacing placeholders with their content.
Uses the new {{KEY:placeholder}} format.
"""
if not placeholders:
return prompt
full_prompt = prompt
for placeholder, content in placeholders.items():
# Replace both old format {{placeholder}} and new format {{KEY:placeholder}}
full_prompt = full_prompt.replace(f"{{{{{placeholder}}}}}", content)
full_prompt = full_prompt.replace(f"{{{{KEY:{placeholder}}}}}", content)
return full_prompt
def _exceedsTokenLimit(self, text: str, model: ModelCapabilities, safety_margin: float) -> bool:
"""
Check if text exceeds model token limit with safety margin.
"""
# Simple character-based estimation (4 chars per token)
estimated_tokens = len(text) // 4
max_tokens = int(model.maxTokens * (1 - safety_margin))
return estimated_tokens > max_tokens
def _reducePlanningPrompt(
self,
full_prompt: str,
placeholders: Optional[Dict[str, str]],
model: ModelCapabilities,
options: AiCallOptions
) -> str:
"""
Reduce planning prompt size by summarizing placeholders while preserving prompt structure.
"""
if not placeholders:
return self._reduceText(full_prompt, 0.7)
# Reduce placeholders while preserving prompt
reduced_placeholders = {}
for placeholder, content in placeholders.items():
if len(content) > 1000: # Only reduce long content
reduction_factor = 0.7
reduced_content = self._reduceText(content, reduction_factor)
reduced_placeholders[placeholder] = reduced_content
else:
reduced_placeholders[placeholder] = content
return self._buildPromptWithPlaceholders(full_prompt, reduced_placeholders)
def _reduceTextPrompt(
self,
prompt: str,
context: str,
model: ModelCapabilities,
options: AiCallOptions
) -> str:
"""
Reduce text prompt size using typeGroup-aware chunking and merging.
"""
max_size = int(model.maxTokens * (1 - options.safetyMargin))
if options.compressPrompt:
# Reduce both prompt and context
target_size = max_size
current_size = len(prompt) + len(context)
reduction_factor = (target_size * 0.7) / current_size
if reduction_factor < 1.0:
prompt = self._reduceText(prompt, reduction_factor)
context = self._reduceText(context, reduction_factor)
else:
# Only reduce context, preserve prompt integrity
max_context_size = max_size - len(prompt)
if len(context) > max_context_size:
reduction_factor = max_context_size / len(context)
context = self._reduceText(context, reduction_factor)
return prompt + "\n\n" + context if context else prompt
def _extractTextFromContentParts(self, extracted_content) -> str:
"""
Extract text content from ExtractionService ContentPart objects.
"""
if not extracted_content or not hasattr(extracted_content, 'parts'):
return ""
text_parts = []
for part in extracted_content.parts:
if hasattr(part, 'typeGroup') and part.typeGroup in ['text', 'table', 'structure']:
if hasattr(part, 'data') and part.data:
text_parts.append(part.data)
return "\n\n".join(text_parts)
def _reduceText(self, text: str, reduction_factor: float) -> str:
"""
Reduce text size by the specified factor.
"""
if reduction_factor >= 1.0:
return text
target_length = int(len(text) * reduction_factor)
return text[:target_length] + "... [reduced]"