247 lines
11 KiB
Python
247 lines
11 KiB
Python
import logging
|
|
import re
|
|
from typing import Dict, Any, List, Optional, Tuple, Union
|
|
from modules.datamodels.datamodelChat import PromptPlaceholder
|
|
|
|
from modules.datamodels.datamodelChat import ChatDocument
|
|
from modules.services.serviceExtraction.mainServiceExtraction import ExtractionService
|
|
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, ModelCapabilities, OperationType, Priority
|
|
from modules.datamodels.datamodelExtraction import ChunkResult, ContentExtracted
|
|
from modules.datamodels.datamodelWeb import (
|
|
WebResearchRequest,
|
|
WebResearchActionResult,
|
|
WebResearchDocumentData,
|
|
WebResearchActionDocument,
|
|
WebSearchResultItem,
|
|
)
|
|
from modules.interfaces.interfaceAiObjects import AiObjects
|
|
from modules.shared.configuration import APP_CONFIG
|
|
from modules.services.serviceAi.subCoreAi import SubCoreAi
|
|
from modules.services.serviceAi.subDocumentProcessing import SubDocumentProcessing
|
|
from modules.services.serviceAi.subWebResearch import SubWebResearch
|
|
from modules.services.serviceAi.subDocumentGeneration import SubDocumentGeneration
|
|
from modules.services.serviceAi.subUtilities import SubUtilities
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
class AiService:
|
|
"""Lightweight AI service orchestrator that delegates to specialized sub-modules.
|
|
|
|
Manager delegates to specialized sub-modules:
|
|
- SubCoreAi: Core AI operations (readImage, generateImage, callAi, planning, text calls)
|
|
- SubDocumentProcessing: Document chunking, processing, and merging logic
|
|
- SubWebResearch: Web research and crawling functionality
|
|
- SubDocumentGeneration: Single-file and multi-file document generation
|
|
- SubUtilities: Helper functions, text processing, and debugging utilities
|
|
|
|
The main service acts as a coordinator:
|
|
1. Manages lazy initialization of sub-modules
|
|
2. Delegates operations to appropriate sub-modules
|
|
3. Maintains the same public API for backward compatibility
|
|
"""
|
|
|
|
def __init__(self, serviceCenter=None) -> None:
|
|
"""Initialize AI service with service center access.
|
|
|
|
Args:
|
|
serviceCenter: Service center instance for accessing other services
|
|
"""
|
|
self.services = serviceCenter
|
|
# Only depend on interfaces
|
|
self.aiObjects = None # Will be initialized in create()
|
|
self._extractionService = None # Lazy initialization
|
|
self._coreAi = None # Lazy initialization
|
|
self._documentProcessor = None # Lazy initialization
|
|
self._webResearch = None # Lazy initialization
|
|
self._documentGenerator = None # Lazy initialization
|
|
self._utilities = None # Lazy initialization
|
|
|
|
@property
|
|
def extractionService(self):
|
|
"""Lazy initialization of extraction service."""
|
|
if self._extractionService is None:
|
|
logger.info("Lazy initializing ExtractionService...")
|
|
self._extractionService = ExtractionService(self.services)
|
|
return self._extractionService
|
|
|
|
@property
|
|
def coreAi(self):
|
|
"""Lazy initialization of core AI service."""
|
|
if self._coreAi is None:
|
|
if self.aiObjects is None:
|
|
raise RuntimeError("AiService.aiObjects must be initialized before accessing coreAi. Use await AiService.create() or await service._ensureAiObjectsInitialized()")
|
|
logger.info("Lazy initializing SubCoreAi...")
|
|
self._coreAi = SubCoreAi(self.services, self.aiObjects)
|
|
return self._coreAi
|
|
|
|
@property
|
|
def documentProcessor(self):
|
|
"""Lazy initialization of document processing service."""
|
|
if self._documentProcessor is None:
|
|
logger.info("Lazy initializing SubDocumentProcessing...")
|
|
self._documentProcessor = SubDocumentProcessing(self.services, self.aiObjects)
|
|
return self._documentProcessor
|
|
|
|
@property
|
|
def webResearchService(self):
|
|
"""Lazy initialization of web research service."""
|
|
if self._webResearch is None:
|
|
logger.info("Lazy initializing SubWebResearch...")
|
|
self._webResearch = SubWebResearch(self.services, self.aiObjects)
|
|
return self._webResearch
|
|
|
|
@property
|
|
def documentGenerator(self):
|
|
"""Lazy initialization of document generation service."""
|
|
if self._documentGenerator is None:
|
|
logger.info("Lazy initializing SubDocumentGeneration...")
|
|
self._documentGenerator = SubDocumentGeneration(self.services, self.aiObjects, self.documentProcessor)
|
|
return self._documentGenerator
|
|
|
|
@property
|
|
def utilities(self):
|
|
"""Lazy initialization of utilities service."""
|
|
if self._utilities is None:
|
|
logger.info("Lazy initializing SubUtilities...")
|
|
self._utilities = SubUtilities(self.services)
|
|
return self._utilities
|
|
|
|
async def _ensureAiObjectsInitialized(self):
|
|
"""Ensure aiObjects is initialized."""
|
|
if self.aiObjects is None:
|
|
logger.info("Lazy initializing AiObjects...")
|
|
self.aiObjects = await AiObjects.create()
|
|
logger.info("AiObjects initialization completed")
|
|
|
|
@classmethod
|
|
async def create(cls, serviceCenter=None) -> "AiService":
|
|
"""Create AiService instance with all connectors initialized."""
|
|
logger.info("AiService.create() called")
|
|
instance = cls(serviceCenter)
|
|
logger.info("AiService created, about to call AiObjects.create()...")
|
|
instance.aiObjects = await AiObjects.create()
|
|
logger.info("AiObjects.create() completed")
|
|
return instance
|
|
|
|
# AI Image Analysis
|
|
async def readImage(
|
|
self,
|
|
prompt: str,
|
|
imageData: Union[str, bytes],
|
|
mimeType: str = None,
|
|
options: Optional[AiCallOptions] = None,
|
|
) -> str:
|
|
"""Call AI for image analysis using interface.callImage()."""
|
|
await self._ensureAiObjectsInitialized()
|
|
return await self.coreAi.readImage(prompt, imageData, mimeType, options)
|
|
|
|
# AI Image Generation
|
|
async def generateImage(
|
|
self,
|
|
prompt: str,
|
|
size: str = "1024x1024",
|
|
quality: str = "standard",
|
|
style: str = "vivid",
|
|
options: Optional[AiCallOptions] = None,
|
|
) -> Dict[str, Any]:
|
|
"""Generate an image using AI using interface.generateImage()."""
|
|
await self._ensureAiObjectsInitialized()
|
|
return await self.coreAi.generateImage(prompt, size, quality, style, options)
|
|
|
|
# Web Research
|
|
async def webResearch(self, request: WebResearchRequest) -> WebResearchActionResult:
|
|
"""Perform web research using interface functions."""
|
|
await self._ensureAiObjectsInitialized()
|
|
return await self.webResearchService.webResearch(request)
|
|
|
|
# Core AI Methods - Delegating to SubCoreAi
|
|
async def callAiPlanning(
|
|
self,
|
|
prompt: str,
|
|
placeholders: Optional[List[PromptPlaceholder]] = None,
|
|
options: Optional[AiCallOptions] = None,
|
|
loopInstruction: Optional[str] = None
|
|
) -> str:
|
|
"""Planning AI call for task planning, action planning, action selection, etc."""
|
|
await self._ensureAiObjectsInitialized()
|
|
return await self.coreAi.callAiPlanning(prompt, placeholders, options, loopInstruction)
|
|
|
|
async def callAiDocuments(
|
|
self,
|
|
prompt: str,
|
|
documents: Optional[List[ChatDocument]] = None,
|
|
options: Optional[AiCallOptions] = None,
|
|
outputFormat: Optional[str] = None,
|
|
title: Optional[str] = None
|
|
) -> Union[str, Dict[str, Any]]:
|
|
"""Document generation AI call for all non-planning calls."""
|
|
await self._ensureAiObjectsInitialized()
|
|
return await self.coreAi.callAiDocuments(prompt, documents, options, outputFormat, title)
|
|
|
|
|
|
def sanitizePromptContent(self, content: str, contentType: str = "text") -> str:
|
|
"""
|
|
Centralized prompt content sanitization to prevent injection attacks and ensure safe presentation.
|
|
|
|
This is the single source of truth for all prompt sanitization across the system.
|
|
Replaces all scattered sanitization functions with a unified approach.
|
|
|
|
Args:
|
|
content: The content to sanitize
|
|
contentType: Type of content ("text", "userinput", "json", "document")
|
|
|
|
Returns:
|
|
Safely sanitized content ready for AI prompt insertion
|
|
"""
|
|
if not content:
|
|
return ""
|
|
|
|
try:
|
|
# Convert to string if not already
|
|
content_str = str(content)
|
|
|
|
# Remove null bytes and control characters (except newlines and tabs)
|
|
sanitized = re.sub(r'[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]', '', content_str)
|
|
|
|
# Handle different content types with appropriate sanitization
|
|
if contentType == "userinput":
|
|
# Extra security for user-controlled content
|
|
# Escape curly braces to prevent placeholder injection
|
|
sanitized = sanitized.replace('{', '{{').replace('}', '}}')
|
|
# Escape quotes and wrap in single quotes
|
|
sanitized = sanitized.replace('"', '\\"').replace("'", "\\'")
|
|
return f"'{sanitized}'"
|
|
|
|
elif contentType == "json":
|
|
# For JSON content, escape quotes and backslashes
|
|
sanitized = sanitized.replace('\\', '\\\\')
|
|
sanitized = sanitized.replace('"', '\\"')
|
|
sanitized = sanitized.replace('\n', '\\n')
|
|
sanitized = sanitized.replace('\r', '\\r')
|
|
sanitized = sanitized.replace('\t', '\\t')
|
|
|
|
elif contentType == "document":
|
|
# For document content, escape special characters
|
|
sanitized = sanitized.replace('\\', '\\\\')
|
|
sanitized = sanitized.replace('"', '\\"')
|
|
sanitized = sanitized.replace("'", "\\'")
|
|
sanitized = sanitized.replace('\n', '\\n')
|
|
sanitized = sanitized.replace('\r', '\\r')
|
|
sanitized = sanitized.replace('\t', '\\t')
|
|
|
|
else: # contentType == "text" or default
|
|
# Basic text sanitization
|
|
sanitized = sanitized.replace('\\', '\\\\')
|
|
sanitized = sanitized.replace('"', '\\"')
|
|
sanitized = sanitized.replace("'", "\\'")
|
|
sanitized = sanitized.replace('\n', '\\n')
|
|
sanitized = sanitized.replace('\r', '\\r')
|
|
sanitized = sanitized.replace('\t', '\\t')
|
|
|
|
return sanitized
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error sanitizing prompt content: {str(e)}")
|
|
# Return a safe fallback
|
|
return "[ERROR: Content could not be safely sanitized]"
|