import logging import re from typing import Dict, Any, List, Optional, Tuple, Union from modules.datamodels.datamodelChat import PromptPlaceholder from modules.datamodels.datamodelChat import ChatDocument from modules.services.serviceExtraction.mainServiceExtraction import ExtractionService from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, ModelCapabilities, OperationType, Priority from modules.datamodels.datamodelExtraction import ChunkResult, ContentExtracted from modules.datamodels.datamodelWeb import ( WebResearchRequest, WebResearchActionResult, WebResearchDocumentData, WebResearchActionDocument, WebSearchResultItem, ) from modules.interfaces.interfaceAiObjects import AiObjects from modules.shared.configuration import APP_CONFIG from modules.services.serviceAi.subCoreAi import SubCoreAi from modules.services.serviceAi.subDocumentProcessing import SubDocumentProcessing from modules.services.serviceAi.subWebResearch import SubWebResearch from modules.services.serviceAi.subDocumentGeneration import SubDocumentGeneration from modules.services.serviceAi.subUtilities import SubUtilities logger = logging.getLogger(__name__) class AiService: """Lightweight AI service orchestrator that delegates to specialized sub-modules. Manager delegates to specialized sub-modules: - SubCoreAi: Core AI operations (readImage, generateImage, callAi, planning, text calls) - SubDocumentProcessing: Document chunking, processing, and merging logic - SubWebResearch: Web research and crawling functionality - SubDocumentGeneration: Single-file and multi-file document generation - SubUtilities: Helper functions, text processing, and debugging utilities The main service acts as a coordinator: 1. Manages lazy initialization of sub-modules 2. Delegates operations to appropriate sub-modules 3. Maintains the same public API for backward compatibility """ def __init__(self, serviceCenter=None) -> None: """Initialize AI service with service center access. Args: serviceCenter: Service center instance for accessing other services """ self.services = serviceCenter # Only depend on interfaces self.aiObjects = None # Will be initialized in create() self._extractionService = None # Lazy initialization self._coreAi = None # Lazy initialization self._documentProcessor = None # Lazy initialization self._webResearch = None # Lazy initialization self._documentGenerator = None # Lazy initialization self._utilities = None # Lazy initialization @property def extractionService(self): """Lazy initialization of extraction service.""" if self._extractionService is None: logger.info("Lazy initializing ExtractionService...") self._extractionService = ExtractionService(self.services) return self._extractionService @property def coreAi(self): """Lazy initialization of core AI service.""" if self._coreAi is None: logger.info("Lazy initializing SubCoreAi...") self._coreAi = SubCoreAi(self.services, self.aiObjects) return self._coreAi @property def documentProcessor(self): """Lazy initialization of document processing service.""" if self._documentProcessor is None: logger.info("Lazy initializing SubDocumentProcessing...") self._documentProcessor = SubDocumentProcessing(self.services, self.aiObjects) return self._documentProcessor @property def webResearchService(self): """Lazy initialization of web research service.""" if self._webResearch is None: logger.info("Lazy initializing SubWebResearch...") self._webResearch = SubWebResearch(self.services, self.aiObjects) return self._webResearch @property def documentGenerator(self): """Lazy initialization of document generation service.""" if self._documentGenerator is None: logger.info("Lazy initializing SubDocumentGeneration...") self._documentGenerator = SubDocumentGeneration(self.services, self.aiObjects, self.documentProcessor) return self._documentGenerator @property def utilities(self): """Lazy initialization of utilities service.""" if self._utilities is None: logger.info("Lazy initializing SubUtilities...") self._utilities = SubUtilities(self.services) return self._utilities async def _ensureAiObjectsInitialized(self): """Ensure aiObjects is initialized.""" if self.aiObjects is None: logger.info("Lazy initializing AiObjects...") self.aiObjects = await AiObjects.create() logger.info("AiObjects initialization completed") @classmethod async def create(cls, serviceCenter=None) -> "AiService": """Create AiService instance with all connectors initialized.""" logger.info("AiService.create() called") instance = cls(serviceCenter) logger.info("AiService created, about to call AiObjects.create()...") instance.aiObjects = await AiObjects.create() logger.info("AiObjects.create() completed") return instance # AI Image Analysis async def readImage( self, prompt: str, imageData: Union[str, bytes], mimeType: str = None, options: Optional[AiCallOptions] = None, ) -> str: """Call AI for image analysis using interface.callImage().""" await self._ensureAiObjectsInitialized() return await self.coreAi.readImage(prompt, imageData, mimeType, options) # AI Image Generation async def generateImage( self, prompt: str, size: str = "1024x1024", quality: str = "standard", style: str = "vivid", options: Optional[AiCallOptions] = None, ) -> Dict[str, Any]: """Generate an image using AI using interface.generateImage().""" await self._ensureAiObjectsInitialized() return await self.coreAi.generateImage(prompt, size, quality, style, options) # Web Research async def webResearch(self, request: WebResearchRequest) -> WebResearchActionResult: """Perform web research using interface functions.""" await self._ensureAiObjectsInitialized() return await self.webResearchService.webResearch(request) # Master AI Call (process user prompt with optional unlimited count of input documents delivering one or many output documents, no size limitations) async def callAi( self, prompt: str, documents: Optional[List[ChatDocument]] = None, placeholders: Optional[List[PromptPlaceholder]] = None, options: Optional[AiCallOptions] = None, outputFormat: Optional[str] = None, title: Optional[str] = None ) -> Union[str, Dict[str, Any]]: """ Unified AI call interface that automatically routes to appropriate handler. Args: prompt: The main prompt for the AI call documents: Optional list of documents to process placeholders: Optional list of placeholder replacements for planning calls options: AI call configuration options outputFormat: Optional output format (html, pdf, docx, txt, md, json, csv, xlsx) for document generation title: Optional title for generated documents Returns: AI response as string, or dict with documents if outputFormat is specified Raises: Exception: If all available models fail """ await self._ensureAiObjectsInitialized() # Get document processor and generator documentProcessor = self.documentProcessor documentGenerator = self.documentGenerator return await self.coreAi.callAi( prompt, documents, placeholders, options, outputFormat, title, documentProcessor, documentGenerator ) def sanitizePromptContent(self, content: str, contentType: str = "text") -> str: """ Centralized prompt content sanitization to prevent injection attacks and ensure safe presentation. This is the single source of truth for all prompt sanitization across the system. Replaces all scattered sanitization functions with a unified approach. Args: content: The content to sanitize contentType: Type of content ("text", "userinput", "json", "document") Returns: Safely sanitized content ready for AI prompt insertion """ if not content: return "" try: # Convert to string if not already content_str = str(content) # Remove null bytes and control characters (except newlines and tabs) sanitized = re.sub(r'[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]', '', content_str) # Handle different content types with appropriate sanitization if contentType == "userinput": # Extra security for user-controlled content # Escape curly braces to prevent placeholder injection sanitized = sanitized.replace('{', '{{').replace('}', '}}') # Escape quotes and wrap in single quotes sanitized = sanitized.replace('"', '\\"').replace("'", "\\'") return f"'{sanitized}'" elif contentType == "json": # For JSON content, escape quotes and backslashes sanitized = sanitized.replace('\\', '\\\\') sanitized = sanitized.replace('"', '\\"') sanitized = sanitized.replace('\n', '\\n') sanitized = sanitized.replace('\r', '\\r') sanitized = sanitized.replace('\t', '\\t') elif contentType == "document": # For document content, escape special characters sanitized = sanitized.replace('\\', '\\\\') sanitized = sanitized.replace('"', '\\"') sanitized = sanitized.replace("'", "\\'") sanitized = sanitized.replace('\n', '\\n') sanitized = sanitized.replace('\r', '\\r') sanitized = sanitized.replace('\t', '\\t') else: # contentType == "text" or default # Basic text sanitization sanitized = sanitized.replace('\\', '\\\\') sanitized = sanitized.replace('"', '\\"') sanitized = sanitized.replace("'", "\\'") sanitized = sanitized.replace('\n', '\\n') sanitized = sanitized.replace('\r', '\\r') sanitized = sanitized.replace('\t', '\\t') return sanitized except Exception as e: logger.error(f"Error sanitizing prompt content: {str(e)}") # Return a safe fallback return "[ERROR: Content could not be safely sanitized]"