gateway/modules/services/serviceAi/mainServiceAi.py

import logging
import re
from typing import Dict, Any, List, Optional, Tuple, Union
from modules.datamodels.datamodelChat import PromptPlaceholder

from modules.datamodels.datamodelChat import ChatDocument
from modules.services.serviceExtraction.mainServiceExtraction import ExtractionService
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, ModelCapabilities, OperationType, Priority
from modules.datamodels.datamodelExtraction import ChunkResult, ContentExtracted
from modules.datamodels.datamodelWeb import (
    WebResearchRequest,
    WebResearchActionResult,
    WebResearchDocumentData,
    WebResearchActionDocument,
    WebSearchResultItem,
)
from modules.interfaces.interfaceAiObjects import AiObjects
from modules.shared.configuration import APP_CONFIG
from modules.services.serviceAi.subCoreAi import SubCoreAi
from modules.services.serviceAi.subDocumentProcessing import SubDocumentProcessing
from modules.services.serviceAi.subWebResearch import SubWebResearch
from modules.services.serviceAi.subDocumentGeneration import SubDocumentGeneration
from modules.services.serviceAi.subUtilities import SubUtilities


logger = logging.getLogger(__name__)

class AiService:
    """Lightweight AI service orchestrator that delegates to specialized sub-modules.

    Manager delegates to specialized sub-modules:
    - SubCoreAi: Core AI operations (readImage, generateImage, callAi, planning, text calls)
    - SubDocumentProcessing: Document chunking, processing, and merging logic
    - SubWebResearch: Web research and crawling functionality
    - SubDocumentGeneration: Single-file and multi-file document generation
    - SubUtilities: Helper functions, text processing, and debugging utilities

    The main service  acts as a coordinator:
    1. Manages lazy initialization of sub-modules
    2. Delegates operations to appropriate sub-modules
    3. Maintains the same public API for backward compatibility
    """

    def __init__(self, serviceCenter=None) -> None:
        """Initialize AI service with service center access.

        Args:
            serviceCenter: Service center instance for accessing other services
        """
        self.services = serviceCenter
        # Only depend on interfaces
        self.aiObjects = None  # Will be initialized in create()
        self._extractionService = None  # Lazy initialization
        self._coreAi = None  # Lazy initialization
        self._documentProcessor = None  # Lazy initialization
        self._webResearch = None  # Lazy initialization
        self._documentGenerator = None  # Lazy initialization
        self._utilities = None  # Lazy initialization

    @property
    def extractionService(self):
        """Lazy initialization of extraction service."""
        if self._extractionService is None:
            logger.info("Lazy initializing ExtractionService...")
            self._extractionService = ExtractionService(self.services)
        return self._extractionService

    @property
    def coreAi(self):
        """Lazy initialization of core AI service."""
        if self._coreAi is None:
            if self.aiObjects is None:
                raise RuntimeError("AiService.aiObjects must be initialized before accessing coreAi. Use await AiService.create() or await service._ensureAiObjectsInitialized()")
            logger.info("Lazy initializing SubCoreAi...")
            self._coreAi = SubCoreAi(self.services, self.aiObjects)
        return self._coreAi

    @property
    def documentProcessor(self):
        """Lazy initialization of document processing service."""
        if self._documentProcessor is None:
            logger.info("Lazy initializing SubDocumentProcessing...")
            self._documentProcessor = SubDocumentProcessing(self.services, self.aiObjects)
        return self._documentProcessor

    @property
    def webResearchService(self):
        """Lazy initialization of web research service."""
        if self._webResearch is None:
            logger.info("Lazy initializing SubWebResearch...")
            self._webResearch = SubWebResearch(self.services, self.aiObjects)
        return self._webResearch

    @property
    def documentGenerator(self):
        """Lazy initialization of document generation service."""
        if self._documentGenerator is None:
            logger.info("Lazy initializing SubDocumentGeneration...")
            self._documentGenerator = SubDocumentGeneration(self.services, self.aiObjects, self.documentProcessor)
        return self._documentGenerator

    @property
    def utilities(self):
        """Lazy initialization of utilities service."""
        if self._utilities is None:
            logger.info("Lazy initializing SubUtilities...")
            self._utilities = SubUtilities(self.services)
        return self._utilities

    async def _ensureAiObjectsInitialized(self):
        """Ensure aiObjects is initialized."""
        if self.aiObjects is None:
            logger.info("Lazy initializing AiObjects...")
            self.aiObjects = await AiObjects.create()
            logger.info("AiObjects initialization completed")

    @classmethod
    async def create(cls, serviceCenter=None) -> "AiService":
        """Create AiService instance with all connectors initialized."""
        logger.info("AiService.create() called")
        instance = cls(serviceCenter)
        logger.info("AiService created, about to call AiObjects.create()...")
        instance.aiObjects = await AiObjects.create()
        logger.info("AiObjects.create() completed")
        return instance

    # AI Image Analysis
    async def readImage(
        self,
        prompt: str,
        imageData: Union[str, bytes],
        mimeType: str = None,
        options: Optional[AiCallOptions] = None,
        ) -> str:
        """Call AI for image analysis using interface.callImage()."""
        await self._ensureAiObjectsInitialized()
        return await self.coreAi.readImage(prompt, imageData, mimeType, options)

    # AI Image Generation
    async def generateImage(
        self,
        prompt: str,
        size: str = "1024x1024",
        quality: str = "standard",
        style: str = "vivid",
        options: Optional[AiCallOptions] = None,
        ) -> Dict[str, Any]:
        """Generate an image using AI using interface.generateImage()."""
        await self._ensureAiObjectsInitialized()
        return await self.coreAi.generateImage(prompt, size, quality, style, options)

    # Web Research
    async def webResearch(self, request: WebResearchRequest) -> WebResearchActionResult:
        """Perform web research using interface functions."""
        await self._ensureAiObjectsInitialized()
        return await self.webResearchService.webResearch(request)

    # Core AI Methods - Delegating to SubCoreAi
    async def callAiPlanning(
        self,
        prompt: str,
        placeholders: Optional[List[PromptPlaceholder]] = None,
        options: Optional[AiCallOptions] = None,
        loopInstruction: Optional[str] = None
        ) -> str:
        """Planning AI call for task planning, action planning, action selection, etc."""
        await self._ensureAiObjectsInitialized()
        return await self.coreAi.callAiPlanning(prompt, placeholders, options, loopInstruction)

    async def callAiDocuments(
        self,
        prompt: str,
        documents: Optional[List[ChatDocument]] = None,
        options: Optional[AiCallOptions] = None,
        outputFormat: Optional[str] = None,
        title: Optional[str] = None
        ) -> Union[str, Dict[str, Any]]:
        """Document generation AI call for all non-planning calls."""
        await self._ensureAiObjectsInitialized()
        return await self.coreAi.callAiDocuments(prompt, documents, options, outputFormat, title)


    def sanitizePromptContent(self, content: str, contentType: str = "text") -> str:
        """
        Centralized prompt content sanitization to prevent injection attacks and ensure safe presentation.

        This is the single source of truth for all prompt sanitization across the system.
        Replaces all scattered sanitization functions with a unified approach.

        Args:
            content: The content to sanitize
            contentType: Type of content ("text", "userinput", "json", "document")

        Returns:
            Safely sanitized content ready for AI prompt insertion
        """
        if not content:
            return ""

        try:
            # Convert to string if not already
            content_str = str(content)

            # Remove null bytes and control characters (except newlines and tabs)
            sanitized = re.sub(r'[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]', '', content_str)

            # Handle different content types with appropriate sanitization
            if contentType == "userinput":
                # Extra security for user-controlled content
                # Escape curly braces to prevent placeholder injection
                sanitized = sanitized.replace('{', '{{').replace('}', '}}')
                # Escape quotes and wrap in single quotes
                sanitized = sanitized.replace('"', '\\"').replace("'", "\\'")
                return f"'{sanitized}'"

            elif contentType == "json":
                # For JSON content, escape quotes and backslashes
                sanitized = sanitized.replace('\\', '\\\\')
                sanitized = sanitized.replace('"', '\\"')
                sanitized = sanitized.replace('\n', '\\n')
                sanitized = sanitized.replace('\r', '\\r')
                sanitized = sanitized.replace('\t', '\\t')

            elif contentType == "document":
                # For document content, escape special characters
                sanitized = sanitized.replace('\\', '\\\\')
                sanitized = sanitized.replace('"', '\\"')
                sanitized = sanitized.replace("'", "\\'")
                sanitized = sanitized.replace('\n', '\\n')
                sanitized = sanitized.replace('\r', '\\r')
                sanitized = sanitized.replace('\t', '\\t')

            else:  # contentType == "text" or default
                # Basic text sanitization
                sanitized = sanitized.replace('\\', '\\\\')
                sanitized = sanitized.replace('"', '\\"')
                sanitized = sanitized.replace("'", "\\'")
                sanitized = sanitized.replace('\n', '\\n')
                sanitized = sanitized.replace('\r', '\\r')
                sanitized = sanitized.replace('\t', '\\t')

            return sanitized

        except Exception as e:
            logger.error(f"Error sanitizing prompt content: {str(e)}")
            # Return a safe fallback
            return "[ERROR: Content could not be safely sanitized]"