gateway/modules/workflows/methods/methodContext/methodContext.py

# Copyright (c) 2025 Patrick Motsch
# All rights reserved.

import logging
from modules.workflows.methods.methodBase import MethodBase
from modules.datamodels.datamodelWorkflowActions import WorkflowActionDefinition, WorkflowActionParameter
from modules.shared.frontendTypes import FrontendType

# Import helpers
from .helpers.documentIndex import DocumentIndexHelper
from .helpers.formatting import FormattingHelper

# Import actions
from .actions.getDocumentIndex import getDocumentIndex
from .actions.extractContent import extractContent
from .actions.neutralizeData import neutralizeData
from .actions.triggerPreprocessingServer import triggerPreprocessingServer

logger = logging.getLogger(__name__)

class MethodContext(MethodBase):
    """Context and workflow information methods."""

    def __init__(self, services):
        super().__init__(services)
        self.name = "context"
        self.description = "Context and workflow information methods"

        # Initialize helper modules
        self.documentIndex = DocumentIndexHelper(self)
        self.formatting = FormattingHelper(self)

        # RBAC-Integration: Action-Definitionen mit actionId
        self._actions = {
            "getDocumentIndex": WorkflowActionDefinition(
                actionId="context.getDocumentIndex",
                description="Generate a comprehensive index of all documents available in the current workflow",
                dynamicMode=True,
                outputType="DocumentList",
                parameters={
                    "resultType": WorkflowActionParameter(
                        name="resultType",
                        type="str",
                        frontendType=FrontendType.SELECT,
                        frontendOptions=["json", "txt", "md"],
                        required=False,
                        default="json",
                        description="Output format"
                    )
                },
                execute=getDocumentIndex.__get__(self, self.__class__)
            ),
            "extractContent": WorkflowActionDefinition(
                actionId="context.extractContent",
                description="Extract raw content parts from documents without AI processing. Returns ContentParts with different typeGroups (text, image, table, structure, container). Images are returned as base64 data, not as extracted text. Text content is extracted from text-based formats (PDF text layers, Word docs, etc.) but NOT from images (no OCR). Use this action to prepare documents for subsequent AI processing actions.",
                dynamicMode=True,
                outputType="UdmDocument",
                parameters={
                    "documentList": WorkflowActionParameter(
                        name="documentList",
                        type="DocumentList",
                        frontendType=FrontendType.DOCUMENT_REFERENCE,
                        required=True,
                        description="Document reference(s) to extract content from"
                    ),
                    "extractionOptions": WorkflowActionParameter(
                        name="extractionOptions",
                        type="Dict[str,Any]",
                        frontendType=FrontendType.JSON,
                        required=False,
                        description="Extraction options (if not provided, defaults are used). Note: This action does NOT use AI - it performs pure content extraction. Images are preserved as base64 data, not converted to text."
                    )
                },
                execute=extractContent.__get__(self, self.__class__)
            ),
            "neutralizeData": WorkflowActionDefinition(
                actionId="context.neutralizeData",
                description="Neutralize extracted data from ContentExtracted documents (for use after extractContent)",
                outputType="DocumentList",
                parameters={
                    "documentList": WorkflowActionParameter(
                        name="documentList",
                        type="DocumentList",
                        frontendType=FrontendType.DOCUMENT_REFERENCE,
                        required=True,
                        description="Document reference(s) containing ContentExtracted objects to neutralize"
                    )
                },
                execute=neutralizeData.__get__(self, self.__class__)
            ),
            "triggerPreprocessingServer": WorkflowActionDefinition(
                actionId="context.triggerPreprocessingServer",
                description="Trigger preprocessing server at customer tenant to update database with configuration",
                outputType="ActionResult",
                parameters={
                    "endpoint": WorkflowActionParameter(
                        name="endpoint",
                        type="str",
                        frontendType=FrontendType.TEXT,
                        required=True,
                        description="The full URL endpoint for the preprocessing server API"
                    ),
                    "configJson": WorkflowActionParameter(
                        name="configJson",
                        type="str",
                        frontendType=FrontendType.JSON,
                        required=True,
                        description="Configuration JSON object to send to the preprocessing server. Can be provided as a dict or as a JSON string"
                    ),
                    "authSecretConfigKey": WorkflowActionParameter(
                        name="authSecretConfigKey",
                        type="str",
                        frontendType=FrontendType.TEXT,
                        required=True,
                        description="The APP_CONFIG key name to retrieve the authorization secret from"
                    )
                },
                execute=triggerPreprocessingServer.__get__(self, self.__class__)
            )
        }

        # Validate actions after definition
        self._validateActions()

        # Register actions as methods (optional, für direkten Zugriff)
        self.getDocumentIndex = getDocumentIndex.__get__(self, self.__class__)
        self.extractContent = extractContent.__get__(self, self.__class__)
        self.neutralizeData = neutralizeData.__get__(self, self.__class__)
        self.triggerPreprocessingServer = triggerPreprocessingServer.__get__(self, self.__class__)