130 lines
6.4 KiB
Python
130 lines
6.4 KiB
Python
# Copyright (c) 2025 Patrick Motsch
|
|
# All rights reserved.
|
|
|
|
import logging
|
|
from modules.workflows.methods.methodBase import MethodBase
|
|
from modules.datamodels.datamodelWorkflowActions import WorkflowActionDefinition, WorkflowActionParameter
|
|
from modules.shared.frontendTypes import FrontendType
|
|
|
|
# Import helpers
|
|
from .helpers.documentIndex import DocumentIndexHelper
|
|
from .helpers.formatting import FormattingHelper
|
|
|
|
# Import actions
|
|
from .actions.getDocumentIndex import getDocumentIndex
|
|
from .actions.extractContent import extractContent
|
|
from .actions.neutralizeData import neutralizeData
|
|
from .actions.triggerPreprocessingServer import triggerPreprocessingServer
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
class MethodContext(MethodBase):
|
|
"""Context and workflow information methods."""
|
|
|
|
def __init__(self, services):
|
|
super().__init__(services)
|
|
self.name = "context"
|
|
self.description = "Context and workflow information methods"
|
|
|
|
# Initialize helper modules
|
|
self.documentIndex = DocumentIndexHelper(self)
|
|
self.formatting = FormattingHelper(self)
|
|
|
|
# RBAC-Integration: Action-Definitionen mit actionId
|
|
self._actions = {
|
|
"getDocumentIndex": WorkflowActionDefinition(
|
|
actionId="context.getDocumentIndex",
|
|
description="Generate a comprehensive index of all documents available in the current workflow",
|
|
dynamicMode=True,
|
|
outputType="DocumentList",
|
|
parameters={
|
|
"resultType": WorkflowActionParameter(
|
|
name="resultType",
|
|
type="str",
|
|
frontendType=FrontendType.SELECT,
|
|
frontendOptions=["json", "txt", "md"],
|
|
required=False,
|
|
default="json",
|
|
description="Output format"
|
|
)
|
|
},
|
|
execute=getDocumentIndex.__get__(self, self.__class__)
|
|
),
|
|
"extractContent": WorkflowActionDefinition(
|
|
actionId="context.extractContent",
|
|
description="Extract raw content parts from documents without AI processing. Returns ContentParts with different typeGroups (text, image, table, structure, container). Images are returned as base64 data, not as extracted text. Text content is extracted from text-based formats (PDF text layers, Word docs, etc.) but NOT from images (no OCR). Use this action to prepare documents for subsequent AI processing actions.",
|
|
dynamicMode=True,
|
|
outputType="UdmDocument",
|
|
parameters={
|
|
"documentList": WorkflowActionParameter(
|
|
name="documentList",
|
|
type="DocumentList",
|
|
frontendType=FrontendType.DOCUMENT_REFERENCE,
|
|
required=True,
|
|
description="Document reference(s) to extract content from"
|
|
),
|
|
"extractionOptions": WorkflowActionParameter(
|
|
name="extractionOptions",
|
|
type="Dict[str,Any]",
|
|
frontendType=FrontendType.JSON,
|
|
required=False,
|
|
description="Extraction options (if not provided, defaults are used). Note: This action does NOT use AI - it performs pure content extraction. Images are preserved as base64 data, not converted to text."
|
|
)
|
|
},
|
|
execute=extractContent.__get__(self, self.__class__)
|
|
),
|
|
"neutralizeData": WorkflowActionDefinition(
|
|
actionId="context.neutralizeData",
|
|
description="Neutralize extracted data from ContentExtracted documents (for use after extractContent)",
|
|
outputType="DocumentList",
|
|
parameters={
|
|
"documentList": WorkflowActionParameter(
|
|
name="documentList",
|
|
type="DocumentList",
|
|
frontendType=FrontendType.DOCUMENT_REFERENCE,
|
|
required=True,
|
|
description="Document reference(s) containing ContentExtracted objects to neutralize"
|
|
)
|
|
},
|
|
execute=neutralizeData.__get__(self, self.__class__)
|
|
),
|
|
"triggerPreprocessingServer": WorkflowActionDefinition(
|
|
actionId="context.triggerPreprocessingServer",
|
|
description="Trigger preprocessing server at customer tenant to update database with configuration",
|
|
outputType="ActionResult",
|
|
parameters={
|
|
"endpoint": WorkflowActionParameter(
|
|
name="endpoint",
|
|
type="str",
|
|
frontendType=FrontendType.TEXT,
|
|
required=True,
|
|
description="The full URL endpoint for the preprocessing server API"
|
|
),
|
|
"configJson": WorkflowActionParameter(
|
|
name="configJson",
|
|
type="str",
|
|
frontendType=FrontendType.JSON,
|
|
required=True,
|
|
description="Configuration JSON object to send to the preprocessing server. Can be provided as a dict or as a JSON string"
|
|
),
|
|
"authSecretConfigKey": WorkflowActionParameter(
|
|
name="authSecretConfigKey",
|
|
type="str",
|
|
frontendType=FrontendType.TEXT,
|
|
required=True,
|
|
description="The APP_CONFIG key name to retrieve the authorization secret from"
|
|
)
|
|
},
|
|
execute=triggerPreprocessingServer.__get__(self, self.__class__)
|
|
)
|
|
}
|
|
|
|
# Validate actions after definition
|
|
self._validateActions()
|
|
|
|
# Register actions as methods (optional, für direkten Zugriff)
|
|
self.getDocumentIndex = getDocumentIndex.__get__(self, self.__class__)
|
|
self.extractContent = extractContent.__get__(self, self.__class__)
|
|
self.neutralizeData = neutralizeData.__get__(self, self.__class__)
|
|
self.triggerPreprocessingServer = triggerPreprocessingServer.__get__(self, self.__class__)
|
|
|