gateway/modules/workflows/methods/methodAi/methodAi.py
2025-12-17 10:45:09 +01:00

383 lines
18 KiB
Python

# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
import logging
from datetime import datetime, UTC
from modules.workflows.methods.methodBase import MethodBase
from modules.datamodels.datamodelWorkflowActions import WorkflowActionDefinition, WorkflowActionParameter
from modules.shared.frontendTypes import FrontendType
# Import helpers
from .helpers.csvProcessing import CsvProcessingHelper
# Import actions
from .actions.process import process
from .actions.webResearch import webResearch
from .actions.summarizeDocument import summarizeDocument
from .actions.translateDocument import translateDocument
from .actions.convert import convert
from .actions.convertDocument import convertDocument
from .actions.extractData import extractData
from .actions.generateDocument import generateDocument
logger = logging.getLogger(__name__)
class MethodAi(MethodBase):
"""AI processing methods."""
def __init__(self, services):
super().__init__(services)
self.name = "ai"
self.description = "AI processing methods"
# Initialize helper modules
self.csvProcessing = CsvProcessingHelper(self)
# RBAC-Integration: Action-Definitionen mit actionId
self._actions = {
"process": WorkflowActionDefinition(
actionId="ai.process",
description="Universal AI document processing action - accepts multiple input documents in any format and processes them together with a prompt",
parameters={
"aiPrompt": WorkflowActionParameter(
name="aiPrompt",
type="str",
frontendType=FrontendType.TEXTAREA,
required=True,
description="Instruction for the AI describing what processing to perform"
),
"documentList": WorkflowActionParameter(
name="documentList",
type="List[str]",
frontendType=FrontendType.DOCUMENT_REFERENCE,
required=False,
description="Document reference(s) in any format to use as input/context"
),
"resultType": WorkflowActionParameter(
name="resultType",
type="str",
frontendType=FrontendType.SELECT,
frontendOptions=["txt", "json", "md", "csv", "xml", "html", "pdf", "docx", "xlsx", "pptx", "png", "jpg"],
required=False,
default="txt",
description="Output file extension. All output documents will use this format"
)
},
execute=process.__get__(self, self.__class__)
),
"webResearch": WorkflowActionDefinition(
actionId="ai.webResearch",
description="Web research with two-step process: search for URLs, then crawl content",
parameters={
"prompt": WorkflowActionParameter(
name="prompt",
type="str",
frontendType=FrontendType.TEXTAREA,
required=True,
description="Natural language research instruction"
),
"urlList": WorkflowActionParameter(
name="urlList",
type="List[str]",
frontendType=FrontendType.MULTISELECT,
required=False,
description="Specific URLs to crawl, if needed"
),
"country": WorkflowActionParameter(
name="country",
type="str",
frontendType=FrontendType.TEXT,
required=False,
description="Two-digit country code (lowercase, e.g., ch, us, de)"
),
"language": WorkflowActionParameter(
name="language",
type="str",
frontendType=FrontendType.SELECT,
frontendOptions=["de", "en", "fr", "it", "es"],
required=False,
description="Language code (lowercase, e.g., de, en, fr)"
),
"researchDepth": WorkflowActionParameter(
name="researchDepth",
type="str",
frontendType=FrontendType.SELECT,
frontendOptions=["fast", "general", "deep"],
required=False,
default="general",
description="Research depth"
)
},
execute=webResearch.__get__(self, self.__class__)
),
"summarizeDocument": WorkflowActionDefinition(
actionId="ai.summarizeDocument",
description="Summarize one or more documents, extracting key points and main ideas",
parameters={
"documentList": WorkflowActionParameter(
name="documentList",
type="List[str]",
frontendType=FrontendType.DOCUMENT_REFERENCE,
required=True,
description="Document reference(s) to summarize"
),
"summaryLength": WorkflowActionParameter(
name="summaryLength",
type="str",
frontendType=FrontendType.SELECT,
frontendOptions=["brief", "medium", "detailed"],
required=False,
default="medium",
description="Desired summary length"
),
"focus": WorkflowActionParameter(
name="focus",
type="str",
frontendType=FrontendType.TEXT,
required=False,
description="Specific aspect to focus on in the summary (e.g., financial data, key decisions)"
),
"resultType": WorkflowActionParameter(
name="resultType",
type="str",
frontendType=FrontendType.SELECT,
frontendOptions=["txt", "md", "docx"],
required=False,
default="txt",
description="Output file extension"
)
},
execute=summarizeDocument.__get__(self, self.__class__)
),
"translateDocument": WorkflowActionDefinition(
actionId="ai.translateDocument",
description="Translate documents to a target language while preserving formatting and structure",
parameters={
"documentList": WorkflowActionParameter(
name="documentList",
type="List[str]",
frontendType=FrontendType.DOCUMENT_REFERENCE,
required=True,
description="Document reference(s) to translate"
),
"targetLanguage": WorkflowActionParameter(
name="targetLanguage",
type="str",
frontendType=FrontendType.TEXT,
required=True,
description="Target language code or name (e.g., de, German, French, es)"
),
"sourceLanguage": WorkflowActionParameter(
name="sourceLanguage",
type="str",
frontendType=FrontendType.TEXT,
required=False,
description="Source language if known (e.g., en, English). If not provided, AI will detect"
),
"preserveFormatting": WorkflowActionParameter(
name="preserveFormatting",
type="bool",
frontendType=FrontendType.CHECKBOX,
required=False,
default=True,
description="Whether to preserve original formatting"
),
"resultType": WorkflowActionParameter(
name="resultType",
type="str",
frontendType=FrontendType.TEXT,
required=False,
description="Output file extension. If not specified, uses same format as input"
)
},
execute=translateDocument.__get__(self, self.__class__)
),
"convert": WorkflowActionDefinition(
actionId="ai.convert",
description="Convert documents/data between different formats with specific formatting options",
parameters={
"documentList": WorkflowActionParameter(
name="documentList",
type="List[str]",
frontendType=FrontendType.DOCUMENT_REFERENCE,
required=True,
description="Document reference(s) to convert"
),
"inputFormat": WorkflowActionParameter(
name="inputFormat",
type="str",
frontendType=FrontendType.SELECT,
frontendOptions=["json", "csv", "xlsx", "txt"],
required=True,
description="Source format"
),
"outputFormat": WorkflowActionParameter(
name="outputFormat",
type="str",
frontendType=FrontendType.SELECT,
frontendOptions=["csv", "json", "xlsx", "txt"],
required=True,
description="Target format"
),
"columnsPerRow": WorkflowActionParameter(
name="columnsPerRow",
type="int",
frontendType=FrontendType.NUMBER,
required=False,
description="For CSV output, number of columns per row. Default: auto-detect",
validation={"min": 1, "max": 100}
),
"delimiter": WorkflowActionParameter(
name="delimiter",
type="str",
frontendType=FrontendType.TEXT,
required=False,
default=",",
description="For CSV output, delimiter character"
),
"includeHeader": WorkflowActionParameter(
name="includeHeader",
type="bool",
frontendType=FrontendType.CHECKBOX,
required=False,
default=True,
description="For CSV output, whether to include header row"
),
"language": WorkflowActionParameter(
name="language",
type="str",
frontendType=FrontendType.SELECT,
frontendOptions=["de", "en", "fr"],
required=False,
default="en",
description="Language for output"
)
},
execute=convert.__get__(self, self.__class__)
),
"convertDocument": WorkflowActionDefinition(
actionId="ai.convertDocument",
description="Convert documents between different formats (PDF→Word, Excel→CSV, etc.)",
parameters={
"documentList": WorkflowActionParameter(
name="documentList",
type="List[str]",
frontendType=FrontendType.DOCUMENT_REFERENCE,
required=True,
description="Document reference(s) to convert"
),
"targetFormat": WorkflowActionParameter(
name="targetFormat",
type="str",
frontendType=FrontendType.SELECT,
frontendOptions=["docx", "pdf", "xlsx", "csv", "txt", "html", "json", "md"],
required=True,
description="Target format extension"
),
"preserveStructure": WorkflowActionParameter(
name="preserveStructure",
type="bool",
frontendType=FrontendType.CHECKBOX,
required=False,
default=True,
description="Whether to preserve document structure (headings, tables, etc.)"
)
},
execute=convertDocument.__get__(self, self.__class__)
),
"extractData": WorkflowActionDefinition(
actionId="ai.extractData",
description="Extract structured data from documents (key-value pairs, entities, facts, etc.)",
parameters={
"documentList": WorkflowActionParameter(
name="documentList",
type="List[str]",
frontendType=FrontendType.DOCUMENT_REFERENCE,
required=True,
description="Document reference(s) to extract data from"
),
"dataStructure": WorkflowActionParameter(
name="dataStructure",
type="str",
frontendType=FrontendType.SELECT,
frontendOptions=["flat", "nested", "list"],
required=False,
default="nested",
description="Desired data structure"
),
"fields": WorkflowActionParameter(
name="fields",
type="List[str]",
frontendType=FrontendType.MULTISELECT,
required=False,
description="Specific fields/properties to extract (e.g., [name, date, amount])"
),
"resultType": WorkflowActionParameter(
name="resultType",
type="str",
frontendType=FrontendType.SELECT,
frontendOptions=["json", "csv", "xlsx"],
required=False,
default="json",
description="Output format"
)
},
execute=extractData.__get__(self, self.__class__)
),
"generateDocument": WorkflowActionDefinition(
actionId="ai.generateDocument",
description="Generate documents from scratch or based on templates/inputs",
parameters={
"prompt": WorkflowActionParameter(
name="prompt",
type="str",
frontendType=FrontendType.TEXTAREA,
required=True,
description="Description of the document to generate"
),
"documentList": WorkflowActionParameter(
name="documentList",
type="List[str]",
frontendType=FrontendType.DOCUMENT_REFERENCE,
required=False,
description="Template documents or reference documents to use as a guide"
),
"documentType": WorkflowActionParameter(
name="documentType",
type="str",
frontendType=FrontendType.SELECT,
frontendOptions=["letter", "memo", "proposal", "contract", "report", "email"],
required=False,
description="Type of document"
),
"resultType": WorkflowActionParameter(
name="resultType",
type="str",
frontendType=FrontendType.SELECT,
frontendOptions=["docx", "pdf", "txt", "md"],
required=False,
default="docx",
description="Output format"
)
},
execute=generateDocument.__get__(self, self.__class__)
)
}
# Validate actions after definition
self._validateActions()
# Register actions as methods (optional, für direkten Zugriff)
self.process = process.__get__(self, self.__class__)
self.webResearch = webResearch.__get__(self, self.__class__)
self.summarizeDocument = summarizeDocument.__get__(self, self.__class__)
self.translateDocument = translateDocument.__get__(self, self.__class__)
self.convert = convert.__get__(self, self.__class__)
self.convertDocument = convertDocument.__get__(self, self.__class__)
self.extractData = extractData.__get__(self, self.__class__)
self.generateDocument = generateDocument.__get__(self, self.__class__)
def _format_timestamp_for_filename(self) -> str:
"""Format current timestamp as YYYYMMDD-hhmmss for filenames."""
return datetime.now(UTC).strftime("%Y%m%d-%H%M%S")