gateway/modules/workflows/methods/methodAi/actions/process.py

# Copyright (c) 2025 Patrick Motsch
# All rights reserved.

"""
Process action for AI operations.
Universal AI document processing action.
"""

import logging
import time
from typing import Dict, Any, List, Optional
from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
from modules.datamodels.datamodelAi import AiCallOptions
from modules.datamodels.datamodelExtraction import ExtractionOptions, MergeStrategy, ContentPart

logger = logging.getLogger(__name__)

@action
async def process(self, parameters: Dict[str, Any]) -> ActionResult:
    """
    GENERAL:
    - Purpose: Universal AI document processing action - accepts MULTIPLE input documents in ANY format (docx, pdf, json, txt, xlsx, html, images, etc.) and processes them together with a prompt to produce MULTIPLE output documents in ANY specified format (via resultType). Use for document generation, format conversion, content transformation, analysis, summarization, translation, extraction, comparison, and any AI-powered document manipulation.
    - Input requirements: aiPrompt (required); optional documentList (can contain multiple documents in any format).
    - Output format: Multiple documents in the same format per call (via resultType: txt, json, pdf, docx, xlsx, pptx, png, jpg, etc.). The AI can generate multiple files based on the prompt (e.g., "create separate documents for each section"). Default: txt.
    - Key capabilities: Can process any number of input documents together, extract data from mixed formats, combine information, generate multiple output files, transform between formats, perform analysis/comparison/summarization on document sets.

    Parameters:
    - aiPrompt (str, required): Instruction for the AI describing what processing to perform.
    - documentList (list, optional): Document reference(s) in any format to use as input/context.
    - resultType (str, optional): Output file extension (txt, json, md, csv, xml, html, pdf, docx, xlsx, png, etc.). All output documents will use this format. Default: txt.
    """
    try:
        # Init progress logger
        workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
        operationId = f"ai_process_{workflowId}_{int(time.time())}"

        # Start progress tracking
        parentOperationId = parameters.get('parentOperationId')
        self.services.chat.progressLogStart(
                    operationId,
                    "Generate",
                    "AI Processing",
                    f"Format: {parameters.get('resultType', 'txt')}",
                    parentOperationId=parentOperationId
                )

        aiPrompt = parameters.get("aiPrompt")
        logger.info(f"aiPrompt extracted: '{aiPrompt}' (type: {type(aiPrompt)})")

        # Update progress - preparing parameters
        self.services.chat.progressLogUpdate(operationId, 0.2, "Preparing parameters")

        from modules.datamodels.datamodelDocref import DocumentReferenceList

        documentListParam = parameters.get("documentList")
        # Convert to DocumentReferenceList if needed
        if documentListParam is None:
            documentList = DocumentReferenceList(references=[])
        elif isinstance(documentListParam, DocumentReferenceList):
            documentList = documentListParam
        elif isinstance(documentListParam, str):
            documentList = DocumentReferenceList.from_string_list([documentListParam])
        elif isinstance(documentListParam, list):
            documentList = DocumentReferenceList.from_string_list(documentListParam)
        else:
            logger.error(f"Invalid documentList type: {type(documentListParam)}")
            documentList = DocumentReferenceList(references=[])

        resultType = parameters.get("resultType", "txt")


        if not aiPrompt:
            logger.error(f"aiPrompt is missing or empty. Parameters: {parameters}")
            return ActionResult.isFailure(
                error="AI prompt is required"
            )

        # Determine output extension and default MIME type without duplicating service logic
        normalized_result_type = (str(resultType).strip().lstrip('.').lower() or "txt")
        output_extension = f".{normalized_result_type}"
        output_mime_type = "application/octet-stream"  # Prefer service-provided mimeType when available
        logger.info(f"Using result type: {resultType} -> {output_extension}")

        # Phase 7.3: Extract content first if documents provided, then use contentParts
        # Check if contentParts are already provided (preferred path)
        contentParts: Optional[List[ContentPart]] = None
        if "contentParts" in parameters:
            contentParts = parameters.get("contentParts")
            if contentParts and not isinstance(contentParts, list):
                # Try to extract from ContentExtracted if it's an ActionDocument
                if hasattr(contentParts, 'parts'):
                    contentParts = contentParts.parts
                else:
                    logger.warning(f"Invalid contentParts type: {type(contentParts)}, treating as empty")
                    contentParts = None

        # If contentParts not provided but documentList is, extract content first
        if not contentParts and documentList.references:
            self.services.chat.progressLogUpdate(operationId, 0.3, "Extracting content from documents")

            # Get ChatDocuments
            chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(documentList)
            if not chatDocuments:
                logger.warning("No documents found in documentList")
            else:
                logger.info(f"Extracting content from {len(chatDocuments)} documents")

                # Prepare extraction options (use defaults if not provided)
                extractionOptions = parameters.get("extractionOptions")
                if not extractionOptions:
                    extractionOptions = ExtractionOptions(
                        prompt="Extract all content from the document",
                        mergeStrategy=MergeStrategy(
                            mergeType="concatenate",
                            groupBy="typeGroup",
                            orderBy="id"
                        ),
                        processDocumentsIndividually=True
                    )

                # Extract content using extraction service with hierarchical progress logging
                # Pass operationId for per-document progress tracking
                extractedResults = self.services.extraction.extractContent(chatDocuments, extractionOptions, operationId=operationId)

                # Combine all ContentParts from all extracted results
                contentParts = []
                for extracted in extractedResults:
                    if extracted.parts:
                        contentParts.extend(extracted.parts)

                logger.info(f"Extracted {len(contentParts)} content parts from {len(extractedResults)} documents")

        # Update progress - preparing AI call
        self.services.chat.progressLogUpdate(operationId, 0.4, "Preparing AI call")

        # Build options with only resultFormat - let service layer handle all other parameters
        output_format = output_extension.replace('.', '') or 'txt'
        options = AiCallOptions(
            resultFormat=output_format
            # Removed all model parameters - service layer will analyze prompt and determine optimal parameters
        )

        # Update progress - calling AI
        self.services.chat.progressLogUpdate(operationId, 0.6, "Calling AI")

        # Use unified callAiContent method with contentParts (extraction is now separate)
        aiResponse = await self.services.ai.callAiContent(
            prompt=aiPrompt,
            options=options,
            contentParts=contentParts,  # Already extracted (or None if no documents)
            outputFormat=output_format,
            parentOperationId=operationId
        )

        # Update progress - processing result
        self.services.chat.progressLogUpdate(operationId, 0.8, "Processing result")

        # Extract documents from AiResponse
        if aiResponse.documents and len(aiResponse.documents) > 0:
            action_documents = []
            for doc in aiResponse.documents:
                validationMetadata = {
                    "actionType": "ai.process",
                    "resultType": normalized_result_type,
                    "outputFormat": output_format,
                    "hasDocuments": True,
                    "documentCount": len(aiResponse.documents)
                }
                action_documents.append(ActionDocument(
                    documentName=doc.documentName,
                    documentData=doc.documentData,
                    mimeType=doc.mimeType or output_mime_type,
                    sourceJson=getattr(doc, 'sourceJson', None),  # Preserve source JSON for structure validation
                    validationMetadata=validationMetadata
                ))

            final_documents = action_documents
        else:
            # Text response - create document from content
            extension = output_extension.lstrip('.')
            meaningful_name = self._generateMeaningfulFileName(
                base_name="ai",
                extension=extension,
                action_name="result"
            )
            validationMetadata = {
                "actionType": "ai.process",
                "resultType": normalized_result_type,
                "outputFormat": output_format,
                "hasDocuments": False,
                "contentType": "text"
            }
            action_document = ActionDocument(
                documentName=meaningful_name,
                documentData=aiResponse.content,
                mimeType=output_mime_type,
                validationMetadata=validationMetadata
            )
            final_documents = [action_document]

        # Complete progress tracking
        self.services.chat.progressLogFinish(operationId, True)

        return ActionResult.isSuccess(documents=final_documents)

    except Exception as e:
        logger.error(f"Error in AI processing: {str(e)}")

        # Complete progress tracking with failure
        try:
            self.services.chat.progressLogFinish(operationId, False)
        except:
            pass  # Don't fail on progress logging errors

        return ActionResult.isFailure(
            error=str(e)
        )