gateway/modules/workflows/methods/methodAi/actions/process.py
2026-01-05 06:40:09 +01:00

264 lines
13 KiB
Python

# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Process action for AI operations.
Universal AI document processing action.
"""
import logging
import time
import json
from typing import Dict, Any, List, Optional
from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
from modules.datamodels.datamodelAi import AiCallOptions
from modules.datamodels.datamodelExtraction import ContentPart
logger = logging.getLogger(__name__)
@action
async def process(self, parameters: Dict[str, Any]) -> ActionResult:
"""
GENERAL:
- Purpose: Universal AI document processing action - accepts MULTIPLE input documents in ANY format (docx, pdf, json, txt, xlsx, html, images, etc.) and processes them together with a prompt to produce MULTIPLE output documents in ANY specified format (via resultType). Use for document generation, format conversion, content transformation, analysis, summarization, translation, extraction, comparison, and any AI-powered document manipulation.
- Input requirements: aiPrompt (required); optional documentList (can contain multiple documents in any format).
- Output format: Multiple documents in the same format per call (via resultType: txt, json, pdf, docx, xlsx, pptx, png, jpg, etc.). The AI can generate multiple files based on the prompt (e.g., "create separate documents for each section"). Default: txt.
- Key capabilities: Can process any number of input documents together, extract data from mixed formats, combine information, generate multiple output files, transform between formats, perform analysis/comparison/summarization on document sets.
Parameters:
- aiPrompt (str, required): Instruction for the AI describing what processing to perform.
- documentList (list, optional): Document reference(s) in any format to use as input/context.
- resultType (str, optional): Output file extension (txt, json, md, csv, xml, html, pdf, docx, xlsx, png, etc.). All output documents will use this format. Default: txt.
"""
try:
# Init progress logger
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
operationId = f"ai_process_{workflowId}_{int(time.time())}"
# Start progress tracking
parentOperationId = parameters.get('parentOperationId')
if not parentOperationId:
logger.warning(f"ai.process: No parentOperationId provided in parameters. Operation '{operationId}' will appear at root level. Available parameters: {list(parameters.keys())}")
else:
logger.debug(f"ai.process: Using parentOperationId '{parentOperationId}' for operation '{operationId}'")
self.services.chat.progressLogStart(
operationId,
"Generate",
"AI Processing",
f"Format: {parameters.get('resultType', 'txt')}",
parentOperationId=parentOperationId
)
aiPrompt = parameters.get("aiPrompt")
logger.info(f"aiPrompt extracted: '{aiPrompt}' (type: {type(aiPrompt)})")
# Update progress - preparing parameters
self.services.chat.progressLogUpdate(operationId, 0.2, "Preparing parameters")
from modules.datamodels.datamodelDocref import DocumentReferenceList
documentListParam = parameters.get("documentList")
# Convert to DocumentReferenceList if needed
if documentListParam is None:
documentList = DocumentReferenceList(references=[])
elif isinstance(documentListParam, DocumentReferenceList):
documentList = documentListParam
elif isinstance(documentListParam, str):
documentList = DocumentReferenceList.from_string_list([documentListParam])
elif isinstance(documentListParam, list):
documentList = DocumentReferenceList.from_string_list(documentListParam)
else:
logger.error(f"Invalid documentList type: {type(documentListParam)}")
documentList = DocumentReferenceList(references=[])
resultType = parameters.get("resultType", "txt")
simpleMode = parameters.get("simpleMode", False)
if not aiPrompt:
logger.error(f"aiPrompt is missing or empty. Parameters: {parameters}")
return ActionResult.isFailure(
error="AI prompt is required"
)
# Determine output extension and default MIME type without duplicating service logic
normalized_result_type = (str(resultType).strip().lstrip('.').lower() or "txt")
output_extension = f".{normalized_result_type}"
output_mime_type = "application/octet-stream" # Prefer service-provided mimeType when available
logger.info(f"Using result type: {resultType} -> {output_extension}, simpleMode: {simpleMode}")
# Check if contentParts are already provided (from context.extractContent or other sources)
contentParts: Optional[List[ContentPart]] = None
if "contentParts" in parameters:
contentParts = parameters.get("contentParts")
if contentParts and not isinstance(contentParts, list):
# Try to extract from ContentExtracted if it's an ActionDocument
if hasattr(contentParts, 'parts'):
contentParts = contentParts.parts
else:
logger.warning(f"Invalid contentParts type: {type(contentParts)}, treating as empty")
contentParts = None
# Update progress - preparing AI call
self.services.chat.progressLogUpdate(operationId, 0.4, "Preparing AI call")
# Build options
output_format = output_extension.replace('.', '') or 'txt'
# Simple mode: fast path without document generation pipeline
if simpleMode:
# Update progress - calling AI (simple mode)
self.services.chat.progressLogUpdate(operationId, 0.6, "Calling AI (simple mode)")
# Extract context from documents if provided
context_text = ""
if documentList and len(documentList.references) > 0:
try:
# Get documents from workflow
documents = self.services.chat.getChatDocumentsFromDocumentList(documentList)
context_parts = []
for doc in documents:
if hasattr(doc, 'fileId') and doc.fileId:
# Get file data
fileData = self.services.interfaceDbComponent.getFileData(doc.fileId)
if fileData:
if isinstance(fileData, bytes):
doc_text = fileData.decode('utf-8', errors='ignore')
else:
doc_text = str(fileData)
context_parts.append(doc_text)
if context_parts:
context_text = "\n\n".join(context_parts)
except Exception as e:
logger.warning(f"Error extracting context from documents in simple mode: {e}")
# Use direct AI call without document generation pipeline
from modules.datamodels.datamodelAi import AiCallRequest, OperationTypeEnum, ProcessingModeEnum
request = AiCallRequest(
prompt=aiPrompt,
context=context_text if context_text else None,
options=AiCallOptions(
resultFormat=output_format,
operationType=OperationTypeEnum.DATA_ANALYSE,
processingMode=ProcessingModeEnum.BASIC
)
)
aiResponse_obj = await self.services.ai.callAi(request)
# Convert AiCallResponse to AiResponse format
from modules.datamodels.datamodelWorkflow import AiResponse, AiResponseMetadata
aiResponse = AiResponse(
content=aiResponse_obj.content,
metadata=AiResponseMetadata(
additionalData={
"modelName": aiResponse_obj.modelName,
"priceUsd": aiResponse_obj.priceUsd,
"processingTime": aiResponse_obj.processingTime,
"bytesSent": aiResponse_obj.bytesSent,
"bytesReceived": aiResponse_obj.bytesReceived,
"errorCount": aiResponse_obj.errorCount
}
),
documents=[] # Simple mode doesn't generate documents
)
else:
# Full mode: use unified callAiContent method
options = AiCallOptions(
resultFormat=output_format
)
# Update progress - calling AI
self.services.chat.progressLogUpdate(operationId, 0.6, "Calling AI")
# Use unified callAiContent method
# If contentParts provided (pre-extracted), use them directly
# Otherwise, pass documentList and let callAiContent handle Phases 5A-5E internally
# Note: ContentExtracted documents (from context.extractContent) are now handled
# automatically in _extractAndPrepareContent() (Phase 5B)
if contentParts:
# Pre-extracted ContentParts - use them directly
aiResponse = await self.services.ai.callAiContent(
prompt=aiPrompt,
options=options,
contentParts=contentParts, # Pre-extracted ContentParts
outputFormat=output_format,
parentOperationId=operationId
)
else:
# Pass documentList - callAiContent handles Phases 5A-5E internally
# This includes automatic detection of ContentExtracted documents
aiResponse = await self.services.ai.callAiContent(
prompt=aiPrompt,
options=options,
documentList=documentList, # callAiContent macht Phasen 5A-5E
outputFormat=output_format,
parentOperationId=operationId
)
# Update progress - processing result
self.services.chat.progressLogUpdate(operationId, 0.8, "Processing result")
# Extract documents from AiResponse
if aiResponse.documents and len(aiResponse.documents) > 0:
action_documents = []
for doc in aiResponse.documents:
validationMetadata = {
"actionType": "ai.process",
"resultType": normalized_result_type,
"outputFormat": output_format,
"hasDocuments": True,
"documentCount": len(aiResponse.documents)
}
action_documents.append(ActionDocument(
documentName=doc.documentName,
documentData=doc.documentData,
mimeType=doc.mimeType or output_mime_type,
sourceJson=getattr(doc, 'sourceJson', None), # Preserve source JSON for structure validation
validationMetadata=validationMetadata
))
final_documents = action_documents
else:
# Text response - create document from content
extension = output_extension.lstrip('.')
meaningful_name = self._generateMeaningfulFileName(
base_name="ai",
extension=extension,
action_name="result"
)
validationMetadata = {
"actionType": "ai.process",
"resultType": normalized_result_type,
"outputFormat": output_format,
"hasDocuments": False,
"contentType": "text"
}
action_document = ActionDocument(
documentName=meaningful_name,
documentData=aiResponse.content,
mimeType=output_mime_type,
validationMetadata=validationMetadata
)
final_documents = [action_document]
# Complete progress tracking
self.services.chat.progressLogFinish(operationId, True)
return ActionResult.isSuccess(documents=final_documents)
except Exception as e:
logger.error(f"Error in AI processing: {str(e)}")
# Complete progress tracking with failure
try:
self.services.chat.progressLogFinish(operationId, False)
except:
pass # Don't fail on progress logging errors
return ActionResult.isFailure(
error=str(e)
)