198 lines
9.3 KiB
Python
198 lines
9.3 KiB
Python
# Copyright (c) 2025 Patrick Motsch
|
|
# All rights reserved.
|
|
|
|
"""
|
|
Process action for AI operations.
|
|
Universal AI document processing action.
|
|
"""
|
|
|
|
import logging
|
|
import time
|
|
import json
|
|
from typing import Dict, Any, List, Optional
|
|
from modules.workflows.methods.methodBase import action
|
|
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
|
from modules.datamodels.datamodelAi import AiCallOptions
|
|
from modules.datamodels.datamodelExtraction import ContentPart
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
@action
|
|
async def process(self, parameters: Dict[str, Any]) -> ActionResult:
|
|
"""
|
|
GENERAL:
|
|
- Purpose: Universal AI document processing action - accepts MULTIPLE input documents in ANY format (docx, pdf, json, txt, xlsx, html, images, etc.) and processes them together with a prompt to produce MULTIPLE output documents in ANY specified format (via resultType). Use for document generation, format conversion, content transformation, analysis, summarization, translation, extraction, comparison, and any AI-powered document manipulation.
|
|
- Input requirements: aiPrompt (required); optional documentList (can contain multiple documents in any format).
|
|
- Output format: Multiple documents in the same format per call (via resultType: txt, json, pdf, docx, xlsx, pptx, png, jpg, etc.). The AI can generate multiple files based on the prompt (e.g., "create separate documents for each section"). Default: txt.
|
|
- Key capabilities: Can process any number of input documents together, extract data from mixed formats, combine information, generate multiple output files, transform between formats, perform analysis/comparison/summarization on document sets.
|
|
|
|
Parameters:
|
|
- aiPrompt (str, required): Instruction for the AI describing what processing to perform.
|
|
- documentList (list, optional): Document reference(s) in any format to use as input/context.
|
|
- resultType (str, optional): Output file extension (txt, json, md, csv, xml, html, pdf, docx, xlsx, png, etc.). All output documents will use this format. Default: txt.
|
|
"""
|
|
try:
|
|
# Init progress logger
|
|
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
|
|
operationId = f"ai_process_{workflowId}_{int(time.time())}"
|
|
|
|
# Start progress tracking
|
|
parentOperationId = parameters.get('parentOperationId')
|
|
self.services.chat.progressLogStart(
|
|
operationId,
|
|
"Generate",
|
|
"AI Processing",
|
|
f"Format: {parameters.get('resultType', 'txt')}",
|
|
parentOperationId=parentOperationId
|
|
)
|
|
|
|
aiPrompt = parameters.get("aiPrompt")
|
|
logger.info(f"aiPrompt extracted: '{aiPrompt}' (type: {type(aiPrompt)})")
|
|
|
|
# Update progress - preparing parameters
|
|
self.services.chat.progressLogUpdate(operationId, 0.2, "Preparing parameters")
|
|
|
|
from modules.datamodels.datamodelDocref import DocumentReferenceList
|
|
|
|
documentListParam = parameters.get("documentList")
|
|
# Convert to DocumentReferenceList if needed
|
|
if documentListParam is None:
|
|
documentList = DocumentReferenceList(references=[])
|
|
elif isinstance(documentListParam, DocumentReferenceList):
|
|
documentList = documentListParam
|
|
elif isinstance(documentListParam, str):
|
|
documentList = DocumentReferenceList.from_string_list([documentListParam])
|
|
elif isinstance(documentListParam, list):
|
|
documentList = DocumentReferenceList.from_string_list(documentListParam)
|
|
else:
|
|
logger.error(f"Invalid documentList type: {type(documentListParam)}")
|
|
documentList = DocumentReferenceList(references=[])
|
|
|
|
resultType = parameters.get("resultType", "txt")
|
|
|
|
|
|
if not aiPrompt:
|
|
logger.error(f"aiPrompt is missing or empty. Parameters: {parameters}")
|
|
return ActionResult.isFailure(
|
|
error="AI prompt is required"
|
|
)
|
|
|
|
# Determine output extension and default MIME type without duplicating service logic
|
|
normalized_result_type = (str(resultType).strip().lstrip('.').lower() or "txt")
|
|
output_extension = f".{normalized_result_type}"
|
|
output_mime_type = "application/octet-stream" # Prefer service-provided mimeType when available
|
|
logger.info(f"Using result type: {resultType} -> {output_extension}")
|
|
|
|
# Check if contentParts are already provided (from context.extractContent or other sources)
|
|
contentParts: Optional[List[ContentPart]] = None
|
|
if "contentParts" in parameters:
|
|
contentParts = parameters.get("contentParts")
|
|
if contentParts and not isinstance(contentParts, list):
|
|
# Try to extract from ContentExtracted if it's an ActionDocument
|
|
if hasattr(contentParts, 'parts'):
|
|
contentParts = contentParts.parts
|
|
else:
|
|
logger.warning(f"Invalid contentParts type: {type(contentParts)}, treating as empty")
|
|
contentParts = None
|
|
|
|
# Update progress - preparing AI call
|
|
self.services.chat.progressLogUpdate(operationId, 0.4, "Preparing AI call")
|
|
|
|
# Build options
|
|
output_format = output_extension.replace('.', '') or 'txt'
|
|
options = AiCallOptions(
|
|
resultFormat=output_format
|
|
)
|
|
|
|
# Update progress - calling AI
|
|
self.services.chat.progressLogUpdate(operationId, 0.6, "Calling AI")
|
|
|
|
# Use unified callAiContent method
|
|
# If contentParts provided (pre-extracted), use them directly
|
|
# Otherwise, pass documentList and let callAiContent handle Phases 5A-5E internally
|
|
# Note: ContentExtracted documents (from context.extractContent) are now handled
|
|
# automatically in _extractAndPrepareContent() (Phase 5B)
|
|
if contentParts:
|
|
# Pre-extracted ContentParts - use them directly
|
|
aiResponse = await self.services.ai.callAiContent(
|
|
prompt=aiPrompt,
|
|
options=options,
|
|
contentParts=contentParts, # Pre-extracted ContentParts
|
|
outputFormat=output_format,
|
|
parentOperationId=operationId
|
|
)
|
|
else:
|
|
# Pass documentList - callAiContent handles Phases 5A-5E internally
|
|
# This includes automatic detection of ContentExtracted documents
|
|
aiResponse = await self.services.ai.callAiContent(
|
|
prompt=aiPrompt,
|
|
options=options,
|
|
documentList=documentList, # callAiContent macht Phasen 5A-5E
|
|
outputFormat=output_format,
|
|
parentOperationId=operationId
|
|
)
|
|
|
|
# Update progress - processing result
|
|
self.services.chat.progressLogUpdate(operationId, 0.8, "Processing result")
|
|
|
|
# Extract documents from AiResponse
|
|
if aiResponse.documents and len(aiResponse.documents) > 0:
|
|
action_documents = []
|
|
for doc in aiResponse.documents:
|
|
validationMetadata = {
|
|
"actionType": "ai.process",
|
|
"resultType": normalized_result_type,
|
|
"outputFormat": output_format,
|
|
"hasDocuments": True,
|
|
"documentCount": len(aiResponse.documents)
|
|
}
|
|
action_documents.append(ActionDocument(
|
|
documentName=doc.documentName,
|
|
documentData=doc.documentData,
|
|
mimeType=doc.mimeType or output_mime_type,
|
|
sourceJson=getattr(doc, 'sourceJson', None), # Preserve source JSON for structure validation
|
|
validationMetadata=validationMetadata
|
|
))
|
|
|
|
final_documents = action_documents
|
|
else:
|
|
# Text response - create document from content
|
|
extension = output_extension.lstrip('.')
|
|
meaningful_name = self._generateMeaningfulFileName(
|
|
base_name="ai",
|
|
extension=extension,
|
|
action_name="result"
|
|
)
|
|
validationMetadata = {
|
|
"actionType": "ai.process",
|
|
"resultType": normalized_result_type,
|
|
"outputFormat": output_format,
|
|
"hasDocuments": False,
|
|
"contentType": "text"
|
|
}
|
|
action_document = ActionDocument(
|
|
documentName=meaningful_name,
|
|
documentData=aiResponse.content,
|
|
mimeType=output_mime_type,
|
|
validationMetadata=validationMetadata
|
|
)
|
|
final_documents = [action_document]
|
|
|
|
# Complete progress tracking
|
|
self.services.chat.progressLogFinish(operationId, True)
|
|
|
|
return ActionResult.isSuccess(documents=final_documents)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error in AI processing: {str(e)}")
|
|
|
|
# Complete progress tracking with failure
|
|
try:
|
|
self.services.chat.progressLogFinish(operationId, False)
|
|
except:
|
|
pass # Don't fail on progress logging errors
|
|
|
|
return ActionResult.isFailure(
|
|
error=str(e)
|
|
)
|
|
|