735 lines
36 KiB
Python
735 lines
36 KiB
Python
"""
|
|
AI processing method module.
|
|
Handles direct AI calls for any type of task.
|
|
"""
|
|
|
|
import time
|
|
import logging
|
|
from typing import Dict, Any, List, Optional
|
|
from datetime import datetime, UTC
|
|
|
|
from modules.workflows.methods.methodBase import MethodBase, action
|
|
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
|
from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum
|
|
from modules.datamodels.datamodelWorkflow import ExtractContentParameters
|
|
from modules.datamodels.datamodelExtraction import ExtractionOptions, MergeStrategy, ContentPart
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
class MethodAi(MethodBase):
|
|
"""AI processing methods."""
|
|
|
|
def __init__(self, services):
|
|
super().__init__(services)
|
|
self.name = "ai"
|
|
self.description = "AI processing methods"
|
|
|
|
def _format_timestamp_for_filename(self) -> str:
|
|
"""Format current timestamp as YYYYMMDD-hhmmss for filenames."""
|
|
return datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
|
|
|
|
|
|
@action
|
|
async def process(self, parameters: Dict[str, Any]) -> ActionResult:
|
|
"""
|
|
GENERAL:
|
|
- Purpose: Universal AI document processing action - accepts MULTIPLE input documents in ANY format (docx, pdf, json, txt, xlsx, html, images, etc.) and processes them together with a prompt to produce MULTIPLE output documents in ANY specified format (via resultType). Use for document generation, format conversion, content transformation, analysis, summarization, translation, extraction, comparison, and any AI-powered document manipulation.
|
|
- Input requirements: aiPrompt (required); optional documentList (can contain multiple documents in any format).
|
|
- Output format: Multiple documents in the same format per call (via resultType: txt, json, pdf, docx, xlsx, pptx, png, jpg, etc.). The AI can generate multiple files based on the prompt (e.g., "create separate documents for each section"). Default: txt.
|
|
- Key capabilities: Can process any number of input documents together, extract data from mixed formats, combine information, generate multiple output files, transform between formats, perform analysis/comparison/summarization on document sets.
|
|
|
|
Parameters:
|
|
- aiPrompt (str, required): Instruction for the AI describing what processing to perform.
|
|
- documentList (list, optional): Document reference(s) in any format to use as input/context.
|
|
- resultType (str, optional): Output file extension (txt, json, md, csv, xml, html, pdf, docx, xlsx, png, etc.). All output documents will use this format. Default: txt.
|
|
"""
|
|
try:
|
|
# Init progress logger
|
|
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
|
|
operationId = f"ai_process_{workflowId}_{int(time.time())}"
|
|
|
|
# Start progress tracking
|
|
self.services.chat.progressLogStart(
|
|
operationId,
|
|
"Generate",
|
|
"AI Processing",
|
|
f"Format: {parameters.get('resultType', 'txt')}"
|
|
)
|
|
|
|
aiPrompt = parameters.get("aiPrompt")
|
|
logger.info(f"aiPrompt extracted: '{aiPrompt}' (type: {type(aiPrompt)})")
|
|
|
|
# Update progress - preparing parameters
|
|
self.services.chat.progressLogUpdate(operationId, 0.2, "Preparing parameters")
|
|
|
|
from modules.datamodels.datamodelDocref import DocumentReferenceList
|
|
|
|
documentListParam = parameters.get("documentList")
|
|
# Convert to DocumentReferenceList if needed
|
|
if documentListParam is None:
|
|
documentList = DocumentReferenceList(references=[])
|
|
elif isinstance(documentListParam, DocumentReferenceList):
|
|
documentList = documentListParam
|
|
elif isinstance(documentListParam, str):
|
|
documentList = DocumentReferenceList.from_string_list([documentListParam])
|
|
elif isinstance(documentListParam, list):
|
|
documentList = DocumentReferenceList.from_string_list(documentListParam)
|
|
else:
|
|
logger.error(f"Invalid documentList type: {type(documentListParam)}")
|
|
documentList = DocumentReferenceList(references=[])
|
|
|
|
resultType = parameters.get("resultType", "txt")
|
|
|
|
|
|
if not aiPrompt:
|
|
logger.error(f"aiPrompt is missing or empty. Parameters: {parameters}")
|
|
return ActionResult.isFailure(
|
|
error="AI prompt is required"
|
|
)
|
|
|
|
# Determine output extension and default MIME type without duplicating service logic
|
|
normalized_result_type = (str(resultType).strip().lstrip('.').lower() or "txt")
|
|
output_extension = f".{normalized_result_type}"
|
|
output_mime_type = "application/octet-stream" # Prefer service-provided mimeType when available
|
|
logger.info(f"Using result type: {resultType} -> {output_extension}")
|
|
|
|
# Phase 7.3: Extract content first if documents provided, then use contentParts
|
|
# Check if contentParts are already provided (preferred path)
|
|
contentParts: Optional[List[ContentPart]] = None
|
|
if "contentParts" in parameters:
|
|
contentParts = parameters.get("contentParts")
|
|
if contentParts and not isinstance(contentParts, list):
|
|
# Try to extract from ContentExtracted if it's an ActionDocument
|
|
if hasattr(contentParts, 'parts'):
|
|
contentParts = contentParts.parts
|
|
else:
|
|
logger.warning(f"Invalid contentParts type: {type(contentParts)}, treating as empty")
|
|
contentParts = None
|
|
|
|
# If contentParts not provided but documentList is, extract content first
|
|
if not contentParts and documentList.references:
|
|
self.services.chat.progressLogUpdate(operationId, 0.3, "Extracting content from documents")
|
|
|
|
# Get ChatDocuments
|
|
chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(documentList)
|
|
if not chatDocuments:
|
|
logger.warning("No documents found in documentList")
|
|
else:
|
|
logger.info(f"Extracting content from {len(chatDocuments)} documents")
|
|
|
|
# Prepare extraction options (use defaults if not provided)
|
|
extractionOptions = parameters.get("extractionOptions")
|
|
if not extractionOptions:
|
|
extractionOptions = ExtractionOptions(
|
|
prompt="Extract all content from the document",
|
|
mergeStrategy=MergeStrategy(
|
|
mergeType="concatenate",
|
|
groupBy="typeGroup",
|
|
orderBy="id"
|
|
),
|
|
processDocumentsIndividually=True
|
|
)
|
|
|
|
# Extract content using extraction service
|
|
extractedResults = self.services.extraction.extractContent(chatDocuments, extractionOptions)
|
|
|
|
# Combine all ContentParts from all extracted results
|
|
contentParts = []
|
|
for extracted in extractedResults:
|
|
if extracted.parts:
|
|
contentParts.extend(extracted.parts)
|
|
|
|
logger.info(f"Extracted {len(contentParts)} content parts from {len(extractedResults)} documents")
|
|
|
|
# Update progress - preparing AI call
|
|
self.services.chat.progressLogUpdate(operationId, 0.4, "Preparing AI call")
|
|
|
|
# Build options with only resultFormat - let service layer handle all other parameters
|
|
output_format = output_extension.replace('.', '') or 'txt'
|
|
options = AiCallOptions(
|
|
resultFormat=output_format
|
|
# Removed all model parameters - service layer will analyze prompt and determine optimal parameters
|
|
)
|
|
|
|
# Update progress - calling AI
|
|
self.services.chat.progressLogUpdate(operationId, 0.6, "Calling AI")
|
|
|
|
# Use unified callAiContent method with contentParts (extraction is now separate)
|
|
aiResponse = await self.services.ai.callAiContent(
|
|
prompt=aiPrompt,
|
|
options=options,
|
|
contentParts=contentParts, # Already extracted (or None if no documents)
|
|
outputFormat=output_format,
|
|
parentOperationId=operationId
|
|
)
|
|
|
|
# Update progress - processing result
|
|
self.services.chat.progressLogUpdate(operationId, 0.8, "Processing result")
|
|
|
|
from modules.datamodels.datamodelChat import ActionDocument
|
|
|
|
# Extract documents from AiResponse
|
|
if aiResponse.documents and len(aiResponse.documents) > 0:
|
|
action_documents = []
|
|
for doc in aiResponse.documents:
|
|
validationMetadata = {
|
|
"actionType": "ai.process",
|
|
"resultType": normalized_result_type,
|
|
"outputFormat": output_format,
|
|
"hasDocuments": True,
|
|
"documentCount": len(aiResponse.documents)
|
|
}
|
|
action_documents.append(ActionDocument(
|
|
documentName=doc.documentName,
|
|
documentData=doc.documentData,
|
|
mimeType=doc.mimeType or output_mime_type,
|
|
sourceJson=getattr(doc, 'sourceJson', None), # Preserve source JSON for structure validation
|
|
validationMetadata=validationMetadata
|
|
))
|
|
|
|
final_documents = action_documents
|
|
else:
|
|
# Text response - create document from content
|
|
extension = output_extension.lstrip('.')
|
|
meaningful_name = self._generateMeaningfulFileName(
|
|
base_name="ai",
|
|
extension=extension,
|
|
action_name="result"
|
|
)
|
|
validationMetadata = {
|
|
"actionType": "ai.process",
|
|
"resultType": normalized_result_type,
|
|
"outputFormat": output_format,
|
|
"hasDocuments": False,
|
|
"contentType": "text"
|
|
}
|
|
action_document = ActionDocument(
|
|
documentName=meaningful_name,
|
|
documentData=aiResponse.content,
|
|
mimeType=output_mime_type,
|
|
validationMetadata=validationMetadata
|
|
)
|
|
final_documents = [action_document]
|
|
|
|
# Complete progress tracking
|
|
self.services.chat.progressLogFinish(operationId, True)
|
|
|
|
return ActionResult.isSuccess(documents=final_documents)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error in AI processing: {str(e)}")
|
|
|
|
# Complete progress tracking with failure
|
|
try:
|
|
self.services.chat.progressLogFinish(operationId, False)
|
|
except:
|
|
pass # Don't fail on progress logging errors
|
|
|
|
return ActionResult.isFailure(
|
|
error=str(e)
|
|
)
|
|
|
|
|
|
@action
|
|
async def webResearch(self, parameters: Dict[str, Any]) -> ActionResult:
|
|
"""
|
|
GENERAL:
|
|
- Purpose: Web research with two-step process: search for URLs, then crawl content.
|
|
- Input requirements: prompt (required); optional list(url), country, language, researchDepth.
|
|
- Output format: JSON with research results including URLs and content.
|
|
|
|
Parameters:
|
|
- prompt (str, required): Natural language research instruction.
|
|
- urlList (list, optional): Specific URLs to crawl, if needed.
|
|
- country (str, optional): Two-digit country code (lowercase, e.g., ch, us, de).
|
|
- language (str, optional): Language code (lowercase, e.g., de, en, fr).
|
|
- researchDepth (str, optional): Research depth - fast, general, or deep. Default: general.
|
|
"""
|
|
try:
|
|
prompt = parameters.get("prompt")
|
|
if not prompt:
|
|
return ActionResult.isFailure(error="Research prompt is required")
|
|
|
|
# Init progress logger
|
|
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
|
|
operationId = f"web_research_{workflowId}_{int(time.time())}"
|
|
|
|
# Start progress tracking
|
|
self.services.chat.progressLogStart(
|
|
operationId,
|
|
"Web Research",
|
|
"Searching and Crawling",
|
|
"Extracting URLs and Content"
|
|
)
|
|
|
|
# Call webcrawl service - service handles all AI intention analysis and processing
|
|
result = await self.services.web.performWebResearch(
|
|
prompt=prompt,
|
|
urls=parameters.get("urlList", []),
|
|
country=parameters.get("country"),
|
|
language=parameters.get("language"),
|
|
researchDepth=parameters.get("researchDepth", "general"),
|
|
operationId=operationId
|
|
)
|
|
|
|
# Complete progress tracking
|
|
self.services.chat.progressLogFinish(operationId, True)
|
|
|
|
# Get meaningful filename from research result (generated by intent analyzer)
|
|
suggestedFilename = result.get("suggested_filename")
|
|
if suggestedFilename:
|
|
# Clean and validate filename
|
|
import re
|
|
cleaned = suggestedFilename.strip().strip('"\'')
|
|
cleaned = cleaned.replace('\n', ' ').replace('\r', ' ').strip()
|
|
# Ensure it doesn't already have extension
|
|
if cleaned.lower().endswith('.json'):
|
|
cleaned = cleaned[:-5]
|
|
# Validate: should be reasonable length and contain only safe characters
|
|
if cleaned and len(cleaned) <= 60 and re.match(r'^[a-zA-Z0-9_\-]+$', cleaned):
|
|
meaningfulName = f"{cleaned}.json"
|
|
else:
|
|
# Fallback to generic meaningful filename
|
|
meaningfulName = self._generateMeaningfulFileName(
|
|
base_name="web_research",
|
|
extension="json",
|
|
action_name="research"
|
|
)
|
|
else:
|
|
# Fallback to generic meaningful filename
|
|
meaningfulName = self._generateMeaningfulFileName(
|
|
base_name="web_research",
|
|
extension="json",
|
|
action_name="research"
|
|
)
|
|
|
|
from modules.datamodels.datamodelChat import ActionDocument
|
|
validationMetadata = {
|
|
"actionType": "ai.webResearch",
|
|
"prompt": prompt,
|
|
"urlList": parameters.get("urlList", []),
|
|
"country": parameters.get("country"),
|
|
"language": parameters.get("language"),
|
|
"researchDepth": parameters.get("researchDepth", "general"),
|
|
"resultFormat": "json"
|
|
}
|
|
actionDocument = ActionDocument(
|
|
documentName=meaningfulName,
|
|
documentData=result,
|
|
mimeType="application/json",
|
|
validationMetadata=validationMetadata
|
|
)
|
|
|
|
return ActionResult.isSuccess(documents=[actionDocument])
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error in web research: {str(e)}")
|
|
try:
|
|
self.services.chat.progressLogFinish(operationId, False)
|
|
except:
|
|
pass
|
|
return ActionResult.isFailure(error=str(e))
|
|
|
|
|
|
# ============================================================================
|
|
# Document Transformation Wrappers
|
|
# ============================================================================
|
|
|
|
@action
|
|
async def summarizeDocument(self, parameters: Dict[str, Any]) -> ActionResult:
|
|
"""
|
|
GENERAL:
|
|
- Purpose: Summarize one or more documents, extracting key points and main ideas.
|
|
- Input requirements: documentList (required); optional summaryLength, focus.
|
|
- Output format: Text document with summary (default: txt, can be overridden with resultType).
|
|
|
|
Parameters:
|
|
- documentList (list, required): Document reference(s) to summarize.
|
|
- summaryLength (str, optional): Desired summary length - brief, medium, or detailed. Default: medium.
|
|
- focus (str, optional): Specific aspect to focus on in the summary (e.g., "financial data", "key decisions").
|
|
- resultType (str, optional): Output file extension (txt, md, docx, etc.). Default: txt.
|
|
"""
|
|
documentList = parameters.get("documentList", [])
|
|
if not documentList:
|
|
return ActionResult.isFailure(error="documentList is required")
|
|
|
|
summaryLength = parameters.get("summaryLength", "medium")
|
|
focus = parameters.get("focus")
|
|
resultType = parameters.get("resultType", "txt")
|
|
|
|
lengthInstructions = {
|
|
"brief": "Create a brief summary (2-3 paragraphs)",
|
|
"medium": "Create a medium-length summary (comprehensive but concise)",
|
|
"detailed": "Create a detailed summary covering all major points"
|
|
}
|
|
lengthInstruction = lengthInstructions.get(summaryLength.lower(), lengthInstructions["medium"])
|
|
|
|
aiPrompt = f"Summarize the provided document(s). {lengthInstruction}."
|
|
if focus:
|
|
aiPrompt += f" Focus specifically on: {focus}."
|
|
aiPrompt += " Extract and present the key points, main ideas, and important information in a clear, well-structured format."
|
|
|
|
return await self.process({
|
|
"aiPrompt": aiPrompt,
|
|
"documentList": documentList,
|
|
"resultType": resultType
|
|
})
|
|
|
|
|
|
@action
|
|
async def translateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
|
|
"""
|
|
GENERAL:
|
|
- Purpose: Translate documents to a target language while preserving formatting and structure.
|
|
- Input requirements: documentList (required); targetLanguage (required).
|
|
- Output format: Translated document in same format as input (default) or specified resultType.
|
|
|
|
Parameters:
|
|
- documentList (list, required): Document reference(s) to translate.
|
|
- targetLanguage (str, required): Target language code or name (e.g., "de", "German", "French", "es").
|
|
- sourceLanguage (str, optional): Source language if known (e.g., "en", "English"). If not provided, AI will detect.
|
|
- preserveFormatting (bool, optional): Whether to preserve original formatting. Default: True.
|
|
- resultType (str, optional): Output file extension. If not specified, uses same format as input.
|
|
"""
|
|
documentList = parameters.get("documentList", [])
|
|
if not documentList:
|
|
return ActionResult.isFailure(error="documentList is required")
|
|
|
|
targetLanguage = parameters.get("targetLanguage")
|
|
if not targetLanguage:
|
|
return ActionResult.isFailure(error="targetLanguage is required")
|
|
|
|
sourceLanguage = parameters.get("sourceLanguage")
|
|
preserveFormatting = parameters.get("preserveFormatting", True)
|
|
resultType = parameters.get("resultType")
|
|
|
|
aiPrompt = f"Translate the provided document(s) to {targetLanguage}."
|
|
if sourceLanguage:
|
|
aiPrompt += f" The source language is {sourceLanguage}."
|
|
if preserveFormatting:
|
|
aiPrompt += " Preserve all formatting, structure, tables, and layout exactly as they appear in the original document."
|
|
else:
|
|
aiPrompt += " Focus on accurate translation of content."
|
|
aiPrompt += " Maintain the same document structure, headings, and organization."
|
|
|
|
processParams = {
|
|
"aiPrompt": aiPrompt,
|
|
"documentList": documentList
|
|
}
|
|
if resultType:
|
|
processParams["resultType"] = resultType
|
|
|
|
return await self.process(processParams)
|
|
|
|
|
|
@action
|
|
async def convert(self, parameters: Dict[str, Any]) -> ActionResult:
|
|
"""
|
|
GENERAL:
|
|
- Purpose: Convert documents/data between different formats with specific formatting options (e.g., JSON→CSV with custom columns, delimiters).
|
|
- Input requirements: documentList (required); inputFormat and outputFormat (required).
|
|
- Output format: Document in target format with specified formatting options.
|
|
- CRITICAL: If input is already in standardized JSON format, uses automatic rendering system (no AI call needed).
|
|
|
|
Parameters:
|
|
- documentList (list, required): Document reference(s) to convert.
|
|
- inputFormat (str, required): Source format (json, csv, xlsx, txt, etc.).
|
|
- outputFormat (str, required): Target format (csv, json, xlsx, txt, etc.).
|
|
- columnsPerRow (int, optional): For CSV output, number of columns per row. Default: auto-detect.
|
|
- delimiter (str, optional): For CSV output, delimiter character. Default: comma (,).
|
|
- includeHeader (bool, optional): For CSV output, whether to include header row. Default: True.
|
|
- language (str, optional): Language for output (e.g., 'de', 'en', 'fr'). Default: 'en'.
|
|
"""
|
|
documentList = parameters.get("documentList", [])
|
|
if not documentList:
|
|
return ActionResult.isFailure(error="documentList is required")
|
|
|
|
inputFormat = parameters.get("inputFormat")
|
|
outputFormat = parameters.get("outputFormat")
|
|
if not inputFormat or not outputFormat:
|
|
return ActionResult.isFailure(error="inputFormat and outputFormat are required")
|
|
|
|
# Normalize formats (remove leading dot if present)
|
|
normalizedInputFormat = inputFormat.strip().lstrip('.').lower()
|
|
normalizedOutputFormat = outputFormat.strip().lstrip('.').lower()
|
|
|
|
# Get documents
|
|
from modules.datamodels.datamodelDocref import DocumentReferenceList
|
|
if isinstance(documentList, DocumentReferenceList):
|
|
docRefList = documentList
|
|
elif isinstance(documentList, list):
|
|
docRefList = DocumentReferenceList.from_string_list(documentList)
|
|
else:
|
|
docRefList = DocumentReferenceList.from_string_list([documentList])
|
|
|
|
chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(docRefList)
|
|
if not chatDocuments:
|
|
return ActionResult.isFailure(error="No documents found in documentList")
|
|
|
|
# Check if input is standardized JSON format - if so, use direct rendering
|
|
if normalizedInputFormat == "json" and len(chatDocuments) == 1:
|
|
try:
|
|
import json
|
|
doc = chatDocuments[0]
|
|
# ChatDocument doesn't have documentData - need to load file content using fileId
|
|
docBytes = self.services.chat.getFileData(doc.fileId)
|
|
if not docBytes:
|
|
raise ValueError(f"No file data found for fileId={doc.fileId}")
|
|
|
|
# Decode bytes to string
|
|
docData = docBytes.decode('utf-8')
|
|
|
|
# Try to parse as JSON
|
|
if isinstance(docData, str):
|
|
jsonData = json.loads(docData)
|
|
elif isinstance(docData, dict):
|
|
jsonData = docData
|
|
else:
|
|
jsonData = None
|
|
|
|
# Check if it's standardized JSON format (has "documents" or "sections")
|
|
if jsonData and (isinstance(jsonData, dict) and ("documents" in jsonData or "sections" in jsonData)):
|
|
# Use direct rendering - no AI call needed!
|
|
from modules.services.serviceGeneration.mainServiceGeneration import GenerationService
|
|
generationService = GenerationService(self.services)
|
|
|
|
# Ensure format is "documents" array
|
|
if "documents" not in jsonData:
|
|
jsonData = {"documents": [{"sections": jsonData.get("sections", []), "metadata": jsonData.get("metadata", {})}]}
|
|
|
|
# Get title
|
|
title = jsonData.get("metadata", {}).get("title", doc.documentName or "Converted Document")
|
|
|
|
# Render with options
|
|
renderOptions = {}
|
|
if normalizedOutputFormat == "csv":
|
|
renderOptions["delimiter"] = parameters.get("delimiter", ",")
|
|
renderOptions["columnsPerRow"] = parameters.get("columnsPerRow")
|
|
renderOptions["includeHeader"] = parameters.get("includeHeader", True)
|
|
|
|
rendered_content, mime_type = await generationService.renderReport(
|
|
jsonData, normalizedOutputFormat, title, None, None
|
|
)
|
|
|
|
# Apply CSV options if needed (renderer will handle them)
|
|
if normalizedOutputFormat == "csv" and renderOptions:
|
|
rendered_content = self._applyCsvOptions(rendered_content, renderOptions)
|
|
|
|
from modules.datamodels.datamodelChat import ActionDocument
|
|
validationMetadata = {
|
|
"actionType": "ai.convert",
|
|
"inputFormat": normalizedInputFormat,
|
|
"outputFormat": normalizedOutputFormat,
|
|
"hasSourceJson": True,
|
|
"conversionType": "direct_rendering"
|
|
}
|
|
actionDoc = ActionDocument(
|
|
documentName=f"{doc.documentName.rsplit('.', 1)[0] if '.' in doc.documentName else doc.documentName}.{normalizedOutputFormat}",
|
|
documentData=rendered_content,
|
|
mimeType=mime_type,
|
|
sourceJson=jsonData, # Preserve source JSON for structure validation
|
|
validationMetadata=validationMetadata
|
|
)
|
|
|
|
return ActionResult.isSuccess(documents=[actionDoc])
|
|
|
|
except Exception as e:
|
|
logger.warning(f"Direct rendering failed, falling back to AI conversion: {str(e)}")
|
|
# Fall through to AI-based conversion
|
|
|
|
# Fallback: Use AI for conversion (for non-JSON inputs or complex conversions)
|
|
columnsPerRow = parameters.get("columnsPerRow")
|
|
delimiter = parameters.get("delimiter", ",")
|
|
includeHeader = parameters.get("includeHeader", True)
|
|
language = parameters.get("language", "en")
|
|
|
|
aiPrompt = f"Convert the provided document(s) from {normalizedInputFormat.upper()} format to {normalizedOutputFormat.upper()} format."
|
|
|
|
if normalizedOutputFormat == "csv":
|
|
aiPrompt += f" Use '{delimiter}' as the delimiter character."
|
|
if columnsPerRow:
|
|
aiPrompt += f" Format the output with {columnsPerRow} columns per row."
|
|
if not includeHeader:
|
|
aiPrompt += " Do not include a header row."
|
|
else:
|
|
aiPrompt += " Include a header row with column names."
|
|
|
|
if language and language != "en":
|
|
aiPrompt += f" Use language: {language}."
|
|
|
|
aiPrompt += " Preserve all data and ensure accurate conversion. Maintain data integrity and structure."
|
|
|
|
return await self.process({
|
|
"aiPrompt": aiPrompt,
|
|
"documentList": documentList,
|
|
"resultType": normalizedOutputFormat
|
|
})
|
|
|
|
def _applyCsvOptions(self, csvContent: str, options: Dict[str, Any]) -> str:
|
|
"""Apply CSV formatting options to rendered CSV content."""
|
|
delimiter = options.get("delimiter", ",")
|
|
columnsPerRow = options.get("columnsPerRow")
|
|
includeHeader = options.get("includeHeader", True)
|
|
|
|
# Check if any options need to be applied
|
|
needsProcessing = (delimiter != ",") or (columnsPerRow is not None) or (not includeHeader)
|
|
|
|
if not needsProcessing:
|
|
return csvContent
|
|
|
|
import csv
|
|
import io
|
|
# Re-read CSV with comma, write with new delimiter
|
|
reader = csv.reader(io.StringIO(csvContent))
|
|
output = io.StringIO()
|
|
writer = csv.writer(output, delimiter=delimiter)
|
|
|
|
rows = list(reader)
|
|
|
|
# Handle header
|
|
if not includeHeader and rows:
|
|
rows = rows[1:] # Skip header
|
|
|
|
# Handle columnsPerRow
|
|
if columnsPerRow:
|
|
newRows = []
|
|
for row in rows:
|
|
# Split row into chunks of columnsPerRow
|
|
for i in range(0, len(row), columnsPerRow):
|
|
chunk = row[i:i+columnsPerRow]
|
|
# Pad to columnsPerRow if needed
|
|
while len(chunk) < columnsPerRow:
|
|
chunk.append("")
|
|
newRows.append(chunk)
|
|
rows = newRows
|
|
|
|
for row in rows:
|
|
writer.writerow(row)
|
|
|
|
return output.getvalue()
|
|
|
|
|
|
@action
|
|
async def convertDocument(self, parameters: Dict[str, Any]) -> ActionResult:
|
|
"""
|
|
GENERAL:
|
|
- Purpose: Convert documents between different formats (PDF→Word, Excel→CSV, etc.).
|
|
- Input requirements: documentList (required); targetFormat (required).
|
|
- Output format: Document in target format.
|
|
|
|
Parameters:
|
|
- documentList (list, required): Document reference(s) to convert.
|
|
- targetFormat (str, required): Target format extension (docx, pdf, xlsx, csv, txt, html, json, md, etc.).
|
|
- preserveStructure (bool, optional): Whether to preserve document structure (headings, tables, etc.). Default: True.
|
|
"""
|
|
documentList = parameters.get("documentList", [])
|
|
if not documentList:
|
|
return ActionResult.isFailure(error="documentList is required")
|
|
|
|
targetFormat = parameters.get("targetFormat")
|
|
if not targetFormat:
|
|
return ActionResult.isFailure(error="targetFormat is required")
|
|
|
|
preserveStructure = parameters.get("preserveStructure", True)
|
|
|
|
# Normalize format (remove leading dot if present)
|
|
normalizedFormat = targetFormat.strip().lstrip('.').lower()
|
|
|
|
aiPrompt = f"Convert the provided document(s) to {normalizedFormat.upper()} format."
|
|
if preserveStructure:
|
|
aiPrompt += " Preserve all document structure including headings, tables, formatting, lists, and layout."
|
|
aiPrompt += " Ensure the converted document maintains the same content and information as the original."
|
|
|
|
return await self.process({
|
|
"aiPrompt": aiPrompt,
|
|
"documentList": documentList,
|
|
"resultType": normalizedFormat
|
|
})
|
|
|
|
|
|
@action
|
|
async def extractData(self, parameters: Dict[str, Any]) -> ActionResult:
|
|
"""
|
|
GENERAL:
|
|
- Purpose: Extract structured data from documents (key-value pairs, entities, facts, etc.).
|
|
- Input requirements: documentList (required); optional dataStructure, fields.
|
|
- Output format: JSON by default, or specified resultType.
|
|
|
|
Parameters:
|
|
- documentList (list, required): Document reference(s) to extract data from.
|
|
- dataStructure (str, optional): Desired data structure - flat, nested, or list. Default: nested.
|
|
- fields (list, optional): Specific fields/properties to extract (e.g., ["name", "date", "amount"]).
|
|
- resultType (str, optional): Output format (json, csv, xlsx, etc.). Default: json.
|
|
"""
|
|
documentList = parameters.get("documentList", [])
|
|
if not documentList:
|
|
return ActionResult.isFailure(error="documentList is required")
|
|
|
|
dataStructure = parameters.get("dataStructure", "nested")
|
|
fields = parameters.get("fields", [])
|
|
resultType = parameters.get("resultType", "json")
|
|
|
|
aiPrompt = "Extract structured data from the provided document(s)."
|
|
if fields:
|
|
fieldsStr = ", ".join(fields)
|
|
aiPrompt += f" Extract the following specific fields: {fieldsStr}."
|
|
else:
|
|
aiPrompt += " Extract all relevant data including names, dates, amounts, entities, and key information."
|
|
|
|
structureInstructions = {
|
|
"flat": "Use a flat key-value structure with simple properties.",
|
|
"nested": "Use a nested JSON structure with logical grouping of related data.",
|
|
"list": "Structure the data as a list/array of objects, one per entity or record."
|
|
}
|
|
aiPrompt += f" {structureInstructions.get(dataStructure.lower(), structureInstructions['nested'])}"
|
|
|
|
aiPrompt += " Ensure all extracted data is accurate and complete."
|
|
|
|
return await self.process({
|
|
"aiPrompt": aiPrompt,
|
|
"documentList": documentList,
|
|
"resultType": resultType
|
|
})
|
|
|
|
|
|
# ============================================================================
|
|
# Content Generation Wrapper
|
|
# ============================================================================
|
|
|
|
@action
|
|
async def generateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
|
|
"""
|
|
GENERAL:
|
|
- Purpose: Generate documents from scratch or based on templates/inputs.
|
|
- Input requirements: prompt or description (required); optional documentList (for templates/references).
|
|
- Output format: Document in specified format (default: docx).
|
|
|
|
Parameters:
|
|
- prompt (str, required): Description of the document to generate.
|
|
- documentList (list, optional): Template documents or reference documents to use as a guide.
|
|
- documentType (str, optional): Type of document - letter, memo, proposal, contract, etc.
|
|
- resultType (str, optional): Output format (docx, pdf, txt, md, etc.). Default: docx.
|
|
"""
|
|
prompt = parameters.get("prompt")
|
|
if not prompt:
|
|
return ActionResult.isFailure(error="prompt is required")
|
|
|
|
documentList = parameters.get("documentList", [])
|
|
documentType = parameters.get("documentType")
|
|
resultType = parameters.get("resultType", "docx")
|
|
|
|
aiPrompt = f"Generate a document based on the following requirements: {prompt}"
|
|
if documentType:
|
|
aiPrompt += f" Document type: {documentType}."
|
|
if documentList:
|
|
aiPrompt += " Use the provided template/reference documents as a guide for structure, format, and style."
|
|
aiPrompt += " Create a professional, well-structured document with appropriate formatting and organization."
|
|
|
|
processParams = {
|
|
"aiPrompt": aiPrompt,
|
|
"resultType": resultType
|
|
}
|
|
if documentList:
|
|
processParams["documentList"] = documentList
|
|
|
|
return await self.process(processParams)
|