910 lines
43 KiB
Python
910 lines
43 KiB
Python
"""
|
|
AI processing method module.
|
|
Handles direct AI calls for any type of task.
|
|
"""
|
|
|
|
import time
|
|
import logging
|
|
from typing import Dict, Any, List, Optional
|
|
from datetime import datetime, UTC
|
|
|
|
from modules.workflows.methods.methodBase import MethodBase, action
|
|
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
|
from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum
|
|
from modules.datamodels.datamodelWorkflow import ExtractContentParameters
|
|
from modules.datamodels.datamodelExtraction import ExtractionOptions, MergeStrategy, ContentPart
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
class MethodAi(MethodBase):
|
|
"""AI processing methods."""
|
|
|
|
def __init__(self, services):
|
|
super().__init__(services)
|
|
self.name = "ai"
|
|
self.description = "AI processing methods"
|
|
|
|
def _format_timestamp_for_filename(self) -> str:
|
|
"""Format current timestamp as YYYYMMDD-hhmmss for filenames."""
|
|
return datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
|
|
|
|
|
|
@action
|
|
async def process(self, parameters: Dict[str, Any]) -> ActionResult:
|
|
"""
|
|
GENERAL:
|
|
- Purpose: Universal AI document processing action - accepts MULTIPLE input documents in ANY format (docx, pdf, json, txt, xlsx, html, images, etc.) and processes them together with a prompt to produce MULTIPLE output documents in ANY specified format (via resultType). Use for document generation, format conversion, content transformation, analysis, summarization, translation, extraction, comparison, and any AI-powered document manipulation.
|
|
- Input requirements: aiPrompt (required); optional documentList (can contain multiple documents in any format).
|
|
- Output format: Multiple documents in the same format per call (via resultType: txt, json, pdf, docx, xlsx, pptx, png, jpg, etc.). The AI can generate multiple files based on the prompt (e.g., "create separate documents for each section"). Default: txt.
|
|
- Key capabilities: Can process any number of input documents together, extract data from mixed formats, combine information, generate multiple output files, transform between formats, perform analysis/comparison/summarization on document sets.
|
|
|
|
Parameters:
|
|
- aiPrompt (str, required): Instruction for the AI describing what processing to perform.
|
|
- documentList (list, optional): Document reference(s) in any format to use as input/context.
|
|
- resultType (str, optional): Output file extension (txt, json, md, csv, xml, html, pdf, docx, xlsx, png, etc.). All output documents will use this format. Default: txt.
|
|
"""
|
|
try:
|
|
# Init progress logger
|
|
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
|
|
operationId = f"ai_process_{workflowId}_{int(time.time())}"
|
|
|
|
# Start progress tracking
|
|
self.services.chat.progressLogStart(
|
|
operationId,
|
|
"Generate",
|
|
"AI Processing",
|
|
f"Format: {parameters.get('resultType', 'txt')}"
|
|
)
|
|
|
|
aiPrompt = parameters.get("aiPrompt")
|
|
logger.info(f"aiPrompt extracted: '{aiPrompt}' (type: {type(aiPrompt)})")
|
|
|
|
# Update progress - preparing parameters
|
|
self.services.chat.progressLogUpdate(operationId, 0.2, "Preparing parameters")
|
|
|
|
from modules.datamodels.datamodelDocref import DocumentReferenceList
|
|
|
|
documentListParam = parameters.get("documentList")
|
|
# Convert to DocumentReferenceList if needed
|
|
if documentListParam is None:
|
|
documentList = DocumentReferenceList(references=[])
|
|
elif isinstance(documentListParam, DocumentReferenceList):
|
|
documentList = documentListParam
|
|
elif isinstance(documentListParam, str):
|
|
documentList = DocumentReferenceList.from_string_list([documentListParam])
|
|
elif isinstance(documentListParam, list):
|
|
documentList = DocumentReferenceList.from_string_list(documentListParam)
|
|
else:
|
|
logger.error(f"Invalid documentList type: {type(documentListParam)}")
|
|
documentList = DocumentReferenceList(references=[])
|
|
|
|
resultType = parameters.get("resultType", "txt")
|
|
|
|
|
|
if not aiPrompt:
|
|
logger.error(f"aiPrompt is missing or empty. Parameters: {parameters}")
|
|
return ActionResult.isFailure(
|
|
error="AI prompt is required"
|
|
)
|
|
|
|
# Determine output extension and default MIME type without duplicating service logic
|
|
normalized_result_type = (str(resultType).strip().lstrip('.').lower() or "txt")
|
|
output_extension = f".{normalized_result_type}"
|
|
output_mime_type = "application/octet-stream" # Prefer service-provided mimeType when available
|
|
logger.info(f"Using result type: {resultType} -> {output_extension}")
|
|
|
|
# Phase 7.3: Extract content first if documents provided, then use contentParts
|
|
# Check if contentParts are already provided (preferred path)
|
|
contentParts: Optional[List[ContentPart]] = None
|
|
if "contentParts" in parameters:
|
|
contentParts = parameters.get("contentParts")
|
|
if contentParts and not isinstance(contentParts, list):
|
|
# Try to extract from ContentExtracted if it's an ActionDocument
|
|
if hasattr(contentParts, 'parts'):
|
|
contentParts = contentParts.parts
|
|
else:
|
|
logger.warning(f"Invalid contentParts type: {type(contentParts)}, treating as empty")
|
|
contentParts = None
|
|
|
|
# If contentParts not provided but documentList is, extract content first
|
|
if not contentParts and documentList.references:
|
|
self.services.chat.progressLogUpdate(operationId, 0.3, "Extracting content from documents")
|
|
|
|
# Get ChatDocuments
|
|
chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(documentList)
|
|
if not chatDocuments:
|
|
logger.warning("No documents found in documentList")
|
|
else:
|
|
logger.info(f"Extracting content from {len(chatDocuments)} documents")
|
|
|
|
# Prepare extraction options (use defaults if not provided)
|
|
extractionOptions = parameters.get("extractionOptions")
|
|
if not extractionOptions:
|
|
extractionOptions = ExtractionOptions(
|
|
prompt="Extract all content from the document",
|
|
mergeStrategy=MergeStrategy(
|
|
mergeType="concatenate",
|
|
groupBy="typeGroup",
|
|
orderBy="id"
|
|
),
|
|
processDocumentsIndividually=True
|
|
)
|
|
|
|
# Extract content using extraction service
|
|
extractedResults = self.services.extraction.extractContent(chatDocuments, extractionOptions)
|
|
|
|
# Combine all ContentParts from all extracted results
|
|
contentParts = []
|
|
for extracted in extractedResults:
|
|
if extracted.parts:
|
|
contentParts.extend(extracted.parts)
|
|
|
|
logger.info(f"Extracted {len(contentParts)} content parts from {len(extractedResults)} documents")
|
|
|
|
# Update progress - preparing AI call
|
|
self.services.chat.progressLogUpdate(operationId, 0.4, "Preparing AI call")
|
|
|
|
# Build options with only resultFormat - let service layer handle all other parameters
|
|
output_format = output_extension.replace('.', '') or 'txt'
|
|
options = AiCallOptions(
|
|
resultFormat=output_format
|
|
# Removed all model parameters - service layer will analyze prompt and determine optimal parameters
|
|
)
|
|
|
|
# Update progress - calling AI
|
|
self.services.chat.progressLogUpdate(operationId, 0.6, "Calling AI")
|
|
|
|
# Use unified callAiContent method with contentParts (extraction is now separate)
|
|
aiResponse = await self.services.ai.callAiContent(
|
|
prompt=aiPrompt,
|
|
options=options,
|
|
contentParts=contentParts, # Already extracted (or None if no documents)
|
|
outputFormat=output_format,
|
|
parentOperationId=operationId
|
|
)
|
|
|
|
# Update progress - processing result
|
|
self.services.chat.progressLogUpdate(operationId, 0.8, "Processing result")
|
|
|
|
from modules.datamodels.datamodelChat import ActionDocument
|
|
|
|
# Extract documents from AiResponse
|
|
if aiResponse.documents and len(aiResponse.documents) > 0:
|
|
action_documents = []
|
|
for doc in aiResponse.documents:
|
|
action_documents.append(ActionDocument(
|
|
documentName=doc.documentName,
|
|
documentData=doc.documentData,
|
|
mimeType=doc.mimeType or output_mime_type
|
|
))
|
|
|
|
# Preserve structured content field for validation (if it exists)
|
|
# Parse content JSON to check if it's structured data
|
|
try:
|
|
import json
|
|
contentData = json.loads(aiResponse.content) if isinstance(aiResponse.content, str) else aiResponse.content
|
|
if isinstance(contentData, (dict, list)):
|
|
action_documents.append(ActionDocument(
|
|
documentName="structured_content.json",
|
|
documentData=contentData,
|
|
mimeType="application/json"
|
|
))
|
|
except:
|
|
pass # Content is not JSON, skip structured content
|
|
|
|
final_documents = action_documents
|
|
else:
|
|
# Text response - create document from content
|
|
extension = output_extension.lstrip('.')
|
|
meaningful_name = self._generateMeaningfulFileName(
|
|
base_name="ai",
|
|
extension=extension,
|
|
action_name="result"
|
|
)
|
|
action_document = ActionDocument(
|
|
documentName=meaningful_name,
|
|
documentData=aiResponse.content,
|
|
mimeType=output_mime_type
|
|
)
|
|
final_documents = [action_document]
|
|
|
|
# Complete progress tracking
|
|
self.services.chat.progressLogFinish(operationId, True)
|
|
|
|
return ActionResult.isSuccess(documents=final_documents)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error in AI processing: {str(e)}")
|
|
|
|
# Complete progress tracking with failure
|
|
try:
|
|
self.services.chat.progressLogFinish(operationId, False)
|
|
except:
|
|
pass # Don't fail on progress logging errors
|
|
|
|
return ActionResult.isFailure(
|
|
error=str(e)
|
|
)
|
|
|
|
|
|
@action
|
|
async def extractContent(self, parameters: ExtractContentParameters) -> ActionResult:
|
|
"""
|
|
Extract content from documents (separate from AI calls).
|
|
|
|
This action performs pure content extraction without AI processing.
|
|
The extracted ContentParts can then be used by subsequent AI processing actions.
|
|
|
|
Parameters:
|
|
- documentList: DocumentReferenceList - Document references to extract content from
|
|
- extractionOptions: Optional[ExtractionOptions] - Extraction options (if not provided, defaults are used)
|
|
|
|
Returns:
|
|
- ActionResult with ActionDocument containing ContentExtracted objects
|
|
- ContentExtracted.parts contains List[ContentPart] (already chunked if needed)
|
|
"""
|
|
try:
|
|
# Init progress logger
|
|
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
|
|
operationId = f"ai_extract_{workflowId}_{int(time.time())}"
|
|
|
|
# Start progress tracking
|
|
self.services.chat.progressLogStart(
|
|
operationId,
|
|
"Extracting content from documents",
|
|
"Content Extraction",
|
|
f"Documents: {len(parameters.documentList.references) if parameters.documentList else 0}"
|
|
)
|
|
|
|
# Get ChatDocuments from documentList
|
|
self.services.chat.progressLogUpdate(operationId, 0.2, "Loading documents")
|
|
chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(parameters.documentList)
|
|
|
|
if not chatDocuments:
|
|
self.services.chat.progressLogFinish(operationId, False)
|
|
return ActionResult.isFailure(error="No documents found in documentList")
|
|
|
|
logger.info(f"Extracting content from {len(chatDocuments)} documents")
|
|
|
|
# Prepare extraction options
|
|
self.services.chat.progressLogUpdate(operationId, 0.3, "Preparing extraction options")
|
|
extractionOptions = parameters.extractionOptions
|
|
|
|
# If extractionOptions not provided, create defaults
|
|
if not extractionOptions:
|
|
# Default extraction options for pure content extraction (no AI processing)
|
|
extractionOptions = ExtractionOptions(
|
|
prompt="Extract all content from the document",
|
|
mergeStrategy=MergeStrategy(
|
|
mergeType="concatenate",
|
|
groupBy="typeGroup",
|
|
orderBy="id"
|
|
),
|
|
processDocumentsIndividually=True
|
|
)
|
|
|
|
# Get parent log ID for document-level operations
|
|
parentLogId = self.services.chat.getOperationLogId(operationId)
|
|
|
|
# Call extraction service
|
|
self.services.chat.progressLogUpdate(operationId, 0.4, "Initiating")
|
|
self.services.chat.progressLogUpdate(operationId, 0.5, f"Extracting content from {len(chatDocuments)} documents")
|
|
extractedResults = self.services.extraction.extractContent(chatDocuments, extractionOptions)
|
|
|
|
# Build ActionDocuments from ContentExtracted results
|
|
self.services.chat.progressLogUpdate(operationId, 0.8, "Building result documents")
|
|
actionDocuments = []
|
|
for extracted in extractedResults:
|
|
# Store ContentExtracted object in ActionDocument.documentData
|
|
actionDoc = ActionDocument(
|
|
documentName=f"extracted_{extracted.id}.json",
|
|
documentData=extracted, # ContentExtracted object
|
|
mimeType="application/json"
|
|
)
|
|
actionDocuments.append(actionDoc)
|
|
|
|
self.services.chat.progressLogFinish(operationId, True)
|
|
|
|
return ActionResult.isSuccess(documents=actionDocuments)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error in content extraction: {str(e)}")
|
|
|
|
# Complete progress tracking with failure
|
|
try:
|
|
self.services.chat.progressLogFinish(operationId, False)
|
|
except:
|
|
pass # Don't fail on progress logging errors
|
|
|
|
return ActionResult.isFailure(error=str(e))
|
|
|
|
|
|
@action
|
|
async def webResearch(self, parameters: Dict[str, Any]) -> ActionResult:
|
|
"""
|
|
GENERAL:
|
|
- Purpose: Web research with two-step process: search for URLs, then crawl content.
|
|
- Input requirements: prompt (required); optional list(url), country, language, researchDepth.
|
|
- Output format: JSON with research results including URLs and content.
|
|
|
|
Parameters:
|
|
- prompt (str, required): Natural language research instruction.
|
|
- urlList (list, optional): Specific URLs to crawl, if needed.
|
|
- country (str, optional): Two-digit country code (lowercase, e.g., ch, us, de).
|
|
- language (str, optional): Language code (lowercase, e.g., de, en, fr).
|
|
- researchDepth (str, optional): Research depth - fast, general, or deep. Default: general.
|
|
"""
|
|
try:
|
|
prompt = parameters.get("prompt")
|
|
if not prompt:
|
|
return ActionResult.isFailure(error="Research prompt is required")
|
|
|
|
# Init progress logger
|
|
operationId = f"web_research_{self.services.workflow.id}_{int(time.time())}"
|
|
|
|
# Start progress tracking
|
|
self.services.chat.progressLogStart(
|
|
operationId,
|
|
"Web Research",
|
|
"Searching and Crawling",
|
|
"Extracting URLs and Content"
|
|
)
|
|
|
|
# Call webcrawl service - service handles all AI intention analysis and processing
|
|
result = await self.services.web.performWebResearch(
|
|
prompt=prompt,
|
|
urls=parameters.get("urlList", []),
|
|
country=parameters.get("country"),
|
|
language=parameters.get("language"),
|
|
researchDepth=parameters.get("researchDepth", "general"),
|
|
operationId=operationId
|
|
)
|
|
|
|
# Complete progress tracking
|
|
self.services.chat.progressLogFinish(operationId, True)
|
|
|
|
# Get meaningful filename from research result (generated by intent analyzer)
|
|
suggestedFilename = result.get("suggested_filename")
|
|
if suggestedFilename:
|
|
# Clean and validate filename
|
|
import re
|
|
cleaned = suggestedFilename.strip().strip('"\'')
|
|
cleaned = cleaned.replace('\n', ' ').replace('\r', ' ').strip()
|
|
# Ensure it doesn't already have extension
|
|
if cleaned.lower().endswith('.json'):
|
|
cleaned = cleaned[:-5]
|
|
# Validate: should be reasonable length and contain only safe characters
|
|
if cleaned and len(cleaned) <= 60 and re.match(r'^[a-zA-Z0-9_\-]+$', cleaned):
|
|
meaningfulName = f"{cleaned}.json"
|
|
else:
|
|
# Fallback to generic meaningful filename
|
|
meaningfulName = self._generateMeaningfulFileName(
|
|
base_name="web_research",
|
|
extension="json",
|
|
action_name="research"
|
|
)
|
|
else:
|
|
# Fallback to generic meaningful filename
|
|
meaningfulName = self._generateMeaningfulFileName(
|
|
base_name="web_research",
|
|
extension="json",
|
|
action_name="research"
|
|
)
|
|
|
|
from modules.datamodels.datamodelChat import ActionDocument
|
|
actionDocument = ActionDocument(
|
|
documentName=meaningfulName,
|
|
documentData=result,
|
|
mimeType="application/json"
|
|
)
|
|
|
|
return ActionResult.isSuccess(documents=[actionDocument])
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error in web research: {str(e)}")
|
|
try:
|
|
self.services.chat.progressLogFinish(operationId, False)
|
|
except:
|
|
pass
|
|
return ActionResult.isFailure(error=str(e))
|
|
|
|
|
|
# ============================================================================
|
|
# Document Transformation Wrappers
|
|
# ============================================================================
|
|
|
|
@action
|
|
async def summarizeDocument(self, parameters: Dict[str, Any]) -> ActionResult:
|
|
"""
|
|
GENERAL:
|
|
- Purpose: Summarize one or more documents, extracting key points and main ideas.
|
|
- Input requirements: documentList (required); optional summaryLength, focus.
|
|
- Output format: Text document with summary (default: txt, can be overridden with resultType).
|
|
|
|
Parameters:
|
|
- documentList (list, required): Document reference(s) to summarize.
|
|
- summaryLength (str, optional): Desired summary length - brief, medium, or detailed. Default: medium.
|
|
- focus (str, optional): Specific aspect to focus on in the summary (e.g., "financial data", "key decisions").
|
|
- resultType (str, optional): Output file extension (txt, md, docx, etc.). Default: txt.
|
|
"""
|
|
documentList = parameters.get("documentList", [])
|
|
if not documentList:
|
|
return ActionResult.isFailure(error="documentList is required")
|
|
|
|
summaryLength = parameters.get("summaryLength", "medium")
|
|
focus = parameters.get("focus")
|
|
resultType = parameters.get("resultType", "txt")
|
|
|
|
lengthInstructions = {
|
|
"brief": "Create a brief summary (2-3 paragraphs)",
|
|
"medium": "Create a medium-length summary (comprehensive but concise)",
|
|
"detailed": "Create a detailed summary covering all major points"
|
|
}
|
|
lengthInstruction = lengthInstructions.get(summaryLength.lower(), lengthInstructions["medium"])
|
|
|
|
aiPrompt = f"Summarize the provided document(s). {lengthInstruction}."
|
|
if focus:
|
|
aiPrompt += f" Focus specifically on: {focus}."
|
|
aiPrompt += " Extract and present the key points, main ideas, and important information in a clear, well-structured format."
|
|
|
|
return await self.process({
|
|
"aiPrompt": aiPrompt,
|
|
"documentList": documentList,
|
|
"resultType": resultType
|
|
})
|
|
|
|
|
|
@action
|
|
async def translateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
|
|
"""
|
|
GENERAL:
|
|
- Purpose: Translate documents to a target language while preserving formatting and structure.
|
|
- Input requirements: documentList (required); targetLanguage (required).
|
|
- Output format: Translated document in same format as input (default) or specified resultType.
|
|
|
|
Parameters:
|
|
- documentList (list, required): Document reference(s) to translate.
|
|
- targetLanguage (str, required): Target language code or name (e.g., "de", "German", "French", "es").
|
|
- sourceLanguage (str, optional): Source language if known (e.g., "en", "English"). If not provided, AI will detect.
|
|
- preserveFormatting (bool, optional): Whether to preserve original formatting. Default: True.
|
|
- resultType (str, optional): Output file extension. If not specified, uses same format as input.
|
|
"""
|
|
documentList = parameters.get("documentList", [])
|
|
if not documentList:
|
|
return ActionResult.isFailure(error="documentList is required")
|
|
|
|
targetLanguage = parameters.get("targetLanguage")
|
|
if not targetLanguage:
|
|
return ActionResult.isFailure(error="targetLanguage is required")
|
|
|
|
sourceLanguage = parameters.get("sourceLanguage")
|
|
preserveFormatting = parameters.get("preserveFormatting", True)
|
|
resultType = parameters.get("resultType")
|
|
|
|
aiPrompt = f"Translate the provided document(s) to {targetLanguage}."
|
|
if sourceLanguage:
|
|
aiPrompt += f" The source language is {sourceLanguage}."
|
|
if preserveFormatting:
|
|
aiPrompt += " Preserve all formatting, structure, tables, and layout exactly as they appear in the original document."
|
|
else:
|
|
aiPrompt += " Focus on accurate translation of content."
|
|
aiPrompt += " Maintain the same document structure, headings, and organization."
|
|
|
|
processParams = {
|
|
"aiPrompt": aiPrompt,
|
|
"documentList": documentList
|
|
}
|
|
if resultType:
|
|
processParams["resultType"] = resultType
|
|
|
|
return await self.process(processParams)
|
|
|
|
|
|
@action
|
|
async def convertDocument(self, parameters: Dict[str, Any]) -> ActionResult:
|
|
"""
|
|
GENERAL:
|
|
- Purpose: Convert documents between different formats (PDF→Word, Excel→CSV, etc.).
|
|
- Input requirements: documentList (required); targetFormat (required).
|
|
- Output format: Document in target format.
|
|
|
|
Parameters:
|
|
- documentList (list, required): Document reference(s) to convert.
|
|
- targetFormat (str, required): Target format extension (docx, pdf, xlsx, csv, txt, html, json, md, etc.).
|
|
- preserveStructure (bool, optional): Whether to preserve document structure (headings, tables, etc.). Default: True.
|
|
"""
|
|
documentList = parameters.get("documentList", [])
|
|
if not documentList:
|
|
return ActionResult.isFailure(error="documentList is required")
|
|
|
|
targetFormat = parameters.get("targetFormat")
|
|
if not targetFormat:
|
|
return ActionResult.isFailure(error="targetFormat is required")
|
|
|
|
preserveStructure = parameters.get("preserveStructure", True)
|
|
|
|
# Normalize format (remove leading dot if present)
|
|
normalizedFormat = targetFormat.strip().lstrip('.').lower()
|
|
|
|
aiPrompt = f"Convert the provided document(s) to {normalizedFormat.upper()} format."
|
|
if preserveStructure:
|
|
aiPrompt += " Preserve all document structure including headings, tables, formatting, lists, and layout."
|
|
aiPrompt += " Ensure the converted document maintains the same content and information as the original."
|
|
|
|
return await self.process({
|
|
"aiPrompt": aiPrompt,
|
|
"documentList": documentList,
|
|
"resultType": normalizedFormat
|
|
})
|
|
|
|
|
|
@action
|
|
async def extractData(self, parameters: Dict[str, Any]) -> ActionResult:
|
|
"""
|
|
GENERAL:
|
|
- Purpose: Extract structured data from documents (key-value pairs, entities, facts, etc.).
|
|
- Input requirements: documentList (required); optional dataStructure, fields.
|
|
- Output format: JSON by default, or specified resultType.
|
|
|
|
Parameters:
|
|
- documentList (list, required): Document reference(s) to extract data from.
|
|
- dataStructure (str, optional): Desired data structure - flat, nested, or list. Default: nested.
|
|
- fields (list, optional): Specific fields/properties to extract (e.g., ["name", "date", "amount"]).
|
|
- resultType (str, optional): Output format (json, csv, xlsx, etc.). Default: json.
|
|
"""
|
|
documentList = parameters.get("documentList", [])
|
|
if not documentList:
|
|
return ActionResult.isFailure(error="documentList is required")
|
|
|
|
dataStructure = parameters.get("dataStructure", "nested")
|
|
fields = parameters.get("fields", [])
|
|
resultType = parameters.get("resultType", "json")
|
|
|
|
aiPrompt = "Extract structured data from the provided document(s)."
|
|
if fields:
|
|
fieldsStr = ", ".join(fields)
|
|
aiPrompt += f" Extract the following specific fields: {fieldsStr}."
|
|
else:
|
|
aiPrompt += " Extract all relevant data including names, dates, amounts, entities, and key information."
|
|
|
|
structureInstructions = {
|
|
"flat": "Use a flat key-value structure with simple properties.",
|
|
"nested": "Use a nested JSON structure with logical grouping of related data.",
|
|
"list": "Structure the data as a list/array of objects, one per entity or record."
|
|
}
|
|
aiPrompt += f" {structureInstructions.get(dataStructure.lower(), structureInstructions['nested'])}"
|
|
|
|
aiPrompt += " Ensure all extracted data is accurate and complete."
|
|
|
|
return await self.process({
|
|
"aiPrompt": aiPrompt,
|
|
"documentList": documentList,
|
|
"resultType": resultType
|
|
})
|
|
|
|
|
|
@action
|
|
async def extractTables(self, parameters: Dict[str, Any]) -> ActionResult:
|
|
"""
|
|
GENERAL:
|
|
- Purpose: Extract tables from documents, preserving structure and data.
|
|
- Input requirements: documentList (required); optional tableFormat.
|
|
- Output format: JSON by default (structured table data), or CSV/XLSX if specified.
|
|
|
|
Parameters:
|
|
- documentList (list, required): Document reference(s) to extract tables from.
|
|
- tableFormat (str, optional): Output format for tables - json, csv, or xlsx. Default: json.
|
|
- includeHeaders (bool, optional): Include table headers. Default: True.
|
|
"""
|
|
documentList = parameters.get("documentList", [])
|
|
if not documentList:
|
|
return ActionResult.isFailure(error="documentList is required")
|
|
|
|
tableFormat = parameters.get("tableFormat", "json")
|
|
includeHeaders = parameters.get("includeHeaders", True)
|
|
|
|
# Map tableFormat to resultType
|
|
formatMap = {
|
|
"json": "json",
|
|
"csv": "csv",
|
|
"xlsx": "xlsx",
|
|
"xls": "xlsx"
|
|
}
|
|
resultType = formatMap.get(tableFormat.lower(), "json")
|
|
|
|
aiPrompt = "Extract all tables from the provided document(s)."
|
|
if includeHeaders:
|
|
aiPrompt += " Include table headers and preserve the table structure."
|
|
else:
|
|
aiPrompt += " Extract table data without headers."
|
|
aiPrompt += " Maintain accurate data types (numbers as numbers, dates as dates, etc.) and preserve all table relationships."
|
|
|
|
if resultType == "json":
|
|
aiPrompt += " Structure each table as a JSON object with headers and rows as arrays."
|
|
elif resultType == "csv":
|
|
aiPrompt += " Output each table as CSV format with proper comma separation."
|
|
elif resultType == "xlsx":
|
|
aiPrompt += " Structure the output as an Excel spreadsheet with tables properly formatted."
|
|
|
|
return await self.process({
|
|
"aiPrompt": aiPrompt,
|
|
"documentList": documentList,
|
|
"resultType": resultType
|
|
})
|
|
|
|
|
|
# ============================================================================
|
|
# Content Generation Wrappers
|
|
# ============================================================================
|
|
|
|
@action
|
|
async def generateReport(self, parameters: Dict[str, Any]) -> ActionResult:
|
|
"""
|
|
GENERAL:
|
|
- Purpose: Generate comprehensive reports from input documents/data with analysis and insights.
|
|
- Input requirements: documentList (optional, can generate from scratch); optional reportType, sections.
|
|
- Output format: Document in specified format (default: docx).
|
|
|
|
Parameters:
|
|
- documentList (list, optional): Input documents/data to base the report on.
|
|
- reportType (str, optional): Type of report - summary, analysis, executive, detailed. Default: analysis.
|
|
- sections (list, optional): Specific sections to include (e.g., ["introduction", "findings", "recommendations"]).
|
|
- title (str, optional): Report title.
|
|
- resultType (str, optional): Output format (docx, pdf, md, etc.). Default: docx.
|
|
"""
|
|
documentList = parameters.get("documentList", [])
|
|
reportType = parameters.get("reportType", "analysis")
|
|
sections = parameters.get("sections", [])
|
|
title = parameters.get("title")
|
|
resultType = parameters.get("resultType", "docx")
|
|
|
|
reportTypeInstructions = {
|
|
"summary": "Create a summary report with key highlights and main points.",
|
|
"analysis": "Create an analytical report with insights, findings, and detailed examination.",
|
|
"executive": "Create an executive summary report suitable for senior management with key insights and recommendations.",
|
|
"detailed": "Create a comprehensive detailed report covering all aspects with in-depth analysis."
|
|
}
|
|
|
|
aiPrompt = f"Generate a {reportType} report."
|
|
if title:
|
|
aiPrompt += f" Title: {title}."
|
|
aiPrompt += f" {reportTypeInstructions.get(reportType.lower(), reportTypeInstructions['analysis'])}"
|
|
|
|
if sections:
|
|
sectionsStr = ", ".join(sections)
|
|
aiPrompt += f" Include the following sections: {sectionsStr}."
|
|
else:
|
|
aiPrompt += " Include standard report sections such as introduction, main content, analysis, findings, and conclusions."
|
|
|
|
if documentList:
|
|
aiPrompt += " Base the report on the provided input documents, analyzing and synthesizing the information."
|
|
else:
|
|
aiPrompt += " Create a professional, well-structured report."
|
|
|
|
processParams = {
|
|
"aiPrompt": aiPrompt,
|
|
"resultType": resultType
|
|
}
|
|
if documentList:
|
|
processParams["documentList"] = documentList
|
|
|
|
return await self.process(processParams)
|
|
|
|
|
|
@action
|
|
async def generateChart(self, parameters: Dict[str, Any]) -> ActionResult:
|
|
"""
|
|
GENERAL:
|
|
- Purpose: Generate charts/graphs from data in documents or structured data.
|
|
- Input requirements: documentList (required); optional chartType, title, labels.
|
|
- Output format: Image (png or jpg).
|
|
|
|
Parameters:
|
|
- documentList (list, required): Documents containing data to visualize (CSV, Excel, JSON, etc.).
|
|
- chartType (str, optional): Type of chart - bar, line, pie, scatter, area, etc. Default: bar.
|
|
- title (str, optional): Chart title.
|
|
- xAxisLabel (str, optional): X-axis label.
|
|
- yAxisLabel (str, optional): Y-axis label.
|
|
- resultType (str, optional): Image format (png or jpg). Default: png.
|
|
"""
|
|
documentList = parameters.get("documentList", [])
|
|
if not documentList:
|
|
return ActionResult.isFailure(error="documentList is required")
|
|
|
|
chartType = parameters.get("chartType", "bar")
|
|
title = parameters.get("title")
|
|
xAxisLabel = parameters.get("xAxisLabel")
|
|
yAxisLabel = parameters.get("yAxisLabel")
|
|
resultType = parameters.get("resultType", "png")
|
|
|
|
# Ensure resultType is an image format
|
|
if resultType.lower() not in ["png", "jpg", "jpeg"]:
|
|
resultType = "png"
|
|
|
|
aiPrompt = f"Generate a {chartType} chart from the provided data."
|
|
if title:
|
|
aiPrompt += f" Chart title: {title}."
|
|
if xAxisLabel:
|
|
aiPrompt += f" X-axis label: {xAxisLabel}."
|
|
if yAxisLabel:
|
|
aiPrompt += f" Y-axis label: {yAxisLabel}."
|
|
aiPrompt += " Create a clear, professional chart with appropriate labels, legends, and formatting. Ensure the chart is visually appealing and easy to read."
|
|
|
|
return await self.process({
|
|
"aiPrompt": aiPrompt,
|
|
"documentList": documentList,
|
|
"resultType": resultType
|
|
})
|
|
|
|
|
|
@action
|
|
async def generateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
|
|
"""
|
|
GENERAL:
|
|
- Purpose: Generate documents from scratch or based on templates/inputs.
|
|
- Input requirements: prompt or description (required); optional documentList (for templates/references).
|
|
- Output format: Document in specified format (default: docx).
|
|
|
|
Parameters:
|
|
- prompt (str, required): Description of the document to generate.
|
|
- documentList (list, optional): Template documents or reference documents to use as a guide.
|
|
- documentType (str, optional): Type of document - letter, memo, proposal, contract, etc.
|
|
- resultType (str, optional): Output format (docx, pdf, txt, md, etc.). Default: docx.
|
|
"""
|
|
prompt = parameters.get("prompt")
|
|
if not prompt:
|
|
return ActionResult.isFailure(error="prompt is required")
|
|
|
|
documentList = parameters.get("documentList", [])
|
|
documentType = parameters.get("documentType")
|
|
resultType = parameters.get("resultType", "docx")
|
|
|
|
aiPrompt = f"Generate a document based on the following requirements: {prompt}"
|
|
if documentType:
|
|
aiPrompt += f" Document type: {documentType}."
|
|
if documentList:
|
|
aiPrompt += " Use the provided template/reference documents as a guide for structure, format, and style."
|
|
aiPrompt += " Create a professional, well-structured document with appropriate formatting and organization."
|
|
|
|
processParams = {
|
|
"aiPrompt": aiPrompt,
|
|
"resultType": resultType
|
|
}
|
|
if documentList:
|
|
processParams["documentList"] = documentList
|
|
|
|
return await self.process(processParams)
|
|
|
|
|
|
# ============================================================================
|
|
# Analysis & Comparison Wrappers
|
|
# ============================================================================
|
|
|
|
@action
|
|
async def analyzeDocuments(self, parameters: Dict[str, Any]) -> ActionResult:
|
|
"""
|
|
GENERAL:
|
|
- Purpose: Analyze documents and find insights, patterns, trends, and key information.
|
|
- Input requirements: documentList (required); optional analysisType, focus.
|
|
- Output format: Analysis report in specified format (default: txt).
|
|
|
|
Parameters:
|
|
- documentList (list, required): Document(s) to analyze.
|
|
- analysisType (str, optional): Type of analysis - general, financial, technical, sentiment, etc. Default: general.
|
|
- focus (str, optional): Specific aspect to focus on (e.g., "trends", "risks", "opportunities").
|
|
- resultType (str, optional): Output format (txt, md, docx, json, etc.). Default: txt.
|
|
"""
|
|
documentList = parameters.get("documentList", [])
|
|
if not documentList:
|
|
return ActionResult.isFailure(error="documentList is required")
|
|
|
|
analysisType = parameters.get("analysisType", "general")
|
|
focus = parameters.get("focus")
|
|
resultType = parameters.get("resultType", "txt")
|
|
|
|
aiPrompt = f"Analyze the provided document(s) and find insights, patterns, and key information."
|
|
aiPrompt += f" Perform a {analysisType} analysis."
|
|
if focus:
|
|
aiPrompt += f" Focus specifically on: {focus}."
|
|
aiPrompt += " Identify trends, important findings, relationships, and provide actionable insights. Present the analysis in a clear, structured format."
|
|
|
|
return await self.process({
|
|
"aiPrompt": aiPrompt,
|
|
"documentList": documentList,
|
|
"resultType": resultType
|
|
})
|
|
|
|
|
|
@action
|
|
async def compareDocuments(self, parameters: Dict[str, Any]) -> ActionResult:
|
|
"""
|
|
GENERAL:
|
|
- Purpose: Compare multiple documents and identify differences, similarities, and changes.
|
|
- Input requirements: documentList (required, should contain 2+ documents); optional comparisonType, focus.
|
|
- Output format: Comparison report in specified format (default: txt).
|
|
|
|
Parameters:
|
|
- documentList (list, required): Two or more documents to compare.
|
|
- comparisonType (str, optional): Type of comparison - differences, similarities, changes, full. Default: full.
|
|
- focus (str, optional): Specific aspect to focus on (e.g., "content", "structure", "data", "formatting").
|
|
- resultType (str, optional): Output format (txt, md, docx, json, etc.). Default: txt.
|
|
"""
|
|
documentList = parameters.get("documentList", [])
|
|
if not documentList:
|
|
return ActionResult.isFailure(error="documentList is required")
|
|
|
|
if isinstance(documentList, str):
|
|
documentList = [documentList]
|
|
|
|
if len(documentList) < 2:
|
|
return ActionResult.isFailure(error="At least 2 documents are required for comparison")
|
|
|
|
comparisonType = parameters.get("comparisonType", "full")
|
|
focus = parameters.get("focus")
|
|
resultType = parameters.get("resultType", "txt")
|
|
|
|
comparisonInstructions = {
|
|
"differences": "Focus on identifying and highlighting all differences between the documents.",
|
|
"similarities": "Focus on identifying commonalities, shared content, and similarities.",
|
|
"changes": "Identify what has changed between versions, what was added, removed, or modified.",
|
|
"full": "Provide a comprehensive comparison including both differences and similarities."
|
|
}
|
|
|
|
aiPrompt = f"Compare the provided documents."
|
|
aiPrompt += f" {comparisonInstructions.get(comparisonType.lower(), comparisonInstructions['full'])}"
|
|
if focus:
|
|
aiPrompt += f" Focus specifically on: {focus}."
|
|
aiPrompt += " Present the comparison in a clear, structured format that makes differences and similarities easy to understand."
|
|
|
|
return await self.process({
|
|
"aiPrompt": aiPrompt,
|
|
"documentList": documentList,
|
|
"resultType": resultType
|
|
})
|
|
|
|
|
|
@action
|
|
async def validateData(self, parameters: Dict[str, Any]) -> ActionResult:
|
|
"""
|
|
GENERAL:
|
|
- Purpose: Validate data quality, structure, completeness, and correctness in documents/data files.
|
|
- Input requirements: documentList (required); optional validationRules, schema.
|
|
- Output format: Validation report in JSON or text format (default: json).
|
|
|
|
Parameters:
|
|
- documentList (list, required): Documents/data files to validate.
|
|
- validationRules (list, optional): Specific validation rules to check (e.g., ["required_fields", "data_types", "ranges"]).
|
|
- schema (dict, optional): Expected data schema/structure to validate against.
|
|
- resultType (str, optional): Output format (json, txt, md, etc.). Default: json.
|
|
"""
|
|
documentList = parameters.get("documentList", [])
|
|
if not documentList:
|
|
return ActionResult.isFailure(error="documentList is required")
|
|
|
|
validationRules = parameters.get("validationRules", [])
|
|
schema = parameters.get("schema")
|
|
resultType = parameters.get("resultType", "json")
|
|
|
|
aiPrompt = "Validate the data quality, structure, completeness, and correctness in the provided documents."
|
|
|
|
if validationRules:
|
|
rulesStr = ", ".join(validationRules)
|
|
aiPrompt += f" Apply the following validation rules: {rulesStr}."
|
|
else:
|
|
aiPrompt += " Check for data completeness, correct data types, required fields, data consistency, and any anomalies or errors."
|
|
|
|
if schema:
|
|
import json
|
|
schemaStr = json.dumps(schema, indent=2)
|
|
aiPrompt += f" Validate against the following expected schema: {schemaStr}."
|
|
|
|
if resultType == "json":
|
|
aiPrompt += " Provide the validation results as structured JSON with validation status, errors, warnings, and details for each check."
|
|
else:
|
|
aiPrompt += " Provide a detailed validation report listing all findings, errors, warnings, and pass/fail status for each validation check."
|
|
|
|
return await self.process({
|
|
"aiPrompt": aiPrompt,
|
|
"documentList": documentList,
|
|
"resultType": resultType
|
|
})
|