157 lines
7.4 KiB
Python
157 lines
7.4 KiB
Python
# Copyright (c) 2025 Patrick Motsch
|
|
# All rights reserved.
|
|
|
|
"""
|
|
Convert action for AI operations.
|
|
Converts documents/data between different formats with specific formatting options.
|
|
"""
|
|
|
|
import logging
|
|
import json
|
|
from typing import Dict, Any
|
|
from modules.workflows.methods.methodBase import action
|
|
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
|
from modules.datamodels.datamodelDocref import DocumentReferenceList
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
@action
|
|
async def convert(self, parameters: Dict[str, Any]) -> ActionResult:
|
|
"""
|
|
GENERAL:
|
|
- Purpose: Convert documents/data between different formats with specific formatting options (e.g., JSON→CSV with custom columns, delimiters).
|
|
- Input requirements: documentList (required); inputFormat and outputFormat (required).
|
|
- Output format: Document in target format with specified formatting options.
|
|
- CRITICAL: If input is already in standardized JSON format, uses automatic rendering system (no AI call needed).
|
|
|
|
Parameters:
|
|
- documentList (list, required): Document reference(s) to convert.
|
|
- inputFormat (str, required): Source format (json, csv, xlsx, txt, etc.).
|
|
- outputFormat (str, required): Target format (csv, json, xlsx, txt, etc.).
|
|
- columnsPerRow (int, optional): For CSV output, number of columns per row. Default: auto-detect.
|
|
- delimiter (str, optional): For CSV output, delimiter character. Default: comma (,).
|
|
- includeHeader (bool, optional): For CSV output, whether to include header row. Default: True.
|
|
- language (str, optional): Language for output (e.g., 'de', 'en', 'fr'). Default: 'en'.
|
|
"""
|
|
documentList = parameters.get("documentList", [])
|
|
if not documentList:
|
|
return ActionResult.isFailure(error="documentList is required")
|
|
|
|
inputFormat = parameters.get("inputFormat")
|
|
outputFormat = parameters.get("outputFormat")
|
|
if not inputFormat or not outputFormat:
|
|
return ActionResult.isFailure(error="inputFormat and outputFormat are required")
|
|
|
|
# Normalize formats (remove leading dot if present)
|
|
normalizedInputFormat = inputFormat.strip().lstrip('.').lower()
|
|
normalizedOutputFormat = outputFormat.strip().lstrip('.').lower()
|
|
|
|
# Get documents
|
|
if isinstance(documentList, DocumentReferenceList):
|
|
docRefList = documentList
|
|
elif isinstance(documentList, list):
|
|
docRefList = DocumentReferenceList.from_string_list(documentList)
|
|
else:
|
|
docRefList = DocumentReferenceList.from_string_list([documentList])
|
|
|
|
chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(docRefList)
|
|
if not chatDocuments:
|
|
return ActionResult.isFailure(error="No documents found in documentList")
|
|
|
|
# Check if input is standardized JSON format - if so, use direct rendering
|
|
if normalizedInputFormat == "json" and len(chatDocuments) == 1:
|
|
try:
|
|
doc = chatDocuments[0]
|
|
# ChatDocument doesn't have documentData - need to load file content using fileId
|
|
docBytes = self.services.chat.getFileData(doc.fileId)
|
|
if not docBytes:
|
|
raise ValueError(f"No file data found for fileId={doc.fileId}")
|
|
|
|
# Decode bytes to string
|
|
docData = docBytes.decode('utf-8')
|
|
|
|
# Try to parse as JSON
|
|
if isinstance(docData, str):
|
|
jsonData = json.loads(docData)
|
|
elif isinstance(docData, dict):
|
|
jsonData = docData
|
|
else:
|
|
jsonData = None
|
|
|
|
# Check if it's standardized JSON format (has "documents" or "sections")
|
|
if jsonData and (isinstance(jsonData, dict) and ("documents" in jsonData or "sections" in jsonData)):
|
|
# Use direct rendering - no AI call needed!
|
|
from modules.services.serviceGeneration.mainServiceGeneration import GenerationService
|
|
generationService = GenerationService(self.services)
|
|
|
|
# Ensure format is "documents" array
|
|
if "documents" not in jsonData:
|
|
jsonData = {"documents": [{"sections": jsonData.get("sections", []), "metadata": jsonData.get("metadata", {})}]}
|
|
|
|
# Get title
|
|
title = jsonData.get("metadata", {}).get("title", doc.documentName or "Converted Document")
|
|
|
|
# Render with options
|
|
renderOptions = {}
|
|
if normalizedOutputFormat == "csv":
|
|
renderOptions["delimiter"] = parameters.get("delimiter", ",")
|
|
renderOptions["columnsPerRow"] = parameters.get("columnsPerRow")
|
|
renderOptions["includeHeader"] = parameters.get("includeHeader", True)
|
|
|
|
rendered_content, mime_type = await generationService.renderReport(
|
|
jsonData, normalizedOutputFormat, title, None, None
|
|
)
|
|
|
|
# Apply CSV options if needed (renderer will handle them)
|
|
if normalizedOutputFormat == "csv" and renderOptions:
|
|
rendered_content = self.csvProcessing.applyCsvOptions(rendered_content, renderOptions)
|
|
|
|
validationMetadata = {
|
|
"actionType": "ai.convert",
|
|
"inputFormat": normalizedInputFormat,
|
|
"outputFormat": normalizedOutputFormat,
|
|
"hasSourceJson": True,
|
|
"conversionType": "direct_rendering"
|
|
}
|
|
actionDoc = ActionDocument(
|
|
documentName=f"{doc.documentName.rsplit('.', 1)[0] if '.' in doc.documentName else doc.documentName}.{normalizedOutputFormat}",
|
|
documentData=rendered_content,
|
|
mimeType=mime_type,
|
|
sourceJson=jsonData, # Preserve source JSON for structure validation
|
|
validationMetadata=validationMetadata
|
|
)
|
|
|
|
return ActionResult.isSuccess(documents=[actionDoc])
|
|
|
|
except Exception as e:
|
|
logger.warning(f"Direct rendering failed, falling back to AI conversion: {str(e)}")
|
|
# Fall through to AI-based conversion
|
|
|
|
# Fallback: Use AI for conversion (for non-JSON inputs or complex conversions)
|
|
columnsPerRow = parameters.get("columnsPerRow")
|
|
delimiter = parameters.get("delimiter", ",")
|
|
includeHeader = parameters.get("includeHeader", True)
|
|
language = parameters.get("language", "en")
|
|
|
|
aiPrompt = f"Convert the provided document(s) from {normalizedInputFormat.upper()} format to {normalizedOutputFormat.upper()} format."
|
|
|
|
if normalizedOutputFormat == "csv":
|
|
aiPrompt += f" Use '{delimiter}' as the delimiter character."
|
|
if columnsPerRow:
|
|
aiPrompt += f" Format the output with {columnsPerRow} columns per row."
|
|
if not includeHeader:
|
|
aiPrompt += " Do not include a header row."
|
|
else:
|
|
aiPrompt += " Include a header row with column names."
|
|
|
|
if language and language != "en":
|
|
aiPrompt += f" Use language: {language}."
|
|
|
|
aiPrompt += " Preserve all data and ensure accurate conversion. Maintain data integrity and structure."
|
|
|
|
return await self.process({
|
|
"aiPrompt": aiPrompt,
|
|
"documentList": documentList,
|
|
"resultType": normalizedOutputFormat
|
|
})
|
|
|