# Copyright (c) 2025 Patrick Motsch # All rights reserved. """ Convert action for AI operations. Converts documents/data between different formats with specific formatting options. """ import logging import json from typing import Dict, Any from modules.workflows.methods.methodBase import action from modules.datamodels.datamodelChat import ActionResult, ActionDocument from modules.datamodels.datamodelDocref import DocumentReferenceList logger = logging.getLogger(__name__) @action async def convert(self, parameters: Dict[str, Any]) -> ActionResult: """ GENERAL: - Purpose: Convert documents/data between different formats with specific formatting options (e.g., JSON→CSV with custom columns, delimiters). - Input requirements: documentList (required); inputFormat and outputFormat (required). - Output format: Document in target format with specified formatting options. - CRITICAL: If input is already in standardized JSON format, uses automatic rendering system (no AI call needed). Parameters: - documentList (list, required): Document reference(s) to convert. - inputFormat (str, required): Source format (json, csv, xlsx, txt, etc.). - outputFormat (str, required): Target format (csv, json, xlsx, txt, etc.). - columnsPerRow (int, optional): For CSV output, number of columns per row. Default: auto-detect. - delimiter (str, optional): For CSV output, delimiter character. Default: comma (,). - includeHeader (bool, optional): For CSV output, whether to include header row. Default: True. - language (str, optional): Language for output (e.g., 'de', 'en', 'fr'). Default: 'en'. """ documentList = parameters.get("documentList", []) if not documentList: return ActionResult.isFailure(error="documentList is required") inputFormat = parameters.get("inputFormat") outputFormat = parameters.get("outputFormat") if not inputFormat or not outputFormat: return ActionResult.isFailure(error="inputFormat and outputFormat are required") # Normalize formats (remove leading dot if present) normalizedInputFormat = inputFormat.strip().lstrip('.').lower() normalizedOutputFormat = outputFormat.strip().lstrip('.').lower() # Get documents if isinstance(documentList, DocumentReferenceList): docRefList = documentList elif isinstance(documentList, list): docRefList = DocumentReferenceList.from_string_list(documentList) else: docRefList = DocumentReferenceList.from_string_list([documentList]) chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(docRefList) if not chatDocuments: return ActionResult.isFailure(error="No documents found in documentList") # Check if input is standardized JSON format - if so, use direct rendering if normalizedInputFormat == "json" and len(chatDocuments) == 1: try: doc = chatDocuments[0] # ChatDocument doesn't have documentData - need to load file content using fileId docBytes = self.services.chat.getFileData(doc.fileId) if not docBytes: raise ValueError(f"No file data found for fileId={doc.fileId}") # Decode bytes to string docData = docBytes.decode('utf-8') # Try to parse as JSON if isinstance(docData, str): jsonData = json.loads(docData) elif isinstance(docData, dict): jsonData = docData else: jsonData = None # Check if it's standardized JSON format (has "documents" or "sections") if jsonData and (isinstance(jsonData, dict) and ("documents" in jsonData or "sections" in jsonData)): # Use direct rendering - no AI call needed! from modules.services.serviceGeneration.mainServiceGeneration import GenerationService generationService = GenerationService(self.services) # Ensure format is "documents" array if "documents" not in jsonData: jsonData = {"documents": [{"sections": jsonData.get("sections", []), "metadata": jsonData.get("metadata", {})}]} # Get title title = jsonData.get("metadata", {}).get("title", doc.documentName or "Converted Document") # Render with options renderOptions = {} if normalizedOutputFormat == "csv": renderOptions["delimiter"] = parameters.get("delimiter", ",") renderOptions["columnsPerRow"] = parameters.get("columnsPerRow") renderOptions["includeHeader"] = parameters.get("includeHeader", True) rendered_content, mime_type, _images = await generationService.renderReport( jsonData, normalizedOutputFormat, title, None, None ) # Apply CSV options if needed (renderer will handle them) if normalizedOutputFormat == "csv" and renderOptions: rendered_content = self.csvProcessing.applyCsvOptions(rendered_content, renderOptions) validationMetadata = { "actionType": "ai.convert", "inputFormat": normalizedInputFormat, "outputFormat": normalizedOutputFormat, "hasSourceJson": True, "conversionType": "direct_rendering" } actionDoc = ActionDocument( documentName=f"{doc.documentName.rsplit('.', 1)[0] if '.' in doc.documentName else doc.documentName}.{normalizedOutputFormat}", documentData=rendered_content, mimeType=mime_type, sourceJson=jsonData, # Preserve source JSON for structure validation validationMetadata=validationMetadata ) return ActionResult.isSuccess(documents=[actionDoc]) except Exception as e: logger.warning(f"Direct rendering failed, falling back to AI conversion: {str(e)}") # Fall through to AI-based conversion # Fallback: Use AI for conversion (for non-JSON inputs or complex conversions) columnsPerRow = parameters.get("columnsPerRow") delimiter = parameters.get("delimiter", ",") includeHeader = parameters.get("includeHeader", True) language = parameters.get("language", "en") aiPrompt = f"Convert the provided document(s) from {normalizedInputFormat.upper()} format to {normalizedOutputFormat.upper()} format." if normalizedOutputFormat == "csv": aiPrompt += f" Use '{delimiter}' as the delimiter character." if columnsPerRow: aiPrompt += f" Format the output with {columnsPerRow} columns per row." if not includeHeader: aiPrompt += " Do not include a header row." else: aiPrompt += " Include a header row with column names." if language and language != "en": aiPrompt += f" Use language: {language}." aiPrompt += " Preserve all data and ensure accurate conversion. Maintain data integrity and structure." return await self.process({ "aiPrompt": aiPrompt, "documentList": documentList, "resultType": normalizedOutputFormat })