# Copyright (c) 2025 Patrick Motsch # All rights reserved. from typing import Any, Dict, List, Optional import base64 import binascii import io import json import logging import re from modules.datamodels.datamodelChat import ActionResult, ActionDocument from modules.serviceCenter.services.serviceGeneration.subDocumentUtility import markdownToDocumentJson from modules.shared.i18nRegistry import normalizePrimaryLanguageTag from modules.workflows.automation2.executors.actionNodeExecutor import _coerce_document_data_to_bytes from modules.workflows.methods.methodAi._common import is_image_action_document_list, serialize_context logger = logging.getLogger(__name__) _SAFE_FILENAME = re.compile(r'[^\w\-.\(\)\s\[\]%@+]') def _persistDocumentsToUserFiles( action_documents: list, services, folder_id: Optional[str] = None, ) -> None: """Persist file.create output documents to user's file storage (like upload). Adds fileId to each document's validationMetadata for download links in UI.""" mgmt = getattr(services, "interfaceDbComponent", None) if not mgmt: try: import modules.interfaces.interfaceDbManagement as iface user = getattr(services, "user", None) if not user: return mgmt = iface.getInterface( user, mandateId=getattr(services, "mandateId", None) or "", featureInstanceId=getattr(services, "featureInstanceId", None) or "", ) except Exception as e: logger.warning("file.create: could not get management interface for persistence: %s", e) return if not mgmt: return logger.info( "file.create persist: mgmt=%s id(mgmt)=%s has_createFileData=%s", type(mgmt).__name__, id(mgmt), hasattr(mgmt, "createFileData"), ) for doc in action_documents: try: doc_data = doc.documentData if hasattr(doc, "documentData") else doc.get("documentData") if not doc_data: continue if isinstance(doc_data, str): try: content = base64.b64decode(doc_data, validate=True) except (TypeError, ValueError, binascii.Error): content = doc_data.encode("utf-8") else: content = doc_data doc_name = ( getattr(doc, "documentName", None) or doc.get("documentName") or "output.pdf" ) mime = ( getattr(doc, "mimeType", None) or doc.get("mimeType") or "application/octet-stream" ) logger.info( "file.create persist: calling createFile name=%s bytes=%s", doc_name, len(content), ) file_item = mgmt.createFile(doc_name, mime, content, folderId=folder_id) logger.info("file.create persist: createFile returned id=%s", file_item.id) ok = mgmt.createFileData(file_item.id, content) logger.info("file.create persist: createFileData returned %s for id=%s", ok, file_item.id) meta = getattr(doc, "validationMetadata", None) or doc.get("validationMetadata") or {} if isinstance(meta, dict): meta["fileId"] = file_item.id if hasattr(doc, "validationMetadata"): doc.validationMetadata = meta elif isinstance(doc, dict): doc["validationMetadata"] = meta logger.info("file.create: persisted %s to user files (id=%s)", doc_name, file_item.id) except Exception as e: dname = getattr(doc, "documentName", None) or doc.get("documentName", "?") logger.warning("file.create: failed to persist document %s: %s", dname, e) def _sanitize_output_stem(title: str) -> str: t = (title or "").strip() or "Document" stem = _SAFE_FILENAME.sub("_", t).strip("._") return stem[:120] if stem else "Document" def _get_management_interface(services) -> Optional[Any]: mgmt = getattr(services, "interfaceDbComponent", None) if mgmt: return mgmt try: import modules.interfaces.interfaceDbManagement as iface user = getattr(services, "user", None) if not user: return None return iface.getInterface( user, mandateId=getattr(services, "mandateId", None) or "", featureInstanceId=getattr(services, "featureInstanceId", None) or "", ) except Exception as e: logger.warning("file.create: could not get management interface: %s", e) return None def _load_image_bytes_from_action_doc(doc: dict, services) -> Optional[bytes]: raw = doc.get("documentData") blob = _coerce_document_data_to_bytes(raw) if blob: return blob fid = doc.get("fileId") if not fid and isinstance(doc.get("validationMetadata"), dict): fid = (doc.get("validationMetadata") or {}).get("fileId") if fid and str(fid).strip(): mgmt = _get_management_interface(services) if mgmt and hasattr(mgmt, "getFileData"): try: return mgmt.getFileData(str(fid)) except Exception as e: logger.warning("file.create: getFileData(%s) failed: %s", fid, e) return None def _images_list_to_pdf(image_bytes_list: List[bytes]) -> bytes: """One PDF page per image; embedded raster data via PyMuPDF.""" import fitz pdf = fitz.open() try: for blob in image_bytes_list: page = pdf.new_page() page.insert_image(page.rect, stream=blob, keep_proportion=True) return pdf.tobytes() finally: pdf.close() def _images_list_to_docx(image_bytes_list: List[bytes]) -> bytes: """Images embedded in the document package (inline shapes), not hyperlinks.""" from docx import Document from docx.shared import Inches doc = Document() for blob in image_bytes_list: p = doc.add_paragraph() run = p.add_run() run.add_picture(io.BytesIO(blob), width=Inches(6.5)) doc.add_paragraph() out = io.BytesIO() doc.save(out) return out.getvalue() async def _create_merged_image_documents( self, parameters: Dict[str, Any], image_docs: List[dict], ) -> ActionResult: """Build one PDF or DOCX containing all extracted images (``imageDocumentsOnly``).""" output_format = (parameters.get("outputFormat") or "docx").strip().lower().lstrip(".") title = (parameters.get("title") or "Document").strip() stem = _sanitize_output_stem(title) folder_id: Optional[str] = None raw_folder = parameters.get("folderId") if raw_folder is not None and str(raw_folder).strip(): folder_id = str(raw_folder).strip() if output_format not in ("pdf", "docx"): return ActionResult.isFailure( error=( f"Nur-Bilder-Kontext: „{output_format}“ wird nicht unterstützt. " "Bitte Ausgabeformat „pdf“ oder „docx“ wählen." ) ) blobs: List[bytes] = [] for d in image_docs: b = _load_image_bytes_from_action_doc(d, self.services) if not b: name = d.get("documentName") or "?" return ActionResult.isFailure( error=f"Bilddaten fehlen oder sind nicht lesbar (Datei: {name})." ) blobs.append(b) if output_format == "pdf": try: combined = _images_list_to_pdf(blobs) except Exception as e: logger.warning("file.create: PDF merge failed: %s", e, exc_info=True) return ActionResult.isFailure(error=f"PDF aus Bildern konnte nicht erzeugt werden: {e}") out_name = f"{stem}.pdf" mime = "application/pdf" else: combined = _images_list_to_docx(blobs) out_name = f"{stem}.docx" mime = "application/vnd.openxmlformats-officedocument.wordprocessingml.document" if not combined: return ActionResult.isFailure(error="Zusammenfügen der Bilder ergab leere Ausgabe") doc_b64 = base64.b64encode(combined).decode("ascii") action_documents = [ ActionDocument( documentName=out_name, documentData=doc_b64, mimeType=mime, validationMetadata={ "actionType": "file.create", "outputFormat": output_format, "source": "mergedImageDocumentsOnly", }, ) ] _persistDocumentsToUserFiles(action_documents, self.services, folder_id=folder_id) return ActionResult.isSuccess(documents=action_documents) async def create(self, parameters: Dict[str, Any]) -> ActionResult: """ Create a file from context (text/markdown from upstream AI node). Uses GenerationService.renderReport to produce docx, pdf, txt, md, html, xlsx, etc. """ raw_context = parameters.get("context", "") or parameters.get("text", "") or "" if isinstance(raw_context, list) and is_image_action_document_list(raw_context): return await _create_merged_image_documents(self, parameters, raw_context) context = serialize_context(raw_context) if not context: logger.warning( "file.create: context empty after resolve — check DataRefs (e.g. Antworttext / " "documents[0].documentData from the AI step)." ) return ActionResult.isFailure(error="context is required (connect an AI node or provide text)") outputFormat = (parameters.get("outputFormat") or "docx").strip().lower().lstrip(".") title = (parameters.get("title") or "Document").strip() templateName = parameters.get("templateName") language = normalizePrimaryLanguageTag( str(parameters.get("language") or "de"), "de", ) folder_id: Optional[str] = None raw_folder = parameters.get("folderId") if raw_folder is not None and str(raw_folder).strip(): folder_id = str(raw_folder).strip() try: structured_content = markdownToDocumentJson(context, title, language) if templateName: structured_content.setdefault("metadata", {})["templateName"] = templateName generation = getattr(self.services, "generation", None) if not generation: return ActionResult.isFailure(error="Generation service not available") ai_service = getattr(self.services, "ai", None) rendered_docs = await generation.renderReport( extractedContent=structured_content, outputFormat=outputFormat, language=language, title=title, userPrompt=None, aiService=ai_service, parentOperationId=parameters.get("parentOperationId"), ) if not rendered_docs: return ActionResult.isFailure(error="Rendering produced no output") action_documents = [] mime_map = { "docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "pdf": "application/pdf", "txt": "text/plain", "md": "text/markdown", "html": "text/html", "xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "csv": "text/csv", "json": "application/json", } for rd in rendered_docs: doc_data = rd.documentData if hasattr(rd, "documentData") else getattr(rd, "document_data", None) doc_name = getattr(rd, "filename", None) or getattr(rd, "documentName", None) or getattr(rd, "document_name", f"output.{outputFormat}") mime = getattr(rd, "mimeType", None) or getattr(rd, "mime_type", None) or mime_map.get(outputFormat, "application/octet-stream") if isinstance(doc_data, bytes): doc_data = base64.b64encode(doc_data).decode("ascii") action_documents.append(ActionDocument( documentName=doc_name, documentData=doc_data, mimeType=mime, validationMetadata={ "actionType": "file.create", "outputFormat": outputFormat, "templateName": templateName, }, )) _persistDocumentsToUserFiles(action_documents, self.services, folder_id=folder_id) return ActionResult.isSuccess(documents=action_documents) except Exception as e: logger.error(f"file.create failed: {e}", exc_info=True) return ActionResult.isFailure(error=str(e))