# Copyright (c) 2025 Patrick Motsch # All rights reserved. from typing import Any, Dict, List, Optional import asyncio import base64 import binascii import io import json import logging import re from modules.datamodels.datamodelChat import ActionResult, ActionDocument from modules.serviceCenter.services.serviceGeneration.subDocumentUtility import ( enhancePlainTextWithMarkdownTables, markdownToDocumentJson, ) from modules.shared.i18nRegistry import normalizePrimaryLanguageTag from modules.workflows.automation2.executors.actionNodeExecutor import _coerce_document_data_to_bytes from modules.workflows.methods.methodAi._common import is_image_action_document_list, serialize_context logger = logging.getLogger(__name__) _SAFE_FILENAME = re.compile(r'[^\w\-.\(\)\s\[\]%@+]') _HEAVY_CONTEXT_KEYS = frozenset({"imageDocumentsOnly", "documents", "inputs"}) def _collect_image_documents_only(raw: Any) -> List[Any]: """Resolve ``imageDocumentsOnly`` whether the context is merged, nested, or surfaced.""" if not isinstance(raw, dict): return [] paths = ( ("imageDocumentsOnly",), ("merged", "imageDocumentsOnly"), ("data", "merged", "imageDocumentsOnly"), ("data", "imageDocumentsOnly"), ) for path in paths: cur: Any = raw ok = True for p in path: if not isinstance(cur, dict): ok = False break cur = cur.get(p) if ok and isinstance(cur, list) and cur: return cur return [] def _context_string_for_report(raw: Any, output_format: str) -> str: """Build one narrative string for ``markdownToDocumentJson`` / render. Prefer plain ``response`` text (merge node surfaces it; nested ``merged.response`` too). Never dump ``inputs`` / binary lists into the PDF body — that produced giant JSON + base64 "hash" paragraphs after merge + ``contextBuilder``. """ of = (output_format or "docx").strip().lower().lstrip(".") if of == "json": return serialize_context(raw, prefer_handover_primary=False) if isinstance(raw, str): return raw.strip().lstrip("\ufeff") if isinstance(raw, dict): for path in ( ("response",), ("merged", "response"), ("data", "response"), ("data", "merged", "response"), ): cur: Any = raw ok = True for k in path: if not isinstance(cur, dict): ok = False break cur = cur.get(k) if ok and cur is not None and str(cur).strip(): return str(cur).strip().lstrip("\ufeff") lean = {k: v for k, v in raw.items() if k not in _HEAVY_CONTEXT_KEYS} try: return json.dumps(lean, ensure_ascii=False, indent=2, default=str) except Exception: return serialize_context(lean, prefer_handover_primary=False) return serialize_context(raw, prefer_handover_primary=False) def _raw_context_preview_for_log(raw: Any, max_len: int = 500) -> str: if raw is None: return "None" s = raw if isinstance(raw, str) else repr(raw) s = s.replace("\r", "\\r").replace("\n", "\\n") if len(s) <= max_len: return s return s[:max_len] + f"...<{len(s)} chars>" def _persistDocumentsToUserFiles( action_documents: list, services, folder_id: Optional[str] = None, ) -> None: """Persist file.create output documents to user's file storage (like upload). Adds fileId to each document's validationMetadata for download links in UI.""" mgmt = getattr(services, "interfaceDbComponent", None) if not mgmt: try: import modules.interfaces.interfaceDbManagement as iface user = getattr(services, "user", None) if not user: return mgmt = iface.getInterface( user, mandateId=getattr(services, "mandateId", None) or "", featureInstanceId=getattr(services, "featureInstanceId", None) or "", ) except Exception as e: logger.warning("file.create: could not get management interface for persistence: %s", e) return if not mgmt: return logger.info( "file.create persist: mgmt=%s id(mgmt)=%s has_createFileData=%s", type(mgmt).__name__, id(mgmt), hasattr(mgmt, "createFileData"), ) for doc in action_documents: try: doc_data = doc.documentData if hasattr(doc, "documentData") else doc.get("documentData") if not doc_data: continue if isinstance(doc_data, str): try: content = base64.b64decode(doc_data, validate=True) except (TypeError, ValueError, binascii.Error): content = doc_data.encode("utf-8") else: content = doc_data doc_name = ( getattr(doc, "documentName", None) or doc.get("documentName") or "output.pdf" ) mime = ( getattr(doc, "mimeType", None) or doc.get("mimeType") or "application/octet-stream" ) logger.info( "file.create persist: calling createFile name=%s bytes=%s", doc_name, len(content), ) file_item = mgmt.createFile(doc_name, mime, content, folderId=folder_id) logger.info("file.create persist: createFile returned id=%s", file_item.id) ok = mgmt.createFileData(file_item.id, content) logger.info("file.create persist: createFileData returned %s for id=%s", ok, file_item.id) meta = getattr(doc, "validationMetadata", None) or doc.get("validationMetadata") or {} if isinstance(meta, dict): meta["fileId"] = file_item.id if hasattr(doc, "validationMetadata"): doc.validationMetadata = meta elif isinstance(doc, dict): doc["validationMetadata"] = meta logger.info("file.create: persisted %s to user files (id=%s)", doc_name, file_item.id) except Exception as e: dname = getattr(doc, "documentName", None) or doc.get("documentName", "?") logger.warning("file.create: failed to persist document %s: %s", dname, e) def _sanitize_output_stem(title: str) -> str: t = (title or "").strip() or "Document" stem = _SAFE_FILENAME.sub("_", t).strip("._") return stem[:120] if stem else "Document" def _get_management_interface(services) -> Optional[Any]: mgmt = getattr(services, "interfaceDbComponent", None) if mgmt: return mgmt try: import modules.interfaces.interfaceDbManagement as iface user = getattr(services, "user", None) if not user: return None return iface.getInterface( user, mandateId=getattr(services, "mandateId", None) or "", featureInstanceId=getattr(services, "featureInstanceId", None) or "", ) except Exception as e: logger.warning("file.create: could not get management interface: %s", e) return None def _load_image_bytes_from_action_doc(doc: dict, services) -> Optional[bytes]: raw = doc.get("documentData") blob = _coerce_document_data_to_bytes(raw) if blob: return blob fid = doc.get("fileId") if not fid and isinstance(doc.get("validationMetadata"), dict): fid = (doc.get("validationMetadata") or {}).get("fileId") if fid and str(fid).strip(): mgmt = _get_management_interface(services) if mgmt and hasattr(mgmt, "getFileData"): try: return mgmt.getFileData(str(fid)) except Exception as e: logger.warning("file.create: getFileData(%s) failed: %s", fid, e) return None # Images larger than this threshold (decoded bytes) are resized before embedding # to avoid multi-minute PDF rendering of high-res raster scans. _MAX_IMAGE_EMBED_BYTES = 300_000 # 300 KB decoded ≈ ~400 KB base64 _IMAGE_MAX_DIMENSION = 1200 # longest edge in pixels after resize def _resize_image_for_document(image_bytes: bytes) -> bytes: """Resize image to at most ``_IMAGE_MAX_DIMENSION`` px on the longest edge and re-encode as JPEG. Falls back to the original bytes on any error.""" try: from PIL import Image as PILImage import io as _io img = PILImage.open(_io.BytesIO(image_bytes)) # Flatten transparency / palette modes to RGB (required for JPEG) if img.mode in ("RGBA", "LA"): bg = PILImage.new("RGB", img.size, (255, 255, 255)) bg.paste(img, mask=img.split()[-1]) img = bg elif img.mode == "P": img = img.convert("RGBA") bg = PILImage.new("RGB", img.size, (255, 255, 255)) bg.paste(img, mask=img.split()[-1]) img = bg elif img.mode != "RGB": img = img.convert("RGB") w, h = img.size if max(w, h) > _IMAGE_MAX_DIMENSION: # thumbnail() is optimised for downscaling: it uses an intermediate # box-filter step before the final filter, making it 3-5× faster # than resize() on large images. BILINEAR is fast and sufficient # for document thumbnails. img.thumbnail((_IMAGE_MAX_DIMENSION, _IMAGE_MAX_DIMENSION), PILImage.BILINEAR) out = _io.BytesIO() img.save(out, format="JPEG", quality=85, optimize=True) return out.getvalue() except Exception as e: logger.warning("file.create: image resize failed (%s) — using original bytes", e) return image_bytes def _append_images_to_content(structured_content: dict, image_docs: list, services=None) -> dict: """Append images from imageDocumentsOnly as native image elements to the structured JSON. Each image becomes an ``image`` element with ``base64Data`` in a trailing "Bilder" section of the first document. Images larger than ``_MAX_IMAGE_EMBED_BYTES`` are automatically resized/compressed so the synchronous PDF renderer does not block for minutes on high-res scans. The renderers (DOCX / PDF) handle ``content.base64Data`` natively. """ elements = [] for doc in image_docs: b = _load_image_bytes_from_action_doc(doc, services) if not b: raw = doc.get("documentData") if isinstance(doc, dict) else None if isinstance(raw, str): try: b = base64.b64decode(raw) except Exception: pass if not b: continue if len(b) > _MAX_IMAGE_EMBED_BYTES: logger.info( "file.create: image %s is %d bytes — resizing to max %dpx for embedding", (doc.get("documentName") if isinstance(doc, dict) else "?") or "?", len(b), _IMAGE_MAX_DIMENSION, ) b = _resize_image_for_document(b) elements.append({ "type": "image", "content": { "base64Data": base64.b64encode(b).decode("ascii"), "alt": (doc.get("documentName") if isinstance(doc, dict) else None) or "image", }, }) if not elements: return structured_content docs = structured_content.get("documents") if isinstance(docs, list) and docs: docs[0].setdefault("sections", []).append({"heading": "Bilder", "elements": elements}) return structured_content def _images_list_to_pdf(image_bytes_list: List[bytes]) -> bytes: """One PDF page per image; embedded raster data via PyMuPDF.""" import fitz pdf = fitz.open() try: for blob in image_bytes_list: page = pdf.new_page() page.insert_image(page.rect, stream=blob, keep_proportion=True) return pdf.tobytes() finally: pdf.close() def _images_list_to_docx(image_bytes_list: List[bytes]) -> bytes: """Images embedded in the document package (inline shapes), not hyperlinks.""" from docx import Document from docx.shared import Inches doc = Document() for blob in image_bytes_list: p = doc.add_paragraph() run = p.add_run() run.add_picture(io.BytesIO(blob), width=Inches(6.5)) doc.add_paragraph() out = io.BytesIO() doc.save(out) return out.getvalue() async def _create_merged_image_documents( self, parameters: Dict[str, Any], image_docs: List[dict], ) -> ActionResult: """Build one PDF or DOCX containing all extracted images (``imageDocumentsOnly``).""" output_format = (parameters.get("outputFormat") or "docx").strip().lower().lstrip(".") title = (parameters.get("title") or "Document").strip() stem = _sanitize_output_stem(title) folder_id: Optional[str] = None raw_folder = parameters.get("folderId") if raw_folder is not None and str(raw_folder).strip(): folder_id = str(raw_folder).strip() if output_format not in ("pdf", "docx"): return ActionResult.isFailure( error=( f"Nur-Bilder-Kontext: „{output_format}“ wird nicht unterstützt. " "Bitte Ausgabeformat „pdf“ oder „docx“ wählen." ) ) blobs: List[bytes] = [] for d in image_docs: b = _load_image_bytes_from_action_doc(d, self.services) if not b: name = d.get("documentName") or "?" return ActionResult.isFailure( error=f"Bilddaten fehlen oder sind nicht lesbar (Datei: {name})." ) blobs.append(b) if output_format == "pdf": try: combined = _images_list_to_pdf(blobs) except Exception as e: logger.warning("file.create: PDF merge failed: %s", e, exc_info=True) return ActionResult.isFailure(error=f"PDF aus Bildern konnte nicht erzeugt werden: {e}") out_name = f"{stem}.pdf" mime = "application/pdf" else: combined = _images_list_to_docx(blobs) out_name = f"{stem}.docx" mime = "application/vnd.openxmlformats-officedocument.wordprocessingml.document" if not combined: return ActionResult.isFailure(error="Zusammenfügen der Bilder ergab leere Ausgabe") doc_b64 = base64.b64encode(combined).decode("ascii") action_documents = [ ActionDocument( documentName=out_name, documentData=doc_b64, mimeType=mime, validationMetadata={ "actionType": "file.create", "outputFormat": output_format, "source": "mergedImageDocumentsOnly", }, ) ] _persistDocumentsToUserFiles(action_documents, self.services, folder_id=folder_id) return ActionResult.isSuccess(documents=action_documents) async def create(self, parameters: Dict[str, Any]) -> ActionResult: """ Create a file from context (text/markdown from upstream AI node). Uses GenerationService.renderReport to produce docx, pdf, txt, md, html, xlsx, etc. """ raw_context = parameters.get("context", "") if isinstance(raw_context, list) and is_image_action_document_list(raw_context): return await _create_merged_image_documents(self, parameters, raw_context) outputFormat = (parameters.get("outputFormat") or "docx").strip().lower().lstrip(".") context = _context_string_for_report(raw_context, outputFormat) if not context: logger.warning( "file.create: context empty after resolve — raw_context type=%s raw_summary=%r " "serialized_len=%s (check ActionNodeExecutor \"file.create context resolution\" log for DataRef / upstream).", type(raw_context).__name__, _raw_context_preview_for_log(raw_context), len(context or ""), ) return ActionResult.isFailure(error="context is required (connect an AI node or provide text)") title = (parameters.get("title") or "Document").strip() templateName = parameters.get("templateName") language = normalizePrimaryLanguageTag( str(parameters.get("language") or "de"), "de", ) folder_id: Optional[str] = None raw_folder = parameters.get("folderId") if raw_folder is not None and str(raw_folder).strip(): folder_id = str(raw_folder).strip() try: if outputFormat != "json": context = enhancePlainTextWithMarkdownTables(context) structured_content = markdownToDocumentJson(context, title, language) if templateName: structured_content.setdefault("metadata", {})["templateName"] = templateName img_docs = _collect_image_documents_only(raw_context) if img_docs: # Image decoding and PIL resizing are CPU-bound; run them in a # thread pool so the event loop is not blocked while processing # high-res raster images (e.g. 3+ MB PNGs from PDF extraction). loop = asyncio.get_event_loop() structured_content = await loop.run_in_executor( None, _append_images_to_content, structured_content, img_docs, self.services, ) generation = getattr(self.services, "generation", None) if not generation: return ActionResult.isFailure(error="Generation service not available") ai_service = getattr(self.services, "ai", None) rendered_docs = await generation.renderReport( extractedContent=structured_content, outputFormat=outputFormat, language=language, title=title, userPrompt=None, aiService=ai_service, parentOperationId=parameters.get("parentOperationId"), ) if not rendered_docs: return ActionResult.isFailure(error="Rendering produced no output") action_documents = [] mime_map = { "docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "pdf": "application/pdf", "txt": "text/plain", "md": "text/markdown", "html": "text/html", "xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "csv": "text/csv", "json": "application/json", } for rd in rendered_docs: doc_data = rd.documentData if hasattr(rd, "documentData") else getattr(rd, "document_data", None) doc_name = getattr(rd, "filename", None) or getattr(rd, "documentName", None) or getattr(rd, "document_name", f"output.{outputFormat}") mime = getattr(rd, "mimeType", None) or getattr(rd, "mime_type", None) or mime_map.get(outputFormat, "application/octet-stream") if isinstance(doc_data, bytes): doc_data = base64.b64encode(doc_data).decode("ascii") action_documents.append(ActionDocument( documentName=doc_name, documentData=doc_data, mimeType=mime, validationMetadata={ "actionType": "file.create", "outputFormat": outputFormat, "templateName": templateName, }, )) _persistDocumentsToUserFiles(action_documents, self.services, folder_id=folder_id) return ActionResult.isSuccess(documents=action_documents) except Exception as e: logger.error(f"file.create failed: {e}", exc_info=True) return ActionResult.isFailure(error=str(e))