327 lines
12 KiB
Python
327 lines
12 KiB
Python
# Copyright (c) 2025 Patrick Motsch
|
|
# All rights reserved.
|
|
|
|
from typing import Any, Dict, List, Optional
|
|
|
|
import base64
|
|
import binascii
|
|
import io
|
|
import json
|
|
import logging
|
|
import re
|
|
|
|
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
|
from modules.serviceCenter.services.serviceGeneration.subDocumentUtility import markdownToDocumentJson
|
|
from modules.shared.i18nRegistry import normalizePrimaryLanguageTag
|
|
from modules.workflows.automation2.executors.actionNodeExecutor import _coerce_document_data_to_bytes
|
|
from modules.workflows.methods.methodAi._common import is_image_action_document_list, serialize_context
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
_SAFE_FILENAME = re.compile(r'[^\w\-.\(\)\s\[\]%@+]')
|
|
|
|
|
|
def _persistDocumentsToUserFiles(
|
|
action_documents: list,
|
|
services,
|
|
folder_id: Optional[str] = None,
|
|
) -> None:
|
|
"""Persist file.create output documents to user's file storage (like upload).
|
|
Adds fileId to each document's validationMetadata for download links in UI."""
|
|
mgmt = getattr(services, "interfaceDbComponent", None)
|
|
if not mgmt:
|
|
try:
|
|
import modules.interfaces.interfaceDbManagement as iface
|
|
user = getattr(services, "user", None)
|
|
if not user:
|
|
return
|
|
mgmt = iface.getInterface(
|
|
user,
|
|
mandateId=getattr(services, "mandateId", None) or "",
|
|
featureInstanceId=getattr(services, "featureInstanceId", None) or "",
|
|
)
|
|
except Exception as e:
|
|
logger.warning("file.create: could not get management interface for persistence: %s", e)
|
|
return
|
|
if not mgmt:
|
|
return
|
|
logger.info(
|
|
"file.create persist: mgmt=%s id(mgmt)=%s has_createFileData=%s",
|
|
type(mgmt).__name__,
|
|
id(mgmt),
|
|
hasattr(mgmt, "createFileData"),
|
|
)
|
|
for doc in action_documents:
|
|
try:
|
|
doc_data = doc.documentData if hasattr(doc, "documentData") else doc.get("documentData")
|
|
if not doc_data:
|
|
continue
|
|
if isinstance(doc_data, str):
|
|
try:
|
|
content = base64.b64decode(doc_data, validate=True)
|
|
except (TypeError, ValueError, binascii.Error):
|
|
content = doc_data.encode("utf-8")
|
|
else:
|
|
content = doc_data
|
|
doc_name = (
|
|
getattr(doc, "documentName", None)
|
|
or doc.get("documentName")
|
|
or "output.pdf"
|
|
)
|
|
mime = (
|
|
getattr(doc, "mimeType", None)
|
|
or doc.get("mimeType")
|
|
or "application/octet-stream"
|
|
)
|
|
logger.info(
|
|
"file.create persist: calling createFile name=%s bytes=%s",
|
|
doc_name,
|
|
len(content),
|
|
)
|
|
file_item = mgmt.createFile(doc_name, mime, content, folderId=folder_id)
|
|
logger.info("file.create persist: createFile returned id=%s", file_item.id)
|
|
ok = mgmt.createFileData(file_item.id, content)
|
|
logger.info("file.create persist: createFileData returned %s for id=%s", ok, file_item.id)
|
|
meta = getattr(doc, "validationMetadata", None) or doc.get("validationMetadata") or {}
|
|
if isinstance(meta, dict):
|
|
meta["fileId"] = file_item.id
|
|
if hasattr(doc, "validationMetadata"):
|
|
doc.validationMetadata = meta
|
|
elif isinstance(doc, dict):
|
|
doc["validationMetadata"] = meta
|
|
logger.info("file.create: persisted %s to user files (id=%s)", doc_name, file_item.id)
|
|
except Exception as e:
|
|
dname = getattr(doc, "documentName", None) or doc.get("documentName", "?")
|
|
logger.warning("file.create: failed to persist document %s: %s", dname, e)
|
|
|
|
|
|
def _sanitize_output_stem(title: str) -> str:
|
|
t = (title or "").strip() or "Document"
|
|
stem = _SAFE_FILENAME.sub("_", t).strip("._")
|
|
return stem[:120] if stem else "Document"
|
|
|
|
|
|
def _get_management_interface(services) -> Optional[Any]:
|
|
mgmt = getattr(services, "interfaceDbComponent", None)
|
|
if mgmt:
|
|
return mgmt
|
|
try:
|
|
import modules.interfaces.interfaceDbManagement as iface
|
|
|
|
user = getattr(services, "user", None)
|
|
if not user:
|
|
return None
|
|
return iface.getInterface(
|
|
user,
|
|
mandateId=getattr(services, "mandateId", None) or "",
|
|
featureInstanceId=getattr(services, "featureInstanceId", None) or "",
|
|
)
|
|
except Exception as e:
|
|
logger.warning("file.create: could not get management interface: %s", e)
|
|
return None
|
|
|
|
|
|
def _load_image_bytes_from_action_doc(doc: dict, services) -> Optional[bytes]:
|
|
raw = doc.get("documentData")
|
|
blob = _coerce_document_data_to_bytes(raw)
|
|
if blob:
|
|
return blob
|
|
fid = doc.get("fileId")
|
|
if not fid and isinstance(doc.get("validationMetadata"), dict):
|
|
fid = (doc.get("validationMetadata") or {}).get("fileId")
|
|
if fid and str(fid).strip():
|
|
mgmt = _get_management_interface(services)
|
|
if mgmt and hasattr(mgmt, "getFileData"):
|
|
try:
|
|
return mgmt.getFileData(str(fid))
|
|
except Exception as e:
|
|
logger.warning("file.create: getFileData(%s) failed: %s", fid, e)
|
|
return None
|
|
|
|
|
|
def _images_list_to_pdf(image_bytes_list: List[bytes]) -> bytes:
|
|
"""One PDF page per image; embedded raster data via PyMuPDF."""
|
|
import fitz
|
|
|
|
pdf = fitz.open()
|
|
try:
|
|
for blob in image_bytes_list:
|
|
page = pdf.new_page()
|
|
page.insert_image(page.rect, stream=blob, keep_proportion=True)
|
|
return pdf.tobytes()
|
|
finally:
|
|
pdf.close()
|
|
|
|
|
|
def _images_list_to_docx(image_bytes_list: List[bytes]) -> bytes:
|
|
"""Images embedded in the document package (inline shapes), not hyperlinks."""
|
|
from docx import Document
|
|
from docx.shared import Inches
|
|
|
|
doc = Document()
|
|
for blob in image_bytes_list:
|
|
p = doc.add_paragraph()
|
|
run = p.add_run()
|
|
run.add_picture(io.BytesIO(blob), width=Inches(6.5))
|
|
doc.add_paragraph()
|
|
out = io.BytesIO()
|
|
doc.save(out)
|
|
return out.getvalue()
|
|
|
|
|
|
async def _create_merged_image_documents(
|
|
self,
|
|
parameters: Dict[str, Any],
|
|
image_docs: List[dict],
|
|
) -> ActionResult:
|
|
"""Build one PDF or DOCX containing all extracted images (``imageDocumentsOnly``)."""
|
|
output_format = (parameters.get("outputFormat") or "docx").strip().lower().lstrip(".")
|
|
title = (parameters.get("title") or "Document").strip()
|
|
stem = _sanitize_output_stem(title)
|
|
folder_id: Optional[str] = None
|
|
raw_folder = parameters.get("folderId")
|
|
if raw_folder is not None and str(raw_folder).strip():
|
|
folder_id = str(raw_folder).strip()
|
|
|
|
if output_format not in ("pdf", "docx"):
|
|
return ActionResult.isFailure(
|
|
error=(
|
|
f"Nur-Bilder-Kontext: „{output_format}“ wird nicht unterstützt. "
|
|
"Bitte Ausgabeformat „pdf“ oder „docx“ wählen."
|
|
)
|
|
)
|
|
|
|
blobs: List[bytes] = []
|
|
for d in image_docs:
|
|
b = _load_image_bytes_from_action_doc(d, self.services)
|
|
if not b:
|
|
name = d.get("documentName") or "?"
|
|
return ActionResult.isFailure(
|
|
error=f"Bilddaten fehlen oder sind nicht lesbar (Datei: {name})."
|
|
)
|
|
blobs.append(b)
|
|
|
|
if output_format == "pdf":
|
|
try:
|
|
combined = _images_list_to_pdf(blobs)
|
|
except Exception as e:
|
|
logger.warning("file.create: PDF merge failed: %s", e, exc_info=True)
|
|
return ActionResult.isFailure(error=f"PDF aus Bildern konnte nicht erzeugt werden: {e}")
|
|
out_name = f"{stem}.pdf"
|
|
mime = "application/pdf"
|
|
else:
|
|
combined = _images_list_to_docx(blobs)
|
|
out_name = f"{stem}.docx"
|
|
mime = "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
|
|
|
if not combined:
|
|
return ActionResult.isFailure(error="Zusammenfügen der Bilder ergab leere Ausgabe")
|
|
|
|
doc_b64 = base64.b64encode(combined).decode("ascii")
|
|
action_documents = [
|
|
ActionDocument(
|
|
documentName=out_name,
|
|
documentData=doc_b64,
|
|
mimeType=mime,
|
|
validationMetadata={
|
|
"actionType": "file.create",
|
|
"outputFormat": output_format,
|
|
"source": "mergedImageDocumentsOnly",
|
|
},
|
|
)
|
|
]
|
|
_persistDocumentsToUserFiles(action_documents, self.services, folder_id=folder_id)
|
|
return ActionResult.isSuccess(documents=action_documents)
|
|
|
|
|
|
async def create(self, parameters: Dict[str, Any]) -> ActionResult:
|
|
"""
|
|
Create a file from context (text/markdown from upstream AI node).
|
|
Uses GenerationService.renderReport to produce docx, pdf, txt, md, html, xlsx, etc.
|
|
"""
|
|
raw_context = parameters.get("context", "") or parameters.get("text", "") or ""
|
|
|
|
if isinstance(raw_context, list) and is_image_action_document_list(raw_context):
|
|
return await _create_merged_image_documents(self, parameters, raw_context)
|
|
|
|
context = serialize_context(raw_context)
|
|
|
|
if not context:
|
|
logger.warning(
|
|
"file.create: context empty after resolve — check DataRefs (e.g. Antworttext / "
|
|
"documents[0].documentData from the AI step)."
|
|
)
|
|
return ActionResult.isFailure(error="context is required (connect an AI node or provide text)")
|
|
|
|
outputFormat = (parameters.get("outputFormat") or "docx").strip().lower().lstrip(".")
|
|
title = (parameters.get("title") or "Document").strip()
|
|
templateName = parameters.get("templateName")
|
|
language = normalizePrimaryLanguageTag(
|
|
str(parameters.get("language") or "de"),
|
|
"de",
|
|
)
|
|
|
|
folder_id: Optional[str] = None
|
|
raw_folder = parameters.get("folderId")
|
|
if raw_folder is not None and str(raw_folder).strip():
|
|
folder_id = str(raw_folder).strip()
|
|
|
|
try:
|
|
structured_content = markdownToDocumentJson(context, title, language)
|
|
if templateName:
|
|
structured_content.setdefault("metadata", {})["templateName"] = templateName
|
|
|
|
generation = getattr(self.services, "generation", None)
|
|
if not generation:
|
|
return ActionResult.isFailure(error="Generation service not available")
|
|
|
|
ai_service = getattr(self.services, "ai", None)
|
|
rendered_docs = await generation.renderReport(
|
|
extractedContent=structured_content,
|
|
outputFormat=outputFormat,
|
|
language=language,
|
|
title=title,
|
|
userPrompt=None,
|
|
aiService=ai_service,
|
|
parentOperationId=parameters.get("parentOperationId"),
|
|
)
|
|
|
|
if not rendered_docs:
|
|
return ActionResult.isFailure(error="Rendering produced no output")
|
|
|
|
action_documents = []
|
|
mime_map = {
|
|
"docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
|
"pdf": "application/pdf",
|
|
"txt": "text/plain",
|
|
"md": "text/markdown",
|
|
"html": "text/html",
|
|
"xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
|
"csv": "text/csv",
|
|
"json": "application/json",
|
|
}
|
|
for rd in rendered_docs:
|
|
doc_data = rd.documentData if hasattr(rd, "documentData") else getattr(rd, "document_data", None)
|
|
doc_name = getattr(rd, "filename", None) or getattr(rd, "documentName", None) or getattr(rd, "document_name", f"output.{outputFormat}")
|
|
mime = getattr(rd, "mimeType", None) or getattr(rd, "mime_type", None) or mime_map.get(outputFormat, "application/octet-stream")
|
|
|
|
if isinstance(doc_data, bytes):
|
|
doc_data = base64.b64encode(doc_data).decode("ascii")
|
|
|
|
action_documents.append(ActionDocument(
|
|
documentName=doc_name,
|
|
documentData=doc_data,
|
|
mimeType=mime,
|
|
validationMetadata={
|
|
"actionType": "file.create",
|
|
"outputFormat": outputFormat,
|
|
"templateName": templateName,
|
|
},
|
|
))
|
|
|
|
_persistDocumentsToUserFiles(action_documents, self.services, folder_id=folder_id)
|
|
return ActionResult.isSuccess(documents=action_documents)
|
|
|
|
except Exception as e:
|
|
logger.error(f"file.create failed: {e}", exc_info=True)
|
|
return ActionResult.isFailure(error=str(e))
|