diff --git a/modules/services/serviceAi/mainServiceAi.py b/modules/services/serviceAi/mainServiceAi.py index 30e7cc88..331a3289 100644 --- a/modules/services/serviceAi/mainServiceAi.py +++ b/modules/services/serviceAi/mainServiceAi.py @@ -1202,21 +1202,24 @@ If no trackable items can be identified, return: {{"kpis": []}} return None # Check for ContentExtracted format + # Nur Format 1 (ActionDocument-Format mit validationMetadata) wird unterstützt documentData = None - if "parts" in jsonData and isinstance(jsonData.get("parts"), list): - # Direct ContentExtracted format: {"id": "...", "parts": [...], ...} - documentData = jsonData - else: - validationMetadata = jsonData.get("validationMetadata", {}) - actionType = validationMetadata.get("actionType") - if actionType == "context.extractContent": - # Format: {"validationMetadata": {"actionType": "context.extractContent"}, "documentData": {...}} - documentData = jsonData.get("documentData") + + validationMetadata = jsonData.get("validationMetadata", {}) + actionType = validationMetadata.get("actionType") + if actionType == "context.extractContent": + # Format: {"validationMetadata": {"actionType": "context.extractContent"}, "documentData": {...}} + documentData = jsonData.get("documentData") + logger.debug(f"Found ContentExtracted via validationMetadata for {document.fileName}") if documentData: from modules.datamodels.datamodelExtraction import ContentExtracted try: + # Stelle sicher, dass "id" vorhanden ist + if "id" not in documentData: + documentData["id"] = document.id + contentExtracted = ContentExtracted(**documentData) if contentExtracted.parts: @@ -1235,8 +1238,8 @@ If no trackable items can be identified, return: {{"kpis": []}} if not originalMimeType and part.metadata.get("documentMimeType"): originalMimeType = part.metadata.get("documentMimeType") - # Falls nicht gefunden, verwende documentName aus ContentExtracted - if not originalFileName and hasattr(contentExtracted, 'id'): + # Falls nicht gefunden, versuche aus documentName zu extrahieren + if not originalFileName: # Versuche aus documentName zu extrahieren (z.B. "B2025-02c_28_extracted_...json" -> "B2025-02c_28.pdf") if document.fileName and "_extracted_" in document.fileName: originalFileName = document.fileName.split("_extracted_")[0] + ".pdf" @@ -1252,7 +1255,8 @@ If no trackable items can be identified, return: {{"kpis": []}} "parts": contentExtracted.parts } except Exception as parseError: - logger.debug(f"Could not parse ContentExtracted format: {str(parseError)}") + logger.warning(f"Could not parse ContentExtracted format from {document.fileName}: {str(parseError)}") + logger.debug(f"JSON keys: {list(jsonData.keys())}, has parts: {'parts' in jsonData}") return None return None diff --git a/modules/services/serviceGeneration/mainServiceGeneration.py b/modules/services/serviceGeneration/mainServiceGeneration.py index cababbeb..e08eaa81 100644 --- a/modules/services/serviceGeneration/mainServiceGeneration.py +++ b/modules/services/serviceGeneration/mainServiceGeneration.py @@ -58,11 +58,35 @@ class GenerationService: # Detect MIME without relying on a service center mime_type = detectMimeTypeFromContent(content, doc.documentName) + # WICHTIG: Für ActionDocuments mit validationMetadata (z.B. context.extractContent) + # müssen wir das gesamte ActionDocument serialisieren, nicht nur documentData + document_data = doc.documentData + if hasattr(doc, 'validationMetadata') and doc.validationMetadata: + # Wenn validationMetadata vorhanden ist, serialisiere das gesamte ActionDocument-Format + if mime_type == "application/json": + # Erstelle ActionDocument-Format mit validationMetadata und documentData + if hasattr(document_data, 'model_dump'): + # Pydantic v2 + document_data_dict = document_data.model_dump() + elif hasattr(document_data, 'dict'): + # Pydantic v1 + document_data_dict = document_data.dict() + elif isinstance(document_data, dict): + document_data_dict = document_data + else: + document_data_dict = {"data": str(document_data)} + + # Erstelle ActionDocument-Format + document_data = { + "validationMetadata": doc.validationMetadata, + "documentData": document_data_dict + } + return { 'fileName': doc.documentName, - 'fileSize': len(str(doc.documentData)), + 'fileSize': len(str(document_data)), 'mimeType': mime_type, - 'content': doc.documentData, + 'content': document_data, 'document': doc } except Exception as e: