From 2255c9009df0ad7b83af9a62552eb0b4615b1809 Mon Sep 17 00:00:00 2001 From: ValueOn AG Date: Tue, 4 Nov 2025 14:30:48 +0100 Subject: [PATCH] fixed critical bug in models to use previous workflow object instead of properly instanciated current object --- modules/services/serviceAi/mainServiceAi.py | 49 +++- .../services/serviceChat/mainServiceChat.py | 226 ++++++++++----- modules/workflows/methods/methodOutlook.py | 153 +++++++++- .../processing/adaptive/VALIDATOR_ANALYSIS.md | 266 ------------------ .../processing/shared/methodDiscovery.py | 68 +++-- modules/workflows/workflowManager.py | 11 + 6 files changed, 402 insertions(+), 371 deletions(-) delete mode 100644 modules/workflows/processing/adaptive/VALIDATOR_ANALYSIS.md diff --git a/modules/services/serviceAi/mainServiceAi.py b/modules/services/serviceAi/mainServiceAi.py index 95c51779..e03b15cd 100644 --- a/modules/services/serviceAi/mainServiceAi.py +++ b/modules/services/serviceAi/mainServiceAi.py @@ -1,5 +1,6 @@ import json import logging +import re import time from typing import Dict, Any, List, Optional, Tuple, Union from modules.datamodels.datamodelChat import PromptPlaceholder, ChatDocument @@ -731,28 +732,68 @@ Respond with ONLY a JSON object in this exact format: self.services.chat.progressLogFinish(aiOperationId, False) return {"success": False, "error": f"Generated content is not valid JSON: {str(e)}"} + # Extract title and filename from generated document structure + extractedTitle = title # Default to user-provided title + extractedFilename = None + if isinstance(generated_data, dict) and "documents" in generated_data: + documents = generated_data["documents"] + if isinstance(documents, list) and len(documents) > 0: + firstDoc = documents[0] + if isinstance(firstDoc, dict): + # Extract title from document (preferred over user-provided title) + if firstDoc.get("title"): + extractedTitle = firstDoc["title"] + # Extract filename from document + if firstDoc.get("filename"): + extractedFilename = firstDoc["filename"] + + # Ensure metadata contains the extracted title for renderers + if "metadata" not in generated_data: + generated_data["metadata"] = {} + if extractedTitle: + generated_data["metadata"]["title"] = extractedTitle + self.services.chat.progressLogUpdate(aiOperationId, 0.8, f"Rendering to {outputFormat} format") # Render to final format using the existing renderer try: from modules.services.serviceGeneration.mainServiceGeneration import GenerationService generationService = GenerationService(self.services) + # Pass extracted title to renderer (will use metadata.title if available) rendered_content, mime_type = await generationService.renderReport( - generated_data, outputFormat, title or "Generated Document", prompt, self + generated_data, outputFormat, extractedTitle or "Generated Document", prompt, self ) + # Use extracted filename if available, otherwise generate from title or use generic + if extractedFilename: + documentName = extractedFilename + elif extractedTitle and extractedTitle != "Generated Document": + # Sanitize title for filename + sanitized = re.sub(r"[^a-zA-Z0-9._-]", "_", extractedTitle) + sanitized = re.sub(r"_+", "_", sanitized).strip("_") + if sanitized: + # Ensure correct extension + if not sanitized.lower().endswith(f".{outputFormat}"): + documentName = f"{sanitized}.{outputFormat}" + else: + documentName = sanitized + else: + documentName = f"generated.{outputFormat}" + else: + documentName = f"generated.{outputFormat}" + # Build result in the expected format result = { "success": True, "content": generated_data, "documents": [{ - "documentName": f"generated.{outputFormat}", + "documentName": documentName, "documentData": rendered_content, "mimeType": mime_type, - "title": title or "Generated Document" + "title": extractedTitle or "Generated Document" }], "is_multi_file": False, "format": outputFormat, - "title": title, + "title": extractedTitle or title, "split_strategy": "single", "total_documents": 1, "processed_documents": 1 diff --git a/modules/services/serviceChat/mainServiceChat.py b/modules/services/serviceChat/mainServiceChat.py index 495985b5..bad3b605 100644 --- a/modules/services/serviceChat/mainServiceChat.py +++ b/modules/services/serviceChat/mainServiceChat.py @@ -29,153 +29,232 @@ class ChatService: logger.error("getChatDocumentsFromDocumentList: No workflow available (self.services.workflow is not set)") return [] - workflow_id = workflow.id if hasattr(workflow, 'id') else 'NO_ID' - workflow_obj_id = id(workflow) + workflowId = workflow.id if hasattr(workflow, 'id') else 'NO_ID' + workflowObjId = id(workflow) logger.debug(f"getChatDocumentsFromDocumentList: input documentList = {documentList}") - logger.debug(f"getChatDocumentsFromDocumentList: using workflow.id = {workflow_id}, workflow object id = {workflow_obj_id}") + logger.debug(f"getChatDocumentsFromDocumentList: using workflow.id = {workflowId}, workflow object id = {workflowObjId}") + + # Root cause analysis: Verify workflow.messages integrity and detect workflow changes + self._verifyWorkflowMessagesIntegrity(workflow, workflowId) - # Debug: list available messages with their labels and document names + # Debug: list available messages with their labels and document names (filtered by workflowId) try: if workflow and hasattr(workflow, 'messages') and workflow.messages: - msg_lines = [] + msgLines = [] + messagesFromOtherWorkflows = [] for message in workflow.messages: + msgWorkflowId = getattr(message, 'workflowId', None) + # Only include messages that belong to this workflow + if msgWorkflowId and msgWorkflowId != workflowId: + messagesFromOtherWorkflows.append(f"id={getattr(message, 'id', None)}, label={getattr(message, 'documentsLabel', None)}, workflowId={msgWorkflowId}") + continue + # Also skip messages without workflowId (shouldn't happen, but be safe) + if not msgWorkflowId: + messagesFromOtherWorkflows.append(f"id={getattr(message, 'id', None)}, label={getattr(message, 'documentsLabel', None)}, workflowId=Missing") + continue + label = getattr(message, 'documentsLabel', None) - doc_names = [] + docNames = [] if getattr(message, 'documents', None): for doc in message.documents: name = getattr(doc, 'fileName', None) or getattr(doc, 'documentName', None) or 'Unnamed' - doc_names.append(name) - msg_lines.append( - f"- id={getattr(message, 'id', None)}, label={label}, docs={doc_names}" + docNames.append(name) + msgLines.append( + f"- id={getattr(message, 'id', None)}, label={label}, workflowId={msgWorkflowId}, docs={docNames}" ) - if msg_lines: - logger.debug("getChatDocumentsFromDocumentList: available messages:\n" + "\n".join(msg_lines)) + if msgLines: + logger.debug("getChatDocumentsFromDocumentList: available messages (filtered for workflow):\n" + "\n".join(msgLines)) + if messagesFromOtherWorkflows: + logger.warning(f"getChatDocumentsFromDocumentList: Found {len(messagesFromOtherWorkflows)} messages from other workflows in workflow.messages list:\n" + "\n".join(messagesFromOtherWorkflows)) else: logger.debug("getChatDocumentsFromDocumentList: no messages available on current workflow") except Exception as e: logger.debug(f"getChatDocumentsFromDocumentList: unable to enumerate messages for debug: {e}") - all_documents = [] - for doc_ref in documentList: - if doc_ref.startswith("docItem:"): + allDocuments = [] + for docRef in documentList: + if docRef.startswith("docItem:"): # docItem:: - extract ID and find document - parts = doc_ref.split(':') + parts = docRef.split(':') if len(parts) >= 2: - doc_id = parts[1] + docId = parts[1] # Find the document by ID for message in workflow.messages: # Validate message belongs to this workflow - msg_workflow_id = getattr(message, 'workflowId', None) - if msg_workflow_id and msg_workflow_id != workflow_id: + msgWorkflowId = getattr(message, 'workflowId', None) + if not msgWorkflowId or msgWorkflowId != workflowId: continue if message.documents: for doc in message.documents: - if doc.id == doc_id: - doc_name = getattr(doc, 'fileName', 'unknown') - all_documents.append(doc) + if doc.id == docId: + docName = getattr(doc, 'fileName', 'unknown') + allDocuments.append(doc) break - elif doc_ref.startswith("docList:"): + elif docRef.startswith("docList:"): # docList::