From 559a216001d04a021127477806cd6b278ea82880 Mon Sep 17 00:00:00 2001
From: ValueOn AG <p.motsch@valueon.ch>
Date: Tue, 15 Jul 2025 23:18:04 +0200
Subject: [PATCH] chat workflow end to end testing

---
 modules/chat/managerChat.py                | 41 +++++++++-
 modules/chat/serviceCenter.py              | 27 +++++++
 modules/interfaces/interfaceChatObjects.py |  1 +
 modules/methods/methodDocument.py          | 94 ++++++++++++++++++++++
 4 files changed, 160 insertions(+), 3 deletions(-)
diff --git a/modules/chat/managerChat.py b/modules/chat/managerChat.py
index 32b48eb4..ce450506 100644
--- a/modules/chat/managerChat.py
+++ b/modules/chat/managerChat.py
@@ -1151,10 +1151,16 @@ You are an action generation AI that creates specific actions to accomplish a ta
 
 DOCUMENT REFERENCE TYPES:
 - docItem: Reference to a single document. Format: "docItem:<id>:<filename>"
-- docList: Reference to a group of documents under a label. Format: <label> (e.g., "task1_action2_results" or "docList:msg123:user_uploads").
+- docList: Reference to a group of documents under a label. Format: <label> (e.g., "task1_action2_results").
   - Each docList label maps to a list of docItem references (see AVAILABLE DOCUMENTS).
   - A label like "task1_action2_results" refers to the output of action 2 in task 1.
 
+CRITICAL DOCUMENT REFERENCE RULES:
+- ONLY use the exact labels listed in AVAILABLE DOCUMENTS below
+- NEVER invent new labels or use message IDs
+- NEVER use formats like "msg_xxx:documents" or "task_X_results" (these will fail)
+- ONLY use the exact labels shown in AVAILABLE DOCUMENTS
+
 TASK STEP: {task_step.description} (ID: {task_step.id})
 EXPECTED OUTPUTS: {', '.join(task_step.expected_outputs or [])}
 SUCCESS CRITERIA: {', '.join(task_step.success_criteria or [])}
@@ -1172,7 +1178,17 @@ AVAILABLE CONNECTIONS:
 AVAILABLE DOCUMENTS:
 {chr(10).join(f"- {doc.documentsLabel} contains {', '.join(doc.documents)}" for doc in all_doc_refs)}
 
-(Use the label as a value in documentList to refer to the group)
+DOCUMENT REFERENCE EXAMPLES:
+✅ CORRECT: Use exact labels from AVAILABLE DOCUMENTS above
+- "task2_action1_personnel_search"
+- "task2_action3_personnel_analysis"
+- "docItem:doc_abc:file1.txt"
+- "docList:msg123:user_uploads" (supported format, but use actual labels instead)
+
+❌ INCORRECT: These will cause errors
+- "msg_xxx:documents" (invalid format - missing docList/docItem prefix)
+- "task_2_results" (not a valid label - use exact labels from AVAILABLE DOCUMENTS)
+- Inventing message IDs instead of using actual document labels
 
 PREVIOUS RESULTS: {', '.join(previous_results) if previous_results else 'None'}
 IMPROVEMENTS NEEDED: {improvements if improvements else 'None'}{retry_context}
@@ -1632,6 +1648,12 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
             if message:
                 workflow.messages.append(message)
                 logger.info(f"Created action message for {action.execMethod}.{action.execAction} with {len(message_data.get('documents', []))} documents")
+                # ADDED LOGGING: Print workflow id, message count, and all message labels and document counts
+                logger.debug(f"WORKFLOW STATE after _createActionMessage: id={id(workflow)}, message_count={len(workflow.messages)}")
+                for idx, msg in enumerate(workflow.messages):
+                    label = getattr(msg, 'documentsLabel', None)
+                    docs = getattr(msg, 'documents', None)
+                    logger.debug(f"  Message {idx}: label='{label}', documents_count={len(docs) if docs else 0}")
             else:
                 logger.error(f"Failed to create workflow message for action {action.execMethod}.{action.execAction}")
                 
@@ -2573,7 +2595,17 @@ Please review the task requirements and try again with different input or approa
         
         # Summarize successful actions
         action_summary = []
+        document_delivery_summary = []
         for action in successful_actions:
+            doc_names = []
+            for doc in (action.documents or []):
+                if hasattr(doc, 'filename'):
+                    doc_names.append(doc.filename)
+                elif isinstance(doc, dict):
+                    doc_names.append(doc.get('filename', doc.get('documentName', 'unknown')))
+                else:
+                    doc_names.append(str(doc))
+            document_delivery_summary.append(f"- {action.actionMethod}.{action.actionName} (label: {action.data.get('resultLabel', '')}): {len(doc_names)} document(s): {', '.join(doc_names) if doc_names else 'None'}")
             action_summary.append({
                 'method': action.actionMethod or '',
                 'action': action.actionName or '',
@@ -2581,9 +2613,12 @@ Please review the task requirements and try again with different input or approa
                 'documents_count': len(action.documents or []),
                 'has_text_result': bool(action.data.get('result', '').strip())
             })
-        
+        delivery_summary_str = '\n'.join(document_delivery_summary) if document_delivery_summary else 'No documents delivered by any action.'
         return f"""You are an action completion validator that evaluates if individual actions were successfully completed.
 
+DOCUMENT DELIVERY SUMMARY:
+{delivery_summary_str}
+
 ACTION DETAILS:
 {json.dumps(action_summary, indent=2)}
 
diff --git a/modules/chat/serviceCenter.py b/modules/chat/serviceCenter.py
index ebbac586..9b3a70cc 100644
--- a/modules/chat/serviceCenter.py
+++ b/modules/chat/serviceCenter.py
@@ -384,6 +384,33 @@ class ServiceCenter:
     def getChatDocumentsFromDocumentList(self, documentList: List[str]) -> List[ChatDocument]:
         """Get ChatDocuments from a list of document references (intent or resolved)."""
         try:
+            # ADDED LOGGING: Print workflow id, message count, and all message labels and document counts
+            import logging
+            logger = logging.getLogger(__name__)
+            logger.debug(f"WORKFLOW STATE at getChatDocumentsFromDocumentList: id={id(self.workflow)}, message_count={len(self.workflow.messages) if hasattr(self.workflow, 'messages') else 'N/A'}")
+            for idx, message in enumerate(getattr(self.workflow, 'messages', [])):
+                label = getattr(message, 'documentsLabel', None)
+                docs = getattr(message, 'documents', None)
+                logger.debug(f"  Message {idx}: label='{label}', documents_count={len(docs) if docs else 0}")
+            # DEBUG LOGGING: Print all document labels and their documents before extraction
+            import logging
+            logger = logging.getLogger(__name__)
+            logger.info("==== DEBUG: Listing all workflow message document labels and contained documents ====")
+            for message in self.workflow.messages:
+                label = getattr(message, 'documentsLabel', None)
+                docs = getattr(message, 'documents', None)
+                if label is not None:
+                    doc_names = []
+                    if docs:
+                        for doc in docs:
+                            if hasattr(doc, 'filename'):
+                                doc_names.append(doc.filename)
+                            elif isinstance(doc, dict) and 'filename' in doc:
+                                doc_names.append(doc['filename'])
+                            else:
+                                doc_names.append(str(doc))
+                    logger.info(f"Message label: '{label}' | Documents: {doc_names if doc_names else 'None'}")
+            logger.info("==== END DEBUG LIST ====")
             all_documents = []
             for doc_ref in documentList:
                 # Parse reference format
diff --git a/modules/interfaces/interfaceChatObjects.py b/modules/interfaces/interfaceChatObjects.py
index 689b5b6c..7f9f78fc 100644
--- a/modules/interfaces/interfaceChatObjects.py
+++ b/modules/interfaces/interfaceChatObjects.py
@@ -335,6 +335,7 @@ class ChatObjects:
                 parentMessageId=createdMessage.get("parentMessageId"),
                 agentName=createdMessage.get("agentName"),
                 documents=[ChatDocument(**doc) for doc in createdMessage.get("documents", [])],
+                documentsLabel=createdMessage.get("documentsLabel"),  # <-- FIX: ensure label is set
                 message=createdMessage.get("message"),
                 role=createdMessage.get("role", "assistant"),
                 status=createdMessage.get("status", "step"),
diff --git a/modules/methods/methodDocument.py b/modules/methods/methodDocument.py
index fd2a67e7..585d952c 100644
--- a/modules/methods/methodDocument.py
+++ b/modules/methods/methodDocument.py
@@ -185,3 +185,97 @@ class MethodDocument(MethodBase):
                 data={},
                 error=str(e)
             )
+
+    @action
+    async def generateReport(self, parameters: Dict[str, Any]) -> ActionResult:
+        """
+        Generate a basic HTML report from extracted content
+        
+        Parameters:
+            documentList (str): Reference to the document list to create report from
+            title (str, optional): Title for the report (default: "Summary Report")
+            includeMetadata (bool, optional): Whether to include metadata (default: True)
+        """
+        try:
+            documentList = parameters.get("documentList")
+            title = parameters.get("title", "Summary Report")
+            includeMetadata = parameters.get("includeMetadata", True)
+            
+            if not documentList:
+                return self._createResult(
+                    success=False,
+                    data={},
+                    error="Document list reference is required"
+                )
+            
+            chatDocuments = self.service.getChatDocumentsFromDocumentList(documentList)
+            if not chatDocuments:
+                return self._createResult(
+                    success=False,
+                    data={},
+                    error="No documents found for the provided reference"
+                )
+            
+            # Generate HTML report
+            html_content = self._generateHtmlReport(chatDocuments, title, includeMetadata)
+            
+            # Create output filename
+            timestamp = datetime.now(UTC).strftime('%Y%m%d_%H%M%S')
+            output_filename = f"report_{timestamp}.html"
+            
+            result_data = {
+                "documentCount": len(chatDocuments),
+                "content": html_content,
+                "title": title,
+                "timestamp": datetime.now(UTC).isoformat()
+            }
+            
+            logger.info(f"Generated HTML report: {output_filename} with {len(html_content)} characters")
+            
+            return self._createResult(
+                success=True,
+                data={
+                    "documents": [{
+                        "documentName": output_filename,
+                        "documentData": result_data,
+                        "mimeType": "text/html"
+                    }]
+                }
+            )
+        except Exception as e:
+            logger.error(f"Error generating report: {str(e)}")
+            return self._createResult(
+                success=False,
+                data={},
+                error=str(e)
+            )
+
+    def _generateHtmlReport(self, chatDocuments: List[Any], title: str, includeMetadata: bool) -> str:
+        """
+        Generate a simple HTML report from chat documents.
+        """
+        html = ["<html><head><meta charset='utf-8'><title>" + title + "</title></head><body>"]
+        html.append(f"<h1>{title}</h1>")
+        html.append(f"<p><b>Generated:</b> {datetime.now(UTC).strftime('%Y-%m-%d %H:%M:%S UTC')}</p>")
+        html.append(f"<p><b>Total Documents:</b> {len(chatDocuments)}</p>")
+        
+        for i, doc in enumerate(chatDocuments, 1):
+            html.append(f"<h2>Document {i}: {doc.filename}</h2>")
+            
+            if includeMetadata:
+                html.append("<ul>")
+                html.append(f"<li><b>ID:</b> {doc.id}</li>")
+                html.append(f"<li><b>File ID:</b> {doc.fileId}</li>")
+                html.append(f"<li><b>Filename:</b> {doc.filename}</li>")
+                if hasattr(doc, 'createdAt'):
+                    html.append(f"<li><b>Created:</b> {doc.createdAt}</li>")
+                html.append("</ul>")
+            
+            # Add document content if available
+            if hasattr(doc, 'content') and doc.content:
+                html.append(f"<div style='white-space:pre-wrap; border:1px solid #ccc; padding:0.5em; margin-bottom:1em; background-color:#f9f9f9;'>{doc.content}</div>")
+            else:
+                html.append("<p><em>No content available</em></p>")
+        
+        html.append("</body></html>")
+        return '\n'.join(html)