From f83786b3a77f21ff5cf2c28ff53a91e148ae3523 Mon Sep 17 00:00:00 2001
From: ValueOn AG <p.motsch@valueon.ch>
Date: Tue, 14 Oct 2025 14:47:50 +0200
Subject: [PATCH] e2e test workflow path react

---
 app.py                                        |  5 +++
 modules/services/serviceAi/subCoreAi.py       |  2 +-
 .../serviceAi/subDocumentProcessing.py        | 42 +++++++++++++++++++
 .../serviceWorkflow/mainServiceWorkflow.py    | 42 ++++++-------------
 ...ocument.py => _EXCLUDED_methodDocument.py} |  3 --
 modules/workflows/methods/methodAi.py         |  8 ++--
 .../processing/shared/placeholderFactory.py   |  3 +-
 .../shared/promptGenerationActionsReact.py    |  9 +++-
 modules/workflows/workflowManager.py          |  2 -
 9 files changed, 74 insertions(+), 42 deletions(-)
 rename modules/workflows/methods/{methodDocument.py => _EXCLUDED_methodDocument.py} (99%)

diff --git a/app.py b/app.py
index 30def90e..4f9ddc52 100644
--- a/app.py
+++ b/app.py
@@ -131,6 +131,11 @@ def initLogging():
                 import unicodedata
                 # Remove emoji characters specifically
                 record.msg = ''.join(char for char in record.msg if unicodedata.category(char) != 'So' or not (0x1F600 <= ord(char) <= 0x1F64F or 0x1F300 <= ord(char) <= 0x1F5FF or 0x1F680 <= ord(char) <= 0x1F6FF or 0x1F1E0 <= ord(char) <= 0x1F1FF or 0x2600 <= ord(char) <= 0x26FF or 0x2700 <= ord(char) <= 0x27BF))
+                # Additionally strip characters not representable in Windows cp1252 (e.g., arrows)
+                try:
+                    record.msg.encode('cp1252', errors='strict')
+                except UnicodeEncodeError:
+                    record.msg = record.msg.encode('cp1252', errors='ignore').decode('cp1252', errors='ignore')
             return True
 
     # Configure handlers based on config
diff --git a/modules/services/serviceAi/subCoreAi.py b/modules/services/serviceAi/subCoreAi.py
index 4cd13f8a..52163432 100644
--- a/modules/services/serviceAi/subCoreAi.py
+++ b/modules/services/serviceAi/subCoreAi.py
@@ -128,7 +128,7 @@ class SubCoreAi:
             else:
                 full_prompt = prompt
             
-            if documentProcessor:
+            if documentProcessor and documents:
                 result = await documentProcessor.callAiText(full_prompt, documents, options)
             else:
                 # Fallback to direct AI call if no document processor available
diff --git a/modules/services/serviceAi/subDocumentProcessing.py b/modules/services/serviceAi/subDocumentProcessing.py
index e9e087d2..f86be535 100644
--- a/modules/services/serviceAi/subDocumentProcessing.py
+++ b/modules/services/serviceAi/subDocumentProcessing.py
@@ -104,6 +104,20 @@ class SubDocumentProcessing:
             # FIXED: Merge with preserved chunk relationships
             mergedContent = self._mergeChunkResults(chunkResults, options)
             
+            # Save merged extraction content to debug file - only if debug enabled
+            try:
+                debug_enabled = self.services.utils.configGet("APP_DEBUG_CHAT_WORKFLOW_ENABLED", False)
+                if debug_enabled:
+                    import os
+                    from datetime import datetime, UTC
+                    ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
+                    debug_root = "./test-chat/ai"
+                    os.makedirs(debug_root, exist_ok=True)
+                    with open(os.path.join(debug_root, f"{ts}_extraction_merged.txt"), "w", encoding="utf-8") as f:
+                        f.write(mergedContent or "")
+            except Exception:
+                pass
+            
             return mergedContent
             
         except Exception as e:
@@ -162,6 +176,21 @@ class SubDocumentProcessing:
             # Merge with JSON mode
             mergedJsonDocument = self._mergeChunkResultsJson(chunkResults, options)
             
+            # Save merged JSON extraction content to debug file - only if debug enabled
+            try:
+                debug_enabled = self.services.utils.configGet("APP_DEBUG_CHAT_WORKFLOW_ENABLED", False)
+                if debug_enabled:
+                    import os
+                    import json as _json
+                    from datetime import datetime, UTC
+                    ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
+                    debug_root = "./test-chat/ai"
+                    os.makedirs(debug_root, exist_ok=True)
+                    with open(os.path.join(debug_root, f"{ts}_extraction_merged.json"), "w", encoding="utf-8") as f:
+                        f.write(_json.dumps(mergedJsonDocument, ensure_ascii=False, indent=2))
+            except Exception:
+                pass
+            
             return mergedJsonDocument
             
         except Exception as e:
@@ -356,6 +385,19 @@ class SubDocumentProcessing:
                             )
                             
                             self.services.utils.debugLogToFile(f"Image analysis result for chunk {chunk_index}: length={len(ai_result) if ai_result else 0}, preview={ai_result[:200] if ai_result else 'None'}...", "AI_SERVICE")
+                            # Save image extraction response to debug file - only if debug enabled
+                            debug_enabled = self.services.utils.configGet("APP_DEBUG_CHAT_WORKFLOW_ENABLED", False)
+                            if debug_enabled:
+                                try:
+                                    import os
+                                    from datetime import datetime, UTC
+                                    ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
+                                    debug_root = "./test-chat/ai"
+                                    os.makedirs(debug_root, exist_ok=True)
+                                    with open(os.path.join(debug_root, f"{ts}_extraction_image_chunk_{chunk_index}.txt"), "w", encoding="utf-8") as f:
+                                        f.write(f"EXTRACTION IMAGE RESPONSE:\n{ai_result if ai_result else 'No response'}\n")
+                                except Exception:
+                                    pass
                             
                             # Check if result is empty or None
                             if not ai_result or not ai_result.strip():
diff --git a/modules/services/serviceWorkflow/mainServiceWorkflow.py b/modules/services/serviceWorkflow/mainServiceWorkflow.py
index 1bb5607c..628eb1d2 100644
--- a/modules/services/serviceWorkflow/mainServiceWorkflow.py
+++ b/modules/services/serviceWorkflow/mainServiceWorkflow.py
@@ -613,12 +613,12 @@ class WorkflowService:
             # Get document reference list using the exact same logic as old system
             document_list = self._getDocumentReferenceList(workflow)
             
-            # Build technical context string for AI action planning (exact copy of old system)
-            context = "AVAILABLE DOCUMENTS:\n\n"
+            # Build index string for AI action planning
+            context = ""
             
-            # Process chat exchanges (current round) - exact copy of old system
+            # Process current round exchanges first
             if document_list["chat"]:
-                context += "CURRENT ROUND DOCUMENTS:\n"
+                context += "\nCurrent round documents:\n"
                 for exchange in document_list["chat"]:
                     # Generate docList reference for the exchange (using message ID and label)
                     # Find the message that corresponds to this exchange
@@ -644,9 +644,9 @@ class WorkflowService:
                             context += f"  - docItem:{doc_ref}\n"
                 context += "\n"
             
-            # Process history exchanges (previous rounds) - exact copy of old system
+            # Process previous rounds after
             if document_list["history"]:
-                context += "WORKFLOW HISTORY DOCUMENTS:\n"
+                context += "\nPast rounds documents:\n"
                 for exchange in document_list["history"]:
                     # Generate docList reference for the exchange (using message ID and label)
                     # Find the message that corresponds to this exchange
@@ -673,7 +673,7 @@ class WorkflowService:
                 context += "\n"
             
             if not document_list["chat"] and not document_list["history"]:
-                context += "NO DOCUMENTS AVAILABLE - This workflow has no documents to process.\n"
+                context += "\nNO DOCUMENTS AVAILABLE - This workflow has no documents to process.\n"
             
             return context
             
@@ -701,39 +701,23 @@ class WorkflowService:
         for message in reversed(workflow.messages):
             is_first = message.status == "first" if hasattr(message, 'status') else False
             
-            # Build a DocumentExchange if message has documents
+            # Build a DocumentExchange if message has documents and an explicit documentsLabel
             doc_exchange = None
             if message.documents:
-                if message.actionId and message.documentsLabel:
-                    # Validate that we use the same label as in the message
+                existing_label = getattr(message, 'documentsLabel', None)
+                if existing_label:
+                    # Validate and use the message's actual documentsLabel
                     validated_label = self._validateDocumentLabelConsistency(message)
-                    
-                    # Use the message's actual documentsLabel
                     doc_refs = []
                     for doc in message.documents:
                         doc_ref = self._getDocumentReferenceFromChatDocument(doc, message)
                         doc_refs.append(doc_ref)
-                    
                     doc_exchange = {
                         'documentsLabel': validated_label,
                         'documents': doc_refs
                     }
-                else:
-                    # Generate new labels for documents without explicit labels
-                    doc_refs = []
-                    for doc in message.documents:
-                        doc_ref = self._getDocumentReferenceFromChatDocument(doc, message)
-                        doc_refs.append(doc_ref)
-                    
-                    if doc_refs:
-                        # Create a label based on message context
-                        context_prefix = self._generateWorkflowContextPrefix(message)
-                        context_label = f"{context_prefix}_context"
-                        
-                        doc_exchange = {
-                            'documentsLabel': context_label,
-                            'documents': doc_refs
-                        }
+                # IMPORTANT: Never synthesize new labels here. If a message lacks
+                # a documentsLabel, we skip adding an exchange for it.
             
             # Append to appropriate container based on boundary
             if doc_exchange:
diff --git a/modules/workflows/methods/methodDocument.py b/modules/workflows/methods/_EXCLUDED_methodDocument.py
similarity index 99%
rename from modules/workflows/methods/methodDocument.py
rename to modules/workflows/methods/_EXCLUDED_methodDocument.py
index 89aff4b4..d31631ce 100644
--- a/modules/workflows/methods/methodDocument.py
+++ b/modules/workflows/methods/_EXCLUDED_methodDocument.py
@@ -153,9 +153,6 @@ class MethodDocument(MethodBase):
                 error=str(e)
             )
 
-
-
-
     @action
     async def generate(self, parameters: Dict[str, Any]) -> ActionResult:
         """
diff --git a/modules/workflows/methods/methodAi.py b/modules/workflows/methods/methodAi.py
index 2b4cb677..eba17bcb 100644
--- a/modules/workflows/methods/methodAi.py
+++ b/modules/workflows/methods/methodAi.py
@@ -31,14 +31,14 @@ class MethodAi(MethodBase):
     async def process(self, parameters: Dict[str, Any]) -> ActionResult:
         """
         GENERAL:
-        - Purpose: AI-based analysis and content generation with optional document context.
-        - Input requirements: aiPrompt (required); optional documentList, resultType, processingMode, includeMetadata, operationType, priority, maxCost, maxProcessingTime, requiredTags.
-        - Output format: Single or multiple documents in requested format.
+        - Purpose: Process a user prompt with optional unlimited input documents to produce one or many output documents of the SAME format.
+        - Input requirements: aiPrompt (required); optional documentList.
+        - Output format: Exactly one file format to select. For multiple output file formats to do different calls.
 
         Parameters:
         - aiPrompt (str, required): Instruction for the AI.
         - documentList (list, optional): Document reference(s) for context.
-        - resultType (str, optional): Output extension (txt, json, md, csv, xml, html, pdf, docx, xlsx, png). Default: txt.
+        - resultType (str, optional): Output file extension - only one extension allowed (e.g. txt, json, md, csv, xml, html, pdf, docx, xlsx, png, ...). Default: txt.
         - processingMode (str, optional): basic | advanced | detailed. Default: basic.
         - includeMetadata (bool, optional): Include metadata when available. Default: True.
         - operationType (str, optional): general | generate_plan | analyse_content | generate_content | web_research | image_analysis | image_generation. Default: general.
diff --git a/modules/workflows/processing/shared/placeholderFactory.py b/modules/workflows/processing/shared/placeholderFactory.py
index 90aa8ba8..225edbe7 100644
--- a/modules/workflows/processing/shared/placeholderFactory.py
+++ b/modules/workflows/processing/shared/placeholderFactory.py
@@ -373,7 +373,8 @@ def extractAvailableDocumentsSummary(service: Any, context: Any) -> str:
     try:
         documents = service.workflow.getAvailableDocuments(context.workflow)
         if documents and documents != "No documents available":
-            doc_count = documents.count("docList:") + documents.count("docItem:")
+            # Count only actual documents, not list labels
+            doc_count = documents.count("docItem:")
             return f"{doc_count} documents available from previous tasks"
         return "No documents available"
     except Exception as e:
diff --git a/modules/workflows/processing/shared/promptGenerationActionsReact.py b/modules/workflows/processing/shared/promptGenerationActionsReact.py
index fc5ca265..10b7f9f6 100644
--- a/modules/workflows/processing/shared/promptGenerationActionsReact.py
+++ b/modules/workflows/processing/shared/promptGenerationActionsReact.py
@@ -32,7 +32,7 @@ def generateReactPlanSelectionPrompt(services, context: Any) -> PromptBundle:
         PromptPlaceholder(label="AVAILABLE_CONNECTIONS_INDEX", content=extractAvailableConnectionsIndex(services), summaryAllowed=False),
     ]
 
-    template = """Select exactly one action to advance the task.
+    template = """Select exactly one next action to advance the task incrementally.
 
 OBJECTIVE:
 {{KEY:USER_PROMPT}}
@@ -52,7 +52,11 @@ AVAILABLE_DOCUMENTS_INDEX:
 AVAILABLE_CONNECTIONS_INDEX:
 {{KEY:AVAILABLE_CONNECTIONS_INDEX}}
 
-REPLY: Return ONLY a JSON object with the following structure (no comments, no extra text):
+REPLY: Return ONLY a JSON object with the following structure (no comments, no extra text). The chosen action MUST:
+- be the next logical incremental step toward fulfilling the objective
+- not attempt to complete the entire objective in one step
+- if producing files, target exactly one output format for this step
+- reference ONLY existing document IDs/labels from AVAILABLE_DOCUMENTS_INDEX
 {{
     "action": "method.action_name",
     "actionObjective": "...",
@@ -76,6 +80,7 @@ RULES:
    - DO NOT create new references
    - Copy references EXACTLY as shown in AVAILABLE_DOCUMENTS_INDEX
 6. For requiredConnection, use ONLY an exact label from AVAILABLE_CONNECTIONS_INDEX
+7. Plan incrementally: if the overall intent needs multiple output formats (e.g., CSV and HTML), choose one format in this step and leave the other(s) for subsequent steps
 """
 
     return PromptBundle(prompt=template, placeholders=placeholders)
diff --git a/modules/workflows/workflowManager.py b/modules/workflows/workflowManager.py
index 88ef4023..5a3098b1 100644
--- a/modules/workflows/workflowManager.py
+++ b/modules/workflows/workflowManager.py
@@ -329,8 +329,6 @@ class WorkflowManager:
                             if not message.documents:
                                 message.documents = []
                             message.documents.extend(created_docs)
-                            # Ensure label is user_context for discoverability
-                            message.documentsLabel = context_label
                             self.services.workflow.updateMessage(message.id, {
                                 "documents": [d.to_dict() for d in message.documents],
                                 "documentsLabel": context_label