From aa854f27b734baf9390459a61fb21e5181c3d879 Mon Sep 17 00:00:00 2001
From: ValueOn AG <p.motsch@valueon.ch>
Date: Tue, 8 Jul 2025 15:35:03 +0200
Subject: [PATCH] doc ref fixed

---
 modules/workflow/managerChat.py      | 193 ++++++++++-----------------
 modules/workflow/serviceContainer.py |  31 +++--
 2 files changed, 90 insertions(+), 134 deletions(-)
diff --git a/modules/workflow/managerChat.py b/modules/workflow/managerChat.py
index a5e31b92..d8674429 100644
--- a/modules/workflow/managerChat.py
+++ b/modules/workflow/managerChat.py
@@ -433,8 +433,8 @@ class ChatManager:
                 
                 # Validate result label format
                 result_label = action.get('resultLabel', '')
-                if not result_label.startswith('docList:'):
-                    logger.error(f"Action {i} result label must start with 'docList:': {result_label}")
+                if not result_label.startswith('task'):
+                    logger.error(f"Action {i} result label must start with 'task': {result_label}")
                     return False
                 
                 # Validate parameters
@@ -467,10 +467,10 @@ class ChatManager:
                 "method": "document",
                 "action": "analyze",
                 "parameters": {
-                    "fileId": doc,
-                    "analysis": ["entities", "topics", "sentiment"]
+                    "documentList": ["task1_previous_results"],
+                    "aiPrompt": "Fallback document analysis for " + doc
                 },
-                "resultLabel": f"docList:fallback:{task_step.get('id', 'unknown')}:{i}:analysis",
+                "resultLabel": "task1_fallback:" + doc + ":analysis",
                 "description": f"Fallback document analysis for {doc}"
             })
         
@@ -520,8 +520,14 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
         previous_results = context['previous_results']
         improvements = context.get('improvements', '')
         
-        # Get available methods
+        # Get available methods and actions with signatures
         methodList = self.service.getMethodsList()
+        method_actions = {}
+        for sig in methodList:
+            if '.' in sig:
+                method, rest = sig.split('.', 1)
+                action = rest.split('(')[0]
+                method_actions.setdefault(method, []).append((action, sig))
         
         # Get workflow history
         messageSummary = await self.service.summarizeChat(workflow.messages)
@@ -529,127 +535,80 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
         # Get available documents and connections
         docRefs = self.service.getDocumentReferenceList()
         connRefs = self.service.getConnectionReferenceList()
+        all_doc_refs = docRefs.get('chat', []) + docRefs.get('history', [])
         
-        return f"""You are an action generation AI that creates specific actions to accomplish a task step.
+        # Build AVAILABLE METHODS section
+        available_methods_str = ''
+        for method, actions in method_actions.items():
+            available_methods_str += f"- {method}:\n"
+            for action, sig in actions:
+                available_methods_str += f"    - {action}: {sig}\n"
+        
+        return f"""
+You are an action generation AI that creates specific actions to accomplish a task step.
 
-TASK STEP: {task_step.get('description', 'Unknown')}
-TASK ID: {task_step.get('id', 'Unknown')}
+DOCUMENT REFERENCE TYPES:
+- docItem: Reference to a single document. Format: "docItem:<id>:<filename>"
+- docList: Reference to a group of documents under a label. Format: <label> (e.g., "task1_action2_results" or "docList:msg123:user_uploads").
+  - Each docList label maps to a list of docItem references (see AVAILABLE DOCUMENTS).
+  - A label like "task1_action2_results" refers to the output of action 2 in task 1.
 
-EXPECTED OUTPUTS:
-{', '.join(task_step.get('expected_outputs', []))}
-
-SUCCESS CRITERIA:
-{', '.join(task_step.get('success_criteria', []))}
+TASK STEP: {task_step.get('description', 'Unknown')} (ID: {task_step.get('id', 'Unknown')})
+EXPECTED OUTPUTS: {', '.join(task_step.get('expected_outputs', []))}
+SUCCESS CRITERIA: {', '.join(task_step.get('success_criteria', []))}
 
 CONTEXT - Chat History:
 {messageSummary}
 
-AVAILABLE METHODS
-{chr(10).join(f"- {method}" for method in methodList)}
+AVAILABLE METHODS AND ACTIONS (with signatures):
+{available_methods_str}
 
-AVAILABLE CONNECTIONS
+AVAILABLE CONNECTIONS:
 {chr(10).join(f"- {conn}" for conn in connRefs)}
 
-AVAILABLE DOCUMENTS
-{chr(10).join(f"- {doc.documentsLabel}: {', '.join(doc.documents)}" for doc in docRefs.get('chat', []))}
+AVAILABLE DOCUMENTS:
+{chr(10).join(f"- {doc.documentsLabel} contains {', '.join(doc.documents)}" for doc in all_doc_refs)}
+  (Use the label as a value in documentList to refer to the group)
 
-PREVIOUS RESULTS:
-{', '.join(previous_results) if previous_results else 'None'}
-
-IMPROVEMENTS NEEDED:
-{improvements if improvements else 'None'}
+PREVIOUS RESULTS: {', '.join(previous_results) if previous_results else 'None'}
+IMPROVEMENTS NEEDED: {improvements if improvements else 'None'}
 
 INSTRUCTIONS:
-1. Generate specific actions to accomplish this task step
-2. Use available documents, connections, and previous results
-3. Ensure proper result labels for handover
-4. Follow the exact JSON structure below
-5. ALL fields are REQUIRED: method, action, parameters, resultLabel, description
+- Generate actions to accomplish this task step using available documents, connections, and previous results.
+- Use docItem for single documents and docList labels for groups of documents as shown in AVAILABLE DOCUMENTS.
+- Always pass documentList as a LIST of references (docItem and/or docList).
+- For resultLabel, use the format: "task{{task_id}}_action{{action_number}}_{{short_label}}" where:
+    - {{task_id}} = the current task's id (e.g., 1)
+    - {{action_number}} = the sequence number of the action within the task (e.g., 2)
+    - {{short_label}} = a short, descriptive label for the output (e.g., "analysis_results")
+  Example: "task1_action2_analysis_results"
+- Follow the JSON structure below. All fields are required.
 
 REQUIRED JSON STRUCTURE:
 {{
     "actions": [
-        {{
-            "method": "method_name",
-            "action": "action_name",
+        
+            "method": "method_name",  // Use only the method name (e.g., "document")
+            "action": "action_name",  // Use only the action name (e.g., "extract")
             "parameters": {{
-                "param1": "value1",
-                "param2": "value2",
+                "documentList": ["docItem:doc_abc:file1.txt", "task1_action2_results"],
+                "aiPrompt": "Describe what to do"
             }},
-            "resultLabel": "docList:uuid:descriptiveLabel",
+            "resultLabel": "task1_action3_analysis_results",
             "description": "What this action does"
         }}
     ]
 }}
 
 FIELD REQUIREMENTS:
-- "method": Must be one of the available methods listed above
-- "action": Must be a valid action for that method
-- "parameters": Object with method-specific parameters
-- "resultLabel": MUST start with "docList:" followed by unique identifier and descriptive label
-- "description": Clear description of what the action accomplishes
+- "method": Must be from AVAILABLE METHODS
+- "action": Must be valid for the method
+- "parameters": Method-specific, must include documentList as a list if required by the signature
+- "resultLabel": Must follow the format above (e.g., "task1_action3_analysis_results")
+- "description": Clear summary of the action
 
-MANDATORY PARAMETER AND RETURN VALUE RULES:
-
-1. CONNECTION PARAMETERS:
-   - Parameter name: "connectionReference" (NOT "connection", "site", "connectionId", etc.)
-   - Value: Must be a connection reference from "Connections" section above
-   - Format: "connection:authority:user:connectionId"
-   - Example: "connection:msft:testuser@example.com:1234"
-
-2. DOCUMENT PARAMETERS:
-   - Parameter name: "documentList" (NOT "documentReference", "document", "fileId", "documents", etc.)
-   - Value: MUST be a LIST of document references from "Documents" section or previous results
-   - Format: Use the exact format shown in "Documents" section (e.g., ["docItem:id:filename"] or ["docList:actionId:label"])
-   - Document references represent a LIST of documents, not single documents
-   - All document inputs expect documentList as an ARRAY of strings
-   - IMPORTANT: Use the exact document reference format as shown in "Documents" section above
-
-3. RETURN VALUES:
-   - ALL actions must return documentList references in resultLabel
-   - Result labels must start with "docList:"
-   - Each action creates a unique documentList for handover
-   - Document lists can contain 0, 1, or multiple documents
-   - No actions return single documents - always documentLists
-
-4. PARAMETER VALIDATION:
-   - Use only document references from "Documents" section above
-   - Use only connection references from "Connections" section above  
-   - Use result labels from previous results in the sequence
-   - All parameter values must be strings (except documentList which must be an array)
-   - Document references show: label - list of references
-
-5. RESULT USAGE RULES:
-   - Previous results can be referenced as: "docList:uuid:label"
-   - Use result labels from previous actions in the sequence
-   - Example: If previous action created "docList:abc123:salesData", 
-     reference it as "docList:abc123:salesData" in parameters
-   - Results are available in the PREVIOUS RESULTS section above
-   - Each action should create a unique resultLabel for handover to next actions
-   - Result labels should be descriptive and indicate the content type
-
-6. DOCUMENT HANDLING RULES:
-   - ALWAYS pass documents as a LIST in documentList parameter
-   - Single documents: ["docItem:id:filename"]
-   - Multiple documents: ["docItem:id1:file1", "docItem:id2:file2"]
-   - Document lists: ["docList:actionId:label"]
-   - Mixed references: ["docItem:id:file", "docList:actionId:label"]
-
-EXAMPLE VALID ACTIONS:
-
-1. SharePoint Search:
-{{
-    "method": "sharepoint",
-    "action": "search",
-    "parameters": {{
-        "connectionReference": "connection:msft:testuser@example.com:1234",
-        "query": "sales quarterly report"
-    }},
-    "resultLabel": "docList:abc123:salesDocuments",
-    "description": "Search SharePoint for sales documents"
-}}
-
-2. Document Analysis using single document:
+EXAMPLES:
+1. Analyze a single document:
 {{
     "method": "document",
     "action": "analyze",
@@ -657,36 +616,24 @@ EXAMPLE VALID ACTIONS:
         "documentList": ["docItem:doc_57520394-6b6d-41c2-b641-bab3fc6d7f4b:candidate_1_profile.txt"],
         "aiPrompt": "Analyze the candidate profile for key insights"
     }},
-    "resultLabel": "docList:ghi789:candidateAnalysis",
+    "resultLabel": "task1_action2_candidate_analysis",
     "description": "Analyze candidate profile for insights"
 }}
 
-3. Document Analysis using multiple documents:
+2. Analyze a group of documents (docList):
 {{
     "method": "document",
     "action": "analyze",
     "parameters": {{
-        "documentList": ["docItem:doc_123:profile.txt", "docItem:doc_456:resume.pdf"],
-        "aiPrompt": "Compare the profile and resume for consistency"
+        "documentList": ["task1_action1_extract_results"],
+        "aiPrompt": "Analyze all extracted results"
     }},
-    "resultLabel": "docList:jkl012:comparisonAnalysis",
-    "description": "Compare multiple documents for consistency"
+    "resultLabel": "task1_action2_analysis_results",
+    "description": "Analyze all extracted results"
 }}
 
-4. Document Extraction using document list:
-{{
-    "method": "document",
-    "action": "extract",
-    "parameters": {{
-        "documentList": ["docList:abc123:salesData"],
-        "aiPrompt": "Extract key information from all sales documents",
-        "format": "json"
-    }},
-    "resultLabel": "docList:mno345:extractedData",
-    "description": "Extract key information from document list"
-}}
-
-NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
+NOTE: Respond with ONLY the JSON object. Do not include any explanatory text.
+"""
 
 
     def _createResultReviewPrompt(self, review_context: Dict[str, Any]) -> str:
@@ -724,7 +671,7 @@ TASK STEP: {task_step.get('description', 'Unknown')}
 EXPECTED OUTPUTS: {', '.join(task_step.get('expected_outputs', []))}
 SUCCESS CRITERIA: {', '.join(task_step.get('success_criteria', []))}
 
-STEP RESULT: {json.dumps(step_result_serializable, indent=2)}
+STEP RESULT: {json.dumps(step_result_serializable, indent=2, ensure_ascii=False)}
 
 INSTRUCTIONS:
 1. Evaluate if the task step was completed successfully
@@ -839,7 +786,7 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
                 "actionId": action.id,
                 "actionMethod": action.execMethod,
                 "actionName": action.execAction,
-                "documentsLabel": result_label,  # Always use execResultLabel
+                "documentsLabel": result_label,  # Use intent label from action definition
                 "documents": []
             }
             
diff --git a/modules/workflow/serviceContainer.py b/modules/workflow/serviceContainer.py
index 53261b7e..e95888e0 100644
--- a/modules/workflow/serviceContainer.py
+++ b/modules/workflow/serviceContainer.py
@@ -230,8 +230,15 @@ class ServiceContainer:
         # Otherwise construct the reference using the message ID and documents label
         return f"docList:{message.id}:{message.documentsLabel}"
     
+    def resolveDocumentReference(self, intent_label: str) -> str:
+        """Resolve an intent label (e.g., 'task1_extract_results') to a docList reference with message ID."""
+        for message in self.workflow.messages:
+            if message.documentsLabel == intent_label and message.documents:
+                return f"docList:{message.id}:{intent_label}"
+        return None
+
     def getChatDocumentsFromDocumentList(self, documentList: List[str]) -> List[ChatDocument]:
-        """Get ChatDocuments from a list of document references"""
+        """Get ChatDocuments from a list of document references (intent or resolved)."""
         try:
             all_documents = []
             for doc_ref in documentList:
@@ -239,14 +246,12 @@ class ServiceContainer:
                 parts = doc_ref.split(':', 2)  # Split into max 3 parts
                 if len(parts) < 3:
                     continue
-                    
                 ref_type = parts[0]
                 ref_id = parts[1]
-                ref_label = parts[2]  # Keep the full label
-                
+                ref_label = parts[2]
+
                 if ref_type == "docItem":
                     # Handle ChatDocument reference: docItem:<id>:<filename>
-                    # Find document in workflow messages
                     for message in self.workflow.messages:
                         if message.documents:
                             for doc in message.documents:
@@ -255,17 +260,21 @@ class ServiceContainer:
                                     break
                             if any(doc.id == ref_id for doc in message.documents):
                                 break
-                                
                 elif ref_type == "docList":
-                    # Handle document list reference: docList:<message.id>:<label>
-                    # Find message by ID
+                    # If ref_id is not a message ID (i.e., not all digits or not found), treat as intent label
+                    found = False
                     for message in self.workflow.messages:
-                        if str(message.id) == ref_id and message.documents:
+                        if message.documentsLabel == ref_label and message.documents:
                             all_documents.extend(message.documents)
+                            found = True
                             break
-                                
+                    if not found:
+                        # Try to resolve intent label to message ID
+                        resolved_ref = self.resolveDocumentReference(ref_label)
+                        if resolved_ref:
+                            # Recursively resolve the resolved reference
+                            all_documents.extend(self.getChatDocumentsFromDocumentList([resolved_ref]))
             return all_documents
-            
         except Exception as e:
             logger.error(f"Error getting documents from document list: {str(e)}")
             return []