doc ref fixed

2025-07-08 15:35:03 +02:00 · 2025-07-08 15:35:03 +02:00 · aa854f27b7
commit aa854f27b7
parent 171e18b0d7
2 changed files with 90 additions and 134 deletions
--- a/modules/workflow/managerChat.py
+++ b/modules/workflow/managerChat.py
@ -433,8 +433,8 @@ class ChatManager:
                
                # Validate result label format
                result_label = action.get('resultLabel', '')
-                if not result_label.startswith('docList:'):
-                    logger.error(f"Action {i} result label must start with 'docList:': {result_label}")
+                if not result_label.startswith('task'):
+                    logger.error(f"Action {i} result label must start with 'task': {result_label}")
                    return False
                
                # Validate parameters
@ -467,10 +467,10 @@ class ChatManager:
                "method": "document",
                "action": "analyze",
                "parameters": {
-                    "fileId": doc,
-                    "analysis": ["entities", "topics", "sentiment"]
+                    "documentList": ["task1_previous_results"],
+                    "aiPrompt": "Fallback document analysis for " + doc
                },
-                "resultLabel": f"docList:fallback:{task_step.get('id', 'unknown')}:{i}:analysis",
+                "resultLabel": "task1_fallback:" + doc + ":analysis",
                "description": f"Fallback document analysis for {doc}"
            })
        
@ -520,8 +520,14 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
        previous_results = context['previous_results']
        improvements = context.get('improvements', '')
        
-        # Get available methods
+        # Get available methods and actions with signatures
        methodList = self.service.getMethodsList()
+        method_actions = {}
+        for sig in methodList:
+            if '.' in sig:
+                method, rest = sig.split('.', 1)
+                action = rest.split('(')[0]
+                method_actions.setdefault(method, []).append((action, sig))
        
        # Get workflow history
        messageSummary = await self.service.summarizeChat(workflow.messages)
@ -529,127 +535,80 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
        # Get available documents and connections
        docRefs = self.service.getDocumentReferenceList()
        connRefs = self.service.getConnectionReferenceList()
+        all_doc_refs = docRefs.get('chat', []) + docRefs.get('history', [])
        
-        return f"""You are an action generation AI that creates specific actions to accomplish a task step.
+        # Build AVAILABLE METHODS section
+        available_methods_str = ''
+        for method, actions in method_actions.items():
+            available_methods_str += f"- {method}:\n"
+            for action, sig in actions:
+                available_methods_str += f"    - {action}: {sig}\n"
+        
+        return f"""
+You are an action generation AI that creates specific actions to accomplish a task step.

-TASK STEP: {task_step.get('description', 'Unknown')}
-TASK ID: {task_step.get('id', 'Unknown')}
+DOCUMENT REFERENCE TYPES:
+- docItem: Reference to a single document. Format: "docItem:<id>:<filename>"
+- docList: Reference to a group of documents under a label. Format: <label> (e.g., "task1_action2_results" or "docList:msg123:user_uploads").
+  - Each docList label maps to a list of docItem references (see AVAILABLE DOCUMENTS).
+  - A label like "task1_action2_results" refers to the output of action 2 in task 1.

-EXPECTED OUTPUTS:
-{', '.join(task_step.get('expected_outputs', []))}
-
-SUCCESS CRITERIA:
-{', '.join(task_step.get('success_criteria', []))}
+TASK STEP: {task_step.get('description', 'Unknown')} (ID: {task_step.get('id', 'Unknown')})
+EXPECTED OUTPUTS: {', '.join(task_step.get('expected_outputs', []))}
+SUCCESS CRITERIA: {', '.join(task_step.get('success_criteria', []))}

 CONTEXT - Chat History:
 {messageSummary}

-AVAILABLE METHODS
-{chr(10).join(f"- {method}" for method in methodList)}
+AVAILABLE METHODS AND ACTIONS (with signatures):
+{available_methods_str}

-AVAILABLE CONNECTIONS
+AVAILABLE CONNECTIONS:
 {chr(10).join(f"- {conn}" for conn in connRefs)}

-AVAILABLE DOCUMENTS
-{chr(10).join(f"- {doc.documentsLabel}: {', '.join(doc.documents)}" for doc in docRefs.get('chat', []))}
+AVAILABLE DOCUMENTS:
+{chr(10).join(f"- {doc.documentsLabel} contains {', '.join(doc.documents)}" for doc in all_doc_refs)}
+  (Use the label as a value in documentList to refer to the group)

-PREVIOUS RESULTS:
-{', '.join(previous_results) if previous_results else 'None'}
-
-IMPROVEMENTS NEEDED:
-{improvements if improvements else 'None'}
+PREVIOUS RESULTS: {', '.join(previous_results) if previous_results else 'None'}
+IMPROVEMENTS NEEDED: {improvements if improvements else 'None'}

 INSTRUCTIONS:
-1. Generate specific actions to accomplish this task step
-2. Use available documents, connections, and previous results
-3. Ensure proper result labels for handover
-4. Follow the exact JSON structure below
-5. ALL fields are REQUIRED: method, action, parameters, resultLabel, description
+- Generate actions to accomplish this task step using available documents, connections, and previous results.
+- Use docItem for single documents and docList labels for groups of documents as shown in AVAILABLE DOCUMENTS.
+- Always pass documentList as a LIST of references (docItem and/or docList).
+- For resultLabel, use the format: "task{{task_id}}_action{{action_number}}_{{short_label}}" where:
+    - {{task_id}} = the current task's id (e.g., 1)
+    - {{action_number}} = the sequence number of the action within the task (e.g., 2)
+    - {{short_label}} = a short, descriptive label for the output (e.g., "analysis_results")
+  Example: "task1_action2_analysis_results"
+- Follow the JSON structure below. All fields are required.

 REQUIRED JSON STRUCTURE:
 {{
    "actions": [
-        {{
-            "method": "method_name",
-            "action": "action_name",
+        
+            "method": "method_name",  // Use only the method name (e.g., "document")
+            "action": "action_name",  // Use only the action name (e.g., "extract")
            "parameters": {{
-                "param1": "value1",
-                "param2": "value2",
+                "documentList": ["docItem:doc_abc:file1.txt", "task1_action2_results"],
+                "aiPrompt": "Describe what to do"
            }},
-            "resultLabel": "docList:uuid:descriptiveLabel",
+            "resultLabel": "task1_action3_analysis_results",
            "description": "What this action does"
        }}
    ]
 }}

 FIELD REQUIREMENTS:
- "method": Must be one of the available methods listed above
- "action": Must be a valid action for that method
- "parameters": Object with method-specific parameters
- "resultLabel": MUST start with "docList:" followed by unique identifier and descriptive label
- "description": Clear description of what the action accomplishes
+- "method": Must be from AVAILABLE METHODS
+- "action": Must be valid for the method
+- "parameters": Method-specific, must include documentList as a list if required by the signature
+- "resultLabel": Must follow the format above (e.g., "task1_action3_analysis_results")
+- "description": Clear summary of the action

-MANDATORY PARAMETER AND RETURN VALUE RULES:
-
-1. CONNECTION PARAMETERS:
-   - Parameter name: "connectionReference" (NOT "connection", "site", "connectionId", etc.)
-   - Value: Must be a connection reference from "Connections" section above
-   - Format: "connection:authority:user:connectionId"
-   - Example: "connection:msft:testuser@example.com:1234"
-
-2. DOCUMENT PARAMETERS:
-   - Parameter name: "documentList" (NOT "documentReference", "document", "fileId", "documents", etc.)
-   - Value: MUST be a LIST of document references from "Documents" section or previous results
-   - Format: Use the exact format shown in "Documents" section (e.g., ["docItem:id:filename"] or ["docList:actionId:label"])
-   - Document references represent a LIST of documents, not single documents
-   - All document inputs expect documentList as an ARRAY of strings
-   - IMPORTANT: Use the exact document reference format as shown in "Documents" section above
-
-3. RETURN VALUES:
-   - ALL actions must return documentList references in resultLabel
-   - Result labels must start with "docList:"
-   - Each action creates a unique documentList for handover
-   - Document lists can contain 0, 1, or multiple documents
-   - No actions return single documents - always documentLists
-
-4. PARAMETER VALIDATION:
-   - Use only document references from "Documents" section above
-   - Use only connection references from "Connections" section above  
-   - Use result labels from previous results in the sequence
-   - All parameter values must be strings (except documentList which must be an array)
-   - Document references show: label - list of references
-
-5. RESULT USAGE RULES:
-   - Previous results can be referenced as: "docList:uuid:label"
-   - Use result labels from previous actions in the sequence
-   - Example: If previous action created "docList:abc123:salesData", 
-     reference it as "docList:abc123:salesData" in parameters
-   - Results are available in the PREVIOUS RESULTS section above
-   - Each action should create a unique resultLabel for handover to next actions
-   - Result labels should be descriptive and indicate the content type
-
-6. DOCUMENT HANDLING RULES:
-   - ALWAYS pass documents as a LIST in documentList parameter
-   - Single documents: ["docItem:id:filename"]
-   - Multiple documents: ["docItem:id1:file1", "docItem:id2:file2"]
-   - Document lists: ["docList:actionId:label"]
-   - Mixed references: ["docItem:id:file", "docList:actionId:label"]
-
-EXAMPLE VALID ACTIONS:
-
-1. SharePoint Search:
-{{
-    "method": "sharepoint",
-    "action": "search",
-    "parameters": {{
-        "connectionReference": "connection:msft:testuser@example.com:1234",
-        "query": "sales quarterly report"
-    }},
-    "resultLabel": "docList:abc123:salesDocuments",
-    "description": "Search SharePoint for sales documents"
-}}
-
-2. Document Analysis using single document:
+EXAMPLES:
+1. Analyze a single document:
 {{
    "method": "document",
    "action": "analyze",
@ -657,36 +616,24 @@ EXAMPLE VALID ACTIONS:
        "documentList": ["docItem:doc_57520394-6b6d-41c2-b641-bab3fc6d7f4b:candidate_1_profile.txt"],
        "aiPrompt": "Analyze the candidate profile for key insights"
    }},
-    "resultLabel": "docList:ghi789:candidateAnalysis",
+    "resultLabel": "task1_action2_candidate_analysis",
    "description": "Analyze candidate profile for insights"
 }}

-3. Document Analysis using multiple documents:
+2. Analyze a group of documents (docList):
 {{
    "method": "document",
    "action": "analyze",
    "parameters": {{
-        "documentList": ["docItem:doc_123:profile.txt", "docItem:doc_456:resume.pdf"],
-        "aiPrompt": "Compare the profile and resume for consistency"
+        "documentList": ["task1_action1_extract_results"],
+        "aiPrompt": "Analyze all extracted results"
    }},
-    "resultLabel": "docList:jkl012:comparisonAnalysis",
-    "description": "Compare multiple documents for consistency"
+    "resultLabel": "task1_action2_analysis_results",
+    "description": "Analyze all extracted results"
 }}

-4. Document Extraction using document list:
-{{
-    "method": "document",
-    "action": "extract",
-    "parameters": {{
-        "documentList": ["docList:abc123:salesData"],
-        "aiPrompt": "Extract key information from all sales documents",
-        "format": "json"
-    }},
-    "resultLabel": "docList:mno345:extractedData",
-    "description": "Extract key information from document list"
-}}
-
-NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
+NOTE: Respond with ONLY the JSON object. Do not include any explanatory text.
+"""


    def _createResultReviewPrompt(self, review_context: Dict[str, Any]) -> str:
@ -724,7 +671,7 @@ TASK STEP: {task_step.get('description', 'Unknown')}
 EXPECTED OUTPUTS: {', '.join(task_step.get('expected_outputs', []))}
 SUCCESS CRITERIA: {', '.join(task_step.get('success_criteria', []))}

-STEP RESULT: {json.dumps(step_result_serializable, indent=2)}
+STEP RESULT: {json.dumps(step_result_serializable, indent=2, ensure_ascii=False)}

 INSTRUCTIONS:
 1. Evaluate if the task step was completed successfully
@ -839,7 +786,7 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
                "actionId": action.id,
                "actionMethod": action.execMethod,
                "actionName": action.execAction,
-                "documentsLabel": result_label,  # Always use execResultLabel
+                "documentsLabel": result_label,  # Use intent label from action definition
                "documents": []
            }
            
--- a/modules/workflow/serviceContainer.py
+++ b/modules/workflow/serviceContainer.py
@ -230,8 +230,15 @@ class ServiceContainer:
        # Otherwise construct the reference using the message ID and documents label
        return f"docList:{message.id}:{message.documentsLabel}"
    
+    def resolveDocumentReference(self, intent_label: str) -> str:
+        """Resolve an intent label (e.g., 'task1_extract_results') to a docList reference with message ID."""
+        for message in self.workflow.messages:
+            if message.documentsLabel == intent_label and message.documents:
+                return f"docList:{message.id}:{intent_label}"
+        return None
+
    def getChatDocumentsFromDocumentList(self, documentList: List[str]) -> List[ChatDocument]:
-        """Get ChatDocuments from a list of document references"""
+        """Get ChatDocuments from a list of document references (intent or resolved)."""
        try:
            all_documents = []
            for doc_ref in documentList:
@ -239,14 +246,12 @@ class ServiceContainer:
                parts = doc_ref.split(':', 2)  # Split into max 3 parts
                if len(parts) < 3:
                    continue
-                    
                ref_type = parts[0]
                ref_id = parts[1]
-                ref_label = parts[2]  # Keep the full label
-                
+                ref_label = parts[2]
+
                if ref_type == "docItem":
                    # Handle ChatDocument reference: docItem:<id>:<filename>
-                    # Find document in workflow messages
                    for message in self.workflow.messages:
                        if message.documents:
                            for doc in message.documents:
@ -255,17 +260,21 @@ class ServiceContainer:
                                    break
                            if any(doc.id == ref_id for doc in message.documents):
                                break
-                                
                elif ref_type == "docList":
-                    # Handle document list reference: docList:<message.id>:<label>
-                    # Find message by ID
+                    # If ref_id is not a message ID (i.e., not all digits or not found), treat as intent label
+                    found = False
                    for message in self.workflow.messages:
-                        if str(message.id) == ref_id and message.documents:
+                        if message.documentsLabel == ref_label and message.documents:
                            all_documents.extend(message.documents)
+                            found = True
                            break
-                                
+                    if not found:
+                        # Try to resolve intent label to message ID
+                        resolved_ref = self.resolveDocumentReference(ref_label)
+                        if resolved_ref:
+                            # Recursively resolve the resolved reference
+                            all_documents.extend(self.getChatDocumentsFromDocumentList([resolved_ref]))
            return all_documents
-            
        except Exception as e:
            logger.error(f"Error getting documents from document list: {str(e)}")
            return []