doc ref fixed

2025-07-08 15:35:03 +02:00 · 2025-07-08 15:35:03 +02:00 · aa854f27b7
commit aa854f27b7
parent 171e18b0d7
2 changed files with 90 additions and 134 deletions
--- a/modules/workflow/managerChat.py
+++ b/modules/workflow/managerChat.py
@ -433,8 +433,8 @@ class ChatManager:
                # Validate result label format
                result_label = action.get('resultLabel', '')
-                if not result_label.startswith('docList:'):
+                if not result_label.startswith('task'):
-                    logger.error(f"Action {i} result label must start with 'docList:': {result_label}")
+                    logger.error(f"Action {i} result label must start with 'task': {result_label}")
                    return False
                # Validate parameters
@ -467,10 +467,10 @@ class ChatManager:
                "method": "document",
                "action": "analyze",
                "parameters": {
-                    "fileId": doc,
+                    "documentList": ["task1_previous_results"],
-                    "analysis": ["entities", "topics", "sentiment"]
+                    "aiPrompt": "Fallback document analysis for " + doc
                },
-                "resultLabel": f"docList:fallback:{task_step.get('id', 'unknown')}:{i}:analysis",
+                "resultLabel": "task1_fallback:" + doc + ":analysis",
                "description": f"Fallback document analysis for {doc}"
            })
@ -520,8 +520,14 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
        previous_results = context['previous_results']
        improvements = context.get('improvements', '')
-        # Get available methods
+        # Get available methods and actions with signatures
        methodList = self.service.getMethodsList()
        method_actions = {}
        for sig in methodList:
            if '.' in sig:
                method, rest = sig.split('.', 1)
                action = rest.split('(')[0]
                method_actions.setdefault(method, []).append((action, sig))
        # Get workflow history
        messageSummary = await self.service.summarizeChat(workflow.messages)
@ -529,127 +535,80 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
        # Get available documents and connections
        docRefs = self.service.getDocumentReferenceList()
        connRefs = self.service.getConnectionReferenceList()
        all_doc_refs = docRefs.get('chat', []) + docRefs.get('history', [])
-        return f"""You are an action generation AI that creates specific actions to accomplish a task step.
+        # Build AVAILABLE METHODS section
        available_methods_str = ''
        for method, actions in method_actions.items():
            available_methods_str += f"- {method}:\n"
            for action, sig in actions:
                available_methods_str += f"    - {action}: {sig}\n"
-TASK STEP: {task_step.get('description', 'Unknown')}
+        return f"""
-TASK ID: {task_step.get('id', 'Unknown')}
+You are an action generation AI that creates specific actions to accomplish a task step.
-EXPECTED OUTPUTS:
+DOCUMENT REFERENCE TYPES:
-{', '.join(task_step.get('expected_outputs', []))}
+- docItem: Reference to a single document. Format: "docItem:<id>:<filename>"
 - docList: Reference to a group of documents under a label. Format: <label> (e.g., "task1_action2_results" or "docList:msg123:user_uploads").
  - Each docList label maps to a list of docItem references (see AVAILABLE DOCUMENTS).
  - A label like "task1_action2_results" refers to the output of action 2 in task 1.
-SUCCESS CRITERIA:
+TASK STEP: {task_step.get('description', 'Unknown')} (ID: {task_step.get('id', 'Unknown')})
-{', '.join(task_step.get('success_criteria', []))}
+EXPECTED OUTPUTS: {', '.join(task_step.get('expected_outputs', []))}
 SUCCESS CRITERIA: {', '.join(task_step.get('success_criteria', []))}
 CONTEXT - Chat History:
 {messageSummary}
-AVAILABLE METHODS
+AVAILABLE METHODS AND ACTIONS (with signatures):
-{chr(10).join(f"- {method}" for method in methodList)}
+{available_methods_str}
-AVAILABLE CONNECTIONS
+AVAILABLE CONNECTIONS:
 {chr(10).join(f"- {conn}" for conn in connRefs)}
-AVAILABLE DOCUMENTS
+AVAILABLE DOCUMENTS:
-{chr(10).join(f"- {doc.documentsLabel}: {', '.join(doc.documents)}" for doc in docRefs.get('chat', []))}
+{chr(10).join(f"- {doc.documentsLabel} contains {', '.join(doc.documents)}" for doc in all_doc_refs)}
  (Use the label as a value in documentList to refer to the group)
-PREVIOUS RESULTS:
+PREVIOUS RESULTS: {', '.join(previous_results) if previous_results else 'None'}
-{', '.join(previous_results) if previous_results else 'None'}
+IMPROVEMENTS NEEDED: {improvements if improvements else 'None'}
 IMPROVEMENTS NEEDED:
 {improvements if improvements else 'None'}
 INSTRUCTIONS:
-1. Generate specific actions to accomplish this task step
+- Generate actions to accomplish this task step using available documents, connections, and previous results.
-2. Use available documents, connections, and previous results
+- Use docItem for single documents and docList labels for groups of documents as shown in AVAILABLE DOCUMENTS.
-3. Ensure proper result labels for handover
+- Always pass documentList as a LIST of references (docItem and/or docList).
-4. Follow the exact JSON structure below
+- For resultLabel, use the format: "task{{task_id}}_action{{action_number}}_{{short_label}}" where:
-5. ALL fields are REQUIRED: method, action, parameters, resultLabel, description
+    - {{task_id}} = the current task's id (e.g., 1)
    - {{action_number}} = the sequence number of the action within the task (e.g., 2)
    - {{short_label}} = a short, descriptive label for the output (e.g., "analysis_results")
  Example: "task1_action2_analysis_results"
 - Follow the JSON structure below. All fields are required.
 REQUIRED JSON STRUCTURE:
 {{
    "actions": [
-        {{
+        
-            "method": "method_name",
+            "method": "method_name",  // Use only the method name (e.g., "document")
-            "action": "action_name",
+            "action": "action_name",  // Use only the action name (e.g., "extract")
            "parameters": {{
-                "param1": "value1",
+                "documentList": ["docItem:doc_abc:file1.txt", "task1_action2_results"],
-                "param2": "value2",
+                "aiPrompt": "Describe what to do"
            }},
-            "resultLabel": "docList:uuid:descriptiveLabel",
+            "resultLabel": "task1_action3_analysis_results",
            "description": "What this action does"
        }}
    ]
 }}
 FIELD REQUIREMENTS:
- "method": Must be one of the available methods listed above
+- "method": Must be from AVAILABLE METHODS
- "action": Must be a valid action for that method
+- "action": Must be valid for the method
- "parameters": Object with method-specific parameters
+- "parameters": Method-specific, must include documentList as a list if required by the signature
- "resultLabel": MUST start with "docList:" followed by unique identifier and descriptive label
+- "resultLabel": Must follow the format above (e.g., "task1_action3_analysis_results")
- "description": Clear description of what the action accomplishes
+- "description": Clear summary of the action
-MANDATORY PARAMETER AND RETURN VALUE RULES:
+EXAMPLES:
-
+1. Analyze a single document:
 1. CONNECTION PARAMETERS:
   - Parameter name: "connectionReference" (NOT "connection", "site", "connectionId", etc.)
   - Value: Must be a connection reference from "Connections" section above
   - Format: "connection:authority:user:connectionId"
   - Example: "connection:msft:testuser@example.com:1234"
 2. DOCUMENT PARAMETERS:
   - Parameter name: "documentList" (NOT "documentReference", "document", "fileId", "documents", etc.)
   - Value: MUST be a LIST of document references from "Documents" section or previous results
   - Format: Use the exact format shown in "Documents" section (e.g., ["docItem:id:filename"] or ["docList:actionId:label"])
   - Document references represent a LIST of documents, not single documents
   - All document inputs expect documentList as an ARRAY of strings
   - IMPORTANT: Use the exact document reference format as shown in "Documents" section above
 3. RETURN VALUES:
   - ALL actions must return documentList references in resultLabel
   - Result labels must start with "docList:"
   - Each action creates a unique documentList for handover
   - Document lists can contain 0, 1, or multiple documents
   - No actions return single documents - always documentLists
 4. PARAMETER VALIDATION:
   - Use only document references from "Documents" section above
   - Use only connection references from "Connections" section above  
   - Use result labels from previous results in the sequence
   - All parameter values must be strings (except documentList which must be an array)
   - Document references show: label - list of references
 5. RESULT USAGE RULES:
   - Previous results can be referenced as: "docList:uuid:label"
   - Use result labels from previous actions in the sequence
   - Example: If previous action created "docList:abc123:salesData", 
     reference it as "docList:abc123:salesData" in parameters
   - Results are available in the PREVIOUS RESULTS section above
   - Each action should create a unique resultLabel for handover to next actions
   - Result labels should be descriptive and indicate the content type
 6. DOCUMENT HANDLING RULES:
   - ALWAYS pass documents as a LIST in documentList parameter
   - Single documents: ["docItem:id:filename"]
   - Multiple documents: ["docItem:id1:file1", "docItem:id2:file2"]
   - Document lists: ["docList:actionId:label"]
   - Mixed references: ["docItem:id:file", "docList:actionId:label"]
 EXAMPLE VALID ACTIONS:
 1. SharePoint Search:
 {{
    "method": "sharepoint",
    "action": "search",
    "parameters": {{
        "connectionReference": "connection:msft:testuser@example.com:1234",
        "query": "sales quarterly report"
    }},
    "resultLabel": "docList:abc123:salesDocuments",
    "description": "Search SharePoint for sales documents"
 }}
 2. Document Analysis using single document:
 {{
    "method": "document",
    "action": "analyze",
@ -657,36 +616,24 @@ EXAMPLE VALID ACTIONS:
        "documentList": ["docItem:doc_57520394-6b6d-41c2-b641-bab3fc6d7f4b:candidate_1_profile.txt"],
        "aiPrompt": "Analyze the candidate profile for key insights"
    }},
-    "resultLabel": "docList:ghi789:candidateAnalysis",
+    "resultLabel": "task1_action2_candidate_analysis",
    "description": "Analyze candidate profile for insights"
 }}
-3. Document Analysis using multiple documents:
+2. Analyze a group of documents (docList):
 {{
    "method": "document",
    "action": "analyze",
    "parameters": {{
-        "documentList": ["docItem:doc_123:profile.txt", "docItem:doc_456:resume.pdf"],
+        "documentList": ["task1_action1_extract_results"],
-        "aiPrompt": "Compare the profile and resume for consistency"
+        "aiPrompt": "Analyze all extracted results"
    }},
-    "resultLabel": "docList:jkl012:comparisonAnalysis",
+    "resultLabel": "task1_action2_analysis_results",
-    "description": "Compare multiple documents for consistency"
+    "description": "Analyze all extracted results"
 }}
-4. Document Extraction using document list:
+NOTE: Respond with ONLY the JSON object. Do not include any explanatory text.
-{{
+"""
    "method": "document",
    "action": "extract",
    "parameters": {{
        "documentList": ["docList:abc123:salesData"],
        "aiPrompt": "Extract key information from all sales documents",
        "format": "json"
    }},
    "resultLabel": "docList:mno345:extractedData",
    "description": "Extract key information from document list"
 }}
 NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
    def _createResultReviewPrompt(self, review_context: Dict[str, Any]) -> str:
@ -724,7 +671,7 @@ TASK STEP: {task_step.get('description', 'Unknown')}
 EXPECTED OUTPUTS: {', '.join(task_step.get('expected_outputs', []))}
 SUCCESS CRITERIA: {', '.join(task_step.get('success_criteria', []))}
-STEP RESULT: {json.dumps(step_result_serializable, indent=2)}
+STEP RESULT: {json.dumps(step_result_serializable, indent=2, ensure_ascii=False)}
 INSTRUCTIONS:
 1. Evaluate if the task step was completed successfully
@ -839,7 +786,7 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
                "actionId": action.id,
                "actionMethod": action.execMethod,
                "actionName": action.execAction,
-                "documentsLabel": result_label,  # Always use execResultLabel
+                "documentsLabel": result_label,  # Use intent label from action definition
                "documents": []
            }
--- a/modules/workflow/serviceContainer.py
+++ b/modules/workflow/serviceContainer.py
@ -230,8 +230,15 @@ class ServiceContainer:
        # Otherwise construct the reference using the message ID and documents label
        return f"docList:{message.id}:{message.documentsLabel}"
    def resolveDocumentReference(self, intent_label: str) -> str:
        """Resolve an intent label (e.g., 'task1_extract_results') to a docList reference with message ID."""
        for message in self.workflow.messages:
            if message.documentsLabel == intent_label and message.documents:
                return f"docList:{message.id}:{intent_label}"
        return None
    def getChatDocumentsFromDocumentList(self, documentList: List[str]) -> List[ChatDocument]:
-        """Get ChatDocuments from a list of document references"""
+        """Get ChatDocuments from a list of document references (intent or resolved)."""
        try:
            all_documents = []
            for doc_ref in documentList:
@ -239,14 +246,12 @@ class ServiceContainer:
                parts = doc_ref.split(':', 2)  # Split into max 3 parts
                if len(parts) < 3:
                    continue
                ref_type = parts[0]
                ref_id = parts[1]
-                ref_label = parts[2]  # Keep the full label
+                ref_label = parts[2]
                if ref_type == "docItem":
                    # Handle ChatDocument reference: docItem:<id>:<filename>
                    # Find document in workflow messages
                    for message in self.workflow.messages:
                        if message.documents:
                            for doc in message.documents:
@ -255,17 +260,21 @@ class ServiceContainer:
                                    break
                            if any(doc.id == ref_id for doc in message.documents):
                                break
                elif ref_type == "docList":
-                    # Handle document list reference: docList:<message.id>:<label>
+                    # If ref_id is not a message ID (i.e., not all digits or not found), treat as intent label
-                    # Find message by ID
+                    found = False
                    for message in self.workflow.messages:
-                        if str(message.id) == ref_id and message.documents:
+                        if message.documentsLabel == ref_label and message.documents:
                            all_documents.extend(message.documents)
                            found = True
                            break
-                                
+                    if not found:
                        # Try to resolve intent label to message ID
                        resolved_ref = self.resolveDocumentReference(ref_label)
                        if resolved_ref:
                            # Recursively resolve the resolved reference
                            all_documents.extend(self.getChatDocumentsFromDocumentList([resolved_ref]))
            return all_documents
        except Exception as e:
            logger.error(f"Error getting documents from document list: {str(e)}")
            return []