doc ref fixed

This commit is contained in:
ValueOn AG 2025-07-08 15:35:03 +02:00
parent 171e18b0d7
commit aa854f27b7
2 changed files with 90 additions and 134 deletions

View file

@ -433,8 +433,8 @@ class ChatManager:
# Validate result label format # Validate result label format
result_label = action.get('resultLabel', '') result_label = action.get('resultLabel', '')
if not result_label.startswith('docList:'): if not result_label.startswith('task'):
logger.error(f"Action {i} result label must start with 'docList:': {result_label}") logger.error(f"Action {i} result label must start with 'task': {result_label}")
return False return False
# Validate parameters # Validate parameters
@ -467,10 +467,10 @@ class ChatManager:
"method": "document", "method": "document",
"action": "analyze", "action": "analyze",
"parameters": { "parameters": {
"fileId": doc, "documentList": ["task1_previous_results"],
"analysis": ["entities", "topics", "sentiment"] "aiPrompt": "Fallback document analysis for " + doc
}, },
"resultLabel": f"docList:fallback:{task_step.get('id', 'unknown')}:{i}:analysis", "resultLabel": "task1_fallback:" + doc + ":analysis",
"description": f"Fallback document analysis for {doc}" "description": f"Fallback document analysis for {doc}"
}) })
@ -520,8 +520,14 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
previous_results = context['previous_results'] previous_results = context['previous_results']
improvements = context.get('improvements', '') improvements = context.get('improvements', '')
# Get available methods # Get available methods and actions with signatures
methodList = self.service.getMethodsList() methodList = self.service.getMethodsList()
method_actions = {}
for sig in methodList:
if '.' in sig:
method, rest = sig.split('.', 1)
action = rest.split('(')[0]
method_actions.setdefault(method, []).append((action, sig))
# Get workflow history # Get workflow history
messageSummary = await self.service.summarizeChat(workflow.messages) messageSummary = await self.service.summarizeChat(workflow.messages)
@ -529,127 +535,80 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
# Get available documents and connections # Get available documents and connections
docRefs = self.service.getDocumentReferenceList() docRefs = self.service.getDocumentReferenceList()
connRefs = self.service.getConnectionReferenceList() connRefs = self.service.getConnectionReferenceList()
all_doc_refs = docRefs.get('chat', []) + docRefs.get('history', [])
return f"""You are an action generation AI that creates specific actions to accomplish a task step. # Build AVAILABLE METHODS section
available_methods_str = ''
for method, actions in method_actions.items():
available_methods_str += f"- {method}:\n"
for action, sig in actions:
available_methods_str += f" - {action}: {sig}\n"
return f"""
You are an action generation AI that creates specific actions to accomplish a task step.
TASK STEP: {task_step.get('description', 'Unknown')} DOCUMENT REFERENCE TYPES:
TASK ID: {task_step.get('id', 'Unknown')} - docItem: Reference to a single document. Format: "docItem:<id>:<filename>"
- docList: Reference to a group of documents under a label. Format: <label> (e.g., "task1_action2_results" or "docList:msg123:user_uploads").
- Each docList label maps to a list of docItem references (see AVAILABLE DOCUMENTS).
- A label like "task1_action2_results" refers to the output of action 2 in task 1.
EXPECTED OUTPUTS: TASK STEP: {task_step.get('description', 'Unknown')} (ID: {task_step.get('id', 'Unknown')})
{', '.join(task_step.get('expected_outputs', []))} EXPECTED OUTPUTS: {', '.join(task_step.get('expected_outputs', []))}
SUCCESS CRITERIA: {', '.join(task_step.get('success_criteria', []))}
SUCCESS CRITERIA:
{', '.join(task_step.get('success_criteria', []))}
CONTEXT - Chat History: CONTEXT - Chat History:
{messageSummary} {messageSummary}
AVAILABLE METHODS AVAILABLE METHODS AND ACTIONS (with signatures):
{chr(10).join(f"- {method}" for method in methodList)} {available_methods_str}
AVAILABLE CONNECTIONS AVAILABLE CONNECTIONS:
{chr(10).join(f"- {conn}" for conn in connRefs)} {chr(10).join(f"- {conn}" for conn in connRefs)}
AVAILABLE DOCUMENTS AVAILABLE DOCUMENTS:
{chr(10).join(f"- {doc.documentsLabel}: {', '.join(doc.documents)}" for doc in docRefs.get('chat', []))} {chr(10).join(f"- {doc.documentsLabel} contains {', '.join(doc.documents)}" for doc in all_doc_refs)}
(Use the label as a value in documentList to refer to the group)
PREVIOUS RESULTS: PREVIOUS RESULTS: {', '.join(previous_results) if previous_results else 'None'}
{', '.join(previous_results) if previous_results else 'None'} IMPROVEMENTS NEEDED: {improvements if improvements else 'None'}
IMPROVEMENTS NEEDED:
{improvements if improvements else 'None'}
INSTRUCTIONS: INSTRUCTIONS:
1. Generate specific actions to accomplish this task step - Generate actions to accomplish this task step using available documents, connections, and previous results.
2. Use available documents, connections, and previous results - Use docItem for single documents and docList labels for groups of documents as shown in AVAILABLE DOCUMENTS.
3. Ensure proper result labels for handover - Always pass documentList as a LIST of references (docItem and/or docList).
4. Follow the exact JSON structure below - For resultLabel, use the format: "task{{task_id}}_action{{action_number}}_{{short_label}}" where:
5. ALL fields are REQUIRED: method, action, parameters, resultLabel, description - {{task_id}} = the current task's id (e.g., 1)
- {{action_number}} = the sequence number of the action within the task (e.g., 2)
- {{short_label}} = a short, descriptive label for the output (e.g., "analysis_results")
Example: "task1_action2_analysis_results"
- Follow the JSON structure below. All fields are required.
REQUIRED JSON STRUCTURE: REQUIRED JSON STRUCTURE:
{{ {{
"actions": [ "actions": [
{{
"method": "method_name", "method": "method_name", // Use only the method name (e.g., "document")
"action": "action_name", "action": "action_name", // Use only the action name (e.g., "extract")
"parameters": {{ "parameters": {{
"param1": "value1", "documentList": ["docItem:doc_abc:file1.txt", "task1_action2_results"],
"param2": "value2", "aiPrompt": "Describe what to do"
}}, }},
"resultLabel": "docList:uuid:descriptiveLabel", "resultLabel": "task1_action3_analysis_results",
"description": "What this action does" "description": "What this action does"
}} }}
] ]
}} }}
FIELD REQUIREMENTS: FIELD REQUIREMENTS:
- "method": Must be one of the available methods listed above - "method": Must be from AVAILABLE METHODS
- "action": Must be a valid action for that method - "action": Must be valid for the method
- "parameters": Object with method-specific parameters - "parameters": Method-specific, must include documentList as a list if required by the signature
- "resultLabel": MUST start with "docList:" followed by unique identifier and descriptive label - "resultLabel": Must follow the format above (e.g., "task1_action3_analysis_results")
- "description": Clear description of what the action accomplishes - "description": Clear summary of the action
MANDATORY PARAMETER AND RETURN VALUE RULES: EXAMPLES:
1. Analyze a single document:
1. CONNECTION PARAMETERS:
- Parameter name: "connectionReference" (NOT "connection", "site", "connectionId", etc.)
- Value: Must be a connection reference from "Connections" section above
- Format: "connection:authority:user:connectionId"
- Example: "connection:msft:testuser@example.com:1234"
2. DOCUMENT PARAMETERS:
- Parameter name: "documentList" (NOT "documentReference", "document", "fileId", "documents", etc.)
- Value: MUST be a LIST of document references from "Documents" section or previous results
- Format: Use the exact format shown in "Documents" section (e.g., ["docItem:id:filename"] or ["docList:actionId:label"])
- Document references represent a LIST of documents, not single documents
- All document inputs expect documentList as an ARRAY of strings
- IMPORTANT: Use the exact document reference format as shown in "Documents" section above
3. RETURN VALUES:
- ALL actions must return documentList references in resultLabel
- Result labels must start with "docList:"
- Each action creates a unique documentList for handover
- Document lists can contain 0, 1, or multiple documents
- No actions return single documents - always documentLists
4. PARAMETER VALIDATION:
- Use only document references from "Documents" section above
- Use only connection references from "Connections" section above
- Use result labels from previous results in the sequence
- All parameter values must be strings (except documentList which must be an array)
- Document references show: label - list of references
5. RESULT USAGE RULES:
- Previous results can be referenced as: "docList:uuid:label"
- Use result labels from previous actions in the sequence
- Example: If previous action created "docList:abc123:salesData",
reference it as "docList:abc123:salesData" in parameters
- Results are available in the PREVIOUS RESULTS section above
- Each action should create a unique resultLabel for handover to next actions
- Result labels should be descriptive and indicate the content type
6. DOCUMENT HANDLING RULES:
- ALWAYS pass documents as a LIST in documentList parameter
- Single documents: ["docItem:id:filename"]
- Multiple documents: ["docItem:id1:file1", "docItem:id2:file2"]
- Document lists: ["docList:actionId:label"]
- Mixed references: ["docItem:id:file", "docList:actionId:label"]
EXAMPLE VALID ACTIONS:
1. SharePoint Search:
{{
"method": "sharepoint",
"action": "search",
"parameters": {{
"connectionReference": "connection:msft:testuser@example.com:1234",
"query": "sales quarterly report"
}},
"resultLabel": "docList:abc123:salesDocuments",
"description": "Search SharePoint for sales documents"
}}
2. Document Analysis using single document:
{{ {{
"method": "document", "method": "document",
"action": "analyze", "action": "analyze",
@ -657,36 +616,24 @@ EXAMPLE VALID ACTIONS:
"documentList": ["docItem:doc_57520394-6b6d-41c2-b641-bab3fc6d7f4b:candidate_1_profile.txt"], "documentList": ["docItem:doc_57520394-6b6d-41c2-b641-bab3fc6d7f4b:candidate_1_profile.txt"],
"aiPrompt": "Analyze the candidate profile for key insights" "aiPrompt": "Analyze the candidate profile for key insights"
}}, }},
"resultLabel": "docList:ghi789:candidateAnalysis", "resultLabel": "task1_action2_candidate_analysis",
"description": "Analyze candidate profile for insights" "description": "Analyze candidate profile for insights"
}} }}
3. Document Analysis using multiple documents: 2. Analyze a group of documents (docList):
{{ {{
"method": "document", "method": "document",
"action": "analyze", "action": "analyze",
"parameters": {{ "parameters": {{
"documentList": ["docItem:doc_123:profile.txt", "docItem:doc_456:resume.pdf"], "documentList": ["task1_action1_extract_results"],
"aiPrompt": "Compare the profile and resume for consistency" "aiPrompt": "Analyze all extracted results"
}}, }},
"resultLabel": "docList:jkl012:comparisonAnalysis", "resultLabel": "task1_action2_analysis_results",
"description": "Compare multiple documents for consistency" "description": "Analyze all extracted results"
}} }}
4. Document Extraction using document list: NOTE: Respond with ONLY the JSON object. Do not include any explanatory text.
{{ """
"method": "document",
"action": "extract",
"parameters": {{
"documentList": ["docList:abc123:salesData"],
"aiPrompt": "Extract key information from all sales documents",
"format": "json"
}},
"resultLabel": "docList:mno345:extractedData",
"description": "Extract key information from document list"
}}
NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
def _createResultReviewPrompt(self, review_context: Dict[str, Any]) -> str: def _createResultReviewPrompt(self, review_context: Dict[str, Any]) -> str:
@ -724,7 +671,7 @@ TASK STEP: {task_step.get('description', 'Unknown')}
EXPECTED OUTPUTS: {', '.join(task_step.get('expected_outputs', []))} EXPECTED OUTPUTS: {', '.join(task_step.get('expected_outputs', []))}
SUCCESS CRITERIA: {', '.join(task_step.get('success_criteria', []))} SUCCESS CRITERIA: {', '.join(task_step.get('success_criteria', []))}
STEP RESULT: {json.dumps(step_result_serializable, indent=2)} STEP RESULT: {json.dumps(step_result_serializable, indent=2, ensure_ascii=False)}
INSTRUCTIONS: INSTRUCTIONS:
1. Evaluate if the task step was completed successfully 1. Evaluate if the task step was completed successfully
@ -839,7 +786,7 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
"actionId": action.id, "actionId": action.id,
"actionMethod": action.execMethod, "actionMethod": action.execMethod,
"actionName": action.execAction, "actionName": action.execAction,
"documentsLabel": result_label, # Always use execResultLabel "documentsLabel": result_label, # Use intent label from action definition
"documents": [] "documents": []
} }

View file

@ -230,8 +230,15 @@ class ServiceContainer:
# Otherwise construct the reference using the message ID and documents label # Otherwise construct the reference using the message ID and documents label
return f"docList:{message.id}:{message.documentsLabel}" return f"docList:{message.id}:{message.documentsLabel}"
def resolveDocumentReference(self, intent_label: str) -> str:
"""Resolve an intent label (e.g., 'task1_extract_results') to a docList reference with message ID."""
for message in self.workflow.messages:
if message.documentsLabel == intent_label and message.documents:
return f"docList:{message.id}:{intent_label}"
return None
def getChatDocumentsFromDocumentList(self, documentList: List[str]) -> List[ChatDocument]: def getChatDocumentsFromDocumentList(self, documentList: List[str]) -> List[ChatDocument]:
"""Get ChatDocuments from a list of document references""" """Get ChatDocuments from a list of document references (intent or resolved)."""
try: try:
all_documents = [] all_documents = []
for doc_ref in documentList: for doc_ref in documentList:
@ -239,14 +246,12 @@ class ServiceContainer:
parts = doc_ref.split(':', 2) # Split into max 3 parts parts = doc_ref.split(':', 2) # Split into max 3 parts
if len(parts) < 3: if len(parts) < 3:
continue continue
ref_type = parts[0] ref_type = parts[0]
ref_id = parts[1] ref_id = parts[1]
ref_label = parts[2] # Keep the full label ref_label = parts[2]
if ref_type == "docItem": if ref_type == "docItem":
# Handle ChatDocument reference: docItem:<id>:<filename> # Handle ChatDocument reference: docItem:<id>:<filename>
# Find document in workflow messages
for message in self.workflow.messages: for message in self.workflow.messages:
if message.documents: if message.documents:
for doc in message.documents: for doc in message.documents:
@ -255,17 +260,21 @@ class ServiceContainer:
break break
if any(doc.id == ref_id for doc in message.documents): if any(doc.id == ref_id for doc in message.documents):
break break
elif ref_type == "docList": elif ref_type == "docList":
# Handle document list reference: docList:<message.id>:<label> # If ref_id is not a message ID (i.e., not all digits or not found), treat as intent label
# Find message by ID found = False
for message in self.workflow.messages: for message in self.workflow.messages:
if str(message.id) == ref_id and message.documents: if message.documentsLabel == ref_label and message.documents:
all_documents.extend(message.documents) all_documents.extend(message.documents)
found = True
break break
if not found:
# Try to resolve intent label to message ID
resolved_ref = self.resolveDocumentReference(ref_label)
if resolved_ref:
# Recursively resolve the resolved reference
all_documents.extend(self.getChatDocumentsFromDocumentList([resolved_ref]))
return all_documents return all_documents
except Exception as e: except Exception as e:
logger.error(f"Error getting documents from document list: {str(e)}") logger.error(f"Error getting documents from document list: {str(e)}")
return [] return []