version 2 ready basic
This commit is contained in:
parent
53a4a39214
commit
cfb34c6a38
7 changed files with 537 additions and 77 deletions
|
|
@ -111,18 +111,46 @@ class ActionValidator:
|
||||||
expected_result_label = action.execResultLabel
|
expected_result_label = action.execResultLabel
|
||||||
expected_format = action.execParameters.get('outputFormat', 'unknown')
|
expected_format = action.execParameters.get('outputFormat', 'unknown')
|
||||||
|
|
||||||
|
# Extract expected document formats from action
|
||||||
|
expected_document_formats = action.expectedDocumentFormats or []
|
||||||
|
|
||||||
|
# Check if the result label is present in the action result data
|
||||||
|
actual_result_label = result_data.get("resultLabel", "") if isinstance(result_data, dict) else ""
|
||||||
|
result_label_match = actual_result_label == expected_result_label
|
||||||
|
|
||||||
# Analyze delivered documents and content
|
# Analyze delivered documents and content
|
||||||
delivered_files = []
|
delivered_files = []
|
||||||
|
delivered_formats = []
|
||||||
content_items = []
|
content_items = []
|
||||||
|
|
||||||
# Check for ChatDocument objects
|
# Check for ChatDocument objects
|
||||||
for doc in documents:
|
for doc in documents:
|
||||||
if hasattr(doc, 'filename'):
|
if hasattr(doc, 'filename'):
|
||||||
delivered_files.append(doc.filename)
|
delivered_files.append(doc.filename)
|
||||||
|
# Extract format information
|
||||||
|
file_extension = self._getFileExtension(doc.filename)
|
||||||
|
mime_type = getattr(doc, 'mimeType', 'application/octet-stream')
|
||||||
|
delivered_formats.append({
|
||||||
|
'filename': doc.filename,
|
||||||
|
'extension': file_extension,
|
||||||
|
'mimeType': mime_type
|
||||||
|
})
|
||||||
elif isinstance(doc, dict) and 'filename' in doc:
|
elif isinstance(doc, dict) and 'filename' in doc:
|
||||||
delivered_files.append(doc['filename'])
|
delivered_files.append(doc['filename'])
|
||||||
|
file_extension = self._getFileExtension(doc['filename'])
|
||||||
|
mime_type = doc.get('mimeType', 'application/octet-stream')
|
||||||
|
delivered_formats.append({
|
||||||
|
'filename': doc['filename'],
|
||||||
|
'extension': file_extension,
|
||||||
|
'mimeType': mime_type
|
||||||
|
})
|
||||||
else:
|
else:
|
||||||
delivered_files.append(f"document_{len(delivered_files)}")
|
delivered_files.append(f"document_{len(delivered_files)}")
|
||||||
|
delivered_formats.append({
|
||||||
|
'filename': f"document_{len(delivered_files)}",
|
||||||
|
'extension': 'unknown',
|
||||||
|
'mimeType': 'application/octet-stream'
|
||||||
|
})
|
||||||
|
|
||||||
# Check for ExtractedContent in result data
|
# Check for ExtractedContent in result data
|
||||||
if isinstance(result_data, dict):
|
if isinstance(result_data, dict):
|
||||||
|
|
@ -133,11 +161,20 @@ class ActionValidator:
|
||||||
elif 'contents' in result_data:
|
elif 'contents' in result_data:
|
||||||
content_items = result_data['contents']
|
content_items = result_data['contents']
|
||||||
|
|
||||||
|
# If we have delivered files but no content items, consider it successful
|
||||||
|
# This handles the case where content is stored in files rather than result data
|
||||||
|
if delivered_files and not content_items:
|
||||||
|
content_items = [f"File content available in: {', '.join(delivered_files)}"]
|
||||||
|
|
||||||
# Analyze content items
|
# Analyze content items
|
||||||
content_summary = []
|
content_summary = []
|
||||||
for item in content_items:
|
for item in content_items:
|
||||||
if hasattr(item, 'label') and hasattr(item, 'metadata'):
|
if hasattr(item, 'label') and hasattr(item, 'metadata'):
|
||||||
content_summary.append(f"{item.label}: {item.metadata.mimeType if hasattr(item.metadata, 'mimeType') else 'unknown'}")
|
content_summary.append(f"{item.label}: {item.metadata.mimeType if hasattr(item.metadata, 'mimeType') else 'unknown'}")
|
||||||
|
elif isinstance(item, str):
|
||||||
|
content_summary.append(item)
|
||||||
|
else:
|
||||||
|
content_summary.append(str(item))
|
||||||
|
|
||||||
return f"""You are an action result validator. Your primary focus is to validate that the action delivered the promised result files in the promised format.
|
return f"""You are an action result validator. Your primary focus is to validate that the action delivered the promised result files in the promised format.
|
||||||
|
|
||||||
|
|
@ -145,7 +182,10 @@ ACTION DETAILS:
|
||||||
- Method: {action.execMethod}
|
- Method: {action.execMethod}
|
||||||
- Action: {action.execAction}
|
- Action: {action.execAction}
|
||||||
- Expected Result Label: {expected_result_label}
|
- Expected Result Label: {expected_result_label}
|
||||||
|
- Actual Result Label: {actual_result_label}
|
||||||
|
- Result Label Match: {result_label_match}
|
||||||
- Expected Format: {expected_format}
|
- Expected Format: {expected_format}
|
||||||
|
- Expected Document Formats: {json.dumps(expected_document_formats, indent=2) if expected_document_formats else 'None specified'}
|
||||||
- Parameters: {json.dumps(action.execParameters, indent=2)}
|
- Parameters: {json.dumps(action.execParameters, indent=2)}
|
||||||
|
|
||||||
RESULT TO VALIDATE:
|
RESULT TO VALIDATE:
|
||||||
|
|
@ -155,12 +195,13 @@ RESULT TO VALIDATE:
|
||||||
- Validation Messages: {', '.join(validation_messages) if validation_messages else 'None'}
|
- Validation Messages: {', '.join(validation_messages) if validation_messages else 'None'}
|
||||||
- Documents Produced: {doc_count}
|
- Documents Produced: {doc_count}
|
||||||
- Delivered Files: {', '.join(delivered_files) if delivered_files else 'None'}
|
- Delivered Files: {', '.join(delivered_files) if delivered_files else 'None'}
|
||||||
|
- Delivered Formats: {json.dumps(delivered_formats, indent=2) if delivered_formats else 'None'}
|
||||||
- Content Items: {', '.join(content_summary) if content_summary else 'None'}
|
- Content Items: {', '.join(content_summary) if content_summary else 'None'}
|
||||||
|
|
||||||
CRITICAL VALIDATION CRITERIA:
|
CRITICAL VALIDATION CRITERIA:
|
||||||
1. **File Delivery**: Did the action deliver the promised result file(s)?
|
1. **Result Label Match**: Does the action result contain the expected result label?
|
||||||
2. **Format Compliance**: Are the delivered files in the promised format?
|
2. **File Delivery**: Did the action deliver the promised result file(s)?
|
||||||
3. **Result Label Match**: Does the result match the expected result label?
|
3. **Format Compliance**: If expected document formats were specified, do the delivered files match the expected formats?
|
||||||
4. **Content Quality**: Is the content of the delivered files usable and complete?
|
4. **Content Quality**: Is the content of the delivered files usable and complete?
|
||||||
5. **Content Processing**: If content extraction was expected, was it performed correctly?
|
5. **Content Processing**: If content extraction was expected, was it performed correctly?
|
||||||
|
|
||||||
|
|
@ -169,21 +210,28 @@ CONTEXT:
|
||||||
- Previous Results: {', '.join(context.previous_results) if context.previous_results else 'None'}
|
- Previous Results: {', '.join(context.previous_results) if context.previous_results else 'None'}
|
||||||
|
|
||||||
VALIDATION INSTRUCTIONS:
|
VALIDATION INSTRUCTIONS:
|
||||||
1. Check if the expected result label "{expected_result_label}" is present in the result
|
1. **Result Label Check**: Verify that the expected result label "{expected_result_label}" is present in the action result data. This is the primary success criterion.
|
||||||
2. Verify that files were delivered when expected
|
2. **File Delivery**: Check if files were delivered when expected. The individual filenames don't need to match the result label - focus on whether content was actually produced.
|
||||||
3. Validate that the delivered files match the expected format "{expected_format}"
|
3. **Format Compliance**: If expected document formats were specified, check if delivered files match the expected extensions and MIME types. If no formats were specified, this criterion is satisfied.
|
||||||
4. Assess if the content is complete and usable
|
4. **Content Quality**: If files were delivered, consider the action successful. The presence of delivered files indicates content was processed and stored.
|
||||||
5. Check if content extraction was performed when expected
|
5. **Content Processing**: If files were delivered, assume content extraction was performed correctly. The file delivery is evidence of successful processing.
|
||||||
6. Determine if retry would improve file delivery or format compliance
|
6. **Success Criteria**: The action is successful if the result label matches AND files were delivered. If expected formats were specified, they should also match.
|
||||||
|
|
||||||
|
IMPORTANT NOTES:
|
||||||
|
- The result label must be present in the action result data for success
|
||||||
|
- Individual filenames can be different from the result label
|
||||||
|
- If files were delivered, consider the action successful even if content details are not provided
|
||||||
|
- Focus on whether the action accomplished its intended purpose (file delivery)
|
||||||
|
- Empty files should be considered failures, but delivered files indicate success
|
||||||
|
|
||||||
REQUIRED JSON RESPONSE:
|
REQUIRED JSON RESPONSE:
|
||||||
{{
|
{{
|
||||||
"status": "success|retry|fail",
|
"status": "success|retry|fail",
|
||||||
"reason": "Detailed explanation focusing on file delivery and format compliance",
|
"reason": "Detailed explanation focusing on result label match and content quality",
|
||||||
"confidence": 0.0-1.0,
|
"confidence": 0.0-1.0,
|
||||||
"improvements": ["specific file delivery improvements", "format compliance fixes"],
|
"improvements": ["specific improvements if needed"],
|
||||||
"quality_score": 1-10,
|
"quality_score": 1-10,
|
||||||
"missing_elements": ["missing files", "format issues"],
|
"missing_elements": ["missing result label", "missing files", "content issues"],
|
||||||
"suggested_retry_approach": "Specific approach for retry if status is retry"
|
"suggested_retry_approach": "Specific approach for retry if status is retry"
|
||||||
}}
|
}}
|
||||||
|
|
||||||
|
|
@ -223,6 +271,12 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
|
||||||
'suggested_retry_approach': ''
|
'suggested_retry_approach': ''
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def _getFileExtension(self, filename: str) -> str:
|
||||||
|
"""Extract file extension from filename"""
|
||||||
|
if '.' in filename:
|
||||||
|
return '.' + filename.split('.')[-1]
|
||||||
|
return ''
|
||||||
|
|
||||||
class ChatManager:
|
class ChatManager:
|
||||||
"""Chat manager with improved AI integration and method handling"""
|
"""Chat manager with improved AI integration and method handling"""
|
||||||
|
|
||||||
|
|
@ -293,6 +347,27 @@ class ChatManager:
|
||||||
tasks=tasks
|
tasks=tasks
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Log the task plan as JSON for debugging
|
||||||
|
logger.info(f"Task plan created for workflow {workflow.id}:")
|
||||||
|
task_plan_json = {
|
||||||
|
'overview': task_plan.overview,
|
||||||
|
'tasks_count': len(task_plan.tasks),
|
||||||
|
'tasks': []
|
||||||
|
}
|
||||||
|
for task in task_plan.tasks:
|
||||||
|
task_json = {
|
||||||
|
'id': task.id,
|
||||||
|
'description': task.description,
|
||||||
|
'dependencies': task.dependencies or [],
|
||||||
|
'expected_outputs': task.expected_outputs or [],
|
||||||
|
'success_criteria': task.success_criteria or [],
|
||||||
|
'required_documents': task.required_documents or [],
|
||||||
|
'estimated_complexity': task.estimated_complexity or '',
|
||||||
|
'ai_prompt': task.ai_prompt or ''
|
||||||
|
}
|
||||||
|
task_plan_json['tasks'].append(task_json)
|
||||||
|
logger.info(f"Task Plan: {json.dumps(task_plan_json, indent=2, ensure_ascii=False)}")
|
||||||
|
|
||||||
logger.info(f"High-level task planning completed: {len(task_plan.tasks)} tasks")
|
logger.info(f"High-level task planning completed: {len(task_plan.tasks)} tasks")
|
||||||
return task_plan
|
return task_plan
|
||||||
|
|
||||||
|
|
@ -330,6 +405,11 @@ class ChatManager:
|
||||||
# Generate actions using AI
|
# Generate actions using AI
|
||||||
actions = await self._generateActionsForTaskStep(context)
|
actions = await self._generateActionsForTaskStep(context)
|
||||||
|
|
||||||
|
# Log the generated actions as JSON for debugging
|
||||||
|
logger.info(f"Generated {len(actions)} actions for task '{task_step.description}':")
|
||||||
|
for i, action in enumerate(actions):
|
||||||
|
logger.info(f"Action {i+1}: {json.dumps(action, indent=2, ensure_ascii=False)}")
|
||||||
|
|
||||||
# Convert to TaskAction objects
|
# Convert to TaskAction objects
|
||||||
task_actions = []
|
task_actions = []
|
||||||
for action_dict in actions:
|
for action_dict in actions:
|
||||||
|
|
@ -338,6 +418,7 @@ class ChatManager:
|
||||||
"execAction": action_dict.get('action', 'unknown'),
|
"execAction": action_dict.get('action', 'unknown'),
|
||||||
"execParameters": action_dict.get('parameters', {}),
|
"execParameters": action_dict.get('parameters', {}),
|
||||||
"execResultLabel": action_dict.get('resultLabel', ''),
|
"execResultLabel": action_dict.get('resultLabel', ''),
|
||||||
|
"expectedDocumentFormats": action_dict.get('expectedDocumentFormats', None),
|
||||||
"status": TaskStatus.PENDING
|
"status": TaskStatus.PENDING
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -352,6 +433,19 @@ class ChatManager:
|
||||||
action_size = self.service.calculateObjectSize(task_actions)
|
action_size = self.service.calculateObjectSize(task_actions)
|
||||||
self.service.updateWorkflowStats(eventLabel="action", bytesSent=action_size)
|
self.service.updateWorkflowStats(eventLabel="action", bytesSent=action_size)
|
||||||
|
|
||||||
|
# Log the final TaskAction objects as JSON
|
||||||
|
logger.info(f"Final TaskAction objects for task '{task_step.description}':")
|
||||||
|
for i, task_action in enumerate(task_actions):
|
||||||
|
action_json = {
|
||||||
|
'id': task_action.id,
|
||||||
|
'execMethod': task_action.execMethod,
|
||||||
|
'execAction': task_action.execAction,
|
||||||
|
'execParameters': task_action.execParameters,
|
||||||
|
'execResultLabel': task_action.execResultLabel,
|
||||||
|
'status': task_action.status.value if hasattr(task_action.status, 'value') else str(task_action.status)
|
||||||
|
}
|
||||||
|
logger.info(f"TaskAction {i+1}: {json.dumps(action_json, indent=2, ensure_ascii=False)}")
|
||||||
|
|
||||||
logger.info(f"Task action definition completed: {len(task_actions)} actions")
|
logger.info(f"Task action definition completed: {len(task_actions)} actions")
|
||||||
return task_actions
|
return task_actions
|
||||||
|
|
||||||
|
|
@ -842,6 +936,7 @@ ACTION GENERATION PRINCIPLES:
|
||||||
- Include validation steps in extraction prompts
|
- Include validation steps in extraction prompts
|
||||||
- If this is a retry, learn from previous failures and improve the approach
|
- If this is a retry, learn from previous failures and improve the approach
|
||||||
- Address specific issues mentioned in previous review feedback
|
- Address specific issues mentioned in previous review feedback
|
||||||
|
- When specifying expectedDocumentFormats, ensure AI prompts explicitly request pure data without markdown formatting
|
||||||
|
|
||||||
INSTRUCTIONS:
|
INSTRUCTIONS:
|
||||||
- Generate actions to accomplish this task step using available documents, connections, and previous results
|
- Generate actions to accomplish this task step using available documents, connections, and previous results
|
||||||
|
|
@ -866,6 +961,13 @@ REQUIRED JSON STRUCTURE:
|
||||||
"aiPrompt": "Comprehensive AI prompt describing what to accomplish"
|
"aiPrompt": "Comprehensive AI prompt describing what to accomplish"
|
||||||
}},
|
}},
|
||||||
"resultLabel": "task1_action3_analysis_results",
|
"resultLabel": "task1_action3_analysis_results",
|
||||||
|
"expectedDocumentFormats": [ // OPTIONAL: Specify expected document formats when needed
|
||||||
|
{{
|
||||||
|
"extension": ".csv",
|
||||||
|
"mimeType": "text/csv",
|
||||||
|
"description": "Structured data output"
|
||||||
|
}}
|
||||||
|
],
|
||||||
"description": "What this action accomplishes (business outcome)"
|
"description": "What this action accomplishes (business outcome)"
|
||||||
}}
|
}}
|
||||||
]
|
]
|
||||||
|
|
@ -876,10 +978,16 @@ FIELD REQUIREMENTS:
|
||||||
- "action": Must be valid for the method
|
- "action": Must be valid for the method
|
||||||
- "parameters": Method-specific, must include documentList as a list if required by the signature
|
- "parameters": Method-specific, must include documentList as a list if required by the signature
|
||||||
- "resultLabel": Must follow the format above (e.g., "task1_action3_analysis_results")
|
- "resultLabel": Must follow the format above (e.g., "task1_action3_analysis_results")
|
||||||
|
- "expectedDocumentFormats": OPTIONAL - Only specify when you need to control output format
|
||||||
|
- Use when you need specific file types (e.g., CSV for data, JSON for structured output)
|
||||||
|
- Omit when format is flexible (e.g., folder queries with mixed file types)
|
||||||
|
- Each format should specify: extension, mimeType, description
|
||||||
|
- When using expectedDocumentFormats, ensure the aiPrompt explicitly requests pure data without markdown formatting
|
||||||
- "description": Clear summary of the business outcome
|
- "description": Clear summary of the business outcome
|
||||||
|
|
||||||
EXAMPLES OF GOOD ACTIONS:
|
EXAMPLES OF GOOD ACTIONS:
|
||||||
1. Comprehensive document analysis:
|
|
||||||
|
1. Document analysis with specific output format (use expectedDocumentFormats):
|
||||||
{{
|
{{
|
||||||
"method": "document",
|
"method": "document",
|
||||||
"action": "extract",
|
"action": "extract",
|
||||||
|
|
@ -888,10 +996,17 @@ EXAMPLES OF GOOD ACTIONS:
|
||||||
"aiPrompt": "Extract and analyze the candidate's qualifications, experience, skills, and suitability for the product designer position. Identify key strengths, relevant experience, technical skills, and any areas of concern. Provide a comprehensive assessment that can be used for evaluation."
|
"aiPrompt": "Extract and analyze the candidate's qualifications, experience, skills, and suitability for the product designer position. Identify key strengths, relevant experience, technical skills, and any areas of concern. Provide a comprehensive assessment that can be used for evaluation."
|
||||||
}},
|
}},
|
||||||
"resultLabel": "task1_action1_candidate_analysis",
|
"resultLabel": "task1_action1_candidate_analysis",
|
||||||
|
"expectedDocumentFormats": [
|
||||||
|
{{
|
||||||
|
"extension": ".json",
|
||||||
|
"mimeType": "application/json",
|
||||||
|
"description": "Structured candidate analysis data"
|
||||||
|
}}
|
||||||
|
],
|
||||||
"description": "Comprehensive analysis of candidate profile for evaluation"
|
"description": "Comprehensive analysis of candidate profile for evaluation"
|
||||||
}}
|
}}
|
||||||
|
|
||||||
2. Multi-document processing:
|
2. Multi-document processing with flexible output (omit expectedDocumentFormats):
|
||||||
{{
|
{{
|
||||||
"method": "document",
|
"method": "document",
|
||||||
"action": "extract",
|
"action": "extract",
|
||||||
|
|
@ -903,6 +1018,25 @@ EXAMPLES OF GOOD ACTIONS:
|
||||||
"description": "Create comprehensive evaluation matrix comparing all candidates"
|
"description": "Create comprehensive evaluation matrix comparing all candidates"
|
||||||
}}
|
}}
|
||||||
|
|
||||||
|
3. Data extraction with specific CSV format:
|
||||||
|
{{
|
||||||
|
"method": "document",
|
||||||
|
"action": "extract",
|
||||||
|
"parameters": {{
|
||||||
|
"documentList": ["docItem:doc_abc:table_data.pdf"],
|
||||||
|
"aiPrompt": "Extract all table data and convert to structured CSV format with proper headers and data types. IMPORTANT: Deliver pure CSV data without any markdown formatting, code blocks, or additional text. Output only the CSV content with proper headers and data rows."
|
||||||
|
}},
|
||||||
|
"resultLabel": "task1_action2_structured_data",
|
||||||
|
"expectedDocumentFormats": [
|
||||||
|
{{
|
||||||
|
"extension": ".csv",
|
||||||
|
"mimeType": "text/csv",
|
||||||
|
"description": "Structured table data in CSV format"
|
||||||
|
}}
|
||||||
|
],
|
||||||
|
"description": "Extract and structure table data for analysis"
|
||||||
|
}}
|
||||||
|
|
||||||
NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
|
NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1022,11 +1156,17 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
|
||||||
async def _executeSingleAction(self, action: TaskAction, workflow: ChatWorkflow) -> ActionResult:
|
async def _executeSingleAction(self, action: TaskAction, workflow: ChatWorkflow) -> ActionResult:
|
||||||
"""Execute a single action and return ActionResult with enhanced document processing"""
|
"""Execute a single action and return ActionResult with enhanced document processing"""
|
||||||
try:
|
try:
|
||||||
|
# Enhance parameters with expected document formats if specified
|
||||||
|
enhanced_parameters = action.execParameters.copy()
|
||||||
|
if action.expectedDocumentFormats:
|
||||||
|
enhanced_parameters['expectedDocumentFormats'] = action.expectedDocumentFormats
|
||||||
|
logger.info(f"Action {action.execMethod}.{action.execAction} expects formats: {action.expectedDocumentFormats}")
|
||||||
|
|
||||||
# Execute the actual method action using the service center
|
# Execute the actual method action using the service center
|
||||||
result = await self.service.executeAction(
|
result = await self.service.executeAction(
|
||||||
methodName=action.execMethod,
|
methodName=action.execMethod,
|
||||||
actionName=action.execAction,
|
actionName=action.execAction,
|
||||||
parameters=action.execParameters
|
parameters=enhanced_parameters
|
||||||
)
|
)
|
||||||
|
|
||||||
# Always use the execResultLabel from the action definition
|
# Always use the execResultLabel from the action definition
|
||||||
|
|
@ -1348,8 +1488,8 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
|
||||||
|
|
||||||
# For CSV files, try to extract table data
|
# For CSV files, try to extract table data
|
||||||
elif file_extension == 'csv':
|
elif file_extension == 'csv':
|
||||||
# Look for CSV-specific fields
|
# Look for CSV-specific fields first, then general content fields
|
||||||
csv_fields = ['table_data', 'csv_data', 'rows', 'data']
|
csv_fields = ['table_data', 'csv_data', 'rows', 'data', 'content', 'text']
|
||||||
for field in csv_fields:
|
for field in csv_fields:
|
||||||
if field in document_data:
|
if field in document_data:
|
||||||
content = document_data[field]
|
content = document_data[field]
|
||||||
|
|
@ -1798,7 +1938,7 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
|
||||||
# Create final success log
|
# Create final success log
|
||||||
self.chatInterface.createWorkflowLog({
|
self.chatInterface.createWorkflowLog({
|
||||||
"workflowId": workflow.id,
|
"workflowId": workflow.id,
|
||||||
"message": f"🎉 Workflow completed successfully ({len(workflow_results)}/{len(task_plan.tasks)} tasks)",
|
"message": f"🎉 Workflow completed ({len(workflow_results)}/{len(task_plan.tasks)} tasks)",
|
||||||
"type": "success",
|
"type": "success",
|
||||||
"status": "completed",
|
"status": "completed",
|
||||||
"progress": 100
|
"progress": 100
|
||||||
|
|
@ -1814,7 +1954,7 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
|
||||||
)
|
)
|
||||||
|
|
||||||
logger.info(f"=== UNIFIED WORKFLOW COMPLETED: {len(workflow_results)}/{len(task_plan.tasks)} tasks successful ===")
|
logger.info(f"=== UNIFIED WORKFLOW COMPLETED: {len(workflow_results)}/{len(task_plan.tasks)} tasks successful ===")
|
||||||
logger.debug(f"FINAL WORKFLOW SUMMARY: {json.dumps(workflow_summary.model_dump(), indent=2, ensure_ascii=False)}")
|
logger.debug(f"FINAL WORKFLOW SUMMARY: {json.dumps(workflow_summary.dict(), indent=2, ensure_ascii=False)}")
|
||||||
return workflow_summary
|
return workflow_summary
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
@ -1989,6 +2129,7 @@ Please review the task requirements and try again with different input or approa
|
||||||
)
|
)
|
||||||
|
|
||||||
# Generate new actions with failure avoidance
|
# Generate new actions with failure avoidance
|
||||||
|
logger.info(f"Regenerating actions for task '{task_step.description}' with failure context (retry {state.retry_count})")
|
||||||
actions = await self.defineTaskActions(task_step, context.workflow, state.getAvailableResults(), enhanced_context)
|
actions = await self.defineTaskActions(task_step, context.workflow, state.getAvailableResults(), enhanced_context)
|
||||||
|
|
||||||
logger.info(f"Regenerated {len(actions)} actions with failure context")
|
logger.info(f"Regenerated {len(actions)} actions with failure context")
|
||||||
|
|
@ -2016,13 +2157,17 @@ Please review the task requirements and try again with different input or approa
|
||||||
prompt = self._createTaskCompletionValidationPrompt(task_result, task_step)
|
prompt = self._createTaskCompletionValidationPrompt(task_result, task_step)
|
||||||
response = await self._callAIWithCircuitBreaker(prompt, "task_completion_validation")
|
response = await self._callAIWithCircuitBreaker(prompt, "task_completion_validation")
|
||||||
|
|
||||||
|
# Log the validation response for debugging
|
||||||
|
logger.debug(f"Task validation AI response: {response}")
|
||||||
|
|
||||||
# Parse validation result
|
# Parse validation result
|
||||||
validation = self._parseTaskValidationResponse(response)
|
validation = self._parseTaskValidationResponse(response)
|
||||||
|
|
||||||
# Add quality metrics
|
# Add quality metrics
|
||||||
validation['quality_metrics'] = self._calculateTaskQualityMetrics(task_step, successful_actions)
|
validation['quality_metrics'] = self._calculateTaskQualityMetrics(task_step, successful_actions)
|
||||||
|
|
||||||
logger.info(f"Task completion validation: {validation.get('status', 'unknown')}")
|
logger.info(f"Task completion validation: {validation.get('status', 'unknown')} - Reason: {validation.get('reason', 'No reason')}")
|
||||||
|
logger.debug(f"Parsed validation result: {json.dumps(validation, indent=2)}")
|
||||||
return ReviewResult(
|
return ReviewResult(
|
||||||
status=validation.get('status', 'unknown'),
|
status=validation.get('status', 'unknown'),
|
||||||
reason=validation.get('reason', 'No reason provided'),
|
reason=validation.get('reason', 'No reason provided'),
|
||||||
|
|
@ -2061,21 +2206,27 @@ Please review the task requirements and try again with different input or approa
|
||||||
'has_text_result': bool(action.data.get('result', '').strip())
|
'has_text_result': bool(action.data.get('result', '').strip())
|
||||||
})
|
})
|
||||||
|
|
||||||
return f"""You are a task completion validator that evaluates if a task was successfully completed.
|
return f"""You are an action completion validator that evaluates if individual actions were successfully completed.
|
||||||
|
|
||||||
TASK DETAILS:
|
ACTION DETAILS:
|
||||||
- Description: {task_step.description}
|
|
||||||
- Expected Outputs: {', '.join(expected_outputs)}
|
|
||||||
- Success Criteria: {', '.join(success_criteria)}
|
|
||||||
|
|
||||||
SUCCESSFUL ACTIONS ({len(successful_actions)}):
|
|
||||||
{json.dumps(action_summary, indent=2)}
|
{json.dumps(action_summary, indent=2)}
|
||||||
|
|
||||||
|
VALIDATION CRITERIA:
|
||||||
|
1. Check if the action's result_label matches what was delivered
|
||||||
|
2. If documents were delivered and result_label is present → SUCCESS
|
||||||
|
3. If no documents but text result with matching result_label or different result_label → RETRY
|
||||||
|
4. If no result_label and no delivery → FAIL
|
||||||
|
|
||||||
|
VALIDATION RULES:
|
||||||
|
- Focus on result_label matching
|
||||||
|
- Check if the action delivered the expected result type
|
||||||
|
- Document delivery with correct result_label = SUCCESS
|
||||||
|
- Text result with correct result_label = SUCCESS
|
||||||
|
|
||||||
VALIDATION QUESTIONS:
|
VALIDATION QUESTIONS:
|
||||||
1. Were all expected outputs produced?
|
1. Does the result_label match what the action was supposed to deliver?
|
||||||
2. Are the success criteria met?
|
2. Were documents or text results delivered with the correct label?
|
||||||
3. Do the action results collectively accomplish the task goal?
|
3. Does the delivery match the action's objective?
|
||||||
4. Is the task ready for handover to the next task?
|
|
||||||
|
|
||||||
REQUIRED JSON RESPONSE:
|
REQUIRED JSON RESPONSE:
|
||||||
{{
|
{{
|
||||||
|
|
@ -2242,6 +2393,21 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
|
||||||
documents=result.data.get("documents", [])
|
documents=result.data.get("documents", [])
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Log the action execution result as JSON (without document content)
|
||||||
|
action_result_json = {
|
||||||
|
'success': action_result.success,
|
||||||
|
'actionId': action_result.actionId,
|
||||||
|
'actionMethod': action_result.actionMethod,
|
||||||
|
'actionName': action_result.actionName,
|
||||||
|
'validation': action_result.validation,
|
||||||
|
'error': action_result.error,
|
||||||
|
'documents_count': len(action_result.documents),
|
||||||
|
'document_names': [doc.filename if hasattr(doc, 'filename') else str(doc) for doc in action_result.documents],
|
||||||
|
'data_keys': list(action_result.data.keys()) if isinstance(action_result.data, dict) else [],
|
||||||
|
'metadata_keys': list(action_result.metadata.keys()) if isinstance(action_result.metadata, dict) else []
|
||||||
|
}
|
||||||
|
logger.info(f"Action execution result for {action.execMethod}.{action.execAction}: {json.dumps(action_result_json, indent=2, ensure_ascii=False)}")
|
||||||
|
|
||||||
# Update action status based on validation
|
# Update action status based on validation
|
||||||
if validation['status'] == 'success':
|
if validation['status'] == 'success':
|
||||||
action.setSuccess()
|
action.setSuccess()
|
||||||
|
|
@ -2334,6 +2500,24 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
|
||||||
applied_improvements=improvements
|
applied_improvements=improvements
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Log the retry action execution result as JSON (without document content)
|
||||||
|
retry_result_json = {
|
||||||
|
'success': action_result.success,
|
||||||
|
'actionId': action_result.actionId,
|
||||||
|
'actionMethod': action_result.actionMethod,
|
||||||
|
'actionName': action_result.actionName,
|
||||||
|
'validation': action_result.validation,
|
||||||
|
'error': action_result.error,
|
||||||
|
'is_retry': action_result.is_retry,
|
||||||
|
'previous_error': action_result.previous_error,
|
||||||
|
'applied_improvements': action_result.applied_improvements,
|
||||||
|
'documents_count': len(action_result.documents),
|
||||||
|
'document_names': [doc.filename if hasattr(doc, 'filename') else str(doc) for doc in action_result.documents],
|
||||||
|
'data_keys': list(action_result.data.keys()) if isinstance(action_result.data, dict) else [],
|
||||||
|
'metadata_keys': list(action_result.metadata.keys()) if isinstance(action_result.metadata, dict) else []
|
||||||
|
}
|
||||||
|
logger.info(f"Retry action execution result for {action.execMethod}.{action.execAction}: {json.dumps(retry_result_json, indent=2, ensure_ascii=False)}")
|
||||||
|
|
||||||
# Update action status
|
# Update action status
|
||||||
if validation['status'] == 'success':
|
if validation['status'] == 'success':
|
||||||
enhanced_action.setSuccess()
|
enhanced_action.setSuccess()
|
||||||
|
|
|
||||||
|
|
@ -185,6 +185,8 @@ class TaskAction(BaseModel, ModelMixin):
|
||||||
execAction: str = Field(..., description="Action to perform")
|
execAction: str = Field(..., description="Action to perform")
|
||||||
execParameters: Dict[str, Any] = Field(default_factory=dict, description="Action parameters")
|
execParameters: Dict[str, Any] = Field(default_factory=dict, description="Action parameters")
|
||||||
execResultLabel: Optional[str] = Field(None, description="Label for the set of result documents")
|
execResultLabel: Optional[str] = Field(None, description="Label for the set of result documents")
|
||||||
|
# NEW: Optional document format specification
|
||||||
|
expectedDocumentFormats: Optional[List[Dict[str, str]]] = Field(None, description="Expected document formats (optional)")
|
||||||
status: TaskStatus = Field(default=TaskStatus.PENDING, description="Action status")
|
status: TaskStatus = Field(default=TaskStatus.PENDING, description="Action status")
|
||||||
error: Optional[str] = Field(None, description="Error message if action failed")
|
error: Optional[str] = Field(None, description="Error message if action failed")
|
||||||
retryCount: int = Field(default=0, description="Number of retries attempted")
|
retryCount: int = Field(default=0, description="Number of retries attempted")
|
||||||
|
|
|
||||||
|
|
@ -7,7 +7,7 @@ import os
|
||||||
import logging
|
import logging
|
||||||
import uuid
|
import uuid
|
||||||
import time
|
import time
|
||||||
from datetime import datetime, UTC
|
from datetime import datetime, UTC, timezone
|
||||||
from typing import Dict, Any, List, Optional, Union
|
from typing import Dict, Any, List, Optional, Union
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
|
|
@ -128,8 +128,8 @@ class ChatObjects:
|
||||||
return self.db.getInitialId(table)
|
return self.db.getInitialId(table)
|
||||||
|
|
||||||
def _getCurrentTimestamp(self) -> str:
|
def _getCurrentTimestamp(self) -> str:
|
||||||
"""Returns the current timestamp in ISO format"""
|
"""Returns the current timestamp as Unix timestamp (seconds since epoch)"""
|
||||||
return datetime.now().isoformat()
|
return str(int(time.time()))
|
||||||
|
|
||||||
# Workflow methods
|
# Workflow methods
|
||||||
|
|
||||||
|
|
@ -576,8 +576,45 @@ class ChatObjects:
|
||||||
"processingTime": 0
|
"processingTime": 0
|
||||||
}
|
}
|
||||||
|
|
||||||
# Simple processing time - just use current time
|
# Calculate processing time as duration since workflow start using Unix timestamps
|
||||||
processing_time = time.time()
|
workflow = self.getWorkflow(workflowId)
|
||||||
|
if workflow and workflow.startedAt:
|
||||||
|
try:
|
||||||
|
# Parse start time as Unix timestamp (handle both old ISO format and new Unix format)
|
||||||
|
start_time_str = workflow.startedAt
|
||||||
|
try:
|
||||||
|
# Try to parse as Unix timestamp first
|
||||||
|
start_time = int(float(start_time_str))
|
||||||
|
except ValueError:
|
||||||
|
# If that fails, try to parse as ISO format and convert to Unix
|
||||||
|
try:
|
||||||
|
# Handle ISO format timestamps (for backward compatibility)
|
||||||
|
if start_time_str.endswith('Z'):
|
||||||
|
start_time_str = start_time_str.replace('Z', '+00:00')
|
||||||
|
dt = datetime.fromisoformat(start_time_str)
|
||||||
|
start_time = int(dt.timestamp())
|
||||||
|
except:
|
||||||
|
# If all parsing fails, use current time
|
||||||
|
logger.warning(f"Could not parse start time: {start_time_str}, using current time")
|
||||||
|
start_time = int(time.time())
|
||||||
|
|
||||||
|
current_time = int(time.time())
|
||||||
|
processing_time = current_time - start_time
|
||||||
|
|
||||||
|
# Ensure processing time is reasonable (not negative or extremely large)
|
||||||
|
if processing_time < 0:
|
||||||
|
logger.warning(f"Negative processing time calculated: {processing_time}, using 0")
|
||||||
|
processing_time = 0
|
||||||
|
elif processing_time > 86400 * 365: # More than 1 year
|
||||||
|
logger.warning(f"Unreasonably large processing time: {processing_time}, using 0")
|
||||||
|
processing_time = 0
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Error calculating processing time: {str(e)}")
|
||||||
|
processing_time = currentStats.get("processingTime", 0) or 0
|
||||||
|
else:
|
||||||
|
# Fallback to existing processing time or 0
|
||||||
|
processing_time = currentStats.get("processingTime", 0) or 0
|
||||||
|
|
||||||
# Update stats with incremental values - ensure no None values
|
# Update stats with incremental values - ensure no None values
|
||||||
current_bytes_sent = currentStats.get("bytesSent", 0) or 0
|
current_bytes_sent = currentStats.get("bytesSent", 0) or 0
|
||||||
|
|
@ -793,8 +830,8 @@ class ChatObjects:
|
||||||
|
|
||||||
# Load logs
|
# Load logs
|
||||||
logs = self.getWorkflowLogs(workflowId)
|
logs = self.getWorkflowLogs(workflowId)
|
||||||
# Sort by timestamp
|
# Sort by timestamp (Unix timestamps)
|
||||||
logs.sort(key=lambda x: x.get("timestamp", ""))
|
logs.sort(key=lambda x: float(x.get("timestamp", 0)))
|
||||||
|
|
||||||
# Assemble complete workflow object
|
# Assemble complete workflow object
|
||||||
completeWorkflow = workflow.copy()
|
completeWorkflow = workflow.copy()
|
||||||
|
|
@ -1205,12 +1242,13 @@ class ChatObjects:
|
||||||
execAction=createdAction["execAction"],
|
execAction=createdAction["execAction"],
|
||||||
execParameters=createdAction.get("execParameters", {}),
|
execParameters=createdAction.get("execParameters", {}),
|
||||||
execResultLabel=createdAction.get("execResultLabel"),
|
execResultLabel=createdAction.get("execResultLabel"),
|
||||||
|
expectedDocumentFormats=createdAction.get("expectedDocumentFormats"),
|
||||||
status=createdAction.get("status", TaskStatus.PENDING),
|
status=createdAction.get("status", TaskStatus.PENDING),
|
||||||
error=createdAction.get("error"),
|
error=createdAction.get("error"),
|
||||||
retryCount=createdAction.get("retryCount", 0),
|
retryCount=createdAction.get("retryCount", 0),
|
||||||
retryMax=createdAction.get("retryMax", 3),
|
retryMax=createdAction.get("retryMax", 3),
|
||||||
processingTime=createdAction.get("processingTime"),
|
processingTime=createdAction.get("processingTime"),
|
||||||
timestamp=datetime.fromisoformat(createdAction.get("timestamp", datetime.now().isoformat())),
|
timestamp=datetime.fromtimestamp(float(createdAction.get("timestamp", time.time()))),
|
||||||
result=createdAction.get("result"),
|
result=createdAction.get("result"),
|
||||||
resultDocuments=createdAction.get("resultDocuments", [])
|
resultDocuments=createdAction.get("resultDocuments", [])
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -24,17 +24,19 @@ class MethodDocument(MethodBase):
|
||||||
@action
|
@action
|
||||||
async def extract(self, parameters: Dict[str, Any]) -> ActionResult:
|
async def extract(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||||
"""
|
"""
|
||||||
Extract specific content from document with ai prompt and return it as a json file
|
Extract specific content from document with ai prompt and return it in the specified format
|
||||||
|
|
||||||
Parameters:
|
Parameters:
|
||||||
documentList (str): Reference to the document list to extract content from
|
documentList (str): Reference to the document list to extract content from
|
||||||
aiPrompt (str): AI prompt for content extraction
|
aiPrompt (str): AI prompt for content extraction
|
||||||
includeMetadata (bool, optional): Whether to include metadata (default: True)
|
includeMetadata (bool, optional): Whether to include metadata (default: True)
|
||||||
|
expectedDocumentFormats (list, optional): Expected document formats with extension, mimeType, description
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
documentList = parameters.get("documentList")
|
documentList = parameters.get("documentList")
|
||||||
aiPrompt = parameters.get("aiPrompt")
|
aiPrompt = parameters.get("aiPrompt")
|
||||||
includeMetadata = parameters.get("includeMetadata", True)
|
includeMetadata = parameters.get("includeMetadata", True)
|
||||||
|
expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
|
||||||
|
|
||||||
if not documentList:
|
if not documentList:
|
||||||
return self._createResult(
|
return self._createResult(
|
||||||
|
|
@ -58,6 +60,31 @@ class MethodDocument(MethodBase):
|
||||||
error="No documents found for the provided reference"
|
error="No documents found for the provided reference"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Determine output format based on expected formats
|
||||||
|
output_extension = ".txt" # Default
|
||||||
|
output_mime_type = "text/plain" # Default
|
||||||
|
|
||||||
|
if expectedDocumentFormats and len(expectedDocumentFormats) > 0:
|
||||||
|
# Use the first expected format
|
||||||
|
expected_format = expectedDocumentFormats[0]
|
||||||
|
output_extension = expected_format.get("extension", ".txt")
|
||||||
|
output_mime_type = expected_format.get("mimeType", "text/plain")
|
||||||
|
logger.info(f"Using expected format: {output_extension} ({output_mime_type})")
|
||||||
|
logger.info(f"Expected document formats: {expectedDocumentFormats}")
|
||||||
|
else:
|
||||||
|
logger.info("No expected format specified, using default .txt format")
|
||||||
|
|
||||||
|
# Enhance AI prompt to specify output format
|
||||||
|
enhanced_prompt = aiPrompt
|
||||||
|
if output_extension == ".csv":
|
||||||
|
enhanced_prompt += f"\n\nCRITICAL: Deliver the result as pure CSV data without any markdown formatting, code blocks, or additional text. Output only the CSV content with proper headers and data rows. Do not include ```csv or ``` markers."
|
||||||
|
elif output_extension == ".json":
|
||||||
|
enhanced_prompt += f"\n\nCRITICAL: Deliver the result as pure JSON data without any markdown formatting, code blocks, or additional text. Output only the JSON content. Do not include ```json or ``` markers."
|
||||||
|
elif output_extension == ".xml":
|
||||||
|
enhanced_prompt += f"\n\nCRITICAL: Deliver the result as pure XML data without any markdown formatting, code blocks, or additional text. Output only the XML content. Do not include ```xml or ``` markers."
|
||||||
|
elif output_extension != ".txt":
|
||||||
|
enhanced_prompt += f"\n\nCRITICAL: Deliver the result as pure {output_extension.upper()} data without any markdown formatting, code blocks, or additional text. Output only the {output_extension.upper()} content. Do not include any markdown markers."
|
||||||
|
|
||||||
# Extract content from all documents
|
# Extract content from all documents
|
||||||
all_extracted_content = []
|
all_extracted_content = []
|
||||||
file_infos = []
|
file_infos = []
|
||||||
|
|
@ -72,7 +99,7 @@ class MethodDocument(MethodBase):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
extracted_content = await self.service.extractContentFromFileData(
|
extracted_content = await self.service.extractContentFromFileData(
|
||||||
prompt=aiPrompt,
|
prompt=enhanced_prompt, # Use enhanced prompt instead of original
|
||||||
fileData=file_data,
|
fileData=file_data,
|
||||||
filename=file_info.get('name', 'document'),
|
filename=file_info.get('name', 'document'),
|
||||||
mimeType=file_info.get('mimeType', 'application/octet-stream'),
|
mimeType=file_info.get('mimeType', 'application/octet-stream'),
|
||||||
|
|
@ -105,25 +132,50 @@ class MethodDocument(MethodBase):
|
||||||
# Fallback: convert to string representation
|
# Fallback: convert to string representation
|
||||||
text_contents.append(str(content_obj))
|
text_contents.append(str(content_obj))
|
||||||
|
|
||||||
# Combine all extracted text content
|
# Process each document individually and create separate output files
|
||||||
combined_content = "\n\n--- DOCUMENT SEPARATOR ---\n\n".join(text_contents)
|
output_documents = []
|
||||||
|
|
||||||
result_data = {
|
for i, (chatDocument, extracted_content) in enumerate(zip(chatDocuments, all_extracted_content)):
|
||||||
"documentCount": len(chatDocuments),
|
# Extract text content from this document
|
||||||
"content": combined_content,
|
text_content = ""
|
||||||
"fileInfos": file_infos if includeMetadata else None,
|
if hasattr(extracted_content, 'contents') and extracted_content.contents:
|
||||||
"timestamp": datetime.now(UTC).isoformat()
|
# Extract text from ContentItem objects
|
||||||
}
|
for content_item in extracted_content.contents:
|
||||||
|
if hasattr(content_item, 'data') and content_item.data:
|
||||||
|
text_content += content_item.data + "\n"
|
||||||
|
elif isinstance(extracted_content, str):
|
||||||
|
text_content = extracted_content
|
||||||
|
else:
|
||||||
|
# Fallback: convert to string representation
|
||||||
|
text_content = str(extracted_content)
|
||||||
|
|
||||||
|
# Create output filename based on original filename
|
||||||
|
original_filename = chatDocument.filename
|
||||||
|
base_name = original_filename.rsplit('.', 1)[0] if '.' in original_filename else original_filename
|
||||||
|
output_filename = f"{base_name}_extracted_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}{output_extension}"
|
||||||
|
|
||||||
|
# Create result data for this document
|
||||||
|
result_data = {
|
||||||
|
"documentCount": 1,
|
||||||
|
"content": text_content,
|
||||||
|
"originalFilename": original_filename,
|
||||||
|
"fileInfos": [file_infos[i]] if includeMetadata and i < len(file_infos) else None,
|
||||||
|
"timestamp": datetime.now(UTC).isoformat()
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.info(f"Created output document: {output_filename} with {len(text_content)} characters")
|
||||||
|
logger.info(f"Content preview: {text_content[:200]}...")
|
||||||
|
|
||||||
|
output_documents.append({
|
||||||
|
"documentName": output_filename,
|
||||||
|
"documentData": result_data,
|
||||||
|
"mimeType": output_mime_type
|
||||||
|
})
|
||||||
|
|
||||||
return self._createResult(
|
return self._createResult(
|
||||||
success=True,
|
success=True,
|
||||||
data={
|
data={
|
||||||
"documents": [
|
"documents": output_documents
|
||||||
{
|
|
||||||
"documentName": f"extracted_content_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.txt",
|
|
||||||
"documentData": result_data
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
|
||||||
|
|
@ -55,12 +55,14 @@ class MethodOutlook(MethodBase):
|
||||||
folder (str, optional): Email folder to read from (default: "Inbox")
|
folder (str, optional): Email folder to read from (default: "Inbox")
|
||||||
limit (int, optional): Maximum number of emails to read (default: 10)
|
limit (int, optional): Maximum number of emails to read (default: 10)
|
||||||
filter (str, optional): Filter criteria for emails
|
filter (str, optional): Filter criteria for emails
|
||||||
|
expectedDocumentFormats (list, optional): Expected document formats with extension, mimeType, description
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
connectionReference = parameters.get("connectionReference")
|
connectionReference = parameters.get("connectionReference")
|
||||||
folder = parameters.get("folder", "Inbox")
|
folder = parameters.get("folder", "Inbox")
|
||||||
limit = parameters.get("limit", 10)
|
limit = parameters.get("limit", 10)
|
||||||
filter = parameters.get("filter")
|
filter = parameters.get("filter")
|
||||||
|
expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
|
||||||
|
|
||||||
if not connectionReference:
|
if not connectionReference:
|
||||||
return self._createResult(
|
return self._createResult(
|
||||||
|
|
@ -112,13 +114,27 @@ class MethodOutlook(MethodBase):
|
||||||
"timestamp": datetime.now(UTC).isoformat()
|
"timestamp": datetime.now(UTC).isoformat()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Determine output format based on expected formats
|
||||||
|
output_extension = ".json" # Default
|
||||||
|
output_mime_type = "application/json" # Default
|
||||||
|
|
||||||
|
if expectedDocumentFormats and len(expectedDocumentFormats) > 0:
|
||||||
|
# Use the first expected format
|
||||||
|
expected_format = expectedDocumentFormats[0]
|
||||||
|
output_extension = expected_format.get("extension", ".json")
|
||||||
|
output_mime_type = expected_format.get("mimeType", "application/json")
|
||||||
|
logger.info(f"Using expected format: {output_extension} ({output_mime_type})")
|
||||||
|
else:
|
||||||
|
logger.info("No expected format specified, using default .json format")
|
||||||
|
|
||||||
return self._createResult(
|
return self._createResult(
|
||||||
success=True,
|
success=True,
|
||||||
data={
|
data={
|
||||||
"documents": [
|
"documents": [
|
||||||
{
|
{
|
||||||
"documentName": f"outlook_emails_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.json",
|
"documentName": f"outlook_emails_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}{output_extension}",
|
||||||
"documentData": result_data
|
"documentData": result_data,
|
||||||
|
"mimeType": output_mime_type
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
@ -144,6 +160,7 @@ class MethodOutlook(MethodBase):
|
||||||
body (str): Email body content
|
body (str): Email body content
|
||||||
cc (List[str], optional): CC recipients
|
cc (List[str], optional): CC recipients
|
||||||
bcc (List[str], optional): BCC recipients
|
bcc (List[str], optional): BCC recipients
|
||||||
|
expectedDocumentFormats (list, optional): Expected document formats with extension, mimeType, description
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
connectionReference = parameters.get("connectionReference")
|
connectionReference = parameters.get("connectionReference")
|
||||||
|
|
@ -152,6 +169,7 @@ class MethodOutlook(MethodBase):
|
||||||
body = parameters.get("body")
|
body = parameters.get("body")
|
||||||
cc = parameters.get("cc", [])
|
cc = parameters.get("cc", [])
|
||||||
bcc = parameters.get("bcc", [])
|
bcc = parameters.get("bcc", [])
|
||||||
|
expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
|
||||||
|
|
||||||
if not connectionReference or not to or not subject or not body:
|
if not connectionReference or not to or not subject or not body:
|
||||||
return self._createResult(
|
return self._createResult(
|
||||||
|
|
@ -207,11 +225,29 @@ class MethodOutlook(MethodBase):
|
||||||
"timestamp": datetime.now(UTC).isoformat()
|
"timestamp": datetime.now(UTC).isoformat()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Determine output format based on expected formats
|
||||||
|
output_extension = ".json" # Default
|
||||||
|
output_mime_type = "application/json" # Default
|
||||||
|
|
||||||
|
if expectedDocumentFormats and len(expectedDocumentFormats) > 0:
|
||||||
|
# Use the first expected format
|
||||||
|
expected_format = expectedDocumentFormats[0]
|
||||||
|
output_extension = expected_format.get("extension", ".json")
|
||||||
|
output_mime_type = expected_format.get("mimeType", "application/json")
|
||||||
|
logger.info(f"Using expected format: {output_extension} ({output_mime_type})")
|
||||||
|
else:
|
||||||
|
logger.info("No expected format specified, using default .json format")
|
||||||
|
|
||||||
return self._createResult(
|
return self._createResult(
|
||||||
success=True,
|
success=True,
|
||||||
data={
|
data={
|
||||||
"documentName": f"outlook_email_sent_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.json",
|
"documents": [
|
||||||
"documentData": result_data
|
{
|
||||||
|
"documentName": f"outlook_email_sent_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}{output_extension}",
|
||||||
|
"documentData": result_data,
|
||||||
|
"mimeType": output_mime_type
|
||||||
|
}
|
||||||
|
]
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -233,12 +269,14 @@ class MethodOutlook(MethodBase):
|
||||||
query (str): Search query
|
query (str): Search query
|
||||||
folder (str, optional): Folder to search in (default: "All")
|
folder (str, optional): Folder to search in (default: "All")
|
||||||
limit (int, optional): Maximum number of results (default: 20)
|
limit (int, optional): Maximum number of results (default: 20)
|
||||||
|
expectedDocumentFormats (list, optional): Expected document formats with extension, mimeType, description
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
connectionReference = parameters.get("connectionReference")
|
connectionReference = parameters.get("connectionReference")
|
||||||
query = parameters.get("query")
|
query = parameters.get("query")
|
||||||
folder = parameters.get("folder", "All")
|
folder = parameters.get("folder", "All")
|
||||||
limit = parameters.get("limit", 20)
|
limit = parameters.get("limit", 20)
|
||||||
|
expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
|
||||||
|
|
||||||
if not connectionReference or not query:
|
if not connectionReference or not query:
|
||||||
return self._createResult(
|
return self._createResult(
|
||||||
|
|
@ -290,11 +328,29 @@ class MethodOutlook(MethodBase):
|
||||||
"timestamp": datetime.now(UTC).isoformat()
|
"timestamp": datetime.now(UTC).isoformat()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Determine output format based on expected formats
|
||||||
|
output_extension = ".json" # Default
|
||||||
|
output_mime_type = "application/json" # Default
|
||||||
|
|
||||||
|
if expectedDocumentFormats and len(expectedDocumentFormats) > 0:
|
||||||
|
# Use the first expected format
|
||||||
|
expected_format = expectedDocumentFormats[0]
|
||||||
|
output_extension = expected_format.get("extension", ".json")
|
||||||
|
output_mime_type = expected_format.get("mimeType", "application/json")
|
||||||
|
logger.info(f"Using expected format: {output_extension} ({output_mime_type})")
|
||||||
|
else:
|
||||||
|
logger.info("No expected format specified, using default .json format")
|
||||||
|
|
||||||
return self._createResult(
|
return self._createResult(
|
||||||
success=True,
|
success=True,
|
||||||
data={
|
data={
|
||||||
"documentName": f"outlook_email_search_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.json",
|
"documents": [
|
||||||
"documentData": result_data
|
{
|
||||||
|
"documentName": f"outlook_email_search_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}{output_extension}",
|
||||||
|
"documentData": result_data,
|
||||||
|
"mimeType": output_mime_type
|
||||||
|
}
|
||||||
|
]
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -54,12 +54,14 @@ class MethodSharepoint(MethodBase):
|
||||||
siteUrl (str): SharePoint site URL
|
siteUrl (str): SharePoint site URL
|
||||||
query (str): Query or description to find document
|
query (str): Query or description to find document
|
||||||
searchScope (str, optional): Search scope (default: "all")
|
searchScope (str, optional): Search scope (default: "all")
|
||||||
|
expectedDocumentFormats (list, optional): Expected document formats with extension, mimeType, description
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
connectionReference = parameters.get("connectionReference")
|
connectionReference = parameters.get("connectionReference")
|
||||||
siteUrl = parameters.get("siteUrl")
|
siteUrl = parameters.get("siteUrl")
|
||||||
query = parameters.get("query")
|
query = parameters.get("query")
|
||||||
searchScope = parameters.get("searchScope", "all")
|
searchScope = parameters.get("searchScope", "all")
|
||||||
|
expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
|
||||||
|
|
||||||
if not connectionReference or not siteUrl or not query:
|
if not connectionReference or not siteUrl or not query:
|
||||||
return self._createResult(
|
return self._createResult(
|
||||||
|
|
@ -108,13 +110,27 @@ class MethodSharepoint(MethodBase):
|
||||||
"timestamp": datetime.now(UTC).isoformat()
|
"timestamp": datetime.now(UTC).isoformat()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Determine output format based on expected formats
|
||||||
|
output_extension = ".json" # Default
|
||||||
|
output_mime_type = "application/json" # Default
|
||||||
|
|
||||||
|
if expectedDocumentFormats and len(expectedDocumentFormats) > 0:
|
||||||
|
# Use the first expected format
|
||||||
|
expected_format = expectedDocumentFormats[0]
|
||||||
|
output_extension = expected_format.get("extension", ".json")
|
||||||
|
output_mime_type = expected_format.get("mimeType", "application/json")
|
||||||
|
logger.info(f"Using expected format: {output_extension} ({output_mime_type})")
|
||||||
|
else:
|
||||||
|
logger.info("No expected format specified, using default .json format")
|
||||||
|
|
||||||
return self._createResult(
|
return self._createResult(
|
||||||
success=True,
|
success=True,
|
||||||
data={
|
data={
|
||||||
"documents": [
|
"documents": [
|
||||||
{
|
{
|
||||||
"documentName": f"sharepoint_find_path_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.json",
|
"documentName": f"sharepoint_find_path_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}{output_extension}",
|
||||||
"documentData": result_data
|
"documentData": result_data,
|
||||||
|
"mimeType": output_mime_type
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
@ -139,6 +155,7 @@ class MethodSharepoint(MethodBase):
|
||||||
siteUrl (str): SharePoint site URL
|
siteUrl (str): SharePoint site URL
|
||||||
documentPaths (List[str]): List of paths to the documents in SharePoint
|
documentPaths (List[str]): List of paths to the documents in SharePoint
|
||||||
includeMetadata (bool, optional): Whether to include metadata (default: True)
|
includeMetadata (bool, optional): Whether to include metadata (default: True)
|
||||||
|
expectedDocumentFormats (list, optional): Expected document formats with extension, mimeType, description
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
documentList = parameters.get("documentList")
|
documentList = parameters.get("documentList")
|
||||||
|
|
@ -146,6 +163,7 @@ class MethodSharepoint(MethodBase):
|
||||||
siteUrl = parameters.get("siteUrl")
|
siteUrl = parameters.get("siteUrl")
|
||||||
documentPaths = parameters.get("documentPaths")
|
documentPaths = parameters.get("documentPaths")
|
||||||
includeMetadata = parameters.get("includeMetadata", True)
|
includeMetadata = parameters.get("includeMetadata", True)
|
||||||
|
expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
|
||||||
|
|
||||||
if not documentList or not connectionReference or not siteUrl or not documentPaths:
|
if not documentList or not connectionReference or not siteUrl or not documentPaths:
|
||||||
return self._createResult(
|
return self._createResult(
|
||||||
|
|
@ -218,13 +236,27 @@ class MethodSharepoint(MethodBase):
|
||||||
"timestamp": datetime.now(UTC).isoformat()
|
"timestamp": datetime.now(UTC).isoformat()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Determine output format based on expected formats
|
||||||
|
output_extension = ".json" # Default
|
||||||
|
output_mime_type = "application/json" # Default
|
||||||
|
|
||||||
|
if expectedDocumentFormats and len(expectedDocumentFormats) > 0:
|
||||||
|
# Use the first expected format
|
||||||
|
expected_format = expectedDocumentFormats[0]
|
||||||
|
output_extension = expected_format.get("extension", ".json")
|
||||||
|
output_mime_type = expected_format.get("mimeType", "application/json")
|
||||||
|
logger.info(f"Using expected format: {output_extension} ({output_mime_type})")
|
||||||
|
else:
|
||||||
|
logger.info("No expected format specified, using default .json format")
|
||||||
|
|
||||||
return self._createResult(
|
return self._createResult(
|
||||||
success=True,
|
success=True,
|
||||||
data={
|
data={
|
||||||
"documents": [
|
"documents": [
|
||||||
{
|
{
|
||||||
"documentName": f"sharepoint_documents_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.json",
|
"documentName": f"sharepoint_documents_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}{output_extension}",
|
||||||
"documentData": result_data
|
"documentData": result_data,
|
||||||
|
"mimeType": output_mime_type
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
@ -248,6 +280,7 @@ class MethodSharepoint(MethodBase):
|
||||||
documentPaths (List[str]): List of paths where to upload the documents
|
documentPaths (List[str]): List of paths where to upload the documents
|
||||||
documentList (str): Reference to the document list to upload
|
documentList (str): Reference to the document list to upload
|
||||||
fileNames (List[str]): List of names for the uploaded files
|
fileNames (List[str]): List of names for the uploaded files
|
||||||
|
expectedDocumentFormats (list, optional): Expected document formats with extension, mimeType, description
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
connectionReference = parameters.get("connectionReference")
|
connectionReference = parameters.get("connectionReference")
|
||||||
|
|
@ -255,6 +288,7 @@ class MethodSharepoint(MethodBase):
|
||||||
documentPaths = parameters.get("documentPaths")
|
documentPaths = parameters.get("documentPaths")
|
||||||
documentList = parameters.get("documentList")
|
documentList = parameters.get("documentList")
|
||||||
fileNames = parameters.get("fileNames")
|
fileNames = parameters.get("fileNames")
|
||||||
|
expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
|
||||||
|
|
||||||
if not connectionReference or not siteUrl or not documentPaths or not documentList or not fileNames:
|
if not connectionReference or not siteUrl or not documentPaths or not documentList or not fileNames:
|
||||||
return self._createResult(
|
return self._createResult(
|
||||||
|
|
@ -339,13 +373,27 @@ class MethodSharepoint(MethodBase):
|
||||||
"timestamp": datetime.now(UTC).isoformat()
|
"timestamp": datetime.now(UTC).isoformat()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Determine output format based on expected formats
|
||||||
|
output_extension = ".json" # Default
|
||||||
|
output_mime_type = "application/json" # Default
|
||||||
|
|
||||||
|
if expectedDocumentFormats and len(expectedDocumentFormats) > 0:
|
||||||
|
# Use the first expected format
|
||||||
|
expected_format = expectedDocumentFormats[0]
|
||||||
|
output_extension = expected_format.get("extension", ".json")
|
||||||
|
output_mime_type = expected_format.get("mimeType", "application/json")
|
||||||
|
logger.info(f"Using expected format: {output_extension} ({output_mime_type})")
|
||||||
|
else:
|
||||||
|
logger.info("No expected format specified, using default .json format")
|
||||||
|
|
||||||
return self._createResult(
|
return self._createResult(
|
||||||
success=True,
|
success=True,
|
||||||
data={
|
data={
|
||||||
"documents": [
|
"documents": [
|
||||||
{
|
{
|
||||||
"documentName": f"sharepoint_upload_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.json",
|
"documentName": f"sharepoint_upload_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}{output_extension}",
|
||||||
"documentData": result_data
|
"documentData": result_data,
|
||||||
|
"mimeType": output_mime_type
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
@ -369,12 +417,14 @@ class MethodSharepoint(MethodBase):
|
||||||
siteUrl (str): SharePoint site URL
|
siteUrl (str): SharePoint site URL
|
||||||
folderPaths (List[str]): List of paths to the folders to list
|
folderPaths (List[str]): List of paths to the folders to list
|
||||||
includeSubfolders (bool, optional): Whether to include subfolders (default: False)
|
includeSubfolders (bool, optional): Whether to include subfolders (default: False)
|
||||||
|
expectedDocumentFormats (list, optional): Expected document formats with extension, mimeType, description
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
connectionReference = parameters.get("connectionReference")
|
connectionReference = parameters.get("connectionReference")
|
||||||
siteUrl = parameters.get("siteUrl")
|
siteUrl = parameters.get("siteUrl")
|
||||||
folderPaths = parameters.get("folderPaths")
|
folderPaths = parameters.get("folderPaths")
|
||||||
includeSubfolders = parameters.get("includeSubfolders", False)
|
includeSubfolders = parameters.get("includeSubfolders", False)
|
||||||
|
expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
|
||||||
|
|
||||||
if not connectionReference or not siteUrl or not folderPaths:
|
if not connectionReference or not siteUrl or not folderPaths:
|
||||||
return self._createResult(
|
return self._createResult(
|
||||||
|
|
@ -436,13 +486,27 @@ class MethodSharepoint(MethodBase):
|
||||||
"timestamp": datetime.now(UTC).isoformat()
|
"timestamp": datetime.now(UTC).isoformat()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Determine output format based on expected formats
|
||||||
|
output_extension = ".json" # Default
|
||||||
|
output_mime_type = "application/json" # Default
|
||||||
|
|
||||||
|
if expectedDocumentFormats and len(expectedDocumentFormats) > 0:
|
||||||
|
# Use the first expected format
|
||||||
|
expected_format = expectedDocumentFormats[0]
|
||||||
|
output_extension = expected_format.get("extension", ".json")
|
||||||
|
output_mime_type = expected_format.get("mimeType", "application/json")
|
||||||
|
logger.info(f"Using expected format: {output_extension} ({output_mime_type})")
|
||||||
|
else:
|
||||||
|
logger.info("No expected format specified, using default .json format")
|
||||||
|
|
||||||
return self._createResult(
|
return self._createResult(
|
||||||
success=True,
|
success=True,
|
||||||
data={
|
data={
|
||||||
"documents": [
|
"documents": [
|
||||||
{
|
{
|
||||||
"documentName": f"sharepoint_document_list_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.json",
|
"documentName": f"sharepoint_document_list_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}{output_extension}",
|
||||||
"documentData": result_data
|
"documentData": result_data,
|
||||||
|
"mimeType": output_mime_type
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -224,12 +224,14 @@ class MethodWeb(MethodBase):
|
||||||
maxDepth (int, optional): Maximum crawl depth (default: 2)
|
maxDepth (int, optional): Maximum crawl depth (default: 2)
|
||||||
includeImages (bool, optional): Whether to include images (default: False)
|
includeImages (bool, optional): Whether to include images (default: False)
|
||||||
followLinks (bool, optional): Whether to follow links (default: True)
|
followLinks (bool, optional): Whether to follow links (default: True)
|
||||||
|
expectedDocumentFormats (list, optional): Expected document formats with extension, mimeType, description
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
urls = parameters.get("urls")
|
urls = parameters.get("urls")
|
||||||
maxDepth = parameters.get("maxDepth", 2)
|
maxDepth = parameters.get("maxDepth", 2)
|
||||||
includeImages = parameters.get("includeImages", False)
|
includeImages = parameters.get("includeImages", False)
|
||||||
followLinks = parameters.get("followLinks", True)
|
followLinks = parameters.get("followLinks", True)
|
||||||
|
expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
|
||||||
|
|
||||||
if not urls:
|
if not urls:
|
||||||
return self._createResult(
|
return self._createResult(
|
||||||
|
|
@ -307,13 +309,27 @@ class MethodWeb(MethodBase):
|
||||||
"timestamp": datetime.now(UTC).isoformat()
|
"timestamp": datetime.now(UTC).isoformat()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Determine output format based on expected formats
|
||||||
|
output_extension = ".json" # Default
|
||||||
|
output_mime_type = "application/json" # Default
|
||||||
|
|
||||||
|
if expectedDocumentFormats and len(expectedDocumentFormats) > 0:
|
||||||
|
# Use the first expected format
|
||||||
|
expected_format = expectedDocumentFormats[0]
|
||||||
|
output_extension = expected_format.get("extension", ".json")
|
||||||
|
output_mime_type = expected_format.get("mimeType", "application/json")
|
||||||
|
logger.info(f"Using expected format: {output_extension} ({output_mime_type})")
|
||||||
|
else:
|
||||||
|
logger.info("No expected format specified, using default .json format")
|
||||||
|
|
||||||
return self._createResult(
|
return self._createResult(
|
||||||
success=True,
|
success=True,
|
||||||
data={
|
data={
|
||||||
"documents": [
|
"documents": [
|
||||||
{
|
{
|
||||||
"documentName": f"web_crawl_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.json",
|
"documentName": f"web_crawl_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}{output_extension}",
|
||||||
"documentData": result_data
|
"documentData": result_data,
|
||||||
|
"mimeType": output_mime_type
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
@ -336,11 +352,13 @@ class MethodWeb(MethodBase):
|
||||||
url (str): URL to scrape
|
url (str): URL to scrape
|
||||||
selectors (Dict[str, str]): CSS selectors for data extraction
|
selectors (Dict[str, str]): CSS selectors for data extraction
|
||||||
format (str, optional): Output format (default: "json")
|
format (str, optional): Output format (default: "json")
|
||||||
|
expectedDocumentFormats (list, optional): Expected document formats with extension, mimeType, description
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
url = parameters.get("url")
|
url = parameters.get("url")
|
||||||
selectors = parameters.get("selectors")
|
selectors = parameters.get("selectors")
|
||||||
format = parameters.get("format", "json")
|
format = parameters.get("format", "json")
|
||||||
|
expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
|
||||||
|
|
||||||
if not url or not selectors:
|
if not url or not selectors:
|
||||||
return self._createResult(
|
return self._createResult(
|
||||||
|
|
@ -400,13 +418,27 @@ class MethodWeb(MethodBase):
|
||||||
"timestamp": datetime.now(UTC).isoformat()
|
"timestamp": datetime.now(UTC).isoformat()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Determine output format based on expected formats
|
||||||
|
output_extension = f".{format}" # Default to format parameter
|
||||||
|
output_mime_type = "application/json" # Default
|
||||||
|
|
||||||
|
if expectedDocumentFormats and len(expectedDocumentFormats) > 0:
|
||||||
|
# Use the first expected format
|
||||||
|
expected_format = expectedDocumentFormats[0]
|
||||||
|
output_extension = expected_format.get("extension", f".{format}")
|
||||||
|
output_mime_type = expected_format.get("mimeType", "application/json")
|
||||||
|
logger.info(f"Using expected format: {output_extension} ({output_mime_type})")
|
||||||
|
else:
|
||||||
|
logger.info(f"No expected format specified, using format parameter: {format}")
|
||||||
|
|
||||||
return self._createResult(
|
return self._createResult(
|
||||||
success=True,
|
success=True,
|
||||||
data={
|
data={
|
||||||
"documents": [
|
"documents": [
|
||||||
{
|
{
|
||||||
"documentName": f"web_scrape_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.{format}",
|
"documentName": f"web_scrape_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}{output_extension}",
|
||||||
"documentData": result_data
|
"documentData": result_data,
|
||||||
|
"mimeType": output_mime_type
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
@ -430,12 +462,14 @@ class MethodWeb(MethodBase):
|
||||||
engine (str, optional): Search engine to use (default: "google")
|
engine (str, optional): Search engine to use (default: "google")
|
||||||
maxResults (int, optional): Maximum number of results (default: 10)
|
maxResults (int, optional): Maximum number of results (default: 10)
|
||||||
filter (str, optional): Additional search filters
|
filter (str, optional): Additional search filters
|
||||||
|
expectedDocumentFormats (list, optional): Expected document formats with extension, mimeType, description
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
query = parameters.get("query")
|
query = parameters.get("query")
|
||||||
engine = parameters.get("engine", "google")
|
engine = parameters.get("engine", "google")
|
||||||
maxResults = parameters.get("maxResults", 10)
|
maxResults = parameters.get("maxResults", 10)
|
||||||
filter = parameters.get("filter")
|
filter = parameters.get("filter")
|
||||||
|
expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
|
||||||
|
|
||||||
if not query:
|
if not query:
|
||||||
return self._createResult(
|
return self._createResult(
|
||||||
|
|
@ -533,13 +567,27 @@ class MethodWeb(MethodBase):
|
||||||
"timestamp": datetime.now(UTC).isoformat()
|
"timestamp": datetime.now(UTC).isoformat()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Determine output format based on expected formats
|
||||||
|
output_extension = ".json" # Default
|
||||||
|
output_mime_type = "application/json" # Default
|
||||||
|
|
||||||
|
if expectedDocumentFormats and len(expectedDocumentFormats) > 0:
|
||||||
|
# Use the first expected format
|
||||||
|
expected_format = expectedDocumentFormats[0]
|
||||||
|
output_extension = expected_format.get("extension", ".json")
|
||||||
|
output_mime_type = expected_format.get("mimeType", "application/json")
|
||||||
|
logger.info(f"Using expected format: {output_extension} ({output_mime_type})")
|
||||||
|
else:
|
||||||
|
logger.info("No expected format specified, using default .json format")
|
||||||
|
|
||||||
return self._createResult(
|
return self._createResult(
|
||||||
success=True,
|
success=True,
|
||||||
data={
|
data={
|
||||||
"documents": [
|
"documents": [
|
||||||
{
|
{
|
||||||
"documentName": f"web_search_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.json",
|
"documentName": f"web_search_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}{output_extension}",
|
||||||
"documentData": result_data
|
"documentData": result_data,
|
||||||
|
"mimeType": output_mime_type
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
@ -561,10 +609,12 @@ class MethodWeb(MethodBase):
|
||||||
Parameters:
|
Parameters:
|
||||||
url (str): URL to validate
|
url (str): URL to validate
|
||||||
checks (List[str], optional): Types of checks to perform (default: ["accessibility", "seo", "performance"])
|
checks (List[str], optional): Types of checks to perform (default: ["accessibility", "seo", "performance"])
|
||||||
|
expectedDocumentFormats (list, optional): Expected document formats with extension, mimeType, description
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
url = parameters.get("url")
|
url = parameters.get("url")
|
||||||
checks = parameters.get("checks", ["accessibility", "seo", "performance"])
|
checks = parameters.get("checks", ["accessibility", "seo", "performance"])
|
||||||
|
expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
|
||||||
|
|
||||||
if not url:
|
if not url:
|
||||||
return self._createResult(
|
return self._createResult(
|
||||||
|
|
@ -609,13 +659,27 @@ class MethodWeb(MethodBase):
|
||||||
"timestamp": datetime.now(UTC).isoformat()
|
"timestamp": datetime.now(UTC).isoformat()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Determine output format based on expected formats
|
||||||
|
output_extension = ".json" # Default
|
||||||
|
output_mime_type = "application/json" # Default
|
||||||
|
|
||||||
|
if expectedDocumentFormats and len(expectedDocumentFormats) > 0:
|
||||||
|
# Use the first expected format
|
||||||
|
expected_format = expectedDocumentFormats[0]
|
||||||
|
output_extension = expected_format.get("extension", ".json")
|
||||||
|
output_mime_type = expected_format.get("mimeType", "application/json")
|
||||||
|
logger.info(f"Using expected format: {output_extension} ({output_mime_type})")
|
||||||
|
else:
|
||||||
|
logger.info("No expected format specified, using default .json format")
|
||||||
|
|
||||||
return self._createResult(
|
return self._createResult(
|
||||||
success=True,
|
success=True,
|
||||||
data={
|
data={
|
||||||
"documents": [
|
"documents": [
|
||||||
{
|
{
|
||||||
"documentName": f"web_validation_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.json",
|
"documentName": f"web_validation_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}{output_extension}",
|
||||||
"documentData": result_data
|
"documentData": result_data,
|
||||||
|
"mimeType": output_mime_type
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue