version 2 ready basic

This commit is contained in:
ValueOn AG 2025-07-11 23:13:42 +02:00
parent 53a4a39214
commit cfb34c6a38
7 changed files with 537 additions and 77 deletions

View file

@ -111,18 +111,46 @@ class ActionValidator:
expected_result_label = action.execResultLabel expected_result_label = action.execResultLabel
expected_format = action.execParameters.get('outputFormat', 'unknown') expected_format = action.execParameters.get('outputFormat', 'unknown')
# Extract expected document formats from action
expected_document_formats = action.expectedDocumentFormats or []
# Check if the result label is present in the action result data
actual_result_label = result_data.get("resultLabel", "") if isinstance(result_data, dict) else ""
result_label_match = actual_result_label == expected_result_label
# Analyze delivered documents and content # Analyze delivered documents and content
delivered_files = [] delivered_files = []
delivered_formats = []
content_items = [] content_items = []
# Check for ChatDocument objects # Check for ChatDocument objects
for doc in documents: for doc in documents:
if hasattr(doc, 'filename'): if hasattr(doc, 'filename'):
delivered_files.append(doc.filename) delivered_files.append(doc.filename)
# Extract format information
file_extension = self._getFileExtension(doc.filename)
mime_type = getattr(doc, 'mimeType', 'application/octet-stream')
delivered_formats.append({
'filename': doc.filename,
'extension': file_extension,
'mimeType': mime_type
})
elif isinstance(doc, dict) and 'filename' in doc: elif isinstance(doc, dict) and 'filename' in doc:
delivered_files.append(doc['filename']) delivered_files.append(doc['filename'])
file_extension = self._getFileExtension(doc['filename'])
mime_type = doc.get('mimeType', 'application/octet-stream')
delivered_formats.append({
'filename': doc['filename'],
'extension': file_extension,
'mimeType': mime_type
})
else: else:
delivered_files.append(f"document_{len(delivered_files)}") delivered_files.append(f"document_{len(delivered_files)}")
delivered_formats.append({
'filename': f"document_{len(delivered_files)}",
'extension': 'unknown',
'mimeType': 'application/octet-stream'
})
# Check for ExtractedContent in result data # Check for ExtractedContent in result data
if isinstance(result_data, dict): if isinstance(result_data, dict):
@ -133,11 +161,20 @@ class ActionValidator:
elif 'contents' in result_data: elif 'contents' in result_data:
content_items = result_data['contents'] content_items = result_data['contents']
# If we have delivered files but no content items, consider it successful
# This handles the case where content is stored in files rather than result data
if delivered_files and not content_items:
content_items = [f"File content available in: {', '.join(delivered_files)}"]
# Analyze content items # Analyze content items
content_summary = [] content_summary = []
for item in content_items: for item in content_items:
if hasattr(item, 'label') and hasattr(item, 'metadata'): if hasattr(item, 'label') and hasattr(item, 'metadata'):
content_summary.append(f"{item.label}: {item.metadata.mimeType if hasattr(item.metadata, 'mimeType') else 'unknown'}") content_summary.append(f"{item.label}: {item.metadata.mimeType if hasattr(item.metadata, 'mimeType') else 'unknown'}")
elif isinstance(item, str):
content_summary.append(item)
else:
content_summary.append(str(item))
return f"""You are an action result validator. Your primary focus is to validate that the action delivered the promised result files in the promised format. return f"""You are an action result validator. Your primary focus is to validate that the action delivered the promised result files in the promised format.
@ -145,7 +182,10 @@ ACTION DETAILS:
- Method: {action.execMethod} - Method: {action.execMethod}
- Action: {action.execAction} - Action: {action.execAction}
- Expected Result Label: {expected_result_label} - Expected Result Label: {expected_result_label}
- Actual Result Label: {actual_result_label}
- Result Label Match: {result_label_match}
- Expected Format: {expected_format} - Expected Format: {expected_format}
- Expected Document Formats: {json.dumps(expected_document_formats, indent=2) if expected_document_formats else 'None specified'}
- Parameters: {json.dumps(action.execParameters, indent=2)} - Parameters: {json.dumps(action.execParameters, indent=2)}
RESULT TO VALIDATE: RESULT TO VALIDATE:
@ -155,12 +195,13 @@ RESULT TO VALIDATE:
- Validation Messages: {', '.join(validation_messages) if validation_messages else 'None'} - Validation Messages: {', '.join(validation_messages) if validation_messages else 'None'}
- Documents Produced: {doc_count} - Documents Produced: {doc_count}
- Delivered Files: {', '.join(delivered_files) if delivered_files else 'None'} - Delivered Files: {', '.join(delivered_files) if delivered_files else 'None'}
- Delivered Formats: {json.dumps(delivered_formats, indent=2) if delivered_formats else 'None'}
- Content Items: {', '.join(content_summary) if content_summary else 'None'} - Content Items: {', '.join(content_summary) if content_summary else 'None'}
CRITICAL VALIDATION CRITERIA: CRITICAL VALIDATION CRITERIA:
1. **File Delivery**: Did the action deliver the promised result file(s)? 1. **Result Label Match**: Does the action result contain the expected result label?
2. **Format Compliance**: Are the delivered files in the promised format? 2. **File Delivery**: Did the action deliver the promised result file(s)?
3. **Result Label Match**: Does the result match the expected result label? 3. **Format Compliance**: If expected document formats were specified, do the delivered files match the expected formats?
4. **Content Quality**: Is the content of the delivered files usable and complete? 4. **Content Quality**: Is the content of the delivered files usable and complete?
5. **Content Processing**: If content extraction was expected, was it performed correctly? 5. **Content Processing**: If content extraction was expected, was it performed correctly?
@ -169,21 +210,28 @@ CONTEXT:
- Previous Results: {', '.join(context.previous_results) if context.previous_results else 'None'} - Previous Results: {', '.join(context.previous_results) if context.previous_results else 'None'}
VALIDATION INSTRUCTIONS: VALIDATION INSTRUCTIONS:
1. Check if the expected result label "{expected_result_label}" is present in the result 1. **Result Label Check**: Verify that the expected result label "{expected_result_label}" is present in the action result data. This is the primary success criterion.
2. Verify that files were delivered when expected 2. **File Delivery**: Check if files were delivered when expected. The individual filenames don't need to match the result label - focus on whether content was actually produced.
3. Validate that the delivered files match the expected format "{expected_format}" 3. **Format Compliance**: If expected document formats were specified, check if delivered files match the expected extensions and MIME types. If no formats were specified, this criterion is satisfied.
4. Assess if the content is complete and usable 4. **Content Quality**: If files were delivered, consider the action successful. The presence of delivered files indicates content was processed and stored.
5. Check if content extraction was performed when expected 5. **Content Processing**: If files were delivered, assume content extraction was performed correctly. The file delivery is evidence of successful processing.
6. Determine if retry would improve file delivery or format compliance 6. **Success Criteria**: The action is successful if the result label matches AND files were delivered. If expected formats were specified, they should also match.
IMPORTANT NOTES:
- The result label must be present in the action result data for success
- Individual filenames can be different from the result label
- If files were delivered, consider the action successful even if content details are not provided
- Focus on whether the action accomplished its intended purpose (file delivery)
- Empty files should be considered failures, but delivered files indicate success
REQUIRED JSON RESPONSE: REQUIRED JSON RESPONSE:
{{ {{
"status": "success|retry|fail", "status": "success|retry|fail",
"reason": "Detailed explanation focusing on file delivery and format compliance", "reason": "Detailed explanation focusing on result label match and content quality",
"confidence": 0.0-1.0, "confidence": 0.0-1.0,
"improvements": ["specific file delivery improvements", "format compliance fixes"], "improvements": ["specific improvements if needed"],
"quality_score": 1-10, "quality_score": 1-10,
"missing_elements": ["missing files", "format issues"], "missing_elements": ["missing result label", "missing files", "content issues"],
"suggested_retry_approach": "Specific approach for retry if status is retry" "suggested_retry_approach": "Specific approach for retry if status is retry"
}} }}
@ -223,6 +271,12 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
'suggested_retry_approach': '' 'suggested_retry_approach': ''
} }
def _getFileExtension(self, filename: str) -> str:
"""Extract file extension from filename"""
if '.' in filename:
return '.' + filename.split('.')[-1]
return ''
class ChatManager: class ChatManager:
"""Chat manager with improved AI integration and method handling""" """Chat manager with improved AI integration and method handling"""
@ -293,6 +347,27 @@ class ChatManager:
tasks=tasks tasks=tasks
) )
# Log the task plan as JSON for debugging
logger.info(f"Task plan created for workflow {workflow.id}:")
task_plan_json = {
'overview': task_plan.overview,
'tasks_count': len(task_plan.tasks),
'tasks': []
}
for task in task_plan.tasks:
task_json = {
'id': task.id,
'description': task.description,
'dependencies': task.dependencies or [],
'expected_outputs': task.expected_outputs or [],
'success_criteria': task.success_criteria or [],
'required_documents': task.required_documents or [],
'estimated_complexity': task.estimated_complexity or '',
'ai_prompt': task.ai_prompt or ''
}
task_plan_json['tasks'].append(task_json)
logger.info(f"Task Plan: {json.dumps(task_plan_json, indent=2, ensure_ascii=False)}")
logger.info(f"High-level task planning completed: {len(task_plan.tasks)} tasks") logger.info(f"High-level task planning completed: {len(task_plan.tasks)} tasks")
return task_plan return task_plan
@ -330,6 +405,11 @@ class ChatManager:
# Generate actions using AI # Generate actions using AI
actions = await self._generateActionsForTaskStep(context) actions = await self._generateActionsForTaskStep(context)
# Log the generated actions as JSON for debugging
logger.info(f"Generated {len(actions)} actions for task '{task_step.description}':")
for i, action in enumerate(actions):
logger.info(f"Action {i+1}: {json.dumps(action, indent=2, ensure_ascii=False)}")
# Convert to TaskAction objects # Convert to TaskAction objects
task_actions = [] task_actions = []
for action_dict in actions: for action_dict in actions:
@ -338,6 +418,7 @@ class ChatManager:
"execAction": action_dict.get('action', 'unknown'), "execAction": action_dict.get('action', 'unknown'),
"execParameters": action_dict.get('parameters', {}), "execParameters": action_dict.get('parameters', {}),
"execResultLabel": action_dict.get('resultLabel', ''), "execResultLabel": action_dict.get('resultLabel', ''),
"expectedDocumentFormats": action_dict.get('expectedDocumentFormats', None),
"status": TaskStatus.PENDING "status": TaskStatus.PENDING
} }
@ -352,6 +433,19 @@ class ChatManager:
action_size = self.service.calculateObjectSize(task_actions) action_size = self.service.calculateObjectSize(task_actions)
self.service.updateWorkflowStats(eventLabel="action", bytesSent=action_size) self.service.updateWorkflowStats(eventLabel="action", bytesSent=action_size)
# Log the final TaskAction objects as JSON
logger.info(f"Final TaskAction objects for task '{task_step.description}':")
for i, task_action in enumerate(task_actions):
action_json = {
'id': task_action.id,
'execMethod': task_action.execMethod,
'execAction': task_action.execAction,
'execParameters': task_action.execParameters,
'execResultLabel': task_action.execResultLabel,
'status': task_action.status.value if hasattr(task_action.status, 'value') else str(task_action.status)
}
logger.info(f"TaskAction {i+1}: {json.dumps(action_json, indent=2, ensure_ascii=False)}")
logger.info(f"Task action definition completed: {len(task_actions)} actions") logger.info(f"Task action definition completed: {len(task_actions)} actions")
return task_actions return task_actions
@ -842,6 +936,7 @@ ACTION GENERATION PRINCIPLES:
- Include validation steps in extraction prompts - Include validation steps in extraction prompts
- If this is a retry, learn from previous failures and improve the approach - If this is a retry, learn from previous failures and improve the approach
- Address specific issues mentioned in previous review feedback - Address specific issues mentioned in previous review feedback
- When specifying expectedDocumentFormats, ensure AI prompts explicitly request pure data without markdown formatting
INSTRUCTIONS: INSTRUCTIONS:
- Generate actions to accomplish this task step using available documents, connections, and previous results - Generate actions to accomplish this task step using available documents, connections, and previous results
@ -866,6 +961,13 @@ REQUIRED JSON STRUCTURE:
"aiPrompt": "Comprehensive AI prompt describing what to accomplish" "aiPrompt": "Comprehensive AI prompt describing what to accomplish"
}}, }},
"resultLabel": "task1_action3_analysis_results", "resultLabel": "task1_action3_analysis_results",
"expectedDocumentFormats": [ // OPTIONAL: Specify expected document formats when needed
{{
"extension": ".csv",
"mimeType": "text/csv",
"description": "Structured data output"
}}
],
"description": "What this action accomplishes (business outcome)" "description": "What this action accomplishes (business outcome)"
}} }}
] ]
@ -876,10 +978,16 @@ FIELD REQUIREMENTS:
- "action": Must be valid for the method - "action": Must be valid for the method
- "parameters": Method-specific, must include documentList as a list if required by the signature - "parameters": Method-specific, must include documentList as a list if required by the signature
- "resultLabel": Must follow the format above (e.g., "task1_action3_analysis_results") - "resultLabel": Must follow the format above (e.g., "task1_action3_analysis_results")
- "expectedDocumentFormats": OPTIONAL - Only specify when you need to control output format
- Use when you need specific file types (e.g., CSV for data, JSON for structured output)
- Omit when format is flexible (e.g., folder queries with mixed file types)
- Each format should specify: extension, mimeType, description
- When using expectedDocumentFormats, ensure the aiPrompt explicitly requests pure data without markdown formatting
- "description": Clear summary of the business outcome - "description": Clear summary of the business outcome
EXAMPLES OF GOOD ACTIONS: EXAMPLES OF GOOD ACTIONS:
1. Comprehensive document analysis:
1. Document analysis with specific output format (use expectedDocumentFormats):
{{ {{
"method": "document", "method": "document",
"action": "extract", "action": "extract",
@ -888,10 +996,17 @@ EXAMPLES OF GOOD ACTIONS:
"aiPrompt": "Extract and analyze the candidate's qualifications, experience, skills, and suitability for the product designer position. Identify key strengths, relevant experience, technical skills, and any areas of concern. Provide a comprehensive assessment that can be used for evaluation." "aiPrompt": "Extract and analyze the candidate's qualifications, experience, skills, and suitability for the product designer position. Identify key strengths, relevant experience, technical skills, and any areas of concern. Provide a comprehensive assessment that can be used for evaluation."
}}, }},
"resultLabel": "task1_action1_candidate_analysis", "resultLabel": "task1_action1_candidate_analysis",
"expectedDocumentFormats": [
{{
"extension": ".json",
"mimeType": "application/json",
"description": "Structured candidate analysis data"
}}
],
"description": "Comprehensive analysis of candidate profile for evaluation" "description": "Comprehensive analysis of candidate profile for evaluation"
}} }}
2. Multi-document processing: 2. Multi-document processing with flexible output (omit expectedDocumentFormats):
{{ {{
"method": "document", "method": "document",
"action": "extract", "action": "extract",
@ -903,6 +1018,25 @@ EXAMPLES OF GOOD ACTIONS:
"description": "Create comprehensive evaluation matrix comparing all candidates" "description": "Create comprehensive evaluation matrix comparing all candidates"
}} }}
3. Data extraction with specific CSV format:
{{
"method": "document",
"action": "extract",
"parameters": {{
"documentList": ["docItem:doc_abc:table_data.pdf"],
"aiPrompt": "Extract all table data and convert to structured CSV format with proper headers and data types. IMPORTANT: Deliver pure CSV data without any markdown formatting, code blocks, or additional text. Output only the CSV content with proper headers and data rows."
}},
"resultLabel": "task1_action2_structured_data",
"expectedDocumentFormats": [
{{
"extension": ".csv",
"mimeType": "text/csv",
"description": "Structured table data in CSV format"
}}
],
"description": "Extract and structure table data for analysis"
}}
NOTE: Respond with ONLY the JSON object. Do not include any explanatory text.""" NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
@ -1022,11 +1156,17 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
async def _executeSingleAction(self, action: TaskAction, workflow: ChatWorkflow) -> ActionResult: async def _executeSingleAction(self, action: TaskAction, workflow: ChatWorkflow) -> ActionResult:
"""Execute a single action and return ActionResult with enhanced document processing""" """Execute a single action and return ActionResult with enhanced document processing"""
try: try:
# Enhance parameters with expected document formats if specified
enhanced_parameters = action.execParameters.copy()
if action.expectedDocumentFormats:
enhanced_parameters['expectedDocumentFormats'] = action.expectedDocumentFormats
logger.info(f"Action {action.execMethod}.{action.execAction} expects formats: {action.expectedDocumentFormats}")
# Execute the actual method action using the service center # Execute the actual method action using the service center
result = await self.service.executeAction( result = await self.service.executeAction(
methodName=action.execMethod, methodName=action.execMethod,
actionName=action.execAction, actionName=action.execAction,
parameters=action.execParameters parameters=enhanced_parameters
) )
# Always use the execResultLabel from the action definition # Always use the execResultLabel from the action definition
@ -1348,8 +1488,8 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
# For CSV files, try to extract table data # For CSV files, try to extract table data
elif file_extension == 'csv': elif file_extension == 'csv':
# Look for CSV-specific fields # Look for CSV-specific fields first, then general content fields
csv_fields = ['table_data', 'csv_data', 'rows', 'data'] csv_fields = ['table_data', 'csv_data', 'rows', 'data', 'content', 'text']
for field in csv_fields: for field in csv_fields:
if field in document_data: if field in document_data:
content = document_data[field] content = document_data[field]
@ -1798,7 +1938,7 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
# Create final success log # Create final success log
self.chatInterface.createWorkflowLog({ self.chatInterface.createWorkflowLog({
"workflowId": workflow.id, "workflowId": workflow.id,
"message": f"🎉 Workflow completed successfully ({len(workflow_results)}/{len(task_plan.tasks)} tasks)", "message": f"🎉 Workflow completed ({len(workflow_results)}/{len(task_plan.tasks)} tasks)",
"type": "success", "type": "success",
"status": "completed", "status": "completed",
"progress": 100 "progress": 100
@ -1814,7 +1954,7 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
) )
logger.info(f"=== UNIFIED WORKFLOW COMPLETED: {len(workflow_results)}/{len(task_plan.tasks)} tasks successful ===") logger.info(f"=== UNIFIED WORKFLOW COMPLETED: {len(workflow_results)}/{len(task_plan.tasks)} tasks successful ===")
logger.debug(f"FINAL WORKFLOW SUMMARY: {json.dumps(workflow_summary.model_dump(), indent=2, ensure_ascii=False)}") logger.debug(f"FINAL WORKFLOW SUMMARY: {json.dumps(workflow_summary.dict(), indent=2, ensure_ascii=False)}")
return workflow_summary return workflow_summary
except Exception as e: except Exception as e:
@ -1989,6 +2129,7 @@ Please review the task requirements and try again with different input or approa
) )
# Generate new actions with failure avoidance # Generate new actions with failure avoidance
logger.info(f"Regenerating actions for task '{task_step.description}' with failure context (retry {state.retry_count})")
actions = await self.defineTaskActions(task_step, context.workflow, state.getAvailableResults(), enhanced_context) actions = await self.defineTaskActions(task_step, context.workflow, state.getAvailableResults(), enhanced_context)
logger.info(f"Regenerated {len(actions)} actions with failure context") logger.info(f"Regenerated {len(actions)} actions with failure context")
@ -2016,13 +2157,17 @@ Please review the task requirements and try again with different input or approa
prompt = self._createTaskCompletionValidationPrompt(task_result, task_step) prompt = self._createTaskCompletionValidationPrompt(task_result, task_step)
response = await self._callAIWithCircuitBreaker(prompt, "task_completion_validation") response = await self._callAIWithCircuitBreaker(prompt, "task_completion_validation")
# Log the validation response for debugging
logger.debug(f"Task validation AI response: {response}")
# Parse validation result # Parse validation result
validation = self._parseTaskValidationResponse(response) validation = self._parseTaskValidationResponse(response)
# Add quality metrics # Add quality metrics
validation['quality_metrics'] = self._calculateTaskQualityMetrics(task_step, successful_actions) validation['quality_metrics'] = self._calculateTaskQualityMetrics(task_step, successful_actions)
logger.info(f"Task completion validation: {validation.get('status', 'unknown')}") logger.info(f"Task completion validation: {validation.get('status', 'unknown')} - Reason: {validation.get('reason', 'No reason')}")
logger.debug(f"Parsed validation result: {json.dumps(validation, indent=2)}")
return ReviewResult( return ReviewResult(
status=validation.get('status', 'unknown'), status=validation.get('status', 'unknown'),
reason=validation.get('reason', 'No reason provided'), reason=validation.get('reason', 'No reason provided'),
@ -2061,21 +2206,27 @@ Please review the task requirements and try again with different input or approa
'has_text_result': bool(action.data.get('result', '').strip()) 'has_text_result': bool(action.data.get('result', '').strip())
}) })
return f"""You are a task completion validator that evaluates if a task was successfully completed. return f"""You are an action completion validator that evaluates if individual actions were successfully completed.
TASK DETAILS: ACTION DETAILS:
- Description: {task_step.description}
- Expected Outputs: {', '.join(expected_outputs)}
- Success Criteria: {', '.join(success_criteria)}
SUCCESSFUL ACTIONS ({len(successful_actions)}):
{json.dumps(action_summary, indent=2)} {json.dumps(action_summary, indent=2)}
VALIDATION CRITERIA:
1. Check if the action's result_label matches what was delivered
2. If documents were delivered and result_label is present SUCCESS
3. If no documents but text result with matching result_label or different result_label RETRY
4. If no result_label and no delivery FAIL
VALIDATION RULES:
- Focus on result_label matching
- Check if the action delivered the expected result type
- Document delivery with correct result_label = SUCCESS
- Text result with correct result_label = SUCCESS
VALIDATION QUESTIONS: VALIDATION QUESTIONS:
1. Were all expected outputs produced? 1. Does the result_label match what the action was supposed to deliver?
2. Are the success criteria met? 2. Were documents or text results delivered with the correct label?
3. Do the action results collectively accomplish the task goal? 3. Does the delivery match the action's objective?
4. Is the task ready for handover to the next task?
REQUIRED JSON RESPONSE: REQUIRED JSON RESPONSE:
{{ {{
@ -2242,6 +2393,21 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
documents=result.data.get("documents", []) documents=result.data.get("documents", [])
) )
# Log the action execution result as JSON (without document content)
action_result_json = {
'success': action_result.success,
'actionId': action_result.actionId,
'actionMethod': action_result.actionMethod,
'actionName': action_result.actionName,
'validation': action_result.validation,
'error': action_result.error,
'documents_count': len(action_result.documents),
'document_names': [doc.filename if hasattr(doc, 'filename') else str(doc) for doc in action_result.documents],
'data_keys': list(action_result.data.keys()) if isinstance(action_result.data, dict) else [],
'metadata_keys': list(action_result.metadata.keys()) if isinstance(action_result.metadata, dict) else []
}
logger.info(f"Action execution result for {action.execMethod}.{action.execAction}: {json.dumps(action_result_json, indent=2, ensure_ascii=False)}")
# Update action status based on validation # Update action status based on validation
if validation['status'] == 'success': if validation['status'] == 'success':
action.setSuccess() action.setSuccess()
@ -2334,6 +2500,24 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
applied_improvements=improvements applied_improvements=improvements
) )
# Log the retry action execution result as JSON (without document content)
retry_result_json = {
'success': action_result.success,
'actionId': action_result.actionId,
'actionMethod': action_result.actionMethod,
'actionName': action_result.actionName,
'validation': action_result.validation,
'error': action_result.error,
'is_retry': action_result.is_retry,
'previous_error': action_result.previous_error,
'applied_improvements': action_result.applied_improvements,
'documents_count': len(action_result.documents),
'document_names': [doc.filename if hasattr(doc, 'filename') else str(doc) for doc in action_result.documents],
'data_keys': list(action_result.data.keys()) if isinstance(action_result.data, dict) else [],
'metadata_keys': list(action_result.metadata.keys()) if isinstance(action_result.metadata, dict) else []
}
logger.info(f"Retry action execution result for {action.execMethod}.{action.execAction}: {json.dumps(retry_result_json, indent=2, ensure_ascii=False)}")
# Update action status # Update action status
if validation['status'] == 'success': if validation['status'] == 'success':
enhanced_action.setSuccess() enhanced_action.setSuccess()

View file

@ -185,6 +185,8 @@ class TaskAction(BaseModel, ModelMixin):
execAction: str = Field(..., description="Action to perform") execAction: str = Field(..., description="Action to perform")
execParameters: Dict[str, Any] = Field(default_factory=dict, description="Action parameters") execParameters: Dict[str, Any] = Field(default_factory=dict, description="Action parameters")
execResultLabel: Optional[str] = Field(None, description="Label for the set of result documents") execResultLabel: Optional[str] = Field(None, description="Label for the set of result documents")
# NEW: Optional document format specification
expectedDocumentFormats: Optional[List[Dict[str, str]]] = Field(None, description="Expected document formats (optional)")
status: TaskStatus = Field(default=TaskStatus.PENDING, description="Action status") status: TaskStatus = Field(default=TaskStatus.PENDING, description="Action status")
error: Optional[str] = Field(None, description="Error message if action failed") error: Optional[str] = Field(None, description="Error message if action failed")
retryCount: int = Field(default=0, description="Number of retries attempted") retryCount: int = Field(default=0, description="Number of retries attempted")

View file

@ -7,7 +7,7 @@ import os
import logging import logging
import uuid import uuid
import time import time
from datetime import datetime, UTC from datetime import datetime, UTC, timezone
from typing import Dict, Any, List, Optional, Union from typing import Dict, Any, List, Optional, Union
import asyncio import asyncio
@ -128,8 +128,8 @@ class ChatObjects:
return self.db.getInitialId(table) return self.db.getInitialId(table)
def _getCurrentTimestamp(self) -> str: def _getCurrentTimestamp(self) -> str:
"""Returns the current timestamp in ISO format""" """Returns the current timestamp as Unix timestamp (seconds since epoch)"""
return datetime.now().isoformat() return str(int(time.time()))
# Workflow methods # Workflow methods
@ -576,8 +576,45 @@ class ChatObjects:
"processingTime": 0 "processingTime": 0
} }
# Simple processing time - just use current time # Calculate processing time as duration since workflow start using Unix timestamps
processing_time = time.time() workflow = self.getWorkflow(workflowId)
if workflow and workflow.startedAt:
try:
# Parse start time as Unix timestamp (handle both old ISO format and new Unix format)
start_time_str = workflow.startedAt
try:
# Try to parse as Unix timestamp first
start_time = int(float(start_time_str))
except ValueError:
# If that fails, try to parse as ISO format and convert to Unix
try:
# Handle ISO format timestamps (for backward compatibility)
if start_time_str.endswith('Z'):
start_time_str = start_time_str.replace('Z', '+00:00')
dt = datetime.fromisoformat(start_time_str)
start_time = int(dt.timestamp())
except:
# If all parsing fails, use current time
logger.warning(f"Could not parse start time: {start_time_str}, using current time")
start_time = int(time.time())
current_time = int(time.time())
processing_time = current_time - start_time
# Ensure processing time is reasonable (not negative or extremely large)
if processing_time < 0:
logger.warning(f"Negative processing time calculated: {processing_time}, using 0")
processing_time = 0
elif processing_time > 86400 * 365: # More than 1 year
logger.warning(f"Unreasonably large processing time: {processing_time}, using 0")
processing_time = 0
except Exception as e:
logger.warning(f"Error calculating processing time: {str(e)}")
processing_time = currentStats.get("processingTime", 0) or 0
else:
# Fallback to existing processing time or 0
processing_time = currentStats.get("processingTime", 0) or 0
# Update stats with incremental values - ensure no None values # Update stats with incremental values - ensure no None values
current_bytes_sent = currentStats.get("bytesSent", 0) or 0 current_bytes_sent = currentStats.get("bytesSent", 0) or 0
@ -793,8 +830,8 @@ class ChatObjects:
# Load logs # Load logs
logs = self.getWorkflowLogs(workflowId) logs = self.getWorkflowLogs(workflowId)
# Sort by timestamp # Sort by timestamp (Unix timestamps)
logs.sort(key=lambda x: x.get("timestamp", "")) logs.sort(key=lambda x: float(x.get("timestamp", 0)))
# Assemble complete workflow object # Assemble complete workflow object
completeWorkflow = workflow.copy() completeWorkflow = workflow.copy()
@ -1205,12 +1242,13 @@ class ChatObjects:
execAction=createdAction["execAction"], execAction=createdAction["execAction"],
execParameters=createdAction.get("execParameters", {}), execParameters=createdAction.get("execParameters", {}),
execResultLabel=createdAction.get("execResultLabel"), execResultLabel=createdAction.get("execResultLabel"),
expectedDocumentFormats=createdAction.get("expectedDocumentFormats"),
status=createdAction.get("status", TaskStatus.PENDING), status=createdAction.get("status", TaskStatus.PENDING),
error=createdAction.get("error"), error=createdAction.get("error"),
retryCount=createdAction.get("retryCount", 0), retryCount=createdAction.get("retryCount", 0),
retryMax=createdAction.get("retryMax", 3), retryMax=createdAction.get("retryMax", 3),
processingTime=createdAction.get("processingTime"), processingTime=createdAction.get("processingTime"),
timestamp=datetime.fromisoformat(createdAction.get("timestamp", datetime.now().isoformat())), timestamp=datetime.fromtimestamp(float(createdAction.get("timestamp", time.time()))),
result=createdAction.get("result"), result=createdAction.get("result"),
resultDocuments=createdAction.get("resultDocuments", []) resultDocuments=createdAction.get("resultDocuments", [])
) )

View file

@ -24,17 +24,19 @@ class MethodDocument(MethodBase):
@action @action
async def extract(self, parameters: Dict[str, Any]) -> ActionResult: async def extract(self, parameters: Dict[str, Any]) -> ActionResult:
""" """
Extract specific content from document with ai prompt and return it as a json file Extract specific content from document with ai prompt and return it in the specified format
Parameters: Parameters:
documentList (str): Reference to the document list to extract content from documentList (str): Reference to the document list to extract content from
aiPrompt (str): AI prompt for content extraction aiPrompt (str): AI prompt for content extraction
includeMetadata (bool, optional): Whether to include metadata (default: True) includeMetadata (bool, optional): Whether to include metadata (default: True)
expectedDocumentFormats (list, optional): Expected document formats with extension, mimeType, description
""" """
try: try:
documentList = parameters.get("documentList") documentList = parameters.get("documentList")
aiPrompt = parameters.get("aiPrompt") aiPrompt = parameters.get("aiPrompt")
includeMetadata = parameters.get("includeMetadata", True) includeMetadata = parameters.get("includeMetadata", True)
expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
if not documentList: if not documentList:
return self._createResult( return self._createResult(
@ -58,6 +60,31 @@ class MethodDocument(MethodBase):
error="No documents found for the provided reference" error="No documents found for the provided reference"
) )
# Determine output format based on expected formats
output_extension = ".txt" # Default
output_mime_type = "text/plain" # Default
if expectedDocumentFormats and len(expectedDocumentFormats) > 0:
# Use the first expected format
expected_format = expectedDocumentFormats[0]
output_extension = expected_format.get("extension", ".txt")
output_mime_type = expected_format.get("mimeType", "text/plain")
logger.info(f"Using expected format: {output_extension} ({output_mime_type})")
logger.info(f"Expected document formats: {expectedDocumentFormats}")
else:
logger.info("No expected format specified, using default .txt format")
# Enhance AI prompt to specify output format
enhanced_prompt = aiPrompt
if output_extension == ".csv":
enhanced_prompt += f"\n\nCRITICAL: Deliver the result as pure CSV data without any markdown formatting, code blocks, or additional text. Output only the CSV content with proper headers and data rows. Do not include ```csv or ``` markers."
elif output_extension == ".json":
enhanced_prompt += f"\n\nCRITICAL: Deliver the result as pure JSON data without any markdown formatting, code blocks, or additional text. Output only the JSON content. Do not include ```json or ``` markers."
elif output_extension == ".xml":
enhanced_prompt += f"\n\nCRITICAL: Deliver the result as pure XML data without any markdown formatting, code blocks, or additional text. Output only the XML content. Do not include ```xml or ``` markers."
elif output_extension != ".txt":
enhanced_prompt += f"\n\nCRITICAL: Deliver the result as pure {output_extension.upper()} data without any markdown formatting, code blocks, or additional text. Output only the {output_extension.upper()} content. Do not include any markdown markers."
# Extract content from all documents # Extract content from all documents
all_extracted_content = [] all_extracted_content = []
file_infos = [] file_infos = []
@ -72,7 +99,7 @@ class MethodDocument(MethodBase):
continue continue
extracted_content = await self.service.extractContentFromFileData( extracted_content = await self.service.extractContentFromFileData(
prompt=aiPrompt, prompt=enhanced_prompt, # Use enhanced prompt instead of original
fileData=file_data, fileData=file_data,
filename=file_info.get('name', 'document'), filename=file_info.get('name', 'document'),
mimeType=file_info.get('mimeType', 'application/octet-stream'), mimeType=file_info.get('mimeType', 'application/octet-stream'),
@ -105,25 +132,50 @@ class MethodDocument(MethodBase):
# Fallback: convert to string representation # Fallback: convert to string representation
text_contents.append(str(content_obj)) text_contents.append(str(content_obj))
# Combine all extracted text content # Process each document individually and create separate output files
combined_content = "\n\n--- DOCUMENT SEPARATOR ---\n\n".join(text_contents) output_documents = []
for i, (chatDocument, extracted_content) in enumerate(zip(chatDocuments, all_extracted_content)):
# Extract text content from this document
text_content = ""
if hasattr(extracted_content, 'contents') and extracted_content.contents:
# Extract text from ContentItem objects
for content_item in extracted_content.contents:
if hasattr(content_item, 'data') and content_item.data:
text_content += content_item.data + "\n"
elif isinstance(extracted_content, str):
text_content = extracted_content
else:
# Fallback: convert to string representation
text_content = str(extracted_content)
# Create output filename based on original filename
original_filename = chatDocument.filename
base_name = original_filename.rsplit('.', 1)[0] if '.' in original_filename else original_filename
output_filename = f"{base_name}_extracted_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}{output_extension}"
# Create result data for this document
result_data = { result_data = {
"documentCount": len(chatDocuments), "documentCount": 1,
"content": combined_content, "content": text_content,
"fileInfos": file_infos if includeMetadata else None, "originalFilename": original_filename,
"fileInfos": [file_infos[i]] if includeMetadata and i < len(file_infos) else None,
"timestamp": datetime.now(UTC).isoformat() "timestamp": datetime.now(UTC).isoformat()
} }
logger.info(f"Created output document: {output_filename} with {len(text_content)} characters")
logger.info(f"Content preview: {text_content[:200]}...")
output_documents.append({
"documentName": output_filename,
"documentData": result_data,
"mimeType": output_mime_type
})
return self._createResult( return self._createResult(
success=True, success=True,
data={ data={
"documents": [ "documents": output_documents
{
"documentName": f"extracted_content_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.txt",
"documentData": result_data
}
]
} }
) )
except Exception as e: except Exception as e:

View file

@ -55,12 +55,14 @@ class MethodOutlook(MethodBase):
folder (str, optional): Email folder to read from (default: "Inbox") folder (str, optional): Email folder to read from (default: "Inbox")
limit (int, optional): Maximum number of emails to read (default: 10) limit (int, optional): Maximum number of emails to read (default: 10)
filter (str, optional): Filter criteria for emails filter (str, optional): Filter criteria for emails
expectedDocumentFormats (list, optional): Expected document formats with extension, mimeType, description
""" """
try: try:
connectionReference = parameters.get("connectionReference") connectionReference = parameters.get("connectionReference")
folder = parameters.get("folder", "Inbox") folder = parameters.get("folder", "Inbox")
limit = parameters.get("limit", 10) limit = parameters.get("limit", 10)
filter = parameters.get("filter") filter = parameters.get("filter")
expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
if not connectionReference: if not connectionReference:
return self._createResult( return self._createResult(
@ -112,13 +114,27 @@ class MethodOutlook(MethodBase):
"timestamp": datetime.now(UTC).isoformat() "timestamp": datetime.now(UTC).isoformat()
} }
# Determine output format based on expected formats
output_extension = ".json" # Default
output_mime_type = "application/json" # Default
if expectedDocumentFormats and len(expectedDocumentFormats) > 0:
# Use the first expected format
expected_format = expectedDocumentFormats[0]
output_extension = expected_format.get("extension", ".json")
output_mime_type = expected_format.get("mimeType", "application/json")
logger.info(f"Using expected format: {output_extension} ({output_mime_type})")
else:
logger.info("No expected format specified, using default .json format")
return self._createResult( return self._createResult(
success=True, success=True,
data={ data={
"documents": [ "documents": [
{ {
"documentName": f"outlook_emails_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.json", "documentName": f"outlook_emails_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}{output_extension}",
"documentData": result_data "documentData": result_data,
"mimeType": output_mime_type
} }
] ]
} }
@ -144,6 +160,7 @@ class MethodOutlook(MethodBase):
body (str): Email body content body (str): Email body content
cc (List[str], optional): CC recipients cc (List[str], optional): CC recipients
bcc (List[str], optional): BCC recipients bcc (List[str], optional): BCC recipients
expectedDocumentFormats (list, optional): Expected document formats with extension, mimeType, description
""" """
try: try:
connectionReference = parameters.get("connectionReference") connectionReference = parameters.get("connectionReference")
@ -152,6 +169,7 @@ class MethodOutlook(MethodBase):
body = parameters.get("body") body = parameters.get("body")
cc = parameters.get("cc", []) cc = parameters.get("cc", [])
bcc = parameters.get("bcc", []) bcc = parameters.get("bcc", [])
expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
if not connectionReference or not to or not subject or not body: if not connectionReference or not to or not subject or not body:
return self._createResult( return self._createResult(
@ -207,11 +225,29 @@ class MethodOutlook(MethodBase):
"timestamp": datetime.now(UTC).isoformat() "timestamp": datetime.now(UTC).isoformat()
} }
# Determine output format based on expected formats
output_extension = ".json" # Default
output_mime_type = "application/json" # Default
if expectedDocumentFormats and len(expectedDocumentFormats) > 0:
# Use the first expected format
expected_format = expectedDocumentFormats[0]
output_extension = expected_format.get("extension", ".json")
output_mime_type = expected_format.get("mimeType", "application/json")
logger.info(f"Using expected format: {output_extension} ({output_mime_type})")
else:
logger.info("No expected format specified, using default .json format")
return self._createResult( return self._createResult(
success=True, success=True,
data={ data={
"documentName": f"outlook_email_sent_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.json", "documents": [
"documentData": result_data {
"documentName": f"outlook_email_sent_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}{output_extension}",
"documentData": result_data,
"mimeType": output_mime_type
}
]
} }
) )
@ -233,12 +269,14 @@ class MethodOutlook(MethodBase):
query (str): Search query query (str): Search query
folder (str, optional): Folder to search in (default: "All") folder (str, optional): Folder to search in (default: "All")
limit (int, optional): Maximum number of results (default: 20) limit (int, optional): Maximum number of results (default: 20)
expectedDocumentFormats (list, optional): Expected document formats with extension, mimeType, description
""" """
try: try:
connectionReference = parameters.get("connectionReference") connectionReference = parameters.get("connectionReference")
query = parameters.get("query") query = parameters.get("query")
folder = parameters.get("folder", "All") folder = parameters.get("folder", "All")
limit = parameters.get("limit", 20) limit = parameters.get("limit", 20)
expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
if not connectionReference or not query: if not connectionReference or not query:
return self._createResult( return self._createResult(
@ -290,11 +328,29 @@ class MethodOutlook(MethodBase):
"timestamp": datetime.now(UTC).isoformat() "timestamp": datetime.now(UTC).isoformat()
} }
# Determine output format based on expected formats
output_extension = ".json" # Default
output_mime_type = "application/json" # Default
if expectedDocumentFormats and len(expectedDocumentFormats) > 0:
# Use the first expected format
expected_format = expectedDocumentFormats[0]
output_extension = expected_format.get("extension", ".json")
output_mime_type = expected_format.get("mimeType", "application/json")
logger.info(f"Using expected format: {output_extension} ({output_mime_type})")
else:
logger.info("No expected format specified, using default .json format")
return self._createResult( return self._createResult(
success=True, success=True,
data={ data={
"documentName": f"outlook_email_search_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.json", "documents": [
"documentData": result_data {
"documentName": f"outlook_email_search_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}{output_extension}",
"documentData": result_data,
"mimeType": output_mime_type
}
]
} }
) )

View file

@ -54,12 +54,14 @@ class MethodSharepoint(MethodBase):
siteUrl (str): SharePoint site URL siteUrl (str): SharePoint site URL
query (str): Query or description to find document query (str): Query or description to find document
searchScope (str, optional): Search scope (default: "all") searchScope (str, optional): Search scope (default: "all")
expectedDocumentFormats (list, optional): Expected document formats with extension, mimeType, description
""" """
try: try:
connectionReference = parameters.get("connectionReference") connectionReference = parameters.get("connectionReference")
siteUrl = parameters.get("siteUrl") siteUrl = parameters.get("siteUrl")
query = parameters.get("query") query = parameters.get("query")
searchScope = parameters.get("searchScope", "all") searchScope = parameters.get("searchScope", "all")
expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
if not connectionReference or not siteUrl or not query: if not connectionReference or not siteUrl or not query:
return self._createResult( return self._createResult(
@ -108,13 +110,27 @@ class MethodSharepoint(MethodBase):
"timestamp": datetime.now(UTC).isoformat() "timestamp": datetime.now(UTC).isoformat()
} }
# Determine output format based on expected formats
output_extension = ".json" # Default
output_mime_type = "application/json" # Default
if expectedDocumentFormats and len(expectedDocumentFormats) > 0:
# Use the first expected format
expected_format = expectedDocumentFormats[0]
output_extension = expected_format.get("extension", ".json")
output_mime_type = expected_format.get("mimeType", "application/json")
logger.info(f"Using expected format: {output_extension} ({output_mime_type})")
else:
logger.info("No expected format specified, using default .json format")
return self._createResult( return self._createResult(
success=True, success=True,
data={ data={
"documents": [ "documents": [
{ {
"documentName": f"sharepoint_find_path_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.json", "documentName": f"sharepoint_find_path_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}{output_extension}",
"documentData": result_data "documentData": result_data,
"mimeType": output_mime_type
} }
] ]
} }
@ -139,6 +155,7 @@ class MethodSharepoint(MethodBase):
siteUrl (str): SharePoint site URL siteUrl (str): SharePoint site URL
documentPaths (List[str]): List of paths to the documents in SharePoint documentPaths (List[str]): List of paths to the documents in SharePoint
includeMetadata (bool, optional): Whether to include metadata (default: True) includeMetadata (bool, optional): Whether to include metadata (default: True)
expectedDocumentFormats (list, optional): Expected document formats with extension, mimeType, description
""" """
try: try:
documentList = parameters.get("documentList") documentList = parameters.get("documentList")
@ -146,6 +163,7 @@ class MethodSharepoint(MethodBase):
siteUrl = parameters.get("siteUrl") siteUrl = parameters.get("siteUrl")
documentPaths = parameters.get("documentPaths") documentPaths = parameters.get("documentPaths")
includeMetadata = parameters.get("includeMetadata", True) includeMetadata = parameters.get("includeMetadata", True)
expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
if not documentList or not connectionReference or not siteUrl or not documentPaths: if not documentList or not connectionReference or not siteUrl or not documentPaths:
return self._createResult( return self._createResult(
@ -218,13 +236,27 @@ class MethodSharepoint(MethodBase):
"timestamp": datetime.now(UTC).isoformat() "timestamp": datetime.now(UTC).isoformat()
} }
# Determine output format based on expected formats
output_extension = ".json" # Default
output_mime_type = "application/json" # Default
if expectedDocumentFormats and len(expectedDocumentFormats) > 0:
# Use the first expected format
expected_format = expectedDocumentFormats[0]
output_extension = expected_format.get("extension", ".json")
output_mime_type = expected_format.get("mimeType", "application/json")
logger.info(f"Using expected format: {output_extension} ({output_mime_type})")
else:
logger.info("No expected format specified, using default .json format")
return self._createResult( return self._createResult(
success=True, success=True,
data={ data={
"documents": [ "documents": [
{ {
"documentName": f"sharepoint_documents_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.json", "documentName": f"sharepoint_documents_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}{output_extension}",
"documentData": result_data "documentData": result_data,
"mimeType": output_mime_type
} }
] ]
} }
@ -248,6 +280,7 @@ class MethodSharepoint(MethodBase):
documentPaths (List[str]): List of paths where to upload the documents documentPaths (List[str]): List of paths where to upload the documents
documentList (str): Reference to the document list to upload documentList (str): Reference to the document list to upload
fileNames (List[str]): List of names for the uploaded files fileNames (List[str]): List of names for the uploaded files
expectedDocumentFormats (list, optional): Expected document formats with extension, mimeType, description
""" """
try: try:
connectionReference = parameters.get("connectionReference") connectionReference = parameters.get("connectionReference")
@ -255,6 +288,7 @@ class MethodSharepoint(MethodBase):
documentPaths = parameters.get("documentPaths") documentPaths = parameters.get("documentPaths")
documentList = parameters.get("documentList") documentList = parameters.get("documentList")
fileNames = parameters.get("fileNames") fileNames = parameters.get("fileNames")
expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
if not connectionReference or not siteUrl or not documentPaths or not documentList or not fileNames: if not connectionReference or not siteUrl or not documentPaths or not documentList or not fileNames:
return self._createResult( return self._createResult(
@ -339,13 +373,27 @@ class MethodSharepoint(MethodBase):
"timestamp": datetime.now(UTC).isoformat() "timestamp": datetime.now(UTC).isoformat()
} }
# Determine output format based on expected formats
output_extension = ".json" # Default
output_mime_type = "application/json" # Default
if expectedDocumentFormats and len(expectedDocumentFormats) > 0:
# Use the first expected format
expected_format = expectedDocumentFormats[0]
output_extension = expected_format.get("extension", ".json")
output_mime_type = expected_format.get("mimeType", "application/json")
logger.info(f"Using expected format: {output_extension} ({output_mime_type})")
else:
logger.info("No expected format specified, using default .json format")
return self._createResult( return self._createResult(
success=True, success=True,
data={ data={
"documents": [ "documents": [
{ {
"documentName": f"sharepoint_upload_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.json", "documentName": f"sharepoint_upload_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}{output_extension}",
"documentData": result_data "documentData": result_data,
"mimeType": output_mime_type
} }
] ]
} }
@ -369,12 +417,14 @@ class MethodSharepoint(MethodBase):
siteUrl (str): SharePoint site URL siteUrl (str): SharePoint site URL
folderPaths (List[str]): List of paths to the folders to list folderPaths (List[str]): List of paths to the folders to list
includeSubfolders (bool, optional): Whether to include subfolders (default: False) includeSubfolders (bool, optional): Whether to include subfolders (default: False)
expectedDocumentFormats (list, optional): Expected document formats with extension, mimeType, description
""" """
try: try:
connectionReference = parameters.get("connectionReference") connectionReference = parameters.get("connectionReference")
siteUrl = parameters.get("siteUrl") siteUrl = parameters.get("siteUrl")
folderPaths = parameters.get("folderPaths") folderPaths = parameters.get("folderPaths")
includeSubfolders = parameters.get("includeSubfolders", False) includeSubfolders = parameters.get("includeSubfolders", False)
expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
if not connectionReference or not siteUrl or not folderPaths: if not connectionReference or not siteUrl or not folderPaths:
return self._createResult( return self._createResult(
@ -436,13 +486,27 @@ class MethodSharepoint(MethodBase):
"timestamp": datetime.now(UTC).isoformat() "timestamp": datetime.now(UTC).isoformat()
} }
# Determine output format based on expected formats
output_extension = ".json" # Default
output_mime_type = "application/json" # Default
if expectedDocumentFormats and len(expectedDocumentFormats) > 0:
# Use the first expected format
expected_format = expectedDocumentFormats[0]
output_extension = expected_format.get("extension", ".json")
output_mime_type = expected_format.get("mimeType", "application/json")
logger.info(f"Using expected format: {output_extension} ({output_mime_type})")
else:
logger.info("No expected format specified, using default .json format")
return self._createResult( return self._createResult(
success=True, success=True,
data={ data={
"documents": [ "documents": [
{ {
"documentName": f"sharepoint_document_list_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.json", "documentName": f"sharepoint_document_list_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}{output_extension}",
"documentData": result_data "documentData": result_data,
"mimeType": output_mime_type
} }
] ]
} }

View file

@ -224,12 +224,14 @@ class MethodWeb(MethodBase):
maxDepth (int, optional): Maximum crawl depth (default: 2) maxDepth (int, optional): Maximum crawl depth (default: 2)
includeImages (bool, optional): Whether to include images (default: False) includeImages (bool, optional): Whether to include images (default: False)
followLinks (bool, optional): Whether to follow links (default: True) followLinks (bool, optional): Whether to follow links (default: True)
expectedDocumentFormats (list, optional): Expected document formats with extension, mimeType, description
""" """
try: try:
urls = parameters.get("urls") urls = parameters.get("urls")
maxDepth = parameters.get("maxDepth", 2) maxDepth = parameters.get("maxDepth", 2)
includeImages = parameters.get("includeImages", False) includeImages = parameters.get("includeImages", False)
followLinks = parameters.get("followLinks", True) followLinks = parameters.get("followLinks", True)
expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
if not urls: if not urls:
return self._createResult( return self._createResult(
@ -307,13 +309,27 @@ class MethodWeb(MethodBase):
"timestamp": datetime.now(UTC).isoformat() "timestamp": datetime.now(UTC).isoformat()
} }
# Determine output format based on expected formats
output_extension = ".json" # Default
output_mime_type = "application/json" # Default
if expectedDocumentFormats and len(expectedDocumentFormats) > 0:
# Use the first expected format
expected_format = expectedDocumentFormats[0]
output_extension = expected_format.get("extension", ".json")
output_mime_type = expected_format.get("mimeType", "application/json")
logger.info(f"Using expected format: {output_extension} ({output_mime_type})")
else:
logger.info("No expected format specified, using default .json format")
return self._createResult( return self._createResult(
success=True, success=True,
data={ data={
"documents": [ "documents": [
{ {
"documentName": f"web_crawl_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.json", "documentName": f"web_crawl_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}{output_extension}",
"documentData": result_data "documentData": result_data,
"mimeType": output_mime_type
} }
] ]
} }
@ -336,11 +352,13 @@ class MethodWeb(MethodBase):
url (str): URL to scrape url (str): URL to scrape
selectors (Dict[str, str]): CSS selectors for data extraction selectors (Dict[str, str]): CSS selectors for data extraction
format (str, optional): Output format (default: "json") format (str, optional): Output format (default: "json")
expectedDocumentFormats (list, optional): Expected document formats with extension, mimeType, description
""" """
try: try:
url = parameters.get("url") url = parameters.get("url")
selectors = parameters.get("selectors") selectors = parameters.get("selectors")
format = parameters.get("format", "json") format = parameters.get("format", "json")
expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
if not url or not selectors: if not url or not selectors:
return self._createResult( return self._createResult(
@ -400,13 +418,27 @@ class MethodWeb(MethodBase):
"timestamp": datetime.now(UTC).isoformat() "timestamp": datetime.now(UTC).isoformat()
} }
# Determine output format based on expected formats
output_extension = f".{format}" # Default to format parameter
output_mime_type = "application/json" # Default
if expectedDocumentFormats and len(expectedDocumentFormats) > 0:
# Use the first expected format
expected_format = expectedDocumentFormats[0]
output_extension = expected_format.get("extension", f".{format}")
output_mime_type = expected_format.get("mimeType", "application/json")
logger.info(f"Using expected format: {output_extension} ({output_mime_type})")
else:
logger.info(f"No expected format specified, using format parameter: {format}")
return self._createResult( return self._createResult(
success=True, success=True,
data={ data={
"documents": [ "documents": [
{ {
"documentName": f"web_scrape_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.{format}", "documentName": f"web_scrape_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}{output_extension}",
"documentData": result_data "documentData": result_data,
"mimeType": output_mime_type
} }
] ]
} }
@ -430,12 +462,14 @@ class MethodWeb(MethodBase):
engine (str, optional): Search engine to use (default: "google") engine (str, optional): Search engine to use (default: "google")
maxResults (int, optional): Maximum number of results (default: 10) maxResults (int, optional): Maximum number of results (default: 10)
filter (str, optional): Additional search filters filter (str, optional): Additional search filters
expectedDocumentFormats (list, optional): Expected document formats with extension, mimeType, description
""" """
try: try:
query = parameters.get("query") query = parameters.get("query")
engine = parameters.get("engine", "google") engine = parameters.get("engine", "google")
maxResults = parameters.get("maxResults", 10) maxResults = parameters.get("maxResults", 10)
filter = parameters.get("filter") filter = parameters.get("filter")
expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
if not query: if not query:
return self._createResult( return self._createResult(
@ -533,13 +567,27 @@ class MethodWeb(MethodBase):
"timestamp": datetime.now(UTC).isoformat() "timestamp": datetime.now(UTC).isoformat()
} }
# Determine output format based on expected formats
output_extension = ".json" # Default
output_mime_type = "application/json" # Default
if expectedDocumentFormats and len(expectedDocumentFormats) > 0:
# Use the first expected format
expected_format = expectedDocumentFormats[0]
output_extension = expected_format.get("extension", ".json")
output_mime_type = expected_format.get("mimeType", "application/json")
logger.info(f"Using expected format: {output_extension} ({output_mime_type})")
else:
logger.info("No expected format specified, using default .json format")
return self._createResult( return self._createResult(
success=True, success=True,
data={ data={
"documents": [ "documents": [
{ {
"documentName": f"web_search_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.json", "documentName": f"web_search_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}{output_extension}",
"documentData": result_data "documentData": result_data,
"mimeType": output_mime_type
} }
] ]
} }
@ -561,10 +609,12 @@ class MethodWeb(MethodBase):
Parameters: Parameters:
url (str): URL to validate url (str): URL to validate
checks (List[str], optional): Types of checks to perform (default: ["accessibility", "seo", "performance"]) checks (List[str], optional): Types of checks to perform (default: ["accessibility", "seo", "performance"])
expectedDocumentFormats (list, optional): Expected document formats with extension, mimeType, description
""" """
try: try:
url = parameters.get("url") url = parameters.get("url")
checks = parameters.get("checks", ["accessibility", "seo", "performance"]) checks = parameters.get("checks", ["accessibility", "seo", "performance"])
expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
if not url: if not url:
return self._createResult( return self._createResult(
@ -609,13 +659,27 @@ class MethodWeb(MethodBase):
"timestamp": datetime.now(UTC).isoformat() "timestamp": datetime.now(UTC).isoformat()
} }
# Determine output format based on expected formats
output_extension = ".json" # Default
output_mime_type = "application/json" # Default
if expectedDocumentFormats and len(expectedDocumentFormats) > 0:
# Use the first expected format
expected_format = expectedDocumentFormats[0]
output_extension = expected_format.get("extension", ".json")
output_mime_type = expected_format.get("mimeType", "application/json")
logger.info(f"Using expected format: {output_extension} ({output_mime_type})")
else:
logger.info("No expected format specified, using default .json format")
return self._createResult( return self._createResult(
success=True, success=True,
data={ data={
"documents": [ "documents": [
{ {
"documentName": f"web_validation_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.json", "documentName": f"web_validation_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}{output_extension}",
"documentData": result_data "documentData": result_data,
"mimeType": output_mime_type
} }
] ]
} }