From cfb34c6a383cc260268e966c18edc929e039f4df Mon Sep 17 00:00:00 2001
From: ValueOn AG <p.motsch@valueon.ch>
Date: Fri, 11 Jul 2025 23:13:42 +0200
Subject: [PATCH] version 2 ready basic

---
 modules/chat/managerChat.py                | 246 ++++++++++++++++++---
 modules/interfaces/interfaceChatModel.py   |   2 +
 modules/interfaces/interfaceChatObjects.py |  54 ++++-
 modules/methods/methodDocument.py          |  84 +++++--
 modules/methods/methodOutlook.py           |  68 +++++-
 modules/methods/methodSharepoint.py        |  80 ++++++-
 modules/methods/methodWeb.py               |  80 ++++++-
 7 files changed, 537 insertions(+), 77 deletions(-)

diff --git a/modules/chat/managerChat.py b/modules/chat/managerChat.py
index 68bc643d..3ed52fdb 100644
--- a/modules/chat/managerChat.py
+++ b/modules/chat/managerChat.py
@@ -111,18 +111,46 @@ class ActionValidator:
         expected_result_label = action.execResultLabel
         expected_format = action.execParameters.get('outputFormat', 'unknown')
         
+        # Extract expected document formats from action
+        expected_document_formats = action.expectedDocumentFormats or []
+        
+        # Check if the result label is present in the action result data
+        actual_result_label = result_data.get("resultLabel", "") if isinstance(result_data, dict) else ""
+        result_label_match = actual_result_label == expected_result_label
+        
         # Analyze delivered documents and content
         delivered_files = []
+        delivered_formats = []
         content_items = []
         
         # Check for ChatDocument objects
         for doc in documents:
             if hasattr(doc, 'filename'):
                 delivered_files.append(doc.filename)
+                # Extract format information
+                file_extension = self._getFileExtension(doc.filename)
+                mime_type = getattr(doc, 'mimeType', 'application/octet-stream')
+                delivered_formats.append({
+                    'filename': doc.filename,
+                    'extension': file_extension,
+                    'mimeType': mime_type
+                })
             elif isinstance(doc, dict) and 'filename' in doc:
                 delivered_files.append(doc['filename'])
+                file_extension = self._getFileExtension(doc['filename'])
+                mime_type = doc.get('mimeType', 'application/octet-stream')
+                delivered_formats.append({
+                    'filename': doc['filename'],
+                    'extension': file_extension,
+                    'mimeType': mime_type
+                })
             else:
                 delivered_files.append(f"document_{len(delivered_files)}")
+                delivered_formats.append({
+                    'filename': f"document_{len(delivered_files)}",
+                    'extension': 'unknown',
+                    'mimeType': 'application/octet-stream'
+                })
         
         # Check for ExtractedContent in result data
         if isinstance(result_data, dict):
@@ -133,11 +161,20 @@ class ActionValidator:
             elif 'contents' in result_data:
                 content_items = result_data['contents']
         
+        # If we have delivered files but no content items, consider it successful
+        # This handles the case where content is stored in files rather than result data
+        if delivered_files and not content_items:
+            content_items = [f"File content available in: {', '.join(delivered_files)}"]
+        
         # Analyze content items
         content_summary = []
         for item in content_items:
             if hasattr(item, 'label') and hasattr(item, 'metadata'):
                 content_summary.append(f"{item.label}: {item.metadata.mimeType if hasattr(item.metadata, 'mimeType') else 'unknown'}")
+            elif isinstance(item, str):
+                content_summary.append(item)
+            else:
+                content_summary.append(str(item))
         
         return f"""You are an action result validator. Your primary focus is to validate that the action delivered the promised result files in the promised format.
 
@@ -145,7 +182,10 @@ ACTION DETAILS:
 - Method: {action.execMethod}
 - Action: {action.execAction}
 - Expected Result Label: {expected_result_label}
+- Actual Result Label: {actual_result_label}
+- Result Label Match: {result_label_match}
 - Expected Format: {expected_format}
+- Expected Document Formats: {json.dumps(expected_document_formats, indent=2) if expected_document_formats else 'None specified'}
 - Parameters: {json.dumps(action.execParameters, indent=2)}
 
 RESULT TO VALIDATE:
@@ -155,12 +195,13 @@ RESULT TO VALIDATE:
 - Validation Messages: {', '.join(validation_messages) if validation_messages else 'None'}
 - Documents Produced: {doc_count}
 - Delivered Files: {', '.join(delivered_files) if delivered_files else 'None'}
+- Delivered Formats: {json.dumps(delivered_formats, indent=2) if delivered_formats else 'None'}
 - Content Items: {', '.join(content_summary) if content_summary else 'None'}
 
 CRITICAL VALIDATION CRITERIA:
-1. **File Delivery**: Did the action deliver the promised result file(s)?
-2. **Format Compliance**: Are the delivered files in the promised format?
-3. **Result Label Match**: Does the result match the expected result label?
+1. **Result Label Match**: Does the action result contain the expected result label?
+2. **File Delivery**: Did the action deliver the promised result file(s)?
+3. **Format Compliance**: If expected document formats were specified, do the delivered files match the expected formats?
 4. **Content Quality**: Is the content of the delivered files usable and complete?
 5. **Content Processing**: If content extraction was expected, was it performed correctly?
 
@@ -169,21 +210,28 @@ CONTEXT:
 - Previous Results: {', '.join(context.previous_results) if context.previous_results else 'None'}
 
 VALIDATION INSTRUCTIONS:
-1. Check if the expected result label "{expected_result_label}" is present in the result
-2. Verify that files were delivered when expected
-3. Validate that the delivered files match the expected format "{expected_format}"
-4. Assess if the content is complete and usable
-5. Check if content extraction was performed when expected
-6. Determine if retry would improve file delivery or format compliance
+1. **Result Label Check**: Verify that the expected result label "{expected_result_label}" is present in the action result data. This is the primary success criterion.
+2. **File Delivery**: Check if files were delivered when expected. The individual filenames don't need to match the result label - focus on whether content was actually produced.
+3. **Format Compliance**: If expected document formats were specified, check if delivered files match the expected extensions and MIME types. If no formats were specified, this criterion is satisfied.
+4. **Content Quality**: If files were delivered, consider the action successful. The presence of delivered files indicates content was processed and stored.
+5. **Content Processing**: If files were delivered, assume content extraction was performed correctly. The file delivery is evidence of successful processing.
+6. **Success Criteria**: The action is successful if the result label matches AND files were delivered. If expected formats were specified, they should also match.
+
+IMPORTANT NOTES:
+- The result label must be present in the action result data for success
+- Individual filenames can be different from the result label
+- If files were delivered, consider the action successful even if content details are not provided
+- Focus on whether the action accomplished its intended purpose (file delivery)
+- Empty files should be considered failures, but delivered files indicate success
 
 REQUIRED JSON RESPONSE:
 {{
     "status": "success|retry|fail",
-    "reason": "Detailed explanation focusing on file delivery and format compliance",
+    "reason": "Detailed explanation focusing on result label match and content quality",
     "confidence": 0.0-1.0,
-    "improvements": ["specific file delivery improvements", "format compliance fixes"],
+    "improvements": ["specific improvements if needed"],
     "quality_score": 1-10,
-    "missing_elements": ["missing files", "format issues"],
+    "missing_elements": ["missing result label", "missing files", "content issues"],
     "suggested_retry_approach": "Specific approach for retry if status is retry"
 }}
 
@@ -222,6 +270,12 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
                 'missing_elements': [],
                 'suggested_retry_approach': ''
             }
+    
+    def _getFileExtension(self, filename: str) -> str:
+        """Extract file extension from filename"""
+        if '.' in filename:
+            return '.' + filename.split('.')[-1]
+        return ''
 
 class ChatManager:
     """Chat manager with improved AI integration and method handling"""
@@ -293,6 +347,27 @@ class ChatManager:
                 tasks=tasks
             )
             
+            # Log the task plan as JSON for debugging
+            logger.info(f"Task plan created for workflow {workflow.id}:")
+            task_plan_json = {
+                'overview': task_plan.overview,
+                'tasks_count': len(task_plan.tasks),
+                'tasks': []
+            }
+            for task in task_plan.tasks:
+                task_json = {
+                    'id': task.id,
+                    'description': task.description,
+                    'dependencies': task.dependencies or [],
+                    'expected_outputs': task.expected_outputs or [],
+                    'success_criteria': task.success_criteria or [],
+                    'required_documents': task.required_documents or [],
+                    'estimated_complexity': task.estimated_complexity or '',
+                    'ai_prompt': task.ai_prompt or ''
+                }
+                task_plan_json['tasks'].append(task_json)
+            logger.info(f"Task Plan: {json.dumps(task_plan_json, indent=2, ensure_ascii=False)}")
+            
             logger.info(f"High-level task planning completed: {len(task_plan.tasks)} tasks")
             return task_plan
             
@@ -330,6 +405,11 @@ class ChatManager:
             # Generate actions using AI
             actions = await self._generateActionsForTaskStep(context)
             
+            # Log the generated actions as JSON for debugging
+            logger.info(f"Generated {len(actions)} actions for task '{task_step.description}':")
+            for i, action in enumerate(actions):
+                logger.info(f"Action {i+1}: {json.dumps(action, indent=2, ensure_ascii=False)}")
+            
             # Convert to TaskAction objects
             task_actions = []
             for action_dict in actions:
@@ -338,6 +418,7 @@ class ChatManager:
                     "execAction": action_dict.get('action', 'unknown'),
                     "execParameters": action_dict.get('parameters', {}),
                     "execResultLabel": action_dict.get('resultLabel', ''),
+                    "expectedDocumentFormats": action_dict.get('expectedDocumentFormats', None),
                     "status": TaskStatus.PENDING
                 }
                 
@@ -351,6 +432,19 @@ class ChatManager:
                 # Calculate actual action size for stats
                 action_size = self.service.calculateObjectSize(task_actions)
                 self.service.updateWorkflowStats(eventLabel="action", bytesSent=action_size)
+                
+                # Log the final TaskAction objects as JSON
+                logger.info(f"Final TaskAction objects for task '{task_step.description}':")
+                for i, task_action in enumerate(task_actions):
+                    action_json = {
+                        'id': task_action.id,
+                        'execMethod': task_action.execMethod,
+                        'execAction': task_action.execAction,
+                        'execParameters': task_action.execParameters,
+                        'execResultLabel': task_action.execResultLabel,
+                        'status': task_action.status.value if hasattr(task_action.status, 'value') else str(task_action.status)
+                    }
+                    logger.info(f"TaskAction {i+1}: {json.dumps(action_json, indent=2, ensure_ascii=False)}")
             
             logger.info(f"Task action definition completed: {len(task_actions)} actions")
             return task_actions
@@ -842,6 +936,7 @@ ACTION GENERATION PRINCIPLES:
 - Include validation steps in extraction prompts
 - If this is a retry, learn from previous failures and improve the approach
 - Address specific issues mentioned in previous review feedback
+- When specifying expectedDocumentFormats, ensure AI prompts explicitly request pure data without markdown formatting
 
 INSTRUCTIONS:
 - Generate actions to accomplish this task step using available documents, connections, and previous results
@@ -866,6 +961,13 @@ REQUIRED JSON STRUCTURE:
                 "aiPrompt": "Comprehensive AI prompt describing what to accomplish"
             }},
             "resultLabel": "task1_action3_analysis_results",
+            "expectedDocumentFormats": [  // OPTIONAL: Specify expected document formats when needed
+                {{
+                    "extension": ".csv",
+                    "mimeType": "text/csv",
+                    "description": "Structured data output"
+                }}
+            ],
             "description": "What this action accomplishes (business outcome)"
         }}
     ]
@@ -876,10 +978,16 @@ FIELD REQUIREMENTS:
 - "action": Must be valid for the method
 - "parameters": Method-specific, must include documentList as a list if required by the signature
 - "resultLabel": Must follow the format above (e.g., "task1_action3_analysis_results")
+- "expectedDocumentFormats": OPTIONAL - Only specify when you need to control output format
+  - Use when you need specific file types (e.g., CSV for data, JSON for structured output)
+  - Omit when format is flexible (e.g., folder queries with mixed file types)
+  - Each format should specify: extension, mimeType, description
+  - When using expectedDocumentFormats, ensure the aiPrompt explicitly requests pure data without markdown formatting
 - "description": Clear summary of the business outcome
 
 EXAMPLES OF GOOD ACTIONS:
-1. Comprehensive document analysis:
+
+1. Document analysis with specific output format (use expectedDocumentFormats):
 {{
     "method": "document",
     "action": "extract",
@@ -888,10 +996,17 @@ EXAMPLES OF GOOD ACTIONS:
         "aiPrompt": "Extract and analyze the candidate's qualifications, experience, skills, and suitability for the product designer position. Identify key strengths, relevant experience, technical skills, and any areas of concern. Provide a comprehensive assessment that can be used for evaluation."
     }},
     "resultLabel": "task1_action1_candidate_analysis",
+    "expectedDocumentFormats": [
+        {{
+            "extension": ".json",
+            "mimeType": "application/json",
+            "description": "Structured candidate analysis data"
+        }}
+    ],
     "description": "Comprehensive analysis of candidate profile for evaluation"
 }}
 
-2. Multi-document processing:
+2. Multi-document processing with flexible output (omit expectedDocumentFormats):
 {{
     "method": "document",
     "action": "extract",
@@ -903,6 +1018,25 @@ EXAMPLES OF GOOD ACTIONS:
     "description": "Create comprehensive evaluation matrix comparing all candidates"
 }}
 
+3. Data extraction with specific CSV format:
+{{
+    "method": "document",
+    "action": "extract",
+    "parameters": {{
+        "documentList": ["docItem:doc_abc:table_data.pdf"],
+        "aiPrompt": "Extract all table data and convert to structured CSV format with proper headers and data types. IMPORTANT: Deliver pure CSV data without any markdown formatting, code blocks, or additional text. Output only the CSV content with proper headers and data rows."
+    }},
+    "resultLabel": "task1_action2_structured_data",
+    "expectedDocumentFormats": [
+        {{
+            "extension": ".csv",
+            "mimeType": "text/csv",
+            "description": "Structured table data in CSV format"
+        }}
+    ],
+    "description": "Extract and structure table data for analysis"
+}}
+
 NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
 
 
@@ -1022,11 +1156,17 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
     async def _executeSingleAction(self, action: TaskAction, workflow: ChatWorkflow) -> ActionResult:
         """Execute a single action and return ActionResult with enhanced document processing"""
         try:
+            # Enhance parameters with expected document formats if specified
+            enhanced_parameters = action.execParameters.copy()
+            if action.expectedDocumentFormats:
+                enhanced_parameters['expectedDocumentFormats'] = action.expectedDocumentFormats
+                logger.info(f"Action {action.execMethod}.{action.execAction} expects formats: {action.expectedDocumentFormats}")
+            
             # Execute the actual method action using the service center
             result = await self.service.executeAction(
                 methodName=action.execMethod,
                 actionName=action.execAction,
-                parameters=action.execParameters
+                parameters=enhanced_parameters
             )
             
             # Always use the execResultLabel from the action definition
@@ -1348,8 +1488,8 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
                 
                 # For CSV files, try to extract table data
                 elif file_extension == 'csv':
-                    # Look for CSV-specific fields
-                    csv_fields = ['table_data', 'csv_data', 'rows', 'data']
+                    # Look for CSV-specific fields first, then general content fields
+                    csv_fields = ['table_data', 'csv_data', 'rows', 'data', 'content', 'text']
                     for field in csv_fields:
                         if field in document_data:
                             content = document_data[field]
@@ -1798,7 +1938,7 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
             # Create final success log
             self.chatInterface.createWorkflowLog({
                 "workflowId": workflow.id,
-                "message": f"🎉 Workflow completed successfully ({len(workflow_results)}/{len(task_plan.tasks)} tasks)",
+                "message": f"🎉 Workflow completed ({len(workflow_results)}/{len(task_plan.tasks)} tasks)",
                 "type": "success",
                 "status": "completed",
                 "progress": 100
@@ -1814,7 +1954,7 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
             )
             
             logger.info(f"=== UNIFIED WORKFLOW COMPLETED: {len(workflow_results)}/{len(task_plan.tasks)} tasks successful ===")
-            logger.debug(f"FINAL WORKFLOW SUMMARY: {json.dumps(workflow_summary.model_dump(), indent=2, ensure_ascii=False)}")
+            logger.debug(f"FINAL WORKFLOW SUMMARY: {json.dumps(workflow_summary.dict(), indent=2, ensure_ascii=False)}")
             return workflow_summary
             
         except Exception as e:
@@ -1989,6 +2129,7 @@ Please review the task requirements and try again with different input or approa
             )
             
             # Generate new actions with failure avoidance
+            logger.info(f"Regenerating actions for task '{task_step.description}' with failure context (retry {state.retry_count})")
             actions = await self.defineTaskActions(task_step, context.workflow, state.getAvailableResults(), enhanced_context)
             
             logger.info(f"Regenerated {len(actions)} actions with failure context")
@@ -2016,13 +2157,17 @@ Please review the task requirements and try again with different input or approa
             prompt = self._createTaskCompletionValidationPrompt(task_result, task_step)
             response = await self._callAIWithCircuitBreaker(prompt, "task_completion_validation")
             
+            # Log the validation response for debugging
+            logger.debug(f"Task validation AI response: {response}")
+            
             # Parse validation result
             validation = self._parseTaskValidationResponse(response)
             
             # Add quality metrics
             validation['quality_metrics'] = self._calculateTaskQualityMetrics(task_step, successful_actions)
             
-            logger.info(f"Task completion validation: {validation.get('status', 'unknown')}")
+            logger.info(f"Task completion validation: {validation.get('status', 'unknown')} - Reason: {validation.get('reason', 'No reason')}")
+            logger.debug(f"Parsed validation result: {json.dumps(validation, indent=2)}")
             return ReviewResult(
                 status=validation.get('status', 'unknown'),
                 reason=validation.get('reason', 'No reason provided'),
@@ -2061,21 +2206,27 @@ Please review the task requirements and try again with different input or approa
                 'has_text_result': bool(action.data.get('result', '').strip())
             })
         
-        return f"""You are a task completion validator that evaluates if a task was successfully completed.
+        return f"""You are an action completion validator that evaluates if individual actions were successfully completed.
 
-TASK DETAILS:
-- Description: {task_step.description}
-- Expected Outputs: {', '.join(expected_outputs)}
-- Success Criteria: {', '.join(success_criteria)}
-
-SUCCESSFUL ACTIONS ({len(successful_actions)}):
+ACTION DETAILS:
 {json.dumps(action_summary, indent=2)}
 
+VALIDATION CRITERIA:
+1. Check if the action's result_label matches what was delivered
+2. If documents were delivered and result_label is present → SUCCESS
+3. If no documents but text result with matching result_label or different result_label → RETRY
+4. If no result_label and no delivery → FAIL
+
+VALIDATION RULES:
+- Focus on result_label matching
+- Check if the action delivered the expected result type
+- Document delivery with correct result_label = SUCCESS
+- Text result with correct result_label = SUCCESS
+
 VALIDATION QUESTIONS:
-1. Were all expected outputs produced?
-2. Are the success criteria met?
-3. Do the action results collectively accomplish the task goal?
-4. Is the task ready for handover to the next task?
+1. Does the result_label match what the action was supposed to deliver?
+2. Were documents or text results delivered with the correct label?
+3. Does the delivery match the action's objective?
 
 REQUIRED JSON RESPONSE:
 {{
@@ -2242,6 +2393,21 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
                 documents=result.data.get("documents", [])
             )
             
+            # Log the action execution result as JSON (without document content)
+            action_result_json = {
+                'success': action_result.success,
+                'actionId': action_result.actionId,
+                'actionMethod': action_result.actionMethod,
+                'actionName': action_result.actionName,
+                'validation': action_result.validation,
+                'error': action_result.error,
+                'documents_count': len(action_result.documents),
+                'document_names': [doc.filename if hasattr(doc, 'filename') else str(doc) for doc in action_result.documents],
+                'data_keys': list(action_result.data.keys()) if isinstance(action_result.data, dict) else [],
+                'metadata_keys': list(action_result.metadata.keys()) if isinstance(action_result.metadata, dict) else []
+            }
+            logger.info(f"Action execution result for {action.execMethod}.{action.execAction}: {json.dumps(action_result_json, indent=2, ensure_ascii=False)}")
+            
             # Update action status based on validation
             if validation['status'] == 'success':
                 action.setSuccess()
@@ -2334,6 +2500,24 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
                 applied_improvements=improvements
             )
             
+            # Log the retry action execution result as JSON (without document content)
+            retry_result_json = {
+                'success': action_result.success,
+                'actionId': action_result.actionId,
+                'actionMethod': action_result.actionMethod,
+                'actionName': action_result.actionName,
+                'validation': action_result.validation,
+                'error': action_result.error,
+                'is_retry': action_result.is_retry,
+                'previous_error': action_result.previous_error,
+                'applied_improvements': action_result.applied_improvements,
+                'documents_count': len(action_result.documents),
+                'document_names': [doc.filename if hasattr(doc, 'filename') else str(doc) for doc in action_result.documents],
+                'data_keys': list(action_result.data.keys()) if isinstance(action_result.data, dict) else [],
+                'metadata_keys': list(action_result.metadata.keys()) if isinstance(action_result.metadata, dict) else []
+            }
+            logger.info(f"Retry action execution result for {action.execMethod}.{action.execAction}: {json.dumps(retry_result_json, indent=2, ensure_ascii=False)}")
+            
             # Update action status
             if validation['status'] == 'success':
                 enhanced_action.setSuccess()
diff --git a/modules/interfaces/interfaceChatModel.py b/modules/interfaces/interfaceChatModel.py
index 28559d1f..439fa38a 100644
--- a/modules/interfaces/interfaceChatModel.py
+++ b/modules/interfaces/interfaceChatModel.py
@@ -185,6 +185,8 @@ class TaskAction(BaseModel, ModelMixin):
     execAction: str = Field(..., description="Action to perform")
     execParameters: Dict[str, Any] = Field(default_factory=dict, description="Action parameters")
     execResultLabel: Optional[str] = Field(None, description="Label for the set of result documents")
+    # NEW: Optional document format specification
+    expectedDocumentFormats: Optional[List[Dict[str, str]]] = Field(None, description="Expected document formats (optional)")
     status: TaskStatus = Field(default=TaskStatus.PENDING, description="Action status")
     error: Optional[str] = Field(None, description="Error message if action failed")
     retryCount: int = Field(default=0, description="Number of retries attempted")
diff --git a/modules/interfaces/interfaceChatObjects.py b/modules/interfaces/interfaceChatObjects.py
index 4132027d..86cf8cc8 100644
--- a/modules/interfaces/interfaceChatObjects.py
+++ b/modules/interfaces/interfaceChatObjects.py
@@ -7,7 +7,7 @@ import os
 import logging
 import uuid
 import time
-from datetime import datetime, UTC
+from datetime import datetime, UTC, timezone
 from typing import Dict, Any, List, Optional, Union
 
 import asyncio
@@ -128,8 +128,8 @@ class ChatObjects:
         return self.db.getInitialId(table)
 
     def _getCurrentTimestamp(self) -> str:
-        """Returns the current timestamp in ISO format"""
-        return datetime.now().isoformat()
+        """Returns the current timestamp as Unix timestamp (seconds since epoch)"""
+        return str(int(time.time()))
 
     # Workflow methods
 
@@ -576,8 +576,45 @@ class ChatObjects:
                     "processingTime": 0
                 }
             
-            # Simple processing time - just use current time
-            processing_time = time.time()
+            # Calculate processing time as duration since workflow start using Unix timestamps
+            workflow = self.getWorkflow(workflowId)
+            if workflow and workflow.startedAt:
+                try:
+                    # Parse start time as Unix timestamp (handle both old ISO format and new Unix format)
+                    start_time_str = workflow.startedAt
+                    try:
+                        # Try to parse as Unix timestamp first
+                        start_time = int(float(start_time_str))
+                    except ValueError:
+                        # If that fails, try to parse as ISO format and convert to Unix
+                        try:
+                            # Handle ISO format timestamps (for backward compatibility)
+                            if start_time_str.endswith('Z'):
+                                start_time_str = start_time_str.replace('Z', '+00:00')
+                            dt = datetime.fromisoformat(start_time_str)
+                            start_time = int(dt.timestamp())
+                        except:
+                            # If all parsing fails, use current time
+                            logger.warning(f"Could not parse start time: {start_time_str}, using current time")
+                            start_time = int(time.time())
+                    
+                    current_time = int(time.time())
+                    processing_time = current_time - start_time
+                    
+                    # Ensure processing time is reasonable (not negative or extremely large)
+                    if processing_time < 0:
+                        logger.warning(f"Negative processing time calculated: {processing_time}, using 0")
+                        processing_time = 0
+                    elif processing_time > 86400 * 365:  # More than 1 year
+                        logger.warning(f"Unreasonably large processing time: {processing_time}, using 0")
+                        processing_time = 0
+                        
+                except Exception as e:
+                    logger.warning(f"Error calculating processing time: {str(e)}")
+                    processing_time = currentStats.get("processingTime", 0) or 0
+            else:
+                # Fallback to existing processing time or 0
+                processing_time = currentStats.get("processingTime", 0) or 0
             
             # Update stats with incremental values - ensure no None values
             current_bytes_sent = currentStats.get("bytesSent", 0) or 0
@@ -793,8 +830,8 @@ class ChatObjects:
             
             # Load logs
             logs = self.getWorkflowLogs(workflowId)
-            # Sort by timestamp
-            logs.sort(key=lambda x: x.get("timestamp", ""))
+            # Sort by timestamp (Unix timestamps)
+            logs.sort(key=lambda x: float(x.get("timestamp", 0)))
             
             # Assemble complete workflow object
             completeWorkflow = workflow.copy()
@@ -1205,12 +1242,13 @@ class ChatObjects:
                 execAction=createdAction["execAction"],
                 execParameters=createdAction.get("execParameters", {}),
                 execResultLabel=createdAction.get("execResultLabel"),
+                expectedDocumentFormats=createdAction.get("expectedDocumentFormats"),
                 status=createdAction.get("status", TaskStatus.PENDING),
                 error=createdAction.get("error"),
                 retryCount=createdAction.get("retryCount", 0),
                 retryMax=createdAction.get("retryMax", 3),
                 processingTime=createdAction.get("processingTime"),
-                timestamp=datetime.fromisoformat(createdAction.get("timestamp", datetime.now().isoformat())),
+                timestamp=datetime.fromtimestamp(float(createdAction.get("timestamp", time.time()))),
                 result=createdAction.get("result"),
                 resultDocuments=createdAction.get("resultDocuments", [])
             )
diff --git a/modules/methods/methodDocument.py b/modules/methods/methodDocument.py
index 456bac95..fd2a67e7 100644
--- a/modules/methods/methodDocument.py
+++ b/modules/methods/methodDocument.py
@@ -24,17 +24,19 @@ class MethodDocument(MethodBase):
     @action
     async def extract(self, parameters: Dict[str, Any]) -> ActionResult:
         """
-        Extract specific content from document with ai prompt and return it as a json file
+        Extract specific content from document with ai prompt and return it in the specified format
         
         Parameters:
             documentList (str): Reference to the document list to extract content from
             aiPrompt (str): AI prompt for content extraction
             includeMetadata (bool, optional): Whether to include metadata (default: True)
+            expectedDocumentFormats (list, optional): Expected document formats with extension, mimeType, description
         """
         try:
             documentList = parameters.get("documentList")
             aiPrompt = parameters.get("aiPrompt")
             includeMetadata = parameters.get("includeMetadata", True)
+            expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
             
             if not documentList:
                 return self._createResult(
@@ -58,6 +60,31 @@ class MethodDocument(MethodBase):
                     error="No documents found for the provided reference"
                 )
             
+            # Determine output format based on expected formats
+            output_extension = ".txt"  # Default
+            output_mime_type = "text/plain"  # Default
+            
+            if expectedDocumentFormats and len(expectedDocumentFormats) > 0:
+                # Use the first expected format
+                expected_format = expectedDocumentFormats[0]
+                output_extension = expected_format.get("extension", ".txt")
+                output_mime_type = expected_format.get("mimeType", "text/plain")
+                logger.info(f"Using expected format: {output_extension} ({output_mime_type})")
+                logger.info(f"Expected document formats: {expectedDocumentFormats}")
+            else:
+                logger.info("No expected format specified, using default .txt format")
+            
+            # Enhance AI prompt to specify output format
+            enhanced_prompt = aiPrompt
+            if output_extension == ".csv":
+                enhanced_prompt += f"\n\nCRITICAL: Deliver the result as pure CSV data without any markdown formatting, code blocks, or additional text. Output only the CSV content with proper headers and data rows. Do not include ```csv or ``` markers."
+            elif output_extension == ".json":
+                enhanced_prompt += f"\n\nCRITICAL: Deliver the result as pure JSON data without any markdown formatting, code blocks, or additional text. Output only the JSON content. Do not include ```json or ``` markers."
+            elif output_extension == ".xml":
+                enhanced_prompt += f"\n\nCRITICAL: Deliver the result as pure XML data without any markdown formatting, code blocks, or additional text. Output only the XML content. Do not include ```xml or ``` markers."
+            elif output_extension != ".txt":
+                enhanced_prompt += f"\n\nCRITICAL: Deliver the result as pure {output_extension.upper()} data without any markdown formatting, code blocks, or additional text. Output only the {output_extension.upper()} content. Do not include any markdown markers."
+            
             # Extract content from all documents
             all_extracted_content = []
             file_infos = []
@@ -72,7 +99,7 @@ class MethodDocument(MethodBase):
                     continue
                 
                 extracted_content = await self.service.extractContentFromFileData(
-                    prompt=aiPrompt,
+                    prompt=enhanced_prompt,  # Use enhanced prompt instead of original
                     fileData=file_data,
                     filename=file_info.get('name', 'document'),
                     mimeType=file_info.get('mimeType', 'application/octet-stream'),
@@ -105,25 +132,50 @@ class MethodDocument(MethodBase):
                     # Fallback: convert to string representation
                     text_contents.append(str(content_obj))
             
-            # Combine all extracted text content
-            combined_content = "\n\n--- DOCUMENT SEPARATOR ---\n\n".join(text_contents)
+            # Process each document individually and create separate output files
+            output_documents = []
             
-            result_data = {
-                "documentCount": len(chatDocuments),
-                "content": combined_content,
-                "fileInfos": file_infos if includeMetadata else None,
-                "timestamp": datetime.now(UTC).isoformat()
-            }
+            for i, (chatDocument, extracted_content) in enumerate(zip(chatDocuments, all_extracted_content)):
+                # Extract text content from this document
+                text_content = ""
+                if hasattr(extracted_content, 'contents') and extracted_content.contents:
+                    # Extract text from ContentItem objects
+                    for content_item in extracted_content.contents:
+                        if hasattr(content_item, 'data') and content_item.data:
+                            text_content += content_item.data + "\n"
+                elif isinstance(extracted_content, str):
+                    text_content = extracted_content
+                else:
+                    # Fallback: convert to string representation
+                    text_content = str(extracted_content)
+                
+                # Create output filename based on original filename
+                original_filename = chatDocument.filename
+                base_name = original_filename.rsplit('.', 1)[0] if '.' in original_filename else original_filename
+                output_filename = f"{base_name}_extracted_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}{output_extension}"
+                
+                # Create result data for this document
+                result_data = {
+                    "documentCount": 1,
+                    "content": text_content,
+                    "originalFilename": original_filename,
+                    "fileInfos": [file_infos[i]] if includeMetadata and i < len(file_infos) else None,
+                    "timestamp": datetime.now(UTC).isoformat()
+                }
+                
+                logger.info(f"Created output document: {output_filename} with {len(text_content)} characters")
+                logger.info(f"Content preview: {text_content[:200]}...")
+                
+                output_documents.append({
+                    "documentName": output_filename,
+                    "documentData": result_data,
+                    "mimeType": output_mime_type
+                })
             
             return self._createResult(
                 success=True,
                 data={
-                    "documents": [
-                        {
-                            "documentName": f"extracted_content_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.txt",
-                            "documentData": result_data
-                        }
-                    ]
+                    "documents": output_documents
                 }
             )
         except Exception as e:
diff --git a/modules/methods/methodOutlook.py b/modules/methods/methodOutlook.py
index a2e91896..4fbd3cdf 100644
--- a/modules/methods/methodOutlook.py
+++ b/modules/methods/methodOutlook.py
@@ -55,12 +55,14 @@ class MethodOutlook(MethodBase):
             folder (str, optional): Email folder to read from (default: "Inbox")
             limit (int, optional): Maximum number of emails to read (default: 10)
             filter (str, optional): Filter criteria for emails
+            expectedDocumentFormats (list, optional): Expected document formats with extension, mimeType, description
         """
         try:
             connectionReference = parameters.get("connectionReference")
             folder = parameters.get("folder", "Inbox")
             limit = parameters.get("limit", 10)
             filter = parameters.get("filter")
+            expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
             
             if not connectionReference:
                 return self._createResult(
@@ -112,13 +114,27 @@ class MethodOutlook(MethodBase):
                 "timestamp": datetime.now(UTC).isoformat()
             }
             
+            # Determine output format based on expected formats
+            output_extension = ".json"  # Default
+            output_mime_type = "application/json"  # Default
+            
+            if expectedDocumentFormats and len(expectedDocumentFormats) > 0:
+                # Use the first expected format
+                expected_format = expectedDocumentFormats[0]
+                output_extension = expected_format.get("extension", ".json")
+                output_mime_type = expected_format.get("mimeType", "application/json")
+                logger.info(f"Using expected format: {output_extension} ({output_mime_type})")
+            else:
+                logger.info("No expected format specified, using default .json format")
+            
             return self._createResult(
                 success=True,
                 data={
                     "documents": [
                         {
-                            "documentName": f"outlook_emails_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.json",
-                            "documentData": result_data
+                            "documentName": f"outlook_emails_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}{output_extension}",
+                            "documentData": result_data,
+                            "mimeType": output_mime_type
                         }
                     ]
                 }
@@ -144,6 +160,7 @@ class MethodOutlook(MethodBase):
             body (str): Email body content
             cc (List[str], optional): CC recipients
             bcc (List[str], optional): BCC recipients
+            expectedDocumentFormats (list, optional): Expected document formats with extension, mimeType, description
         """
         try:
             connectionReference = parameters.get("connectionReference")
@@ -152,6 +169,7 @@ class MethodOutlook(MethodBase):
             body = parameters.get("body")
             cc = parameters.get("cc", [])
             bcc = parameters.get("bcc", [])
+            expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
             
             if not connectionReference or not to or not subject or not body:
                 return self._createResult(
@@ -207,11 +225,29 @@ class MethodOutlook(MethodBase):
                 "timestamp": datetime.now(UTC).isoformat()
             }
             
+            # Determine output format based on expected formats
+            output_extension = ".json"  # Default
+            output_mime_type = "application/json"  # Default
+            
+            if expectedDocumentFormats and len(expectedDocumentFormats) > 0:
+                # Use the first expected format
+                expected_format = expectedDocumentFormats[0]
+                output_extension = expected_format.get("extension", ".json")
+                output_mime_type = expected_format.get("mimeType", "application/json")
+                logger.info(f"Using expected format: {output_extension} ({output_mime_type})")
+            else:
+                logger.info("No expected format specified, using default .json format")
+            
             return self._createResult(
                 success=True,
                 data={
-                    "documentName": f"outlook_email_sent_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.json",
-                    "documentData": result_data
+                    "documents": [
+                        {
+                            "documentName": f"outlook_email_sent_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}{output_extension}",
+                            "documentData": result_data,
+                            "mimeType": output_mime_type
+                        }
+                    ]
                 }
             )
             
@@ -233,12 +269,14 @@ class MethodOutlook(MethodBase):
             query (str): Search query
             folder (str, optional): Folder to search in (default: "All")
             limit (int, optional): Maximum number of results (default: 20)
+            expectedDocumentFormats (list, optional): Expected document formats with extension, mimeType, description
         """
         try:
             connectionReference = parameters.get("connectionReference")
             query = parameters.get("query")
             folder = parameters.get("folder", "All")
             limit = parameters.get("limit", 20)
+            expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
             
             if not connectionReference or not query:
                 return self._createResult(
@@ -290,11 +328,29 @@ class MethodOutlook(MethodBase):
                 "timestamp": datetime.now(UTC).isoformat()
             }
             
+            # Determine output format based on expected formats
+            output_extension = ".json"  # Default
+            output_mime_type = "application/json"  # Default
+            
+            if expectedDocumentFormats and len(expectedDocumentFormats) > 0:
+                # Use the first expected format
+                expected_format = expectedDocumentFormats[0]
+                output_extension = expected_format.get("extension", ".json")
+                output_mime_type = expected_format.get("mimeType", "application/json")
+                logger.info(f"Using expected format: {output_extension} ({output_mime_type})")
+            else:
+                logger.info("No expected format specified, using default .json format")
+            
             return self._createResult(
                 success=True,
                 data={
-                    "documentName": f"outlook_email_search_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.json",
-                    "documentData": result_data
+                    "documents": [
+                        {
+                            "documentName": f"outlook_email_search_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}{output_extension}",
+                            "documentData": result_data,
+                            "mimeType": output_mime_type
+                        }
+                    ]
                 }
             )
             
diff --git a/modules/methods/methodSharepoint.py b/modules/methods/methodSharepoint.py
index 8288119d..0560a754 100644
--- a/modules/methods/methodSharepoint.py
+++ b/modules/methods/methodSharepoint.py
@@ -54,12 +54,14 @@ class MethodSharepoint(MethodBase):
             siteUrl (str): SharePoint site URL
             query (str): Query or description to find document
             searchScope (str, optional): Search scope (default: "all")
+            expectedDocumentFormats (list, optional): Expected document formats with extension, mimeType, description
         """
         try:
             connectionReference = parameters.get("connectionReference")
             siteUrl = parameters.get("siteUrl")
             query = parameters.get("query")
             searchScope = parameters.get("searchScope", "all")
+            expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
             
             if not connectionReference or not siteUrl or not query:
                 return self._createResult(
@@ -108,13 +110,27 @@ class MethodSharepoint(MethodBase):
                 "timestamp": datetime.now(UTC).isoformat()
             }
             
+            # Determine output format based on expected formats
+            output_extension = ".json"  # Default
+            output_mime_type = "application/json"  # Default
+            
+            if expectedDocumentFormats and len(expectedDocumentFormats) > 0:
+                # Use the first expected format
+                expected_format = expectedDocumentFormats[0]
+                output_extension = expected_format.get("extension", ".json")
+                output_mime_type = expected_format.get("mimeType", "application/json")
+                logger.info(f"Using expected format: {output_extension} ({output_mime_type})")
+            else:
+                logger.info("No expected format specified, using default .json format")
+            
             return self._createResult(
                 success=True,
                 data={
                     "documents": [
                         {
-                            "documentName": f"sharepoint_find_path_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.json",
-                            "documentData": result_data
+                            "documentName": f"sharepoint_find_path_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}{output_extension}",
+                            "documentData": result_data,
+                            "mimeType": output_mime_type
                         }
                     ]
                 }
@@ -139,6 +155,7 @@ class MethodSharepoint(MethodBase):
             siteUrl (str): SharePoint site URL
             documentPaths (List[str]): List of paths to the documents in SharePoint
             includeMetadata (bool, optional): Whether to include metadata (default: True)
+            expectedDocumentFormats (list, optional): Expected document formats with extension, mimeType, description
         """
         try:
             documentList = parameters.get("documentList")
@@ -146,6 +163,7 @@ class MethodSharepoint(MethodBase):
             siteUrl = parameters.get("siteUrl")
             documentPaths = parameters.get("documentPaths")
             includeMetadata = parameters.get("includeMetadata", True)
+            expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
             
             if not documentList or not connectionReference or not siteUrl or not documentPaths:
                 return self._createResult(
@@ -218,13 +236,27 @@ class MethodSharepoint(MethodBase):
                 "timestamp": datetime.now(UTC).isoformat()
             }
             
+            # Determine output format based on expected formats
+            output_extension = ".json"  # Default
+            output_mime_type = "application/json"  # Default
+            
+            if expectedDocumentFormats and len(expectedDocumentFormats) > 0:
+                # Use the first expected format
+                expected_format = expectedDocumentFormats[0]
+                output_extension = expected_format.get("extension", ".json")
+                output_mime_type = expected_format.get("mimeType", "application/json")
+                logger.info(f"Using expected format: {output_extension} ({output_mime_type})")
+            else:
+                logger.info("No expected format specified, using default .json format")
+            
             return self._createResult(
                 success=True,
                 data={
                     "documents": [
                         {
-                            "documentName": f"sharepoint_documents_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.json",
-                            "documentData": result_data
+                            "documentName": f"sharepoint_documents_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}{output_extension}",
+                            "documentData": result_data,
+                            "mimeType": output_mime_type
                         }
                     ]
                 }
@@ -248,6 +280,7 @@ class MethodSharepoint(MethodBase):
             documentPaths (List[str]): List of paths where to upload the documents
             documentList (str): Reference to the document list to upload
             fileNames (List[str]): List of names for the uploaded files
+            expectedDocumentFormats (list, optional): Expected document formats with extension, mimeType, description
         """
         try:
             connectionReference = parameters.get("connectionReference")
@@ -255,6 +288,7 @@ class MethodSharepoint(MethodBase):
             documentPaths = parameters.get("documentPaths")
             documentList = parameters.get("documentList")
             fileNames = parameters.get("fileNames")
+            expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
             
             if not connectionReference or not siteUrl or not documentPaths or not documentList or not fileNames:
                 return self._createResult(
@@ -339,13 +373,27 @@ class MethodSharepoint(MethodBase):
                 "timestamp": datetime.now(UTC).isoformat()
             }
             
+            # Determine output format based on expected formats
+            output_extension = ".json"  # Default
+            output_mime_type = "application/json"  # Default
+            
+            if expectedDocumentFormats and len(expectedDocumentFormats) > 0:
+                # Use the first expected format
+                expected_format = expectedDocumentFormats[0]
+                output_extension = expected_format.get("extension", ".json")
+                output_mime_type = expected_format.get("mimeType", "application/json")
+                logger.info(f"Using expected format: {output_extension} ({output_mime_type})")
+            else:
+                logger.info("No expected format specified, using default .json format")
+            
             return self._createResult(
                 success=True,
                 data={
                     "documents": [
                         {
-                            "documentName": f"sharepoint_upload_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.json",
-                            "documentData": result_data
+                            "documentName": f"sharepoint_upload_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}{output_extension}",
+                            "documentData": result_data,
+                            "mimeType": output_mime_type
                         }
                     ]
                 }
@@ -369,12 +417,14 @@ class MethodSharepoint(MethodBase):
             siteUrl (str): SharePoint site URL
             folderPaths (List[str]): List of paths to the folders to list
             includeSubfolders (bool, optional): Whether to include subfolders (default: False)
+            expectedDocumentFormats (list, optional): Expected document formats with extension, mimeType, description
         """
         try:
             connectionReference = parameters.get("connectionReference")
             siteUrl = parameters.get("siteUrl")
             folderPaths = parameters.get("folderPaths")
             includeSubfolders = parameters.get("includeSubfolders", False)
+            expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
             
             if not connectionReference or not siteUrl or not folderPaths:
                 return self._createResult(
@@ -436,13 +486,27 @@ class MethodSharepoint(MethodBase):
                 "timestamp": datetime.now(UTC).isoformat()
             }
             
+            # Determine output format based on expected formats
+            output_extension = ".json"  # Default
+            output_mime_type = "application/json"  # Default
+            
+            if expectedDocumentFormats and len(expectedDocumentFormats) > 0:
+                # Use the first expected format
+                expected_format = expectedDocumentFormats[0]
+                output_extension = expected_format.get("extension", ".json")
+                output_mime_type = expected_format.get("mimeType", "application/json")
+                logger.info(f"Using expected format: {output_extension} ({output_mime_type})")
+            else:
+                logger.info("No expected format specified, using default .json format")
+            
             return self._createResult(
                 success=True,
                 data={
                     "documents": [
                         {
-                            "documentName": f"sharepoint_document_list_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.json",
-                            "documentData": result_data
+                            "documentName": f"sharepoint_document_list_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}{output_extension}",
+                            "documentData": result_data,
+                            "mimeType": output_mime_type
                         }
                     ]
                 }
diff --git a/modules/methods/methodWeb.py b/modules/methods/methodWeb.py
index a03549e5..c257bed9 100644
--- a/modules/methods/methodWeb.py
+++ b/modules/methods/methodWeb.py
@@ -224,12 +224,14 @@ class MethodWeb(MethodBase):
             maxDepth (int, optional): Maximum crawl depth (default: 2)
             includeImages (bool, optional): Whether to include images (default: False)
             followLinks (bool, optional): Whether to follow links (default: True)
+            expectedDocumentFormats (list, optional): Expected document formats with extension, mimeType, description
         """
         try:
             urls = parameters.get("urls")
             maxDepth = parameters.get("maxDepth", 2)
             includeImages = parameters.get("includeImages", False)
             followLinks = parameters.get("followLinks", True)
+            expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
             
             if not urls:
                 return self._createResult(
@@ -307,13 +309,27 @@ class MethodWeb(MethodBase):
                 "timestamp": datetime.now(UTC).isoformat()
             }
             
+            # Determine output format based on expected formats
+            output_extension = ".json"  # Default
+            output_mime_type = "application/json"  # Default
+            
+            if expectedDocumentFormats and len(expectedDocumentFormats) > 0:
+                # Use the first expected format
+                expected_format = expectedDocumentFormats[0]
+                output_extension = expected_format.get("extension", ".json")
+                output_mime_type = expected_format.get("mimeType", "application/json")
+                logger.info(f"Using expected format: {output_extension} ({output_mime_type})")
+            else:
+                logger.info("No expected format specified, using default .json format")
+            
             return self._createResult(
                 success=True,
                 data={
                     "documents": [
                         {
-                            "documentName": f"web_crawl_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.json",
-                            "documentData": result_data
+                            "documentName": f"web_crawl_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}{output_extension}",
+                            "documentData": result_data,
+                            "mimeType": output_mime_type
                         }
                     ]
                 }
@@ -336,11 +352,13 @@ class MethodWeb(MethodBase):
             url (str): URL to scrape
             selectors (Dict[str, str]): CSS selectors for data extraction
             format (str, optional): Output format (default: "json")
+            expectedDocumentFormats (list, optional): Expected document formats with extension, mimeType, description
         """
         try:
             url = parameters.get("url")
             selectors = parameters.get("selectors")
             format = parameters.get("format", "json")
+            expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
             
             if not url or not selectors:
                 return self._createResult(
@@ -400,13 +418,27 @@ class MethodWeb(MethodBase):
                 "timestamp": datetime.now(UTC).isoformat()
             }
             
+            # Determine output format based on expected formats
+            output_extension = f".{format}"  # Default to format parameter
+            output_mime_type = "application/json"  # Default
+            
+            if expectedDocumentFormats and len(expectedDocumentFormats) > 0:
+                # Use the first expected format
+                expected_format = expectedDocumentFormats[0]
+                output_extension = expected_format.get("extension", f".{format}")
+                output_mime_type = expected_format.get("mimeType", "application/json")
+                logger.info(f"Using expected format: {output_extension} ({output_mime_type})")
+            else:
+                logger.info(f"No expected format specified, using format parameter: {format}")
+            
             return self._createResult(
                 success=True,
                 data={
                     "documents": [
                         {
-                            "documentName": f"web_scrape_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.{format}",
-                            "documentData": result_data
+                            "documentName": f"web_scrape_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}{output_extension}",
+                            "documentData": result_data,
+                            "mimeType": output_mime_type
                         }
                     ]
                 }
@@ -430,12 +462,14 @@ class MethodWeb(MethodBase):
             engine (str, optional): Search engine to use (default: "google")
             maxResults (int, optional): Maximum number of results (default: 10)
             filter (str, optional): Additional search filters
+            expectedDocumentFormats (list, optional): Expected document formats with extension, mimeType, description
         """
         try:
             query = parameters.get("query")
             engine = parameters.get("engine", "google")
             maxResults = parameters.get("maxResults", 10)
             filter = parameters.get("filter")
+            expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
             
             if not query:
                 return self._createResult(
@@ -533,13 +567,27 @@ class MethodWeb(MethodBase):
                 "timestamp": datetime.now(UTC).isoformat()
             }
             
+            # Determine output format based on expected formats
+            output_extension = ".json"  # Default
+            output_mime_type = "application/json"  # Default
+            
+            if expectedDocumentFormats and len(expectedDocumentFormats) > 0:
+                # Use the first expected format
+                expected_format = expectedDocumentFormats[0]
+                output_extension = expected_format.get("extension", ".json")
+                output_mime_type = expected_format.get("mimeType", "application/json")
+                logger.info(f"Using expected format: {output_extension} ({output_mime_type})")
+            else:
+                logger.info("No expected format specified, using default .json format")
+            
             return self._createResult(
                 success=True,
                 data={
                     "documents": [
                         {
-                            "documentName": f"web_search_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.json",
-                            "documentData": result_data
+                            "documentName": f"web_search_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}{output_extension}",
+                            "documentData": result_data,
+                            "mimeType": output_mime_type
                         }
                     ]
                 }
@@ -561,10 +609,12 @@ class MethodWeb(MethodBase):
         Parameters:
             url (str): URL to validate
             checks (List[str], optional): Types of checks to perform (default: ["accessibility", "seo", "performance"])
+            expectedDocumentFormats (list, optional): Expected document formats with extension, mimeType, description
         """
         try:
             url = parameters.get("url")
             checks = parameters.get("checks", ["accessibility", "seo", "performance"])
+            expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
             
             if not url:
                 return self._createResult(
@@ -609,13 +659,27 @@ class MethodWeb(MethodBase):
                 "timestamp": datetime.now(UTC).isoformat()
             }
             
+            # Determine output format based on expected formats
+            output_extension = ".json"  # Default
+            output_mime_type = "application/json"  # Default
+            
+            if expectedDocumentFormats and len(expectedDocumentFormats) > 0:
+                # Use the first expected format
+                expected_format = expectedDocumentFormats[0]
+                output_extension = expected_format.get("extension", ".json")
+                output_mime_type = expected_format.get("mimeType", "application/json")
+                logger.info(f"Using expected format: {output_extension} ({output_mime_type})")
+            else:
+                logger.info("No expected format specified, using default .json format")
+            
             return self._createResult(
                 success=True,
                 data={
                     "documents": [
                         {
-                            "documentName": f"web_validation_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.json",
-                            "documentData": result_data
+                            "documentName": f"web_validation_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}{output_extension}",
+                            "documentData": result_data,
+                            "mimeType": output_mime_type
                         }
                     ]
                 }