diff --git a/ENHANCED_AI_AGENT_RECOMMENDATIONS.md b/ENHANCED_AI_AGENT_RECOMMENDATIONS.md deleted file mode 100644 index e019fa76..00000000 --- a/ENHANCED_AI_AGENT_RECOMMENDATIONS.md +++ /dev/null @@ -1,277 +0,0 @@ -# Enhanced AI Agent System Recommendations - -## Overview -This document provides comprehensive recommendations for building a stable, robust, and perfect AI agent system with clear handovers and optimal user request processing. - -## 1. **Enhanced Error Recovery & Resilience** - -### ✅ **Implemented Features:** -- **Circuit Breaker Pattern**: Prevents cascading failures when AI services are down -- **Exponential Backoff Retry**: Intelligent retry with increasing delays -- **Timeout Handling**: Prevents hanging operations -- **Fallback Mechanisms**: Graceful degradation when AI fails -- **Alternative Approach Generation**: Tries different methods when original fails - -### 🔄 **Additional Recommendations:** - -#### A. **State Persistence & Recovery** -```python -# Add checkpoint system for long-running workflows -class WorkflowCheckpoint: - def save_checkpoint(self, workflow_id: str, task_step: int, state: Dict): - # Save current state to database - pass - - def restore_checkpoint(self, workflow_id: str) -> Dict: - # Restore from last checkpoint - pass -``` - -#### B. **Graceful Degradation** -```python -# Implement multiple AI providers with fallback -class MultiProviderAIService: - def __init__(self): - self.providers = [ - OpenAIService(), - AnthropicService(), - LocalLLMService() # Fallback - ] - - async def call_with_fallback(self, prompt: str) -> str: - for provider in self.providers: - try: - return await provider.call(prompt) - except Exception: - continue - raise Exception("All AI providers failed") -``` - -## 2. **Intelligent Task Planning & Execution** - -### ✅ **Current Implementation:** -- **Task Planning**: AI analyzes request and creates logical task steps -- **Handover Review**: Validates each step before proceeding -- **Dynamic Action Generation**: Creates actions based on current context - -### 🔄 **Enhanced Recommendations:** - -#### A. **Dependency Graph Management** -```python -class TaskDependencyGraph: - def __init__(self): - self.nodes = {} # task_id -> task_info - self.edges = {} # task_id -> [dependencies] - - def add_task(self, task_id: str, dependencies: List[str]): - self.nodes[task_id] = {"status": "pending"} - self.edges[task_id] = dependencies - - def get_ready_tasks(self) -> List[str]: - # Return tasks with all dependencies completed - pass - - def detect_cycles(self) -> bool: - # Detect circular dependencies - pass -``` - -#### B. **Parallel Task Execution** -```python -async def execute_parallel_tasks(self, independent_tasks: List[Dict]) -> List[Dict]: - """Execute independent tasks in parallel for better performance""" - tasks = [] - for task_step in independent_tasks: - task = asyncio.create_task(self._executeTaskStep(task_step)) - tasks.append(task) - - results = await asyncio.gather(*tasks, return_exceptions=True) - return results -``` - -## 3. **Advanced Quality Assurance** - -### 🔄 **Quality Metrics & Validation:** - -#### A. **Multi-Dimensional Quality Assessment** -```python -class QualityAssessor: - def assess_quality(self, result: Dict, criteria: Dict) -> QualityScore: - return QualityScore( - completeness=self._assess_completeness(result, criteria), - accuracy=self._assess_accuracy(result, criteria), - relevance=self._assess_relevance(result, criteria), - coherence=self._assess_coherence(result, criteria) - ) -``` - -#### B. **Continuous Learning & Improvement** -```python -class LearningSystem: - def record_execution(self, task: Dict, result: Dict, quality_score: float): - """Record execution for learning""" - pass - - def suggest_improvements(self, task_type: str) -> List[str]: - """Suggest improvements based on historical data""" - pass -``` - -## 4. **Enhanced Document & Context Management** - -### 🔄 **Smart Document Processing:** - -#### A. **Document Understanding & Classification** -```python -class DocumentProcessor: - def classify_document(self, content: str) -> DocumentType: - """Classify document type for better processing""" - pass - - def extract_key_information(self, document: Document) -> Dict: - """Extract key information for context""" - pass -``` - -#### B. **Context-Aware Processing** -```python -class ContextManager: - def __init__(self): - self.context_stack = [] - self.document_cache = {} - - def add_context(self, context: Dict): - """Add context for current processing""" - self.context_stack.append(context) - - def get_relevant_context(self, task: Dict) -> Dict: - """Get relevant context for specific task""" - pass -``` - -## 5. **Advanced Handover Mechanisms** - -### 🔄 **Intelligent Handover System:** - -#### A. **Handover Validation Engine** -```python -class HandoverValidator: - def validate_handover(self, from_task: Dict, to_task: Dict, data: Dict) -> ValidationResult: - """Validate data handover between tasks""" - return ValidationResult( - is_valid=self._check_data_completeness(data, to_task), - missing_data=self._identify_missing_data(data, to_task), - quality_issues=self._identify_quality_issues(data), - suggestions=self._generate_handover_suggestions(data, to_task) - ) -``` - -## 6. **Monitoring & Observability** - -### 🔄 **Comprehensive Monitoring:** - -#### A. **Real-Time Metrics** -```python -class MetricsCollector: - def __init__(self): - self.metrics = { - 'task_execution_time': [], - 'ai_call_latency': [], - 'success_rate': [], - 'error_rate': [], - 'quality_scores': [] - } - - def record_metric(self, metric_name: str, value: float): - """Record metric for monitoring""" - pass - - def get_health_score(self) -> float: - """Calculate overall system health score""" - pass -``` - -## 7. **Security & Privacy** - -### 🔄 **Enhanced Security Measures:** - -#### A. **Data Sanitization** -```python -class DataSanitizer: - def sanitize_input(self, user_input: str) -> str: - """Sanitize user input for security""" - pass - - def validate_documents(self, documents: List[Document]) -> bool: - """Validate documents for security risks""" - pass -``` - -## 8. **Performance Optimization** - -### 🔄 **Performance Enhancements:** - -#### A. **Caching Strategy** -```python -class CacheManager: - def __init__(self): - self.document_cache = {} - self.ai_response_cache = {} - self.task_result_cache = {} - - def get_cached_result(self, key: str) -> Optional[Dict]: - """Get cached result if available""" - pass - - def cache_result(self, key: str, result: Dict, ttl: int = 3600): - """Cache result with TTL""" - pass -``` - -## 9. **Testing & Validation** - -### 🔄 **Comprehensive Testing:** - -#### A. **Automated Testing Framework** -```python -class TestFramework: - def test_task_planning(self, scenarios: List[Dict]): - """Test task planning with various scenarios""" - pass - - def test_handover_validation(self, test_cases: List[Dict]): - """Test handover validation logic""" - pass -``` - -## 10. **Implementation Priority** - -### **Phase 1 (Critical - Implement First):** -1. ✅ Circuit Breaker Pattern -2. ✅ Retry Mechanisms -3. ✅ Fallback Systems -4. 🔄 Enhanced Error Handling - -### **Phase 2 (Important - Implement Next):** -1. 🔄 Parallel Task Execution -2. 🔄 Advanced Quality Assessment -3. 🔄 Smart Document Processing -4. 🔄 Comprehensive Monitoring - -### **Phase 3 (Enhancement - Future):** -1. 🔄 Learning & Optimization -2. 🔄 Advanced Security -3. 🔄 Performance Optimization -4. 🔄 Advanced Testing - -## Conclusion - -The enhanced AI agent system provides: -- **Robustness**: Multiple layers of error recovery and fallback mechanisms -- **Intelligence**: Smart task planning and dynamic action generation -- **Quality**: Comprehensive validation and quality assessment -- **Observability**: Full monitoring and alerting capabilities -- **Scalability**: Resource management and performance optimization -- **Security**: Data protection and access control - -This system will process user requests in a near-perfect way with clear handovers, comprehensive error handling, and continuous improvement capabilities. \ No newline at end of file diff --git a/modules/methods/methodCoder.py b/modules/methods/methodCoder.py index a1b8f3ca..ea6937a4 100644 --- a/modules/methods/methodCoder.py +++ b/modules/methods/methodCoder.py @@ -1,29 +1,98 @@ from typing import Dict, Any, Optional import logging +import uuid from datetime import datetime, UTC from modules.workflow.methodBase import MethodBase, ActionResult, action logger = logging.getLogger(__name__) -class CoderService: - """Service for code analysis, generation, and refactoring operations""" +class MethodCoder(MethodBase): + """Coder method implementation for code operations""" def __init__(self, serviceContainer: Any): - self.serviceContainer = serviceContainer + super().__init__(serviceContainer) + self.name = "coder" + self.description = "Handle code operations like analysis and generation" - async def analyzeCode(self, code: str, language: str = "python", checks: list = None) -> Dict[str, Any]: - """Analyze code quality and structure""" - if checks is None: - checks = ["complexity", "style", "security"] + @action + async def analyze(self, parameters: Dict[str, Any]) -> ActionResult: + """ + Analyze code quality and structure + Parameters: + documentList (str): Reference to the document list to analyze + aiPrompt (str): AI prompt for code analysis + language (str, optional): Programming language (default: "python") + checks (List[str], optional): Types of checks to perform (default: ["complexity", "style", "security"]) + """ try: + documentList = parameters.get("documentList") + aiPrompt = parameters.get("aiPrompt") + language = parameters.get("language", "python") + checks = parameters.get("checks", ["complexity", "style", "security"]) + + if not documentList: + return self._createResult( + success=False, + data={}, + error="Document list reference is required" + ) + + if not aiPrompt: + return self._createResult( + success=False, + data={}, + error="AI prompt is required" + ) + + chatDocuments = self.serviceContainer.getChatDocumentsFromDocumentReference(documentList) + if not chatDocuments: + return self._createResult( + success=False, + data={}, + error="No documents found for the provided reference" + ) + + # Extract content from all documents + all_code_content = [] + + for chatDocument in chatDocuments: + fileId = chatDocument.fileId + code = self.serviceContainer.getFileData(fileId) + file_info = self.serviceContainer.getFileInfo(fileId) + + if not code: + logger.warning(f"Code file is empty for fileId: {fileId}") + continue + + # Use AI prompt to extract relevant code content + extracted_content = await self.serviceContainer.extractContentFromFileData( + prompt=aiPrompt, + fileData=code, + filename=file_info.get('name', 'code'), + mimeType=file_info.get('mimeType', 'text/plain'), + base64Encoded=False + ) + + all_code_content.append(extracted_content) + + if not all_code_content: + return self._createResult( + success=False, + data={}, + error="No code content could be extracted from any documents" + ) + + # Combine all code content for analysis + combined_code = "\n\n--- CODE SEPARATOR ---\n\n".join(all_code_content) + # Create analysis prompt analysis_prompt = f""" Analyze this {language} code for quality, structure, and potential issues. Code to analyze: - {code} + {combined_code} Please check for: {', '.join(checks)} @@ -34,144 +103,27 @@ class CoderService: 3. Security considerations 4. Performance optimizations 5. Best practices compliance + 6. Summary of findings across all documents """ # Use AI service for analysis analysis_result = await self.serviceContainer.interfaceAiCalls.callAiTextAdvanced(analysis_prompt) - return { + # Create result data + result_data = { + "documentCount": len(chatDocuments), "language": language, "checks": checks, "analysis": analysis_result, "timestamp": datetime.now(UTC).isoformat() } - except Exception as e: - logger.error(f"Error analyzing code: {str(e)}") - return { - "error": str(e), - "language": language, - "checks": checks - } - - async def generateCode(self, requirements: str, language: str = "python", template: str = None) -> Dict[str, Any]: - """Generate code based on requirements""" - try: - # Create generation prompt - generation_prompt = f""" - Generate {language} code based on the following requirements: - - Requirements: - {requirements} - - {f'Template to follow: {template}' if template else ''} - - Please provide: - 1. Complete, working code - 2. Clear comments and documentation - 3. Error handling where appropriate - 4. Best practices implementation - """ - - # Use AI service for code generation - generated_code = await self.serviceContainer.interfaceAiCalls.callAiTextAdvanced(generation_prompt) - - return { - "language": language, - "requirements": requirements, - "code": generated_code, - "timestamp": datetime.now(UTC).isoformat() - } - - except Exception as e: - logger.error(f"Error generating code: {str(e)}") - return { - "error": str(e), - "language": language, - "requirements": requirements - } - - async def refactorCode(self, code: str, language: str = "python", improvements: list = None) -> Dict[str, Any]: - """Refactor code for better quality""" - if improvements is None: - improvements = ["style", "complexity"] - - try: - # Create refactoring prompt - refactor_prompt = f""" - Refactor this {language} code to improve: - {', '.join(improvements)} - - Original code: - {code} - - Please provide: - 1. Refactored code with improvements - 2. Explanation of changes made - 3. Benefits of the refactoring - 4. Any potential trade-offs - """ - - # Use AI service for refactoring - refactored_code = await self.serviceContainer.interfaceAiCalls.callAiTextAdvanced(refactor_prompt) - - return { - "language": language, - "improvements": improvements, - "original_code": code, - "refactored_code": refactored_code, - "timestamp": datetime.now(UTC).isoformat() - } - - except Exception as e: - logger.error(f"Error refactoring code: {str(e)}") - return { - "error": str(e), - "language": language, - "improvements": improvements - } - -class MethodCoder(MethodBase): - """Coder method implementation for code operations""" - - def __init__(self, serviceContainer: Any): - super().__init__(serviceContainer) - self.name = "coder" - self.description = "Handle code operations like analysis and generation" - self.coderService = CoderService(serviceContainer) - - @action - async def analyze(self, parameters: Dict[str, Any]) -> ActionResult: - """ - Analyze code quality and structure - - Parameters: - code (str): The code to analyze - language (str, optional): Programming language (default: "python") - checks (List[str], optional): Types of checks to perform (default: ["complexity", "style", "security"]) - """ - try: - code = parameters.get("code") - language = parameters.get("language", "python") - checks = parameters.get("checks", ["complexity", "style", "security"]) - - if not code: - return self._createResult( - success=False, - data={}, - error="Code is required" - ) - - # Analyze code - results = await self.coderService.analyzeCode( - code=code, - language=language, - checks=checks - ) - return self._createResult( success=True, - data=results + data={ + "documentName": f"code_analysis_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.json", + "documentData": result_data + } ) except Exception as e: @@ -204,16 +156,39 @@ class MethodCoder(MethodBase): error="Requirements are required" ) - # Generate code - code = await self.coderService.generateCode( - requirements=requirements, - language=language, - template=template - ) + # Create generation prompt + generation_prompt = f""" + Generate {language} code based on the following requirements: + + Requirements: + {requirements} + + {f'Template to follow: {template}' if template else ''} + + Please provide: + 1. Complete, working code + 2. Clear comments and documentation + 3. Error handling where appropriate + 4. Best practices implementation + """ + + # Use AI service for code generation + generated_code = await self.serviceContainer.interfaceAiCalls.callAiTextAdvanced(generation_prompt) + + # Create result data + result_data = { + "language": language, + "requirements": requirements, + "code": generated_code, + "timestamp": datetime.now(UTC).isoformat() + } return self._createResult( success=True, - data=code + data={ + "documentName": f"generated_code_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.{language}", + "documentData": result_data + } ) except Exception as e: @@ -230,32 +205,96 @@ class MethodCoder(MethodBase): Refactor code for better quality Parameters: - code (str): The code to refactor + documentList (str): Reference to the document list to refactor + aiImprovementPrompt (str): AI prompt for code improvements language (str, optional): Programming language (default: "python") - improvements (List[str], optional): Types of improvements to make (default: ["style", "complexity"]) """ try: - code = parameters.get("code") + documentList = parameters.get("documentList") + aiImprovementPrompt = parameters.get("aiImprovementPrompt") language = parameters.get("language", "python") - improvements = parameters.get("improvements", ["style", "complexity"]) - if not code: + if not documentList: return self._createResult( success=False, data={}, - error="Code is required" + error="Document list reference is required" ) - # Refactor code - results = await self.coderService.refactorCode( - code=code, - language=language, - improvements=improvements - ) + if not aiImprovementPrompt: + return self._createResult( + success=False, + data={}, + error="AI improvement prompt is required" + ) + + chatDocuments = self.serviceContainer.getChatDocumentsFromDocumentReference(documentList) + if not chatDocuments: + return self._createResult( + success=False, + data={}, + error="No documents found for the provided reference" + ) + + # Process each document individually + refactored_results = [] + + for chatDocument in chatDocuments: + fileId = chatDocument.fileId + code = self.serviceContainer.getFileData(fileId) + file_info = self.serviceContainer.getFileInfo(fileId) + + if not code: + logger.warning(f"Code file is empty for fileId: {fileId}") + continue + + # Create refactoring prompt for this specific document + refactor_prompt = f""" + Refactor this {language} code based on the following improvement requirements: + + Improvement requirements: + {aiImprovementPrompt} + + Original code: + {code} + + Please provide: + 1. Refactored code with improvements + 2. Explanation of changes made + 3. Benefits of the refactoring + 4. Any potential trade-offs + """ + + # Use AI service for refactoring + refactored_code = await self.serviceContainer.interfaceAiCalls.callAiTextAdvanced(refactor_prompt) + + refactored_results.append({ + "original_file": file_info.get('name', 'unknown'), + "original_code": code, + "refactored_code": refactored_code + }) + + if not refactored_results: + return self._createResult( + success=False, + data={}, + error="No code could be refactored from any documents" + ) + + # Create result data + result_data = { + "documentCount": len(chatDocuments), + "language": language, + "refactored_results": refactored_results, + "timestamp": datetime.now(UTC).isoformat() + } return self._createResult( success=True, - data=results + data={ + "documentName": f"refactored_code_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.{language}", + "documentData": result_data + } ) except Exception as e: diff --git a/modules/methods/methodDocument.py b/modules/methods/methodDocument.py index 3dfd95f2..a8659037 100644 --- a/modules/methods/methodDocument.py +++ b/modules/methods/methodDocument.py @@ -5,171 +5,14 @@ Handles document operations using the document service. import logging from typing import Dict, Any, List, Optional +import uuid +from datetime import datetime, UTC from modules.workflow.managerDocument import DocumentManager from modules.workflow.methodBase import MethodBase, ActionResult, action logger = logging.getLogger(__name__) -class DocumentService: - """Service for document content extraction, analysis, and summarization""" - - def __init__(self, serviceContainer: Any): - self.serviceContainer = serviceContainer - - async def extractContent(self, fileId: str, format: str = "text", includeMetadata: bool = True) -> Dict[str, Any]: - """Extract content from document using prompt-based extraction""" - try: - # Get file data - file_data = self.serviceContainer.getFileData(fileId) - file_info = self.serviceContainer.getFileInfo(fileId) - - if not file_data: - return { - "error": "File not found or empty", - "fileId": fileId - } - - # Create extraction prompt based on format - extraction_prompt = f""" - Extract and structure the content from this document. - - File information: - - Name: {file_info.get('name', 'Unknown')} - - Type: {file_info.get('mimeType', 'Unknown')} - - Size: {len(file_data)} bytes - - Please extract: - 1. Main content and key information - 2. Structured data if present (tables, lists, etc.) - 3. Important facts and figures - 4. Key insights and takeaways - - Format the output as: {format} - Include metadata: {includeMetadata} - """ - - # Use the new direct file data extraction method - extracted_content = await self.serviceContainer.extractContentFromFileData( - prompt=extraction_prompt, - fileData=file_data, - filename=file_info.get('name', 'document'), - mimeType=file_info.get('mimeType', 'application/octet-stream'), - base64Encoded=False - ) - - result = { - "fileId": fileId, - "format": format, - "content": extracted_content, - "fileInfo": file_info if includeMetadata else None - } - - return result - - except Exception as e: - logger.error(f"Error extracting content: {str(e)}") - return { - "error": str(e), - "fileId": fileId - } - - async def analyzeContent(self, fileId: str, analysis: list = None) -> Dict[str, Any]: - """Analyze document content for entities, topics, and sentiment""" - if analysis is None: - analysis = ["entities", "topics", "sentiment"] - - try: - # First extract content - content_result = await self.extractContent(fileId, "text", True) - - if "error" in content_result: - return content_result - - content = content_result.get("content", "") - - # Create analysis prompt - analysis_prompt = f""" - Analyze this document content for the following aspects: - {', '.join(analysis)} - - Document content: - {content[:5000]} # Limit content length - - Please provide a detailed analysis including: - 1. Key entities (people, organizations, locations, dates) - 2. Main topics and themes - 3. Sentiment analysis (positive, negative, neutral) - 4. Key insights and patterns - 5. Important relationships between entities - 6. Document structure and organization - """ - - # Use AI service for analysis - analysis_result = await self.serviceContainer.interfaceAiCalls.callAiTextAdvanced(analysis_prompt) - - return { - "fileId": fileId, - "analysis": analysis, - "results": analysis_result, - "content": content_result - } - - except Exception as e: - logger.error(f"Error analyzing content: {str(e)}") - return { - "error": str(e), - "fileId": fileId, - "analysis": analysis - } - - async def summarizeContent(self, fileId: str, maxLength: int = 200, format: str = "text") -> Dict[str, Any]: - """Summarize document content""" - try: - # First extract content - content_result = await self.extractContent(fileId, "text", False) - - if "error" in content_result: - return content_result - - content = content_result.get("content", "") - - # Create summarization prompt - summary_prompt = f""" - Create a comprehensive summary of this document content. - - Document content: - {content[:8000]} # Limit content length - - Requirements: - - Maximum length: {maxLength} words - - Format: {format} - - Include key points and main ideas - - Maintain accuracy and completeness - - Use clear, professional language - - Highlight important insights and conclusions - """ - - # Use AI service for summarization - summary = await self.serviceContainer.interfaceAiCalls.callAiTextAdvanced(summary_prompt) - - return { - "fileId": fileId, - "maxLength": maxLength, - "format": format, - "summary": summary, - "wordCount": len(summary.split()), - "originalContent": content_result - } - - except Exception as e: - logger.error(f"Error summarizing content: {str(e)}") - return { - "error": str(e), - "fileId": fileId, - "maxLength": maxLength - } - class MethodDocument(MethodBase): """Document method implementation for document operations""" @@ -178,7 +21,6 @@ class MethodDocument(MethodBase): super().__init__(serviceContainer) self.name = "document" self.description = "Handle document operations like extraction and analysis" - self.documentService = DocumentService(serviceContainer) self.documentManager = DocumentManager(serviceContainer) @action @@ -187,34 +29,89 @@ class MethodDocument(MethodBase): Extract content from document Parameters: - fileId (str): The ID of the document to extract content from + documentList (str): Reference to the document list to extract content from + aiPrompt (str): AI prompt for content extraction format (str, optional): Output format (default: "text") includeMetadata (bool, optional): Whether to include metadata (default: True) """ try: - fileId = parameters.get("fileId") + documentList = parameters.get("documentList") + aiPrompt = parameters.get("aiPrompt") format = parameters.get("format", "text") includeMetadata = parameters.get("includeMetadata", True) - if not fileId: + if not documentList: return self._createResult( success=False, data={}, - error="File ID is required" + error="Document list reference is required" ) - # Extract content - content = await self.documentService.extractContent( - fileId=fileId, - format=format, - includeMetadata=includeMetadata - ) + if not aiPrompt: + return self._createResult( + success=False, + data={}, + error="AI prompt is required" + ) + + chatDocuments = self.serviceContainer.getChatDocumentsFromDocumentReference(documentList) + if not chatDocuments: + return self._createResult( + success=False, + data={}, + error="No documents found for the provided reference" + ) + + # Extract content from all documents + all_extracted_content = [] + file_infos = [] + + for chatDocument in chatDocuments: + fileId = chatDocument.fileId + file_data = self.serviceContainer.getFileData(fileId) + file_info = self.serviceContainer.getFileInfo(fileId) + + if not file_data: + logger.warning(f"File not found or empty for fileId: {fileId}") + continue + + extracted_content = await self.serviceContainer.extractContentFromFileData( + prompt=aiPrompt, + fileData=file_data, + filename=file_info.get('name', 'document'), + mimeType=file_info.get('mimeType', 'application/octet-stream'), + base64Encoded=False + ) + + all_extracted_content.append(extracted_content) + if includeMetadata: + file_infos.append(file_info) + + if not all_extracted_content: + return self._createResult( + success=False, + data={}, + error="No content could be extracted from any documents" + ) + + # Combine all extracted content + combined_content = "\n\n--- DOCUMENT SEPARATOR ---\n\n".join(all_extracted_content) + + result_data = { + "documentCount": len(chatDocuments), + "format": format, + "content": combined_content, + "fileInfos": file_infos if includeMetadata else None, + "timestamp": datetime.now(UTC).isoformat() + } return self._createResult( success=True, - data=content + data={ + "documentName": f"extracted_content_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.txt", + "documentData": result_data + } ) - except Exception as e: logger.error(f"Error extracting content: {str(e)}") return self._createResult( @@ -229,31 +126,102 @@ class MethodDocument(MethodBase): Analyze document content Parameters: - fileId (str): The ID of the document to analyze + documentList (str): Reference to the document list to analyze + aiPrompt (str): AI prompt for content analysis analysis (List[str], optional): Types of analysis to perform (default: ["entities", "topics", "sentiment"]) """ try: - fileId = parameters.get("fileId") + documentList = parameters.get("documentList") + aiPrompt = parameters.get("aiPrompt") analysis = parameters.get("analysis", ["entities", "topics", "sentiment"]) - if not fileId: + if not documentList: return self._createResult( success=False, data={}, - error="File ID is required" + error="Document list reference is required" ) - # Analyze content - results = await self.documentService.analyzeContent( - fileId=fileId, - analysis=analysis - ) + if not aiPrompt: + return self._createResult( + success=False, + data={}, + error="AI prompt is required" + ) + + chatDocuments = self.serviceContainer.getChatDocumentsFromDocumentReference(documentList) + if not chatDocuments: + return self._createResult( + success=False, + data={}, + error="No documents found for the provided reference" + ) + + # Extract content from all documents + all_extracted_content = [] + + for chatDocument in chatDocuments: + fileId = chatDocument.fileId + file_data = self.serviceContainer.getFileData(fileId) + file_info = self.serviceContainer.getFileInfo(fileId) + + if not file_data: + logger.warning(f"File not found or empty for fileId: {fileId}") + continue + + extracted_content = await self.serviceContainer.extractContentFromFileData( + prompt=aiPrompt, + fileData=file_data, + filename=file_info.get('name', 'document'), + mimeType=file_info.get('mimeType', 'application/octet-stream'), + base64Encoded=False + ) + + all_extracted_content.append(extracted_content) + + if not all_extracted_content: + return self._createResult( + success=False, + data={}, + error="No content could be extracted from any documents" + ) + + # Combine all extracted content for analysis + combined_content = "\n\n--- DOCUMENT SEPARATOR ---\n\n".join(all_extracted_content) + + analysis_prompt = f""" + Analyze this document content for the following aspects: + {', '.join(analysis)} + + Document content: + {combined_content[:8000]} # Limit content length + + Please provide a detailed analysis including: + 1. Key entities (people, organizations, locations, dates) + 2. Main topics and themes + 3. Sentiment analysis (positive, negative, neutral) + 4. Key insights and patterns + 5. Important relationships between entities + 6. Document structure and organization + """ + + analysis_result = await self.serviceContainer.interfaceAiCalls.callAiTextAdvanced(analysis_prompt) + + result_data = { + "documentCount": len(chatDocuments), + "analysis": analysis, + "results": analysis_result, + "content": combined_content, + "timestamp": datetime.now(UTC).isoformat() + } return self._createResult( success=True, - data=results + data={ + "documentName": f"document_analysis_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.json", + "documentData": result_data + } ) - except Exception as e: logger.error(f"Error analyzing content: {str(e)}") return self._createResult( @@ -268,34 +236,105 @@ class MethodDocument(MethodBase): Summarize document content Parameters: - fileId (str): The ID of the document to summarize + documentList (str): Reference to the document list to summarize + aiPrompt (str): AI prompt for content extraction maxLength (int, optional): Maximum length of summary in words (default: 200) format (str, optional): Output format (default: "text") """ try: - fileId = parameters.get("fileId") + documentList = parameters.get("documentList") + aiPrompt = parameters.get("aiPrompt") maxLength = parameters.get("maxLength", 200) format = parameters.get("format", "text") - if not fileId: + if not documentList: return self._createResult( success=False, data={}, - error="File ID is required" + error="Document list reference is required" ) - # Summarize content - summary = await self.documentService.summarizeContent( - fileId=fileId, - maxLength=maxLength, - format=format - ) + if not aiPrompt: + return self._createResult( + success=False, + data={}, + error="AI prompt is required" + ) + + chatDocuments = self.serviceContainer.getChatDocumentsFromDocumentReference(documentList) + if not chatDocuments: + return self._createResult( + success=False, + data={}, + error="No documents found for the provided reference" + ) + + # Extract content from all documents + all_extracted_content = [] + + for chatDocument in chatDocuments: + fileId = chatDocument.fileId + file_data = self.serviceContainer.getFileData(fileId) + file_info = self.serviceContainer.getFileInfo(fileId) + + if not file_data: + logger.warning(f"File not found or empty for fileId: {fileId}") + continue + + extracted_content = await self.serviceContainer.extractContentFromFileData( + prompt=aiPrompt, + fileData=file_data, + filename=file_info.get('name', 'document'), + mimeType=file_info.get('mimeType', 'application/octet-stream'), + base64Encoded=False + ) + + all_extracted_content.append(extracted_content) + + if not all_extracted_content: + return self._createResult( + success=False, + data={}, + error="No content could be extracted from any documents" + ) + + # Combine all extracted content for summarization + combined_content = "\n\n--- DOCUMENT SEPARATOR ---\n\n".join(all_extracted_content) + + summary_prompt = f""" + Create a comprehensive summary of this document content. + + Document content: + {combined_content[:8000]} # Limit content length + + Requirements: + - Maximum length: {maxLength} words + - Format: {format} + - Include key points and main ideas + - Maintain accuracy and completeness + - Use clear, professional language + - Highlight important insights and conclusions + """ + + summary = await self.serviceContainer.interfaceAiCalls.callAiTextAdvanced(summary_prompt) + + result_data = { + "documentCount": len(chatDocuments), + "maxLength": maxLength, + "format": format, + "summary": summary, + "wordCount": len(summary.split()), + "originalContent": combined_content, + "timestamp": datetime.now(UTC).isoformat() + } return self._createResult( success=True, - data=summary + data={ + "documentName": f"document_summary_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.txt", + "documentData": result_data + } ) - except Exception as e: logger.error(f"Error summarizing content: {str(e)}") return self._createResult( diff --git a/modules/methods/methodExcel.py b/modules/methods/methodExcel.py deleted file mode 100644 index 1236856b..00000000 --- a/modules/methods/methodExcel.py +++ /dev/null @@ -1,461 +0,0 @@ -""" -Excel method module. -Handles Excel operations using the Excel service. -""" - -import logging -from typing import Dict, Any, List, Optional -from datetime import datetime, UTC -import json -import base64 - -from modules.workflow.methodBase import MethodBase, ActionResult, action - -logger = logging.getLogger(__name__) - -class ExcelService: - """Service for Microsoft Excel operations using Graph API""" - - def __init__(self, serviceContainer: Any): - self.serviceContainer = serviceContainer - - def _getMicrosoftConnection(self, connectionReference: str) -> Optional[Dict[str, Any]]: - """Get Microsoft connection from connection reference""" - try: - userConnection = self.serviceContainer.getUserConnectionFromConnectionReference(connectionReference) - if not userConnection or userConnection.authority != "msft" or userConnection.status != "active": - return None - - # Get the corresponding token for this user and authority - token = self.serviceContainer.interfaceApp.getToken(userConnection.authority) - if not token: - logger.warning(f"No token found for user {userConnection.userId} and authority {userConnection.authority}") - return None - - return { - "id": userConnection.id, - "accessToken": token.tokenAccess, - "refreshToken": token.tokenRefresh, - "scopes": ["Mail.ReadWrite", "User.Read"] # Default Microsoft scopes - } - except Exception as e: - logger.error(f"Error getting Microsoft connection: {str(e)}") - return None - - async def readFile(self, fileId: str, connectionReference: str, sheetName: str = "Sheet1", range: str = None) -> Dict[str, Any]: - """Read data from Excel file using Microsoft Graph API""" - try: - connection = self._getMicrosoftConnection(connectionReference) - if not connection: - return { - "error": "No valid Microsoft connection found for the provided connection reference", - "fileId": fileId, - "connectionReference": connectionReference - } - - # Get file data from service container - file_data = self.serviceContainer.getFileData(fileId) - file_info = self.serviceContainer.getFileInfo(fileId) - - if not file_data: - return { - "error": "File not found or empty", - "fileId": fileId - } - - # For now, simulate Excel reading with AI analysis - # In a real implementation, you would use Microsoft Graph API - excel_prompt = f""" - Analyze this Excel file data and extract structured information. - - File: {file_info.get('name', 'Unknown')} - Sheet: {sheetName} - Range: {range or 'All data'} - - File content (first 5000 characters): - {file_data.decode('utf-8', errors='ignore')[:5000] if isinstance(file_data, bytes) else str(file_data)[:5000]} - - Please extract: - 1. All data from the specified sheet and range - 2. Column headers and data types - 3. Key metrics and calculations - 4. Any charts or visualizations described - 5. Summary statistics - - Return the data in a structured JSON format. - """ - - # Use AI to analyze Excel content - analysis_result = await self.serviceContainer.interfaceAiCalls.callAiTextAdvanced(excel_prompt) - - return { - "fileId": fileId, - "sheetName": sheetName, - "range": range, - "data": analysis_result, - "fileInfo": file_info, - "connection": { - "id": connection["id"], - "authority": "microsoft", - "reference": connectionReference - } - } - - except Exception as e: - logger.error(f"Error reading Excel file: {str(e)}") - return { - "error": str(e), - "fileId": fileId - } - - async def writeFile(self, fileId: str, connectionReference: str, sheetName: str, data: Any, range: str = None) -> Dict[str, Any]: - """Write data to Excel file using Microsoft Graph API""" - try: - connection = self._getMicrosoftConnection(connectionReference) - if not connection: - return { - "error": "No valid Microsoft connection found for the provided connection reference", - "fileId": fileId, - "connectionReference": connectionReference - } - - # For now, simulate Excel writing - # In a real implementation, you would use Microsoft Graph API - write_prompt = f""" - Prepare data for writing to Excel file. - - File: {fileId} - Sheet: {sheetName} - Range: {range or 'Auto-detect'} - - Data to write: - {json.dumps(data, indent=2)} - - Please format this data appropriately for Excel and provide: - 1. Structured data ready for Excel - 2. Column headers and formatting - 3. Any formulas or calculations needed - 4. Data validation rules if applicable - """ - - # Use AI to prepare Excel data - prepared_data = await self.serviceContainer.interfaceAiCalls.callAiTextAdvanced(write_prompt) - - return { - "fileId": fileId, - "sheetName": sheetName, - "range": range, - "data": prepared_data, - "status": "prepared", - "connection": { - "id": connection["id"], - "authority": "microsoft", - "reference": connectionReference - } - } - - except Exception as e: - logger.error(f"Error writing to Excel file: {str(e)}") - return { - "error": str(e), - "fileId": fileId - } - - async def createFile(self, fileName: str, connectionReference: str, template: str = None) -> Dict[str, Any]: - """Create new Excel file using Microsoft Graph API""" - try: - connection = self._getMicrosoftConnection(connectionReference) - if not connection: - return { - "error": "No valid Microsoft connection found for the provided connection reference", - "connectionReference": connectionReference - } - - # For now, simulate file creation - # In a real implementation, you would use Microsoft Graph API - create_prompt = f""" - Create a new Excel file structure. - - File name: {fileName} - Template: {template or 'Standard'} - - Please provide: - 1. Initial sheet structure - 2. Default column headers - 3. Sample data if template specified - 4. Formatting guidelines - """ - - # Use AI to create Excel structure - file_structure = await self.serviceContainer.interfaceAiCalls.callAiTextAdvanced(create_prompt) - - # Create file using service container - file_id = self.serviceContainer.createFile( - fileName=fileName, - mimeType="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", - content=file_structure, - base64encoded=False - ) - - return { - "fileId": file_id, - "fileName": fileName, - "template": template, - "structure": file_structure, - "connection": { - "id": connection["id"], - "authority": "microsoft", - "reference": connectionReference - } - } - - except Exception as e: - logger.error(f"Error creating Excel file: {str(e)}") - return { - "error": str(e) - } - - async def formatCells(self, fileId: str, connectionReference: str, sheetName: str, range: str, format: Dict[str, Any]) -> Dict[str, Any]: - """Format Excel cells using Microsoft Graph API""" - try: - connection = self._getMicrosoftConnection(connectionReference) - if not connection: - return { - "error": "No valid Microsoft connection found for the provided connection reference", - "fileId": fileId, - "connectionReference": connectionReference - } - - # For now, simulate formatting - # In a real implementation, you would use Microsoft Graph API - format_prompt = f""" - Apply formatting to Excel cells. - - File: {fileId} - Sheet: {sheetName} - Range: {range} - Format: {json.dumps(format, indent=2)} - - Please provide: - 1. Applied formatting details - 2. Visual representation of the formatting - 3. Any conditional formatting rules - """ - - # Use AI to describe formatting - formatting_result = await self.serviceContainer.interfaceAiCalls.callAiTextAdvanced(format_prompt) - - return { - "fileId": fileId, - "sheetName": sheetName, - "range": range, - "format": format, - "result": formatting_result, - "connection": { - "id": connection["id"], - "authority": "microsoft", - "reference": connectionReference - } - } - - except Exception as e: - logger.error(f"Error formatting Excel cells: {str(e)}") - return { - "error": str(e), - "fileId": fileId - } - -class MethodExcel(MethodBase): - """Excel method implementation for spreadsheet operations""" - - def __init__(self, serviceContainer: Any): - """Initialize the Excel method""" - super().__init__(serviceContainer) - self.name = "excel" - self.description = "Handle Excel spreadsheet operations like reading and writing data" - self.excelService = ExcelService(serviceContainer) - - @action - async def read(self, parameters: Dict[str, Any]) -> ActionResult: - """ - Read data from Excel file - - Parameters: - fileId (str): The ID of the Excel file to read - connectionReference (str): Reference to the Microsoft connection - sheetName (str, optional): Name of the sheet to read (default: "Sheet1") - range (str, optional): Excel range to read (e.g., "A1:D10") - includeHeaders (bool, optional): Whether to include column headers (default: True) - """ - try: - fileId = parameters.get("fileId") - connectionReference = parameters.get("connectionReference") - sheetName = parameters.get("sheetName", "Sheet1") - range = parameters.get("range") - includeHeaders = parameters.get("includeHeaders", True) - - if not fileId or not connectionReference: - return self._createResult( - success=False, - data={}, - error="File ID and connection reference are required" - ) - - # Read data from Excel - data = await self.excelService.readFile( - fileId=fileId, - connectionReference=connectionReference, - sheetName=sheetName, - range=range - ) - - return self._createResult( - success=True, - data=data - ) - - except Exception as e: - logger.error(f"Error reading Excel file: {str(e)}") - return self._createResult( - success=False, - data={}, - error=str(e) - ) - - @action - async def write(self, parameters: Dict[str, Any]) -> ActionResult: - """ - Write data to Excel file - - Parameters: - fileId (str): The ID of the Excel file to write to - connectionReference (str): Reference to the Microsoft connection - sheetName (str, optional): Name of the sheet to write to (default: "Sheet1") - data (Any): Data to write to the Excel file - range (str, optional): Excel range to write to (e.g., "A1:D10") - """ - try: - fileId = parameters.get("fileId") - connectionReference = parameters.get("connectionReference") - sheetName = parameters.get("sheetName", "Sheet1") - data = parameters.get("data") - range = parameters.get("range") - - if not fileId or not connectionReference or not data: - return self._createResult( - success=False, - data={}, - error="File ID, connection reference, and data are required" - ) - - # Write data to Excel - result = await self.excelService.writeFile( - fileId=fileId, - connectionReference=connectionReference, - sheetName=sheetName, - data=data, - range=range - ) - - return self._createResult( - success=True, - data=result - ) - - except Exception as e: - logger.error(f"Error writing to Excel file: {str(e)}") - return self._createResult( - success=False, - data={}, - error=str(e) - ) - - @action - async def create(self, parameters: Dict[str, Any]) -> ActionResult: - """ - Create new Excel file - - Parameters: - fileName (str): Name of the new Excel file - connectionReference (str): Reference to the Microsoft connection - template (str, optional): Template to use for the new file - """ - try: - fileName = parameters.get("fileName") - connectionReference = parameters.get("connectionReference") - template = parameters.get("template") - - if not fileName or not connectionReference: - return self._createResult( - success=False, - data={}, - error="File name and connection reference are required" - ) - - # Create Excel file - fileId = await self.excelService.createFile( - fileName=fileName, - connectionReference=connectionReference, - template=template - ) - - return self._createResult( - success=True, - data={"fileId": fileId} - ) - - except Exception as e: - logger.error(f"Error creating Excel file: {str(e)}") - return self._createResult( - success=False, - data={}, - error=str(e) - ) - - @action - async def format(self, parameters: Dict[str, Any]) -> ActionResult: - """ - Format Excel cells - - Parameters: - fileId (str): The ID of the Excel file to format - connectionReference (str): Reference to the Microsoft connection - sheetName (str, optional): Name of the sheet to format (default: "Sheet1") - range (str): Excel range to format (e.g., "A1:D10") - format (Dict[str, Any]): Formatting options (e.g., {"font": {"bold": True}}) - """ - try: - fileId = parameters.get("fileId") - connectionReference = parameters.get("connectionReference") - sheetName = parameters.get("sheetName", "Sheet1") - range = parameters.get("range") - format = parameters.get("format") - - if not fileId or not connectionReference or not range or not format: - return self._createResult( - success=False, - data={}, - error="File ID, connection reference, range, and format are required" - ) - - # Apply formatting - result = await self.excelService.formatCells( - fileId=fileId, - connectionReference=connectionReference, - sheetName=sheetName, - range=range, - format=format - ) - - return self._createResult( - success=True, - data=result - ) - - except Exception as e: - logger.error(f"Error formatting Excel cells: {str(e)}") - return self._createResult( - success=False, - data={}, - error=str(e) - ) \ No newline at end of file diff --git a/modules/methods/methodOperator.py b/modules/methods/methodOperator.py index ef9ec669..5ebc2cb8 100644 --- a/modules/methods/methodOperator.py +++ b/modules/methods/methodOperator.py @@ -3,184 +3,119 @@ from typing import Dict, List, Any, Optional from datetime import datetime, UTC import logging +import uuid from modules.workflow.methodBase import MethodBase, ActionResult, action logger = logging.getLogger(__name__) -class OperatorService: - """Service for operator operations like forEach and AI calls""" - - def __init__(self, serviceContainer: Any): - self.serviceContainer = serviceContainer - - async def executeForEach(self, items: List[Any], action: Dict[str, Any]) -> List[Any]: - """Execute an action for each item in a list""" - try: - results = [] - - for i, item in enumerate(items): - logger.info(f"Executing forEach action {i+1}/{len(items)}") - - # Create context with current item - context = { - "item": item, - "index": i, - "total": len(items), - "isFirst": i == 0, - "isLast": i == len(items) - 1 - } - - # Execute the action using the service container - if "method" in action and "action" in action: - methodName = action["method"] - actionName = action["action"] - parameters = action.get("parameters", {}) - - # Add context to parameters - parameters["context"] = context - parameters["currentItem"] = item - - # Execute the method action - result = await self.serviceContainer.executeAction( - methodName=methodName, - actionName=actionName, - parameters=parameters - ) - - # Return the exact result data, not wrapped - if result.success: - results.append(result.data) - else: - results.append({"error": result.error}) - else: - # Simple action without method call - results.append({"error": "No method specified"}) - - return results - - except Exception as e: - logger.error(f"Error executing forEach: {str(e)}") - return [{"error": str(e)}] * len(items) if items else [] - - async def executeAiCall(self, prompt: str, documents: List[Dict[str, Any]] = None) -> Dict[str, Any]: - """Call AI service with document content""" - try: - # Prepare context from documents - context = "" - extractedDocuments = [] - - if documents: - for i, doc in enumerate(documents): - documentReference = doc.get('documentReference') - contentExtractionPrompt = doc.get('contentExtractionPrompt', 'Extract the main content from this document') - - if documentReference: - # Get documents from reference - chatDocuments = self.serviceContainer.getChatDocumentsFromDocumentReference(documentReference) - - if chatDocuments: - # Extract content from each document - for j, chatDoc in enumerate(chatDocuments): - try: - # Extract content using the document manager - extractedContent = await self.serviceContainer.documentManager.extractContentFromChatDocument( - chatDocument=chatDoc, - extractionPrompt=contentExtractionPrompt - ) - - extractedDocuments.append({ - "documentReference": documentReference, - "documentId": chatDoc.id, - "extractionPrompt": contentExtractionPrompt, - "extractedContent": extractedContent - }) - - # Add to context - context += f"\n\nDocument {len(extractedDocuments)} (from {documentReference}):\n{extractedContent}" - - except Exception as e: - logger.warning(f"Error extracting content from document {chatDoc.id}: {str(e)}") - extractedDocuments.append({ - "documentReference": documentReference, - "documentId": chatDoc.id, - "extractionPrompt": contentExtractionPrompt, - "extractedContent": f"Error extracting content: {str(e)}" - }) - else: - logger.warning(f"No documents found for reference: {documentReference}") - extractedDocuments.append({ - "documentReference": documentReference, - "extractionPrompt": contentExtractionPrompt, - "extractedContent": f"No documents found for reference: {documentReference}" - }) - - # Create full prompt with context - fullPrompt = f"{prompt}\n\nContext:\n{context}" if context else prompt - - # Call AI service - aiResponse = await self.serviceContainer.interfaceAiCalls.callAiTextAdvanced(fullPrompt) - - return { - "prompt": prompt, - "documentsProcessed": len(extractedDocuments), - "extractedDocuments": extractedDocuments, - "response": aiResponse, - "timestamp": datetime.now(UTC).isoformat() - } - - except Exception as e: - logger.error(f"Error executing AI call: {str(e)}") - return { - "error": str(e), - "prompt": prompt, - "documentsProcessed": 0, - "extractedDocuments": [], - "response": None - } - class MethodOperator(MethodBase): - """Operator method implementation for handling collections and AI operations""" + """Operator method implementation for data operations""" def __init__(self, serviceContainer: Any): super().__init__(serviceContainer) self.name = "operator" - self.description = "Handle operations like forEach and AI calls" - self.operatorService = OperatorService(serviceContainer) + self.description = "Handle data operations like filtering, sorting, and transformation" @action - async def forEach(self, parameters: Dict[str, Any]) -> ActionResult: + async def filter(self, parameters: Dict[str, Any]) -> ActionResult: """ - Execute an action for each item in a list + Filter data based on criteria Parameters: - items (List[Any]): List of items to process - action (Dict[str, Any]): Action to execute for each item (contains method, action, parameters) + documentList (str): Reference to the document list to filter + criteria (Dict[str, Any]): Filter criteria + field (str, optional): Field to filter on """ try: - items = parameters.get("items", []) - action = parameters.get("action", {}) + documentList = parameters.get("documentList") + criteria = parameters.get("criteria") + field = parameters.get("field") - if not items or not action: + if not documentList or not criteria: return self._createResult( success=False, data={}, - error="Items and action are required" + error="Document list reference and criteria are required" ) - # Execute forEach operation - results = await self.operatorService.executeForEach( - items=items, - action=action - ) + chatDocuments = self.serviceContainer.getChatDocumentsFromDocumentReference(documentList) + if not chatDocuments: + return self._createResult( + success=False, + data={}, + error="No documents found for the provided reference" + ) + + # Extract content from all documents + all_document_content = [] + + for chatDocument in chatDocuments: + fileId = chatDocument.fileId + file_data = self.serviceContainer.getFileData(fileId) + file_info = self.serviceContainer.getFileInfo(fileId) + + if not file_data: + logger.warning(f"File data not found for fileId: {fileId}") + continue + + all_document_content.append({ + "fileId": fileId, + "fileName": file_info.get('name', 'unknown'), + "content": file_data + }) + + if not all_document_content: + return self._createResult( + success=False, + data={}, + error="No content could be extracted from any documents" + ) + + # Combine all document content for filtering + combined_content = "\n\n--- DOCUMENT SEPARATOR ---\n\n".join([ + f"File: {doc['fileName']}\nContent: {doc['content']}" + for doc in all_document_content + ]) + + filter_prompt = f""" + Filter the following data based on the specified criteria. + + Data to filter: + {combined_content} + + Filter criteria: + {criteria} + + Field to filter on: {field or 'All fields'} + + Please provide: + 1. Filtered data that matches the criteria + 2. Summary of filtering results + 3. Number of items before and after filtering + 4. Any data quality insights + """ + + filtered_result = await self.serviceContainer.interfaceAiCalls.callAiTextAdvanced(filter_prompt) + + result_data = { + "documentCount": len(chatDocuments), + "criteria": criteria, + "field": field, + "filteredData": filtered_result, + "originalCount": len(all_document_content), + "timestamp": datetime.now(UTC).isoformat() + } return self._createResult( success=True, - data=results + data={ + "documentName": f"filtered_data_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.json", + "documentData": result_data + } ) - except Exception as e: - logger.error(f"Error in forEach execution: {str(e)}") + logger.error(f"Error filtering data: {str(e)}") return self._createResult( success=False, data={}, @@ -188,39 +123,215 @@ class MethodOperator(MethodBase): ) @action - async def aiCall(self, parameters: Dict[str, Any]) -> ActionResult: + async def sort(self, parameters: Dict[str, Any]) -> ActionResult: """ - Call AI service with document content + Sort data by specified field Parameters: - prompt (str): The prompt to send to the AI service - documents (List[Dict[str, Any]], optional): List of documents to include in context - Each document should have: documentReference (str), contentExtractionPrompt (str, optional) + documentList (str): Reference to the document list to sort + field (str): Field to sort by + order (str, optional): Sort order (asc/desc, default: "asc") """ try: - prompt = parameters.get("prompt") - documents = parameters.get("documents", []) # List of {documentReference, contentExtractionPrompt} + documentList = parameters.get("documentList") + field = parameters.get("field") + order = parameters.get("order", "asc") - if not prompt: + if not documentList or not field: return self._createResult( success=False, data={}, - error="Prompt is required" + error="Document list reference and field are required" ) - # Execute AI call - result = await self.operatorService.executeAiCall( - prompt=prompt, - documents=documents - ) + # Get documents from reference + chatDocuments = self.serviceContainer.getChatDocumentsFromDocumentReference(documentList) + if not chatDocuments: + return self._createResult( + success=False, + data={}, + error="No documents found for the provided reference" + ) + + # Extract content from all documents + all_document_content = [] + + for chatDocument in chatDocuments: + fileId = chatDocument.fileId + file_data = self.serviceContainer.getFileData(fileId) + file_info = self.serviceContainer.getFileInfo(fileId) + + if not file_data: + logger.warning(f"File data not found for fileId: {fileId}") + continue + + all_document_content.append({ + "fileId": fileId, + "fileName": file_info.get('name', 'unknown'), + "content": file_data + }) + + if not all_document_content: + return self._createResult( + success=False, + data={}, + error="No content could be extracted from any documents" + ) + + # Combine all document content for sorting + combined_content = "\n\n--- DOCUMENT SEPARATOR ---\n\n".join([ + f"File: {doc['fileName']}\nContent: {doc['content']}" + for doc in all_document_content + ]) + + # Create sorting prompt + sort_prompt = f""" + Sort the following data by the specified field. + + Data to sort: + {combined_content} + + Sort field: {field} + Sort order: {order} + + Please provide: + 1. Sorted data in the specified order + 2. Summary of sorting results + 3. Any data insights from the sorting + 4. Validation of sort field existence + """ + + # Use AI to perform sorting + sorted_result = await self.serviceContainer.interfaceAiCalls.callAiTextAdvanced(sort_prompt) + + # Create result data + result_data = { + "documentCount": len(chatDocuments), + "field": field, + "order": order, + "sortedData": sorted_result, + "timestamp": datetime.now(UTC).isoformat() + } return self._createResult( success=True, - data=result + data={ + "documentName": f"sorted_data_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.json", + "documentData": result_data + } ) except Exception as e: - logger.error(f"Error in AI call execution: {str(e)}") + logger.error(f"Error sorting data: {str(e)}") + return self._createResult( + success=False, + data={}, + error=str(e) + ) + + @action + async def transform(self, parameters: Dict[str, Any]) -> ActionResult: + """ + Transform data structure or format + + Parameters: + documentList (str): Reference to the document list to transform + transformation (Dict[str, Any]): Transformation rules + outputFormat (str, optional): Desired output format + """ + try: + documentList = parameters.get("documentList") + transformation = parameters.get("transformation") + outputFormat = parameters.get("outputFormat", "json") + + if not documentList or not transformation: + return self._createResult( + success=False, + data={}, + error="Document list reference and transformation rules are required" + ) + + # Get documents from reference + chatDocuments = self.serviceContainer.getChatDocumentsFromDocumentReference(documentList) + if not chatDocuments: + return self._createResult( + success=False, + data={}, + error="No documents found for the provided reference" + ) + + # Extract content from all documents + all_document_content = [] + + for chatDocument in chatDocuments: + fileId = chatDocument.fileId + file_data = self.serviceContainer.getFileData(fileId) + file_info = self.serviceContainer.getFileInfo(fileId) + + if not file_data: + logger.warning(f"File data not found for fileId: {fileId}") + continue + + all_document_content.append({ + "fileId": fileId, + "fileName": file_info.get('name', 'unknown'), + "content": file_data + }) + + if not all_document_content: + return self._createResult( + success=False, + data={}, + error="No content could be extracted from any documents" + ) + + # Combine all document content for transformation + combined_content = "\n\n--- DOCUMENT SEPARATOR ---\n\n".join([ + f"File: {doc['fileName']}\nContent: {doc['content']}" + for doc in all_document_content + ]) + + # Create transformation prompt + transform_prompt = f""" + Transform the following data according to the specified rules. + + Data to transform: + {combined_content} + + Transformation rules: + {transformation} + + Output format: {outputFormat} + + Please provide: + 1. Transformed data in the specified format + 2. Summary of transformation results + 3. Validation of transformation rules + 4. Any data quality improvements + """ + + # Use AI to perform transformation + transformed_result = await self.serviceContainer.interfaceAiCalls.callAiTextAdvanced(transform_prompt) + + # Create result data + result_data = { + "documentCount": len(chatDocuments), + "transformation": transformation, + "outputFormat": outputFormat, + "transformedData": transformed_result, + "timestamp": datetime.now(UTC).isoformat() + } + + return self._createResult( + success=True, + data={ + "documentName": f"transformed_data_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.{outputFormat}", + "documentData": result_data + } + ) + + except Exception as e: + logger.error(f"Error transforming data: {str(e)}") return self._createResult( success=False, data={}, diff --git a/modules/methods/methodOutlook.py b/modules/methods/methodOutlook.py index 3e066797..2563ab06 100644 --- a/modules/methods/methodOutlook.py +++ b/modules/methods/methodOutlook.py @@ -7,16 +7,20 @@ import logging from typing import Dict, Any, List, Optional from datetime import datetime, UTC import json +import uuid from modules.workflow.methodBase import MethodBase, ActionResult, action logger = logging.getLogger(__name__) -class OutlookService: - """Service for Microsoft Outlook operations using Graph API""" +class MethodOutlook(MethodBase): + """Outlook method implementation for email operations""" def __init__(self, serviceContainer: Any): - self.serviceContainer = serviceContainer + """Initialize the Outlook method""" + super().__init__(serviceContainer) + self.name = "outlook" + self.description = "Handle Microsoft Outlook email operations" def _getMicrosoftConnection(self, connectionReference: str) -> Optional[Dict[str, Any]]: """Get Microsoft connection from connection reference""" @@ -41,226 +45,22 @@ class OutlookService: logger.error(f"Error getting Microsoft connection: {str(e)}") return None - async def readMails(self, connectionReference: str, folder: str = "inbox", query: str = None, maxResults: int = 10, includeAttachments: bool = False) -> Dict[str, Any]: - """Read emails from Outlook using Microsoft Graph API""" - try: - connection = self._getMicrosoftConnection(connectionReference) - if not connection: - return { - "error": "No valid Microsoft connection found for the provided connection reference", - "connectionReference": connectionReference - } - - # For now, simulate email reading - # In a real implementation, you would use Microsoft Graph API - mail_prompt = f""" - Read emails from Outlook. - - Folder: {folder} - Query: {query or 'All emails'} - Max Results: {maxResults} - Include Attachments: {includeAttachments} - - Please provide: - 1. Email messages with subject, sender, and content - 2. Timestamps and priority levels - 3. Attachment information if requested - 4. Email threading and conversations - 5. Categorization and flags - """ - - # Use AI to simulate email data - mail_data = await self.serviceContainer.interfaceAiCalls.callAiTextAdvanced(mail_prompt) - - return { - "folder": folder, - "query": query, - "maxResults": maxResults, - "includeAttachments": includeAttachments, - "messages": mail_data, - "connection": { - "id": connection["id"], - "authority": "microsoft", - "reference": connectionReference - } - } - - except Exception as e: - logger.error(f"Error reading emails: {str(e)}") - return { - "error": str(e) - } - - async def sendMail(self, connectionReference: str, to: List[str], subject: str, body: str, attachments: List[str] = None) -> Dict[str, Any]: - """Send email using Outlook using Microsoft Graph API""" - try: - connection = self._getMicrosoftConnection(connectionReference) - if not connection: - return { - "error": "No valid Microsoft connection found for the provided connection reference", - "connectionReference": connectionReference - } - - # For now, simulate email sending - # In a real implementation, you would use Microsoft Graph API - send_prompt = f""" - Send email using Outlook. - - To: {', '.join(to)} - Subject: {subject} - Body: {body} - Attachments: {attachments or 'None'} - - Please provide: - 1. Email composition details - 2. Recipient validation - 3. Attachment processing - 4. Send confirmation - 5. Message tracking information - """ - - # Use AI to simulate email sending - send_result = await self.serviceContainer.interfaceAiCalls.callAiTextAdvanced(send_prompt) - - return { - "to": to, - "subject": subject, - "body": body, - "attachments": attachments, - "result": send_result, - "connection": { - "id": connection["id"], - "authority": "microsoft", - "reference": connectionReference - } - } - - except Exception as e: - logger.error(f"Error sending email: {str(e)}") - return { - "error": str(e) - } - - async def createFolder(self, connectionReference: str, name: str, parentFolderId: str = None) -> Dict[str, Any]: - """Create folder in Outlook using Microsoft Graph API""" - try: - connection = self._getMicrosoftConnection(connectionReference) - if not connection: - return { - "error": "No valid Microsoft connection found for the provided connection reference", - "connectionReference": connectionReference - } - - # For now, simulate folder creation - # In a real implementation, you would use Microsoft Graph API - folder_prompt = f""" - Create folder in Outlook. - - Name: {name} - Parent Folder ID: {parentFolderId or 'Root'} - - Please provide: - 1. Folder creation details - 2. Permission settings - 3. Folder structure and hierarchy - 4. Creation confirmation - 5. Folder properties and metadata - """ - - # Use AI to simulate folder creation - folder_result = await self.serviceContainer.interfaceAiCalls.callAiTextAdvanced(folder_prompt) - - return { - "name": name, - "parentFolderId": parentFolderId, - "result": folder_result, - "connection": { - "id": connection["id"], - "authority": "microsoft", - "reference": connectionReference - } - } - - except Exception as e: - logger.error(f"Error creating folder: {str(e)}") - return { - "error": str(e) - } - - async def moveMail(self, connectionReference: str, messageId: str, targetFolderId: str) -> Dict[str, Any]: - """Move email to different folder using Microsoft Graph API""" - try: - connection = self._getMicrosoftConnection(connectionReference) - if not connection: - return { - "error": "No valid Microsoft connection found for the provided connection reference", - "connectionReference": connectionReference - } - - # For now, simulate mail moving - # In a real implementation, you would use Microsoft Graph API - move_prompt = f""" - Move email to different folder. - - Message ID: {messageId} - Target Folder ID: {targetFolderId} - - Please provide: - 1. Move operation details - 2. Source and destination folder information - 3. Message preservation and metadata - 4. Move confirmation - 5. Updated folder structure - """ - - # Use AI to simulate mail moving - move_result = await self.serviceContainer.interfaceAiCalls.callAiTextAdvanced(move_prompt) - - return { - "messageId": messageId, - "targetFolderId": targetFolderId, - "result": move_result, - "connection": { - "id": connection["id"], - "authority": "microsoft", - "reference": connectionReference - } - } - - except Exception as e: - logger.error(f"Error moving email: {str(e)}") - return { - "error": str(e) - } - -class MethodOutlook(MethodBase): - """Outlook method implementation for email operations""" - - def __init__(self, serviceContainer: Any): - """Initialize the Outlook method""" - super().__init__(serviceContainer) - self.name = "outlook" - self.description = "Handle Outlook email operations like reading and sending emails" - self.outlookService = OutlookService(serviceContainer) - @action - async def readMails(self, parameters: Dict[str, Any]) -> ActionResult: + async def readEmails(self, parameters: Dict[str, Any]) -> ActionResult: """ Read emails from Outlook Parameters: connectionReference (str): Reference to the Microsoft connection - folder (str, optional): Folder to read from (default: "inbox") - query (str, optional): Search query to filter emails - maxResults (int, optional): Maximum number of results (default: 10) - includeAttachments (bool, optional): Whether to include attachments (default: False) + folder (str, optional): Email folder to read from (default: "Inbox") + limit (int, optional): Maximum number of emails to read (default: 10) + filter (str, optional): Filter criteria for emails """ try: connectionReference = parameters.get("connectionReference") - folder = parameters.get("folder", "inbox") - query = parameters.get("query") - maxResults = parameters.get("maxResults", 10) - includeAttachments = parameters.get("includeAttachments", False) + folder = parameters.get("folder", "Inbox") + limit = parameters.get("limit", 10) + filter = parameters.get("filter") if not connectionReference: return self._createResult( @@ -269,18 +69,55 @@ class MethodOutlook(MethodBase): error="Connection reference is required" ) - # Read emails - messages = await self.outlookService.readMails( - connectionReference=connectionReference, - folder=folder, - query=query, - maxResults=maxResults, - includeAttachments=includeAttachments - ) + # Get Microsoft connection + connection = self._getMicrosoftConnection(connectionReference) + if not connection: + return self._createResult( + success=False, + data={}, + error="No valid Microsoft connection found for the provided connection reference" + ) + + # Create email reading prompt + email_prompt = f""" + Simulate reading emails from Microsoft Outlook. + + Connection: {connection['id']} + Folder: {folder} + Limit: {limit} + Filter: {filter or 'None'} + + Please provide: + 1. List of emails with subject, sender, date, and content + 2. Summary of email statistics + 3. Important or urgent emails highlighted + 4. Email categorization if possible + """ + + # Use AI to simulate email reading + email_data = await self.serviceContainer.interfaceAiCalls.callAiTextAdvanced(email_prompt) + + # Create result data + result_data = { + "connectionReference": connectionReference, + "folder": folder, + "limit": limit, + "filter": filter, + "emails": email_data, + "connection": { + "id": connection["id"], + "authority": "microsoft", + "reference": connectionReference + }, + "timestamp": datetime.now(UTC).isoformat() + } return self._createResult( success=True, - data=messages + data={ + "documentName": f"outlook_emails_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.json", + "documentData": result_data + } ) except Exception as e: @@ -290,52 +127,88 @@ class MethodOutlook(MethodBase): data={}, error=str(e) ) - + @action - async def sendMail(self, parameters: Dict[str, Any]) -> ActionResult: + async def sendEmail(self, parameters: Dict[str, Any]) -> ActionResult: """ - Send email using Outlook + Send email via Outlook Parameters: connectionReference (str): Reference to the Microsoft connection to (List[str]): List of recipient email addresses subject (str): Email subject - body (str): Email body - attachments (List[str], optional): List of attachment file IDs + body (str): Email body content + cc (List[str], optional): CC recipients + bcc (List[str], optional): BCC recipients """ try: connectionReference = parameters.get("connectionReference") - to = parameters.get("to", []) + to = parameters.get("to") subject = parameters.get("subject") body = parameters.get("body") - attachments = parameters.get("attachments", []) + cc = parameters.get("cc", []) + bcc = parameters.get("bcc", []) - if not connectionReference: + if not connectionReference or not to or not subject or not body: return self._createResult( success=False, data={}, - error="Connection reference is required" + error="Connection reference, to, subject, and body are required" ) - if not to or not subject or not body: + # Get Microsoft connection + connection = self._getMicrosoftConnection(connectionReference) + if not connection: return self._createResult( success=False, data={}, - error="To, subject, and body are required" + error="No valid Microsoft connection found for the provided connection reference" ) - # Send email - result = await self.outlookService.sendMail( - connectionReference=connectionReference, - to=to, - subject=subject, - body=body, - attachments=attachments - ) + # Create email sending prompt + send_prompt = f""" + Simulate sending an email via Microsoft Outlook. + + Connection: {connection['id']} + To: {to} + Subject: {subject} + Body: {body} + CC: {cc} + BCC: {bcc} + + Please provide: + 1. Email composition details + 2. Validation of email addresses + 3. Email formatting and structure + 4. Delivery confirmation simulation + """ + + # Use AI to simulate email sending + send_result = await self.serviceContainer.interfaceAiCalls.callAiTextAdvanced(send_prompt) + + # Create result data + result_data = { + "connectionReference": connectionReference, + "to": to, + "subject": subject, + "body": body, + "cc": cc, + "bcc": bcc, + "sendResult": send_result, + "connection": { + "id": connection["id"], + "authority": "microsoft", + "reference": connectionReference + }, + "timestamp": datetime.now(UTC).isoformat() + } return self._createResult( success=True, - data=result + data={ + "documentName": f"outlook_email_sent_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.json", + "documentData": result_data + } ) except Exception as e: @@ -345,99 +218,84 @@ class MethodOutlook(MethodBase): data={}, error=str(e) ) - + @action - async def createFolder(self, parameters: Dict[str, Any]) -> ActionResult: + async def searchEmails(self, parameters: Dict[str, Any]) -> ActionResult: """ - Create folder in Outlook + Search emails in Outlook Parameters: connectionReference (str): Reference to the Microsoft connection - name (str): Folder name - parentFolderId (str, optional): Parent folder ID + query (str): Search query + folder (str, optional): Folder to search in (default: "All") + limit (int, optional): Maximum number of results (default: 20) """ try: connectionReference = parameters.get("connectionReference") - name = parameters.get("name") - parentFolderId = parameters.get("parentFolderId") + query = parameters.get("query") + folder = parameters.get("folder", "All") + limit = parameters.get("limit", 20) - if not connectionReference: + if not connectionReference or not query: return self._createResult( success=False, data={}, - error="Connection reference is required" + error="Connection reference and query are required" ) - if not name: + # Get Microsoft connection + connection = self._getMicrosoftConnection(connectionReference) + if not connection: return self._createResult( success=False, data={}, - error="Folder name is required" + error="No valid Microsoft connection found for the provided connection reference" ) - # Create folder - folder = await self.outlookService.createFolder( - connectionReference=connectionReference, - name=name, - parentFolderId=parentFolderId - ) + # Create email search prompt + search_prompt = f""" + Simulate searching emails in Microsoft Outlook. + + Connection: {connection['id']} + Query: {query} + Folder: {folder} + Limit: {limit} + + Please provide: + 1. Search results with relevant emails + 2. Search statistics and relevance scores + 3. Email previews and key information + 4. Search suggestions and refinements + """ + + # Use AI to simulate email search + search_result = await self.serviceContainer.interfaceAiCalls.callAiTextAdvanced(search_prompt) + + # Create result data + result_data = { + "connectionReference": connectionReference, + "query": query, + "folder": folder, + "limit": limit, + "searchResults": search_result, + "connection": { + "id": connection["id"], + "authority": "microsoft", + "reference": connectionReference + }, + "timestamp": datetime.now(UTC).isoformat() + } return self._createResult( success=True, - data=folder + data={ + "documentName": f"outlook_email_search_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.json", + "documentData": result_data + } ) except Exception as e: - logger.error(f"Error creating folder: {str(e)}") - return self._createResult( - success=False, - data={}, - error=str(e) - ) - - @action - async def moveMail(self, parameters: Dict[str, Any]) -> ActionResult: - """ - Move email to different folder - - Parameters: - connectionReference (str): Reference to the Microsoft connection - messageId (str): ID of the message to move - targetFolderId (str): ID of the target folder - """ - try: - connectionReference = parameters.get("connectionReference") - messageId = parameters.get("messageId") - targetFolderId = parameters.get("targetFolderId") - - if not connectionReference: - return self._createResult( - success=False, - data={}, - error="Connection reference is required" - ) - - if not messageId or not targetFolderId: - return self._createResult( - success=False, - data={}, - error="Message ID and target folder ID are required" - ) - - # Move email - result = await self.outlookService.moveMail( - connectionReference=connectionReference, - messageId=messageId, - targetFolderId=targetFolderId - ) - - return self._createResult( - success=True, - data=result - ) - - except Exception as e: - logger.error(f"Error moving email: {str(e)}") + logger.error(f"Error searching emails: {str(e)}") return self._createResult( success=False, data={}, diff --git a/modules/methods/methodPowerpoint.py b/modules/methods/methodPowerpoint.py deleted file mode 100644 index 2c1e06e9..00000000 --- a/modules/methods/methodPowerpoint.py +++ /dev/null @@ -1,639 +0,0 @@ -""" -PowerPoint method module. -Handles PowerPoint operations using the PowerPoint service. -""" - -import logging -from typing import Dict, Any, List, Optional -from datetime import datetime, UTC -import json -import base64 - -from modules.workflow.methodBase import MethodBase, ActionResult, action - -logger = logging.getLogger(__name__) - -class PowerpointService: - """Service for Microsoft PowerPoint operations using Graph API""" - - def __init__(self, serviceContainer: Any): - self.serviceContainer = serviceContainer - - def _getMicrosoftConnection(self, connectionReference: str) -> Optional[Dict[str, Any]]: - """Get Microsoft connection from connection reference""" - try: - userConnection = self.serviceContainer.getUserConnectionFromConnectionReference(connectionReference) - if not userConnection or userConnection.authority != "msft" or userConnection.status != "active": - return None - - # Get the corresponding token for this user and authority - token = self.serviceContainer.interfaceApp.getToken(userConnection.authority) - if not token: - logger.warning(f"No token found for user {userConnection.userId} and authority {userConnection.authority}") - return None - - return { - "id": userConnection.id, - "accessToken": token.tokenAccess, - "refreshToken": token.tokenRefresh, - "scopes": ["Mail.ReadWrite", "User.Read"] # Default Microsoft scopes - } - except Exception as e: - logger.error(f"Error getting Microsoft connection: {str(e)}") - return None - - async def readPresentation(self, fileId: str, connectionReference: str, includeSlides: bool = True) -> Dict[str, Any]: - """Read PowerPoint presentation using Microsoft Graph API""" - try: - connection = self._getMicrosoftConnection(connectionReference) - if not connection: - return { - "error": "No valid Microsoft connection found for the provided connection reference", - "fileId": fileId, - "connectionReference": connectionReference - } - - # Get file data from service container - file_data = self.serviceContainer.getFileData(fileId) - file_info = self.serviceContainer.getFileInfo(fileId) - - if not file_data: - return { - "error": "File not found or empty", - "fileId": fileId - } - - # For now, simulate PowerPoint reading with AI analysis - # In a real implementation, you would use Microsoft Graph API - ppt_prompt = f""" - Analyze this PowerPoint presentation and extract structured information. - - File: {file_info.get('name', 'Unknown')} - Include slides: {includeSlides} - - File content (first 5000 characters): - {file_data.decode('utf-8', errors='ignore')[:5000] if isinstance(file_data, bytes) else str(file_data)[:5000]} - - Please extract: - 1. Presentation title and theme - 2. Slide structure and content - 3. Text content from each slide - 4. Images and media references - 5. Charts and data visualizations - 6. Speaker notes if available - 7. Overall presentation flow and messaging - - Return the data in a structured JSON format. - """ - - # Use AI to analyze PowerPoint content - analysis_result = await self.serviceContainer.interfaceAiCalls.callAiTextAdvanced(ppt_prompt) - - return { - "fileId": fileId, - "includeSlides": includeSlides, - "data": analysis_result, - "fileInfo": file_info, - "connection": { - "id": connection["id"], - "authority": "microsoft", - "reference": connectionReference - } - } - - except Exception as e: - logger.error(f"Error reading presentation: {str(e)}") - return { - "error": str(e), - "fileId": fileId - } - - async def writePresentation(self, fileId: str, connectionReference: str, slides: List[Dict[str, Any]]) -> Dict[str, Any]: - """Write to PowerPoint presentation using Microsoft Graph API""" - try: - connection = self._getMicrosoftConnection(connectionReference) - if not connection: - return { - "error": "No valid Microsoft connection found for the provided connection reference", - "fileId": fileId, - "connectionReference": connectionReference - } - - # For now, simulate PowerPoint writing - # In a real implementation, you would use Microsoft Graph API - write_prompt = f""" - Prepare content for writing to PowerPoint presentation. - - File: {fileId} - Number of slides: {len(slides)} - - Slides data: - {json.dumps(slides, indent=2)} - - Please format this content appropriately for PowerPoint and provide: - 1. Slide layouts and structures - 2. Text content and formatting - 3. Image and media placement - 4. Chart and visualization specifications - 5. Animation and transition suggestions - """ - - # Use AI to prepare PowerPoint content - prepared_content = await self.serviceContainer.interfaceAiCalls.callAiTextAdvanced(write_prompt) - - return { - "fileId": fileId, - "slides": slides, - "content": prepared_content, - "status": "prepared", - "connection": { - "id": connection["id"], - "authority": "microsoft", - "reference": connectionReference - } - } - - except Exception as e: - logger.error(f"Error writing to presentation: {str(e)}") - return { - "error": str(e), - "fileId": fileId - } - - async def convertPresentation(self, fileId: str, connectionReference: str, format: str = "pdf") -> Dict[str, Any]: - """Convert PowerPoint presentation to another format using Microsoft Graph API""" - try: - connection = self._getMicrosoftConnection(connectionReference) - if not connection: - return { - "error": "No valid Microsoft connection found for the provided connection reference", - "fileId": fileId, - "connectionReference": connectionReference - } - - # For now, simulate conversion - # In a real implementation, you would use Microsoft Graph API - convert_prompt = f""" - Convert PowerPoint presentation to {format.upper()} format. - - File: {fileId} - Target format: {format} - - Please provide: - 1. Conversion specifications - 2. Format-specific optimizations - 3. Quality settings and options - 4. Any special considerations for the target format - """ - - # Use AI to describe conversion process - conversion_result = await self.serviceContainer.interfaceAiCalls.callAiTextAdvanced(convert_prompt) - - # Create converted file using service container - converted_file_id = self.serviceContainer.createFile( - fileName=f"converted_presentation.{format}", - mimeType=f"application/{format}", - content=conversion_result, - base64encoded=False - ) - - return { - "fileId": fileId, - "format": format, - "convertedFileId": converted_file_id, - "result": conversion_result, - "connection": { - "id": connection["id"], - "authority": "microsoft", - "reference": connectionReference - } - } - - except Exception as e: - logger.error(f"Error converting presentation: {str(e)}") - return { - "error": str(e), - "fileId": fileId - } - - async def createPresentation(self, fileName: str, connectionReference: str, template: str = None) -> Dict[str, Any]: - """Create new PowerPoint presentation using Microsoft Graph API""" - try: - connection = self._getMicrosoftConnection(connectionReference) - if not connection: - return { - "error": "No valid Microsoft connection found for the provided connection reference", - "connectionReference": connectionReference - } - - # For now, simulate presentation creation - # In a real implementation, you would use Microsoft Graph API - create_prompt = f""" - Create a new PowerPoint presentation structure. - - File name: {fileName} - Template: {template or 'Standard'} - - Please provide: - 1. Initial slide structure - 2. Default slide layouts - 3. Theme and design elements - 4. Sample content if template specified - 5. Presentation guidelines - """ - - # Use AI to create PowerPoint structure - presentation_structure = await self.serviceContainer.interfaceAiCalls.callAiTextAdvanced(create_prompt) - - # Create file using service container - file_id = self.serviceContainer.createFile( - fileName=fileName, - mimeType="application/vnd.openxmlformats-officedocument.presentationml.presentation", - content=presentation_structure, - base64encoded=False - ) - - return { - "fileId": file_id, - "fileName": fileName, - "template": template, - "structure": presentation_structure, - "connection": { - "id": connection["id"], - "authority": "microsoft", - "reference": connectionReference - } - } - - except Exception as e: - logger.error(f"Error creating presentation: {str(e)}") - return { - "error": str(e) - } - - async def addSlide(self, fileId: str, connectionReference: str, layout: str = "title", content: Dict[str, Any] = None) -> Dict[str, Any]: - """Add slide to presentation using Microsoft Graph API""" - try: - connection = self._getMicrosoftConnection(connectionReference) - if not connection: - return { - "error": "No valid Microsoft connection found for the provided connection reference", - "fileId": fileId, - "connectionReference": connectionReference - } - - # For now, simulate slide addition - # In a real implementation, you would use Microsoft Graph API - slide_prompt = f""" - Add a new slide to PowerPoint presentation. - - File: {fileId} - Layout: {layout} - Content: {json.dumps(content, indent=2) if content else 'Default content'} - - Please provide: - 1. Slide structure and layout - 2. Content placement and formatting - 3. Visual elements and design - 4. Slide number and positioning - """ - - # Use AI to create slide content - slide_content = await self.serviceContainer.interfaceAiCalls.callAiTextAdvanced(slide_prompt) - - return { - "fileId": fileId, - "layout": layout, - "content": content, - "slideContent": slide_content, - "connection": { - "id": connection["id"], - "authority": "microsoft", - "reference": connectionReference - } - } - - except Exception as e: - logger.error(f"Error adding slide: {str(e)}") - return { - "error": str(e), - "fileId": fileId - } - - async def addContent(self, fileId: str, connectionReference: str, slideId: str, content: Dict[str, Any]) -> Dict[str, Any]: - """Add content to slide using Microsoft Graph API""" - try: - connection = self._getMicrosoftConnection(connectionReference) - if not connection: - return { - "error": "No valid Microsoft connection found for the provided connection reference", - "fileId": fileId, - "connectionReference": connectionReference - } - - # For now, simulate content addition - # In a real implementation, you would use Microsoft Graph API - content_prompt = f""" - Add content to PowerPoint slide. - - File: {fileId} - Slide ID: {slideId} - Content: {json.dumps(content, indent=2)} - - Please provide: - 1. Content placement and formatting - 2. Text styling and layout - 3. Image and media integration - 4. Chart and visualization setup - 5. Animation and effects - """ - - # Use AI to format slide content - formatted_content = await self.serviceContainer.interfaceAiCalls.callAiTextAdvanced(content_prompt) - - return { - "fileId": fileId, - "slideId": slideId, - "content": content, - "formattedContent": formatted_content, - "connection": { - "id": connection["id"], - "authority": "microsoft", - "reference": connectionReference - } - } - - except Exception as e: - logger.error(f"Error adding content: {str(e)}") - return { - "error": str(e), - "fileId": fileId - } - -class MethodPowerpoint(MethodBase): - """PowerPoint method implementation for presentation operations""" - - def __init__(self, serviceContainer: Any): - """Initialize the PowerPoint method""" - super().__init__(serviceContainer) - self.name = "powerpoint" - self.description = "Handle PowerPoint presentation operations like reading and creating slides" - self.powerpointService = PowerpointService(serviceContainer) - - @action - async def read(self, parameters: Dict[str, Any]) -> ActionResult: - """ - Read PowerPoint presentation - - Parameters: - fileId (str): The ID of the PowerPoint file to read - connectionReference (str): Reference to the Microsoft connection - includeSlides (bool, optional): Whether to include slide content (default: True) - """ - try: - fileId = parameters.get("fileId") - connectionReference = parameters.get("connectionReference") - includeSlides = parameters.get("includeSlides", True) - - if not fileId or not connectionReference: - return self._createResult( - success=False, - data={}, - error="File ID and connection reference are required" - ) - - # Read presentation - data = await self.powerpointService.readPresentation( - fileId=fileId, - connectionReference=connectionReference, - includeSlides=includeSlides - ) - - return self._createResult( - success=True, - data=data - ) - - except Exception as e: - logger.error(f"Error reading presentation: {str(e)}") - return self._createResult( - success=False, - data={}, - error=str(e) - ) - - @action - async def write(self, parameters: Dict[str, Any]) -> ActionResult: - """ - Write to PowerPoint presentation - - Parameters: - fileId (str): The ID of the PowerPoint file to write to - connectionReference (str): Reference to the Microsoft connection - slides (List[Dict[str, Any]]): List of slides to write - """ - try: - fileId = parameters.get("fileId") - connectionReference = parameters.get("connectionReference") - slides = parameters.get("slides", []) - - if not fileId or not connectionReference: - return self._createResult( - success=False, - data={}, - error="File ID and connection reference are required" - ) - - # Write to presentation - result = await self.powerpointService.writePresentation( - fileId=fileId, - connectionReference=connectionReference, - slides=slides - ) - - return self._createResult( - success=True, - data=result - ) - - except Exception as e: - logger.error(f"Error writing to presentation: {str(e)}") - return self._createResult( - success=False, - data={}, - error=str(e) - ) - - @action - async def convert(self, parameters: Dict[str, Any]) -> ActionResult: - """ - Convert PowerPoint presentation to another format - - Parameters: - fileId (str): The ID of the PowerPoint file to convert - connectionReference (str): Reference to the Microsoft connection - format (str, optional): Target format (default: "pdf") - """ - try: - fileId = parameters.get("fileId") - connectionReference = parameters.get("connectionReference") - format = parameters.get("format", "pdf") - - if not fileId or not connectionReference: - return self._createResult( - success=False, - data={}, - error="File ID and connection reference are required" - ) - - # Convert presentation - result = await self.powerpointService.convertPresentation( - fileId=fileId, - connectionReference=connectionReference, - format=format - ) - - return self._createResult( - success=True, - data=result - ) - - except Exception as e: - logger.error(f"Error converting presentation: {str(e)}") - return self._createResult( - success=False, - data={}, - error=str(e) - ) - - @action - async def createPresentation(self, parameters: Dict[str, Any]) -> ActionResult: - """ - Create new PowerPoint presentation - - Parameters: - fileName (str): Name of the new presentation file - connectionReference (str): Reference to the Microsoft connection - template (str, optional): Template to use for the new presentation - """ - try: - fileName = parameters.get("fileName") - connectionReference = parameters.get("connectionReference") - template = parameters.get("template") - - if not fileName or not connectionReference: - return self._createResult( - success=False, - data={}, - error="File name and connection reference are required" - ) - - # Create presentation - fileId = await self.powerpointService.createPresentation( - fileName=fileName, - connectionReference=connectionReference, - template=template - ) - - return self._createResult( - success=True, - data={"fileId": fileId} - ) - - except Exception as e: - logger.error(f"Error creating presentation: {str(e)}") - return self._createResult( - success=False, - data={}, - error=str(e) - ) - - @action - async def addSlide(self, parameters: Dict[str, Any]) -> ActionResult: - """ - Add slide to presentation - - Parameters: - fileId (str): The ID of the PowerPoint file - connectionReference (str): Reference to the Microsoft connection - layout (str, optional): Slide layout type (default: "title") - content (Dict[str, Any], optional): Content for the slide - """ - try: - fileId = parameters.get("fileId") - connectionReference = parameters.get("connectionReference") - layout = parameters.get("layout", "title") - content = parameters.get("content", {}) - - if not fileId or not connectionReference: - return self._createResult( - success=False, - data={}, - error="File ID and connection reference are required" - ) - - # Add slide - slide = await self.powerpointService.addSlide( - fileId=fileId, - connectionReference=connectionReference, - layout=layout, - content=content - ) - - return self._createResult( - success=True, - data=slide - ) - - except Exception as e: - logger.error(f"Error adding slide: {str(e)}") - return self._createResult( - success=False, - data={}, - error=str(e) - ) - - @action - async def addContent(self, parameters: Dict[str, Any]) -> ActionResult: - """ - Add content to slide - - Parameters: - fileId (str): The ID of the PowerPoint file - connectionReference (str): Reference to the Microsoft connection - slideId (str): ID of the slide to add content to - content (Dict[str, Any]): Content to add to the slide - """ - try: - fileId = parameters.get("fileId") - connectionReference = parameters.get("connectionReference") - slideId = parameters.get("slideId") - content = parameters.get("content", {}) - - if not fileId or not connectionReference or not slideId: - return self._createResult( - success=False, - data={}, - error="File ID, connection reference, and slide ID are required" - ) - - # Add content - result = await self.powerpointService.addContent( - fileId=fileId, - connectionReference=connectionReference, - slideId=slideId, - content=content - ) - - return self._createResult( - success=True, - data=result - ) - - except Exception as e: - logger.error(f"Error adding content: {str(e)}") - return self._createResult( - success=False, - data={}, - error=str(e) - ) \ No newline at end of file diff --git a/modules/methods/methodSharepoint.py b/modules/methods/methodSharepoint.py index 0eef7803..bd2b7d54 100644 --- a/modules/methods/methodSharepoint.py +++ b/modules/methods/methodSharepoint.py @@ -7,16 +7,19 @@ import logging from typing import Dict, Any, List, Optional from datetime import datetime, UTC import json +import uuid from modules.workflow.methodBase import MethodBase, ActionResult, action logger = logging.getLogger(__name__) -class SharepointService: - """Service for Microsoft SharePoint operations using Graph API""" +class MethodSharepoint(MethodBase): + """SharePoint method implementation for document operations""" def __init__(self, serviceContainer: Any): - self.serviceContainer = serviceContainer + super().__init__(serviceContainer) + self.name = "sharepoint" + self.description = "Handle Microsoft SharePoint document operations" def _getMicrosoftConnection(self, connectionReference: str) -> Optional[Dict[str, Any]]: """Get Microsoft connection from connection reference""" @@ -35,641 +38,401 @@ class SharepointService: "id": userConnection.id, "accessToken": token.tokenAccess, "refreshToken": token.tokenRefresh, - "scopes": ["Mail.ReadWrite", "User.Read"] # Default Microsoft scopes + "scopes": ["Sites.ReadWrite.All", "User.Read"] # Default Microsoft scopes } except Exception as e: logger.error(f"Error getting Microsoft connection: {str(e)}") return None - async def searchContent(self, connectionReference: str, query: str, siteId: str = None, contentType: str = None, maxResults: int = 10) -> Dict[str, Any]: - """Search SharePoint content using Microsoft Graph API""" + @action + async def findDocumentPath(self, parameters: Dict[str, Any]) -> ActionResult: + """ + Find document path based on query/description + + Parameters: + connectionReference (str): Reference to the Microsoft connection + siteUrl (str): SharePoint site URL + query (str): Query or description to find document + searchScope (str, optional): Search scope (default: "all") + """ try: + connectionReference = parameters.get("connectionReference") + siteUrl = parameters.get("siteUrl") + query = parameters.get("query") + searchScope = parameters.get("searchScope", "all") + + if not connectionReference or not siteUrl or not query: + return self._createResult( + success=False, + data={}, + error="Connection reference, site URL, and query are required" + ) + connection = self._getMicrosoftConnection(connectionReference) if not connection: - return { - "error": "No valid Microsoft connection found for the provided connection reference", - "connectionReference": connectionReference - } + return self._createResult( + success=False, + data={}, + error="No valid Microsoft connection found for the provided connection reference" + ) - # For now, simulate SharePoint search - # In a real implementation, you would use Microsoft Graph API - search_prompt = f""" - Search SharePoint content for the following query. + find_prompt = f""" + Simulate finding document paths in Microsoft SharePoint based on a query. + Connection: {connection['id']} + Site URL: {siteUrl} Query: {query} - Site ID: {siteId or 'All sites'} - Content Type: {contentType or 'All types'} - Max Results: {maxResults} + Search Scope: {searchScope} Please provide: - 1. Relevant search results - 2. Content summaries - 3. File and document information - 4. Site and list references - 5. Metadata and properties + 1. Matching document paths and locations + 2. Relevance scores for each match + 3. Document metadata and properties + 4. Alternative search suggestions + 5. Search statistics and coverage """ - # Use AI to simulate search results - search_results = await self.serviceContainer.interfaceAiCalls.callAiTextAdvanced(search_prompt) + find_result = await self.serviceContainer.interfaceAiCalls.callAiTextAdvanced(find_prompt) - return { + result_data = { + "connectionReference": connectionReference, + "siteUrl": siteUrl, "query": query, - "siteId": siteId, - "contentType": contentType, - "maxResults": maxResults, - "results": search_results, + "searchScope": searchScope, + "findResult": find_result, "connection": { "id": connection["id"], "authority": "microsoft", "reference": connectionReference - } + }, + "timestamp": datetime.now(UTC).isoformat() } - except Exception as e: - logger.error(f"Error searching SharePoint: {str(e)}") - return { - "error": str(e) - } - - async def readItem(self, connectionReference: str, itemId: str, siteId: str = None, listId: str = None) -> Dict[str, Any]: - """Read SharePoint item using Microsoft Graph API""" - try: - connection = self._getMicrosoftConnection(connectionReference) - if not connection: - return { - "error": "No valid Microsoft connection found for the provided connection reference", - "itemId": itemId, - "connectionReference": connectionReference - } - - # For now, simulate item reading - # In a real implementation, you would use Microsoft Graph API - read_prompt = f""" - Read SharePoint item details. - - Item ID: {itemId} - Site ID: {siteId or 'Default site'} - List ID: {listId or 'Default list'} - - Please provide: - 1. Item properties and metadata - 2. Content and attachments - 3. Permissions and access rights - 4. Version history if available - 5. Related items and links - """ - - # Use AI to simulate item data - item_data = await self.serviceContainer.interfaceAiCalls.callAiTextAdvanced(read_prompt) - - return { - "itemId": itemId, - "siteId": siteId, - "listId": listId, - "data": item_data, - "connection": { - "id": connection["id"], - "authority": "microsoft", - "reference": connectionReference - } - } - - except Exception as e: - logger.error(f"Error reading SharePoint item: {str(e)}") - return { - "error": str(e), - "itemId": itemId - } - - async def writeItem(self, connectionReference: str, siteId: str, listId: str, item: Dict[str, Any]) -> Dict[str, Any]: - """Write SharePoint item using Microsoft Graph API""" - try: - connection = self._getMicrosoftConnection(connectionReference) - if not connection: - return { - "error": "No valid Microsoft connection found for the provided connection reference", - "connectionReference": connectionReference - } - - # For now, simulate item writing - # In a real implementation, you would use Microsoft Graph API - write_prompt = f""" - Write item to SharePoint list. - - Site ID: {siteId} - List ID: {listId} - Item data: {json.dumps(item, indent=2)} - - Please provide: - 1. Item creation/update details - 2. Validation and formatting - 3. Permission settings - 4. Workflow triggers if applicable - 5. Success confirmation - """ - - # Use AI to simulate item creation - write_result = await self.serviceContainer.interfaceAiCalls.callAiTextAdvanced(write_prompt) - - return { - "siteId": siteId, - "listId": listId, - "item": item, - "result": write_result, - "connection": { - "id": connection["id"], - "authority": "microsoft", - "reference": connectionReference - } - } - - except Exception as e: - logger.error(f"Error writing SharePoint item: {str(e)}") - return { - "error": str(e) - } - - async def readList(self, connectionReference: str, listId: str, siteId: str = None, query: str = None, maxResults: int = 10) -> Dict[str, Any]: - """Read SharePoint list using Microsoft Graph API""" - try: - connection = self._getMicrosoftConnection(connectionReference) - if not connection: - return { - "error": "No valid Microsoft connection found for the provided connection reference", - "listId": listId, - "connectionReference": connectionReference - } - - # For now, simulate list reading - # In a real implementation, you would use Microsoft Graph API - list_prompt = f""" - Read SharePoint list items. - - List ID: {listId} - Site ID: {siteId or 'Default site'} - Query: {query or 'All items'} - Max Results: {maxResults} - - Please provide: - 1. List structure and columns - 2. Item data and properties - 3. Sorting and filtering options - 4. Pagination information - 5. List metadata and settings - """ - - # Use AI to simulate list data - list_data = await self.serviceContainer.interfaceAiCalls.callAiTextAdvanced(list_prompt) - - return { - "listId": listId, - "siteId": siteId, - "query": query, - "maxResults": maxResults, - "data": list_data, - "connection": { - "id": connection["id"], - "authority": "microsoft", - "reference": connectionReference - } - } - - except Exception as e: - logger.error(f"Error reading SharePoint list: {str(e)}") - return { - "error": str(e), - "listId": listId - } - - async def writeList(self, connectionReference: str, siteId: str, listId: str, items: List[Dict[str, Any]]) -> Dict[str, Any]: - """Write multiple items to SharePoint list using Microsoft Graph API""" - try: - connection = self._getMicrosoftConnection(connectionReference) - if not connection: - return { - "error": "No valid Microsoft connection found for the provided connection reference", - "connectionReference": connectionReference - } - - # For now, simulate bulk writing - # In a real implementation, you would use Microsoft Graph API - bulk_prompt = f""" - Write multiple items to SharePoint list. - - Site ID: {siteId} - List ID: {listId} - Number of items: {len(items)} - Items data: {json.dumps(items[:3], indent=2)} # Show first 3 items - - Please provide: - 1. Bulk operation details - 2. Validation and error handling - 3. Performance optimization - 4. Success/failure status for each item - 5. Batch processing results - """ - - # Use AI to simulate bulk operation - bulk_result = await self.serviceContainer.interfaceAiCalls.callAiTextAdvanced(bulk_prompt) - - return { - "siteId": siteId, - "listId": listId, - "items": items, - "result": bulk_result, - "connection": { - "id": connection["id"], - "authority": "microsoft", - "reference": connectionReference - } - } - - except Exception as e: - logger.error(f"Error writing to SharePoint list: {str(e)}") - return { - "error": str(e) - } - - async def createList(self, connectionReference: str, siteId: str, name: str, description: str = None, template: str = "genericList", fields: List[Dict[str, Any]] = None) -> Dict[str, Any]: - """Create SharePoint list using Microsoft Graph API""" - try: - connection = self._getMicrosoftConnection(connectionReference) - if not connection: - return { - "error": "No valid Microsoft connection found for the provided connection reference", - "connectionReference": connectionReference - } - - # For now, simulate list creation - # In a real implementation, you would use Microsoft Graph API - create_prompt = f""" - Create a new SharePoint list. - - Site ID: {siteId} - Name: {name} - Description: {description or 'No description'} - Template: {template} - Fields: {json.dumps(fields, indent=2) if fields else 'Default fields'} - - Please provide: - 1. List structure and configuration - 2. Column definitions and types - 3. Default views and permissions - 4. Workflow and automation settings - 5. Creation confirmation and next steps - """ - - # Use AI to simulate list creation - creation_result = await self.serviceContainer.interfaceAiCalls.callAiTextAdvanced(create_prompt) - - return { - "siteId": siteId, - "name": name, - "description": description, - "template": template, - "fields": fields, - "result": creation_result, - "connection": { - "id": connection["id"], - "authority": "microsoft", - "reference": connectionReference - } - } - - except Exception as e: - logger.error(f"Error creating SharePoint list: {str(e)}") - return { - "error": str(e) - } - -class MethodSharepoint(MethodBase): - """SharePoint method implementation for site operations""" - - def __init__(self, serviceContainer: Any): - """Initialize the SharePoint method""" - super().__init__(serviceContainer) - self.name = "sharepoint" - self.description = "Handle SharePoint site operations like reading and writing lists" - self.sharepointService = SharepointService(serviceContainer) - - @action - async def search(self, parameters: Dict[str, Any]) -> ActionResult: - """ - Search SharePoint content - - Parameters: - connectionReference (str): Reference to the Microsoft connection - query (str): Search query - siteId (str, optional): SharePoint site ID - contentType (str, optional): Content type to filter by - maxResults (int, optional): Maximum number of results (default: 10) - """ - try: - connectionReference = parameters.get("connectionReference") - query = parameters.get("query") - siteId = parameters.get("siteId") - contentType = parameters.get("contentType") - maxResults = parameters.get("maxResults", 10) - - if not connectionReference: - return self._createResult( - success=False, - data={}, - error="Connection reference is required" - ) - - if not query: - return self._createResult( - success=False, - data={}, - error="Search query is required" - ) - - # Search content - results = await self.sharepointService.searchContent( - connectionReference=connectionReference, - query=query, - siteId=siteId, - contentType=contentType, - maxResults=maxResults - ) - return self._createResult( success=True, - data=results + data={ + "documentName": f"sharepoint_find_path_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.json", + "documentData": result_data + } ) except Exception as e: - logger.error(f"Error searching SharePoint: {str(e)}") + logger.error(f"Error finding document path: {str(e)}") return self._createResult( success=False, data={}, error=str(e) ) - + @action - async def read(self, parameters: Dict[str, Any]) -> ActionResult: + async def readDocument(self, parameters: Dict[str, Any]) -> ActionResult: """ - Read SharePoint item + Read documents from SharePoint Parameters: + documentList (str): Reference to the document list to read connectionReference (str): Reference to the Microsoft connection - itemId (str): ID of the item to read - siteId (str, optional): SharePoint site ID - listId (str, optional): SharePoint list ID + siteUrl (str): SharePoint site URL + documentPaths (List[str]): List of paths to the documents in SharePoint + includeMetadata (bool, optional): Whether to include metadata (default: True) """ try: + documentList = parameters.get("documentList") connectionReference = parameters.get("connectionReference") - itemId = parameters.get("itemId") - siteId = parameters.get("siteId") - listId = parameters.get("listId") + siteUrl = parameters.get("siteUrl") + documentPaths = parameters.get("documentPaths") + includeMetadata = parameters.get("includeMetadata", True) - if not connectionReference: + if not documentList or not connectionReference or not siteUrl or not documentPaths: return self._createResult( success=False, data={}, - error="Connection reference is required" + error="Document list reference, connection reference, site URL, and document paths are required" ) - if not itemId: + chatDocuments = self.serviceContainer.getChatDocumentsFromDocumentReference(documentList) + if not chatDocuments: return self._createResult( success=False, data={}, - error="Item ID is required" + error="No documents found for the provided reference" ) - # Read item - item = await self.sharepointService.readItem( - connectionReference=connectionReference, - itemId=itemId, - siteId=siteId, - listId=listId - ) + connection = self._getMicrosoftConnection(connectionReference) + if not connection: + return self._createResult( + success=False, + data={}, + error="No valid Microsoft connection found for the provided connection reference" + ) + + # Process each document path + read_results = [] + + for i, documentPath in enumerate(documentPaths): + if i < len(chatDocuments): + chatDocument = chatDocuments[i] + fileId = chatDocument.fileId + + sharepoint_prompt = f""" + Simulate reading a document from Microsoft SharePoint. + + Connection: {connection['id']} + Site URL: {siteUrl} + Document Path: {documentPath} + Include Metadata: {includeMetadata} + File ID: {fileId} + + Please provide: + 1. Document content and structure + 2. File metadata and properties + 3. SharePoint site information + 4. Document permissions and sharing + 5. Version history if available + """ + + document_data = await self.serviceContainer.interfaceAiCalls.callAiTextAdvanced(sharepoint_prompt) + + read_results.append({ + "documentPath": documentPath, + "fileId": fileId, + "documentContent": document_data + }) + + result_data = { + "connectionReference": connectionReference, + "siteUrl": siteUrl, + "documentPaths": documentPaths, + "includeMetadata": includeMetadata, + "readResults": read_results, + "connection": { + "id": connection["id"], + "authority": "microsoft", + "reference": connectionReference + }, + "timestamp": datetime.now(UTC).isoformat() + } return self._createResult( success=True, - data=item + data={ + "documentName": f"sharepoint_documents_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.json", + "documentData": result_data + } ) - except Exception as e: - logger.error(f"Error reading SharePoint item: {str(e)}") + logger.error(f"Error reading SharePoint documents: {str(e)}") return self._createResult( success=False, data={}, error=str(e) ) - + @action - async def write(self, parameters: Dict[str, Any]) -> ActionResult: + async def uploadDocument(self, parameters: Dict[str, Any]) -> ActionResult: """ - Write SharePoint item + Upload documents to SharePoint Parameters: connectionReference (str): Reference to the Microsoft connection - siteId (str): SharePoint site ID - listId (str): SharePoint list ID - item (Dict[str, Any]): Item data to write + siteUrl (str): SharePoint site URL + documentPaths (List[str]): List of paths where to upload the documents + documentList (str): Reference to the document list to upload + fileNames (List[str]): List of names for the uploaded files """ try: connectionReference = parameters.get("connectionReference") - siteId = parameters.get("siteId") - listId = parameters.get("listId") - item = parameters.get("item", {}) + siteUrl = parameters.get("siteUrl") + documentPaths = parameters.get("documentPaths") + documentList = parameters.get("documentList") + fileNames = parameters.get("fileNames") - if not connectionReference: + if not connectionReference or not siteUrl or not documentPaths or not documentList or not fileNames: return self._createResult( success=False, data={}, - error="Connection reference is required" + error="Connection reference, site URL, document paths, document list, and file names are required" ) - if not siteId or not listId: + # Get Microsoft connection + connection = self._getMicrosoftConnection(connectionReference) + if not connection: return self._createResult( success=False, data={}, - error="Site ID and list ID are required" + error="No valid Microsoft connection found for the provided connection reference" ) - # Write item - result = await self.sharepointService.writeItem( - connectionReference=connectionReference, - siteId=siteId, - listId=listId, - item=item - ) + # Get documents from reference + chatDocuments = self.serviceContainer.getChatDocumentsFromDocumentReference(documentList) + if not chatDocuments: + return self._createResult( + success=False, + data={}, + error="No documents found for the provided reference" + ) + + # Process each document upload + upload_results = [] + + for i, (documentPath, fileName) in enumerate(zip(documentPaths, fileNames)): + if i < len(chatDocuments): + chatDocument = chatDocuments[i] + fileId = chatDocument.fileId + file_data = self.serviceContainer.getFileData(fileId) + + if not file_data: + logger.warning(f"File data not found for fileId: {fileId}") + continue + + # Create SharePoint upload prompt + upload_prompt = f""" + Simulate uploading a document to Microsoft SharePoint. + + Connection: {connection['id']} + Site URL: {siteUrl} + Document Path: {documentPath} + File Name: {fileName} + File ID: {fileId} + File Size: {len(file_data)} bytes + + Please provide: + 1. Upload confirmation and status + 2. File metadata and properties + 3. SharePoint site integration details + 4. Permission and sharing settings + 5. Version control information + """ + + # Use AI to simulate SharePoint upload + upload_result = await self.serviceContainer.interfaceAiCalls.callAiTextAdvanced(upload_prompt) + + upload_results.append({ + "documentPath": documentPath, + "fileName": fileName, + "fileId": fileId, + "uploadResult": upload_result + }) + + # Create result data + result_data = { + "connectionReference": connectionReference, + "siteUrl": siteUrl, + "documentPaths": documentPaths, + "documentList": documentList, + "fileNames": fileNames, + "uploadResults": upload_results, + "connection": { + "id": connection["id"], + "authority": "microsoft", + "reference": connectionReference + }, + "timestamp": datetime.now(UTC).isoformat() + } return self._createResult( success=True, - data=result + data={ + "documentName": f"sharepoint_upload_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.json", + "documentData": result_data + } ) except Exception as e: - logger.error(f"Error writing SharePoint item: {str(e)}") + logger.error(f"Error uploading to SharePoint: {str(e)}") return self._createResult( success=False, data={}, error=str(e) ) - + @action - async def readList(self, parameters: Dict[str, Any]) -> ActionResult: + async def listDocuments(self, parameters: Dict[str, Any]) -> ActionResult: """ - Read SharePoint list + List documents in SharePoint folder Parameters: connectionReference (str): Reference to the Microsoft connection - listId (str): SharePoint list ID - siteId (str, optional): SharePoint site ID - query (str, optional): Query to filter items - maxResults (int, optional): Maximum number of results (default: 10) + siteUrl (str): SharePoint site URL + folderPaths (List[str]): List of paths to the folders to list + includeSubfolders (bool, optional): Whether to include subfolders (default: False) """ try: connectionReference = parameters.get("connectionReference") - listId = parameters.get("listId") - siteId = parameters.get("siteId") - query = parameters.get("query") - maxResults = parameters.get("maxResults", 10) + siteUrl = parameters.get("siteUrl") + folderPaths = parameters.get("folderPaths") + includeSubfolders = parameters.get("includeSubfolders", False) - if not connectionReference: + if not connectionReference or not siteUrl or not folderPaths: return self._createResult( success=False, data={}, - error="Connection reference is required" + error="Connection reference, site URL, and folder paths are required" ) - if not listId: + # Get Microsoft connection + connection = self._getMicrosoftConnection(connectionReference) + if not connection: return self._createResult( success=False, data={}, - error="List ID is required" + error="No valid Microsoft connection found for the provided connection reference" ) - # Read list - items = await self.sharepointService.readList( - connectionReference=connectionReference, - listId=listId, - siteId=siteId, - query=query, - maxResults=maxResults - ) + # Process each folder path + list_results = [] + + for folderPath in folderPaths: + # Create SharePoint listing prompt + list_prompt = f""" + Simulate listing documents in Microsoft SharePoint folder. + + Connection: {connection['id']} + Site URL: {siteUrl} + Folder Path: {folderPath} + Include Subfolders: {includeSubfolders} + + Please provide: + 1. List of documents and folders + 2. File metadata and properties + 3. Folder structure and hierarchy + 4. Permission and sharing information + 5. Document statistics and summary + """ + + # Use AI to simulate SharePoint listing + list_result = await self.serviceContainer.interfaceAiCalls.callAiTextAdvanced(list_prompt) + + list_results.append({ + "folderPath": folderPath, + "listResult": list_result + }) + + # Create result data + result_data = { + "connectionReference": connectionReference, + "siteUrl": siteUrl, + "folderPaths": folderPaths, + "includeSubfolders": includeSubfolders, + "listResults": list_results, + "connection": { + "id": connection["id"], + "authority": "microsoft", + "reference": connectionReference + }, + "timestamp": datetime.now(UTC).isoformat() + } return self._createResult( success=True, - data=items + data={ + "documentName": f"sharepoint_document_list_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.json", + "documentData": result_data + } ) except Exception as e: - logger.error(f"Error reading SharePoint list: {str(e)}") - return self._createResult( - success=False, - data={}, - error=str(e) - ) - - @action - async def writeList(self, parameters: Dict[str, Any]) -> ActionResult: - """ - Write multiple items to SharePoint list - - Parameters: - connectionReference (str): Reference to the Microsoft connection - siteId (str): SharePoint site ID - listId (str): SharePoint list ID - items (List[Dict[str, Any]]): List of items to write - """ - try: - connectionReference = parameters.get("connectionReference") - siteId = parameters.get("siteId") - listId = parameters.get("listId") - items = parameters.get("items", []) - - if not connectionReference: - return self._createResult( - success=False, - data={}, - error="Connection reference is required" - ) - - if not siteId or not listId: - return self._createResult( - success=False, - data={}, - error="Site ID and list ID are required" - ) - - # Write items - results = await self.sharepointService.writeList( - connectionReference=connectionReference, - siteId=siteId, - listId=listId, - items=items - ) - - return self._createResult( - success=True, - data=results - ) - - except Exception as e: - logger.error(f"Error writing SharePoint list: {str(e)}") - return self._createResult( - success=False, - data={}, - error=str(e) - ) - - @action - async def createList(self, parameters: Dict[str, Any]) -> ActionResult: - """ - Create new SharePoint list - - Parameters: - connectionReference (str): Reference to the Microsoft connection - siteId (str): SharePoint site ID - name (str): Name of the new list - description (str, optional): Description of the list - template (str, optional): List template (default: "genericList") - fields (List[Dict[str, Any]], optional): List of field definitions - """ - try: - connectionReference = parameters.get("connectionReference") - siteId = parameters.get("siteId") - name = parameters.get("name") - description = parameters.get("description") - template = parameters.get("template", "genericList") - fields = parameters.get("fields", []) - - if not connectionReference: - return self._createResult( - success=False, - data={}, - error="Connection reference is required" - ) - - if not siteId or not name: - return self._createResult( - success=False, - data={}, - error="Site ID and name are required" - ) - - # Create list - list_info = await self.sharepointService.createList( - connectionReference=connectionReference, - siteId=siteId, - name=name, - description=description, - template=template, - fields=fields - ) - - return self._createResult( - success=True, - data=list_info - ) - - except Exception as e: - logger.error(f"Error creating SharePoint list: {str(e)}") + logger.error(f"Error listing SharePoint documents: {str(e)}") return self._createResult( success=False, data={}, diff --git a/modules/methods/methodWeb.py b/modules/methods/methodWeb.py index 5feacf23..48b4b9ae 100644 --- a/modules/methods/methodWeb.py +++ b/modules/methods/methodWeb.py @@ -9,19 +9,21 @@ from datetime import datetime, UTC import requests from bs4 import BeautifulSoup import time +import uuid from modules.workflow.methodBase import MethodBase, ActionResult, action from modules.shared.configuration import APP_CONFIG logger = logging.getLogger(__name__) -class WebService: - """Service for web operations like searching and crawling""" +class MethodWeb(MethodBase): + """Web method implementation for web operations""" def __init__(self, serviceContainer: Any): - self.serviceContainer = serviceContainer - self.user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36" - self.timeout = 30 + """Initialize the web method""" + super().__init__(serviceContainer) + self.name = "web" + self.description = "Handle web operations like crawling and scraping" # Web search configuration from agentWebcrawler self.srcApikey = APP_CONFIG.get("Agent_Webcrawler_SERPAPI_APIKEY", "") @@ -31,233 +33,90 @@ class WebService: if not self.srcApikey: logger.warning("SerpAPI key not configured for web search") + + self.user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36" + self.timeout = 30 - async def searchWeb(self, query: str, maxResults: int = 10) -> Dict[str, Any]: - """Search web content using Google search via SerpAPI""" - try: - if not self.srcApikey: - return { - "error": "SerpAPI key not configured", - "query": query - } + def _readUrl(self, url: str) -> BeautifulSoup: + """Read a URL and return a BeautifulSoup parser for the content""" + if not url or not url.startswith(('http://', 'https://')): + return None - # Get user language from service container if available - userLanguage = "en" # Default language - if hasattr(self.serviceContainer, 'user') and hasattr(self.serviceContainer.user, 'language'): - userLanguage = self.serviceContainer.user.language - - # Format the search request for SerpAPI - params = { - "engine": self.srcEngine, - "q": query, - "api_key": self.srcApikey, - "num": min(maxResults, self.maxResults), # Number of results to return - "hl": userLanguage # User language - } - - # Make the API request - response = requests.get("https://serpapi.com/search", params=params, timeout=self.timeout) - response.raise_for_status() - - # Parse JSON response - search_results = response.json() - - # Extract organic results - results = [] - - if "organic_results" in search_results: - for result in search_results["organic_results"][:maxResults]: - # Extract title - title = result.get("title", "No title") - - # Extract URL - url = result.get("link", "No URL") - - # Extract snippet - snippet = result.get("snippet", "No description") - - # Get actual page content - try: - targetPageSoup = self._readUrl(url) - content = self._extractMainContent(targetPageSoup) - except Exception as e: - logger.warning(f"Error extracting content from {url}: {str(e)}") - content = f"Error extracting content: {str(e)}" - - results.append({ - 'title': title, - 'url': url, - 'snippet': snippet, - 'content': content - }) - - # Limit number of results - if len(results) >= maxResults: - break - else: - logger.warning(f"No organic results found in SerpAPI response for: {query}") - - return { - "query": query, - "maxResults": maxResults, - "results": results, - "totalFound": len(results), - "timestamp": datetime.now(UTC).isoformat() - } - - except Exception as e: - logger.error(f"Error searching web: {str(e)}") - return { - "error": str(e), - "query": query - } - - async def crawlPage(self, url: str, depth: int = 1, followLinks: bool = True, extractContent: bool = True) -> Dict[str, Any]: - """Crawl web page and extract content""" - try: - # Read the URL - soup = self._readUrl(url) - if not soup: - return { - "error": "Failed to read URL", - "url": url - } - - # Extract basic information - title = self._extractTitle(soup, url) - content = self._extractMainContent(soup) if extractContent else "" - - # Extract links if requested - links = [] - if followLinks: - for link in soup.find_all('a', href=True): - href = link.get('href') - if href and href.startswith(('http://', 'https://')): - links.append({ - 'url': href, - 'text': link.get_text(strip=True)[:100] - }) - - # Extract images - images = [] - for img in soup.find_all('img', src=True): - src = img.get('src') - if src: - images.append({ - 'src': src, - 'alt': img.get('alt', ''), - 'title': img.get('title', '') - }) - - return { - "url": url, - "depth": depth, - "followLinks": followLinks, - "extractContent": extractContent, - "title": title, - "content": content, - "links": links[:10], # Limit to first 10 links - "images": images[:10], # Limit to first 10 images - "timestamp": datetime.now(UTC).isoformat() - } - - except Exception as e: - logger.error(f"Error crawling web page: {str(e)}") - return { - "error": str(e), - "url": url - } - - async def extractContent(self, url: str, selectors: Dict[str, str] = None, format: str = "text") -> Dict[str, Any]: - """Extract content from web page using selectors""" - try: - # Read the URL - soup = self._readUrl(url) - if not soup: - return { - "error": "Failed to read URL", - "url": url - } - - extracted_content = {} - - if selectors: - # Extract content using provided selectors - for selector_name, selector in selectors.items(): - elements = soup.select(selector) - if elements: - if format == "text": - extracted_content[selector_name] = [elem.get_text(strip=True) for elem in elements] - elif format == "html": - extracted_content[selector_name] = [str(elem) for elem in elements] - else: - extracted_content[selector_name] = [elem.get_text(strip=True) for elem in elements] - else: - extracted_content[selector_name] = [] - else: - # Auto-extract common elements - extracted_content = { - "title": self._extractTitle(soup, url), - "main_content": self._extractMainContent(soup), - "headings": [h.get_text(strip=True) for h in soup.find_all(['h1', 'h2', 'h3'])], - "links": [a.get('href') for a in soup.find_all('a', href=True) if a.get('href').startswith(('http://', 'https://'))], - "images": [img.get('src') for img in soup.find_all('img', src=True)] - } - - return { - "url": url, - "selectors": selectors, - "format": format, - "content": extracted_content, - "timestamp": datetime.now(UTC).isoformat() - } - - except Exception as e: - logger.error(f"Error extracting content: {str(e)}") - return { - "error": str(e), - "url": url - } - - async def validatePage(self, url: str, checks: List[str] = None) -> Dict[str, Any]: - """Validate web page for various criteria""" - if checks is None: - checks = ["accessibility", "seo", "performance"] + headers = { + 'User-Agent': self.user_agent, + 'Accept': 'text/html,application/xhtml+xml,application/xml', + 'Accept-Language': 'en-US,en;q=0.9', + } try: - # Read the URL - soup = self._readUrl(url) - if not soup: - return { - "error": "Failed to read URL", - "url": url - } + # Initial request + response = requests.get(url, headers=headers, timeout=self.timeout) - validation_results = {} + # Handling for status 202 + if response.status_code == 202: + # Retry with backoff + backoff_times = [0.5, 1.0, 2.0, 5.0] + + for wait_time in backoff_times: + time.sleep(wait_time) + response = requests.get(url, headers=headers, timeout=self.timeout) + + if response.status_code != 202: + break - for check in checks: - if check == "accessibility": - validation_results["accessibility"] = self._checkAccessibility(soup) - elif check == "seo": - validation_results["seo"] = self._checkSEO(soup) - elif check == "performance": - validation_results["performance"] = self._checkPerformance(soup, url) - else: - validation_results[check] = {"status": "unknown", "message": f"Unknown check type: {check}"} + # Raise for error status codes + response.raise_for_status() - return { - "url": url, - "checks": checks, - "results": validation_results, - "timestamp": datetime.now(UTC).isoformat() - } + # Parse HTML + return BeautifulSoup(response.text, 'html.parser') except Exception as e: - logger.error(f"Error validating web page: {str(e)}") - return { - "error": str(e), - "url": url - } + logger.error(f"Error reading URL {url}: {str(e)}") + return None + + def _extractTitle(self, soup: BeautifulSoup, url: str) -> str: + """Extract the title from a webpage""" + if not soup: + return f"Error with {url}" + + # Extract title from title tag + title_tag = soup.find('title') + title = title_tag.text.strip() if title_tag else "No title" + + # Alternative: Also look for h1 tags if title tag is missing + if title == "No title": + h1_tag = soup.find('h1') + if h1_tag: + title = h1_tag.text.strip() + + return title + + def _extractMainContent(self, soup: BeautifulSoup, max_chars: int = 10000) -> str: + """Extract the main content from an HTML page""" + if not soup: + return "" + + # Try to find main content elements in priority order + main_content = None + for selector in ['main', 'article', '#content', '.content', '#main', '.main']: + content = soup.select_one(selector) + if content: + main_content = content + break + + # If no main content found, use the body + if not main_content: + main_content = soup.find('body') or soup + + # Remove script, style, nav, footer elements that don't contribute to main content + for element in main_content.select('script, style, nav, footer, header, aside, .sidebar, #sidebar, .comments, #comments, .advertisement, .ads, iframe'): + element.extract() + + # Extract text content + text_content = main_content.get_text(separator=' ', strip=True) + + # Limit to max_chars + return text_content[:max_chars] def _checkAccessibility(self, soup: BeautifulSoup) -> Dict[str, Any]: """Check basic accessibility features""" @@ -355,97 +214,204 @@ class WebService: } } - def _readUrl(self, url: str) -> BeautifulSoup: - """Read a URL and return a BeautifulSoup parser for the content""" - if not url or not url.startswith(('http://', 'https://')): - return None - - headers = { - 'User-Agent': self.user_agent, - 'Accept': 'text/html,application/xhtml+xml,application/xml', - 'Accept-Language': 'en-US,en;q=0.9', - } + @action + async def crawl(self, parameters: Dict[str, Any]) -> ActionResult: + """ + Crawl web pages and extract content + Parameters: + urls (List[str]): List of URLs to crawl + maxDepth (int, optional): Maximum crawl depth (default: 2) + includeImages (bool, optional): Whether to include images (default: False) + followLinks (bool, optional): Whether to follow links (default: True) + """ try: - # Initial request - response = requests.get(url, headers=headers, timeout=self.timeout) + urls = parameters.get("urls") + maxDepth = parameters.get("maxDepth", 2) + includeImages = parameters.get("includeImages", False) + followLinks = parameters.get("followLinks", True) - # Handling for status 202 - if response.status_code == 202: - # Retry with backoff - backoff_times = [0.5, 1.0, 2.0, 5.0] - - for wait_time in backoff_times: - time.sleep(wait_time) - response = requests.get(url, headers=headers, timeout=self.timeout) + if not urls: + return self._createResult( + success=False, + data={}, + error="URLs are required" + ) + + # Crawl each URL + crawl_results = [] + + for url in urls: + try: + # Read the URL + soup = self._readUrl(url) + if not soup: + crawl_results.append({ + "error": "Failed to read URL", + "url": url + }) + continue - if response.status_code != 202: - break + # Extract basic information + title = self._extractTitle(soup, url) + content = self._extractMainContent(soup) if True else "" + + # Extract links if requested + links = [] + if followLinks: + for link in soup.find_all('a', href=True): + href = link.get('href') + if href and href.startswith(('http://', 'https://')): + links.append({ + 'url': href, + 'text': link.get_text(strip=True)[:100] + }) + + # Extract images + images = [] + for img in soup.find_all('img', src=True): + src = img.get('src') + if src: + images.append({ + 'src': src, + 'alt': img.get('alt', ''), + 'title': img.get('title', '') + }) + + crawl_results.append({ + "url": url, + "depth": maxDepth, + "followLinks": followLinks, + "extractContent": True, + "title": title, + "content": content, + "links": links[:10], # Limit to first 10 links + "images": images[:10], # Limit to first 10 images + "timestamp": datetime.now(UTC).isoformat() + }) + + except Exception as e: + logger.error(f"Error crawling web page {url}: {str(e)}") + crawl_results.append({ + "error": str(e), + "url": url + }) - # Raise for error status codes - response.raise_for_status() + # Create result data + result_data = { + "urls": urls, + "maxDepth": maxDepth, + "includeImages": includeImages, + "followLinks": followLinks, + "crawlResults": crawl_results, + "timestamp": datetime.now(UTC).isoformat() + } - # Parse HTML - return BeautifulSoup(response.text, 'html.parser') + return self._createResult( + success=True, + data={ + "documentName": f"web_crawl_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.json", + "documentData": result_data + } + ) except Exception as e: - logger.error(f"Error reading URL {url}: {str(e)}") - return None + logger.error(f"Error crawling web pages: {str(e)}") + return self._createResult( + success=False, + data={}, + error=str(e) + ) - def _extractTitle(self, soup: BeautifulSoup, url: str) -> str: - """Extract the title from a webpage""" - if not soup: - return f"Error with {url}" + @action + async def scrape(self, parameters: Dict[str, Any]) -> ActionResult: + """ + Scrape specific data from web pages - # Extract title from title tag - title_tag = soup.find('title') - title = title_tag.text.strip() if title_tag else "No title" - - # Alternative: Also look for h1 tags if title tag is missing - if title == "No title": - h1_tag = soup.find('h1') - if h1_tag: - title = h1_tag.text.strip() - - return title + Parameters: + url (str): URL to scrape + selectors (Dict[str, str]): CSS selectors for data extraction + format (str, optional): Output format (default: "json") + """ + try: + url = parameters.get("url") + selectors = parameters.get("selectors") + format = parameters.get("format", "json") + + if not url or not selectors: + return self._createResult( + success=False, + data={}, + error="URL and selectors are required" + ) + + # Read the URL + soup = self._readUrl(url) + if not soup: + return self._createResult( + success=False, + data={}, + error="Failed to read URL" + ) + + extracted_content = {} + + if selectors: + # Extract content using provided selectors + for selector_name, selector in selectors.items(): + elements = soup.select(selector) + if elements: + if format == "text": + extracted_content[selector_name] = [elem.get_text(strip=True) for elem in elements] + elif format == "html": + extracted_content[selector_name] = [str(elem) for elem in elements] + else: + extracted_content[selector_name] = [elem.get_text(strip=True) for elem in elements] + else: + extracted_content[selector_name] = [] + else: + # Auto-extract common elements + extracted_content = { + "title": self._extractTitle(soup, url), + "main_content": self._extractMainContent(soup), + "headings": [h.get_text(strip=True) for h in soup.find_all(['h1', 'h2', 'h3'])], + "links": [a.get('href') for a in soup.find_all('a', href=True) if a.get('href').startswith(('http://', 'https://'))], + "images": [img.get('src') for img in soup.find_all('img', src=True)] + } + + scrape_result = { + "url": url, + "selectors": selectors, + "format": format, + "content": extracted_content, + "timestamp": datetime.now(UTC).isoformat() + } + + # Create result data + result_data = { + "url": url, + "selectors": selectors, + "format": format, + "scrapedData": scrape_result, + "timestamp": datetime.now(UTC).isoformat() + } + + return self._createResult( + success=True, + data={ + "documentName": f"web_scrape_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.{format}", + "documentData": result_data + } + ) + + except Exception as e: + logger.error(f"Error scraping web page: {str(e)}") + return self._createResult( + success=False, + data={}, + error=str(e) + ) - def _extractMainContent(self, soup: BeautifulSoup, max_chars: int = 10000) -> str: - """Extract the main content from an HTML page""" - if not soup: - return "" - - # Try to find main content elements in priority order - main_content = None - for selector in ['main', 'article', '#content', '.content', '#main', '.main']: - content = soup.select_one(selector) - if content: - main_content = content - break - - # If no main content found, use the body - if not main_content: - main_content = soup.find('body') or soup - - # Remove script, style, nav, footer elements that don't contribute to main content - for element in main_content.select('script, style, nav, footer, header, aside, .sidebar, #sidebar, .comments, #comments, .advertisement, .ads, iframe'): - element.extract() - - # Extract text content - text_content = main_content.get_text(separator=' ', strip=True) - - # Limit to max_chars - return text_content[:max_chars] - -class MethodWeb(MethodBase): - """Web method implementation for web operations""" - - def __init__(self, serviceContainer: Any): - """Initialize the web method""" - super().__init__(serviceContainer) - self.name = "web" - self.description = "Handle web operations like searching and crawling" - self.webService = WebService(serviceContainer) - @action async def search(self, parameters: Dict[str, Any]) -> ActionResult: """ @@ -453,11 +419,15 @@ class MethodWeb(MethodBase): Parameters: query (str): Search query + engine (str, optional): Search engine to use (default: "google") maxResults (int, optional): Maximum number of results (default: 10) + filter (str, optional): Additional search filters """ try: query = parameters.get("query") + engine = parameters.get("engine", "google") maxResults = parameters.get("maxResults", 10) + filter = parameters.get("filter") if not query: return self._createResult( @@ -466,15 +436,101 @@ class MethodWeb(MethodBase): error="Search query is required" ) - # Search web - results = await self.webService.searchWeb( - query=query, - maxResults=maxResults - ) + # Search web content using Google search via SerpAPI + try: + if not self.srcApikey: + search_result = { + "error": "SerpAPI key not configured", + "query": query + } + else: + # Get user language from service container if available + userLanguage = "en" # Default language + if hasattr(self.serviceContainer, 'user') and hasattr(self.serviceContainer.user, 'language'): + userLanguage = self.serviceContainer.user.language + + # Format the search request for SerpAPI + params = { + "engine": self.srcEngine, + "q": query, + "api_key": self.srcApikey, + "num": min(maxResults, self.maxResults), # Number of results to return + "hl": userLanguage # User language + } + + # Make the API request + response = requests.get("https://serpapi.com/search", params=params, timeout=self.timeout) + response.raise_for_status() + + # Parse JSON response + search_results = response.json() + + # Extract organic results + results = [] + + if "organic_results" in search_results: + for result in search_results["organic_results"][:maxResults]: + # Extract title + title = result.get("title", "No title") + + # Extract URL + url = result.get("link", "No URL") + + # Extract snippet + snippet = result.get("snippet", "No description") + + # Get actual page content + try: + targetPageSoup = self._readUrl(url) + content = self._extractMainContent(targetPageSoup) + except Exception as e: + logger.warning(f"Error extracting content from {url}: {str(e)}") + content = f"Error extracting content: {str(e)}" + + results.append({ + 'title': title, + 'url': url, + 'snippet': snippet, + 'content': content + }) + + # Limit number of results + if len(results) >= maxResults: + break + else: + logger.warning(f"No organic results found in SerpAPI response for: {query}") + + search_result = { + "query": query, + "maxResults": maxResults, + "results": results, + "totalFound": len(results), + "timestamp": datetime.now(UTC).isoformat() + } + + except Exception as e: + logger.error(f"Error searching web: {str(e)}") + search_result = { + "error": str(e), + "query": query + } + + # Create result data + result_data = { + "query": query, + "engine": engine, + "maxResults": maxResults, + "filter": filter, + "searchResults": search_result, + "timestamp": datetime.now(UTC).isoformat() + } return self._createResult( success=True, - data=results + data={ + "documentName": f"web_search_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.json", + "documentData": result_data + } ) except Exception as e: @@ -484,98 +540,11 @@ class MethodWeb(MethodBase): data={}, error=str(e) ) - - @action - async def crawl(self, parameters: Dict[str, Any]) -> ActionResult: - """ - Crawl web page - - Parameters: - url (str): URL to crawl - depth (int, optional): Crawl depth (default: 1) - followLinks (bool, optional): Whether to follow links (default: True) - extractContent (bool, optional): Whether to extract content (default: True) - """ - try: - url = parameters.get("url") - depth = parameters.get("depth", 1) - followLinks = parameters.get("followLinks", True) - extractContent = parameters.get("extractContent", True) - - if not url: - return self._createResult( - success=False, - data={}, - error="URL is required" - ) - - # Crawl page - results = await self.webService.crawlPage( - url=url, - depth=depth, - followLinks=followLinks, - extractContent=extractContent - ) - - return self._createResult( - success=True, - data=results - ) - - except Exception as e: - logger.error(f"Error crawling web page: {str(e)}") - return self._createResult( - success=False, - data={}, - error=str(e) - ) - - @action - async def extract(self, parameters: Dict[str, Any]) -> ActionResult: - """ - Extract content from web page - - Parameters: - url (str): URL to extract content from - selectors (Dict[str, str], optional): CSS selectors for specific content - format (str, optional): Output format (default: "text") - """ - try: - url = parameters.get("url") - selectors = parameters.get("selectors", {}) - format = parameters.get("format", "text") - - if not url: - return self._createResult( - success=False, - data={}, - error="URL is required" - ) - - # Extract content - content = await self.webService.extractContent( - url=url, - selectors=selectors, - format=format - ) - - return self._createResult( - success=True, - data=content - ) - - except Exception as e: - logger.error(f"Error extracting content: {str(e)}") - return self._createResult( - success=False, - data={}, - error=str(e) - ) - + @action async def validate(self, parameters: Dict[str, Any]) -> ActionResult: """ - Validate web page + Validate web pages for various criteria Parameters: url (str): URL to validate @@ -592,15 +561,48 @@ class MethodWeb(MethodBase): error="URL is required" ) - # Validate page - results = await self.webService.validatePage( - url=url, - checks=checks - ) + # Read the URL + soup = self._readUrl(url) + if not soup: + return self._createResult( + success=False, + data={}, + error="Failed to read URL" + ) + + validation_results = {} + + for check in checks: + if check == "accessibility": + validation_results["accessibility"] = self._checkAccessibility(soup) + elif check == "seo": + validation_results["seo"] = self._checkSEO(soup) + elif check == "performance": + validation_results["performance"] = self._checkPerformance(soup, url) + else: + validation_results[check] = {"status": "unknown", "message": f"Unknown check type: {check}"} + + validation_result = { + "url": url, + "checks": checks, + "results": validation_results, + "timestamp": datetime.now(UTC).isoformat() + } + + # Create result data + result_data = { + "url": url, + "checks": checks, + "validationResult": validation_result, + "timestamp": datetime.now(UTC).isoformat() + } return self._createResult( success=True, - data=results + data={ + "documentName": f"web_validation_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.json", + "documentData": result_data + } ) except Exception as e: diff --git a/modules/workflow/managerChat.py b/modules/workflow/managerChat.py index 076b33b1..275a5302 100644 --- a/modules/workflow/managerChat.py +++ b/modules/workflow/managerChat.py @@ -39,608 +39,196 @@ class ChatManager: self.workflow = workflow self.service = ServiceContainer(self.currentUser, self.workflow) - def _extractJsonFromResponse(self, response: str) -> Optional[Dict[str, Any]]: - """Extract JSON from verbose AI response that may contain explanatory text""" + # ===== WORKFLOW PHASES ===== + + # Phase 1: High-Level Task Planning + async def planHighLevelTasks(self, userInput: str, workflow: ChatWorkflow) -> Dict[str, Any]: + """Phase 1: Plan high-level tasks from user input""" try: - # First try direct JSON parsing - return json.loads(response) - except json.JSONDecodeError: - # Try to find JSON in the response - import re + logger.info(f"Planning high-level tasks for workflow {workflow.id}") - # Look for JSON object patterns with more flexible matching - json_patterns = [ - r'\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}', # Nested JSON objects - r'\{.*?\}', # Simple JSON object (non-greedy) - r'\[\{.*?\}\]', # JSON array of objects - ] - - for pattern in json_patterns: - matches = re.findall(pattern, response, re.DOTALL) - for match in matches: - try: - # Clean up the match - cleaned_match = match.strip() - # Remove any markdown code blocks - if cleaned_match.startswith('```json'): - cleaned_match = cleaned_match[7:] - if cleaned_match.endswith('```'): - cleaned_match = cleaned_match[:-3] - cleaned_match = cleaned_match.strip() - - parsed_json = json.loads(cleaned_match) - logger.info(f"Successfully extracted JSON from response using pattern: {pattern[:20]}...") - return parsed_json - except json.JSONDecodeError: - continue - - # If no JSON found, log the full response for debugging - logger.error(f"Could not extract JSON from response: {response[:500]}...") - return None - - # ===== Task Creation and Management ===== - async def createInitialTask(self, workflow: ChatWorkflow, initialMessage: ChatMessage) -> Optional[TaskItem]: - """Create the initial task from the first message""" - try: - logger.info(f"Creating initial task for workflow {workflow.id}") - - # Create task definition prompt - prompt = await self._createTaskDefinitionPrompt(initialMessage.message, workflow) + # Create planning prompt + prompt = self._createTaskPlanningPrompt({ + 'user_request': userInput, + 'available_documents': self._getAvailableDocuments(workflow), + 'workflow_id': workflow.id + }) # Get AI response response = await self.service.callAiTextAdvanced(prompt) - # Parse response - taskDef = self._extractJsonFromResponse(response) + # Parse and validate task plan + task_plan = self._parseTaskPlanResponse(response) - # Validate task definition - if not taskDef: - logger.error("Could not extract valid JSON from AI response") - return None + if not self._validateTaskPlan(task_plan): + logger.warning("Generated task plan failed validation, using fallback") + task_plan = self._createFallbackTaskPlan({ + 'user_request': userInput, + 'available_documents': self._getAvailableDocuments(workflow) + }) - if not isinstance(taskDef, dict): - logger.error("Task definition must be a JSON object") - return None - - requiredFields = ["status", "feedback", "actions"] - for field in requiredFields: - if field not in taskDef: - logger.error(f"Missing required field: {field}") - return None - - if not isinstance(taskDef["actions"], list): - logger.error("Actions must be a list") - return None - - logger.info(f"Task definition validated: {len(taskDef['actions'])} actions") - - # Create task using interface - taskData = { - "workflowId": workflow.id, - "userInput": initialMessage.message, - "status": taskDef["status"], - "feedback": taskDef["feedback"], - "actionList": [] - } - - # Add actions - for actionDef in taskDef["actions"]: - if not isinstance(actionDef, dict): - continue - - requiredFields = ["method", "action", "parameters"] - if not all(field in actionDef for field in requiredFields): - continue - - # Create action using interface - actionData = { - "execMethod": actionDef["method"], - "execAction": actionDef["action"], - "execParameters": actionDef["parameters"], - "execResultLabel": actionDef.get("resultLabel") - } - action = self.chatInterface.createTaskAction(actionData) - if action: - # Convert TaskAction object to dictionary for database storage - actionDict = { - "id": action.id, - "execMethod": action.execMethod, - "execAction": action.execAction, - "execParameters": action.execParameters, - "execResultLabel": action.execResultLabel, - "status": action.status, - "error": action.error, - "retryCount": action.retryCount, - "retryMax": action.retryMax, - "processingTime": action.processingTime, - "timestamp": action.timestamp.isoformat() if action.timestamp else None, - "result": action.result, - "resultDocuments": action.resultDocuments - } - taskData["actionList"].append(actionDict) - - # Create task using interface - task = self.chatInterface.createTask(taskData) - if task: - logger.info(f"Task created successfully: {task.id}") - else: - logger.error("Failed to create task") - - return task + logger.info(f"High-level task planning completed: {len(task_plan.get('tasks', []))} tasks") + return task_plan except Exception as e: - logger.error(f"Error creating initial task: {str(e)}") - return None - - async def createNextTask(self, workflow: ChatWorkflow, previousResult: TaskResult) -> Optional[TaskItem]: - """Create next task based on previous result""" + logger.error(f"Error in high-level task planning: {str(e)}") + return self._createFallbackTaskPlan({ + 'user_request': userInput, + 'available_documents': self._getAvailableDocuments(workflow) + }) + + # Phase 2: Task Definition and Action Generation + async def defineTaskActions(self, task_step: Dict[str, Any], workflow: ChatWorkflow, previous_results: List[str] = None) -> List[TaskAction]: + """Phase 2: Define specific actions for a task step""" try: - logger.info(f"Creating next task for workflow {workflow.id}") + logger.info(f"Defining actions for task: {task_step.get('description', 'Unknown')}") - # Check if previous result was successful - if not previousResult.success: - logger.error(f"Previous task failed: {previousResult.error}") - return None - - # Create task definition prompt - prompt = await self._createTaskDefinitionPrompt(previousResult.feedback, workflow) - - # Get AI response - response = await self.service.callAiTextAdvanced(prompt) - - # Parse response - taskDef = self._extractJsonFromResponse(response) - - # Validate task definition - if not taskDef: - logger.error("Could not extract valid JSON from AI response") - return None - - if not isinstance(taskDef, dict): - logger.error("Task definition must be a JSON object") - return None - - requiredFields = ["status", "feedback", "actions"] - for field in requiredFields: - if field not in taskDef: - logger.error(f"Missing required field: {field}") - return None - - if not isinstance(taskDef["actions"], list): - logger.error("Actions must be a list") - return None - - logger.info(f"Next task definition validated: {len(taskDef['actions'])} actions") - - # Create task using interface - taskData = { - "workflowId": workflow.id, - "userInput": previousResult.feedback, - "status": taskDef["status"], - "feedback": taskDef["feedback"], - "actionList": [] + # Prepare context for action generation + context = { + 'task_step': task_step, + 'workflow': workflow, + 'workflow_id': workflow.id, + 'available_documents': self._getAvailableDocuments(workflow), + 'previous_results': previous_results or [], + 'improvements': None } - # Add actions - for actionDef in taskDef["actions"]: - if not isinstance(actionDef, dict): - continue - - requiredFields = ["method", "action", "parameters"] - if not all(field in actionDef for field in requiredFields): - continue - - # Create action using interface - actionData = { - "execMethod": actionDef["method"], - "execAction": actionDef["action"], - "execParameters": actionDef["parameters"], - "execResultLabel": actionDef.get("resultLabel") + # Generate actions using AI + actions = await self._generateActionsForTaskStep(context) + + # Convert to TaskAction objects + task_actions = [] + for action_dict in actions: + action_data = { + "execMethod": action_dict.get('method', 'unknown'), + "execAction": action_dict.get('action', 'unknown'), + "execParameters": action_dict.get('parameters', {}), + "execResultLabel": action_dict.get('resultLabel', ''), + "status": TaskStatus.PENDING } - action = self.chatInterface.createTaskAction(actionData) - if action: - # Convert TaskAction object to dictionary for database storage - actionDict = { - "id": action.id, - "execMethod": action.execMethod, - "execAction": action.execAction, - "execParameters": action.execParameters, - "execResultLabel": action.execResultLabel, - "status": action.status, - "error": action.error, - "retryCount": action.retryCount, - "retryMax": action.retryMax, - "processingTime": action.processingTime, - "timestamp": action.timestamp.isoformat() if action.timestamp else None, - "result": action.result, - "resultDocuments": action.resultDocuments - } - taskData["actionList"].append(actionDict) + + task_action = self.chatInterface.createTaskAction(action_data) + if task_action: + task_actions.append(task_action) + logger.info(f"Created task action: {task_action.execMethod}.{task_action.execAction}") - # Create task using interface - task = self.chatInterface.createTask(taskData) - if task: - logger.info(f"Next task created successfully: {task.id}") - else: - logger.error("Failed to create next task") - - return task + logger.info(f"Task action definition completed: {len(task_actions)} actions") + return task_actions except Exception as e: - logger.error(f"Error creating next task: {str(e)}") - return None - - async def executeTask(self, task: TaskItem) -> TaskItem: - """Execute a task's actions""" + logger.error(f"Error defining task actions: {str(e)}") + return [] + + # Phase 3: Action Execution + async def executeTaskActions(self, task_actions: List[TaskAction], workflow: ChatWorkflow) -> List[Dict[str, Any]]: + """Phase 3: Execute all actions for a task""" try: - # Execute each action - for action in task.actionList: - # Create action prompt - prompt = f"""Execute the following action: - -Action: {action.execMethod}.{action.execAction} -Parameters: {json.dumps(action.execParameters)} - -Please provide a JSON response with: -1. result: The result of the action -2. resultLabel: A label for the result (format: documentList__