From d1aac4099d1dbe684a17a8db2065bf4d7dfe15fc Mon Sep 17 00:00:00 2001 From: ValueOn AG Date: Thu, 10 Jul 2025 20:03:50 +0200 Subject: [PATCH] stats table included --- README_document_test.md | 114 -- modules/historic_data_agents/agentCoder.py | 1039 ----------------- .../agentDocumentation.py | 537 --------- modules/historic_data_agents/agentEmail.py | 380 ------ .../historic_data_agents/agentSharepoint.py | 348 ------ .../historic_data_agents/agentWebcrawler.py | 814 ------------- modules/interfaces/interfaceChatObjects.py | 74 +- ...methodCoder.py => EXCLUDED_methodCoder.py} | 17 +- modules/methods/methodDocument.py | 19 +- modules/methods/methodOutlook.py | 4 +- modules/methods/methodSharepoint.py | 4 +- modules/methods/methodWeb.py | 6 +- modules/routes/routeWorkflows.py | 16 +- modules/workflow/managerChat.py | 189 ++- modules/workflow/managerDocument.py | 73 -- modules/workflow/methodBase.py | 6 +- modules/workflow/processorDocument.py | 10 +- .../{serviceContainer.py => serviceCenter.py} | 247 +++- notes/WORKFLOW_ARCHITECTURE.md | 226 ---- notes/changelog.txt | 89 -- notes/data_specification.md | 187 --- notes/methodbased_specification.md | 17 +- run_document_test.ps1 | 31 - test_config.ini | 15 - test_document_extraction.py | 288 ----- test_param_extraction.py | 27 - test_retry_enhancement.py | 289 ----- test_sample_document.txt | 47 - test_signature.py | 23 - test_workflow.py | 488 -------- 30 files changed, 455 insertions(+), 5169 deletions(-) delete mode 100644 README_document_test.md delete mode 100644 modules/historic_data_agents/agentCoder.py delete mode 100644 modules/historic_data_agents/agentDocumentation.py delete mode 100644 modules/historic_data_agents/agentEmail.py delete mode 100644 modules/historic_data_agents/agentSharepoint.py delete mode 100644 modules/historic_data_agents/agentWebcrawler.py rename modules/methods/{methodCoder.py => EXCLUDED_methodCoder.py} (93%) delete mode 100644 modules/workflow/managerDocument.py rename modules/workflow/{serviceContainer.py => serviceCenter.py} (76%) delete mode 100644 notes/WORKFLOW_ARCHITECTURE.md delete mode 100644 notes/data_specification.md delete mode 100644 run_document_test.ps1 delete mode 100644 test_config.ini delete mode 100644 test_document_extraction.py delete mode 100644 test_param_extraction.py delete mode 100644 test_retry_enhancement.py delete mode 100644 test_sample_document.txt delete mode 100644 test_signature.py delete mode 100644 test_workflow.py diff --git a/README_document_test.md b/README_document_test.md deleted file mode 100644 index 7bf052f2..00000000 --- a/README_document_test.md +++ /dev/null @@ -1,114 +0,0 @@ -# Document Extraction Test - -This test procedure validates the DocumentManager's ability to extract content from files using AI-powered analysis. - -## Files Created - -- `test_document_extraction.py` - Main test script -- `test_sample_document.txt` - Sample document for testing -- `run_document_test.ps1` - PowerShell wrapper script -- `test_document_extraction.log` - Generated log file (cleared on each run) - -## Usage - -### Method 1: Using PowerShell Script (Recommended) - -```powershell -# Test with default sample file -.\run_document_test.ps1 - -# Test with custom file -.\run_document_test.ps1 "path\to\your\document.pdf" -``` - -### Method 2: Direct Python Execution - -```bash -# Test with default sample file -python test_document_extraction.py test_sample_document.txt - -# Test with custom file -python test_document_extraction.py "path/to/your/document.docx" -``` - -## Test Features - -1. **File Validation**: Checks if the specified file exists -2. **MIME Type Detection**: Automatically detects file type based on extension -3. **Content Extraction**: Uses the DocumentManager to extract content -4. **AI Processing**: Applies the prompt "summarize the content and give list of the major topics" -5. **Comprehensive Logging**: Logs all steps and results to `test_document_extraction.log` -6. **Log Cleanup**: Clears the log file on each test run - -## Supported File Types - -- Text files (.txt, .md) -- CSV files (.csv) -- JSON files (.json) -- XML files (.xml) -- HTML files (.html, .htm) -- Images (.jpg, .jpeg, .png, .gif, .svg) -- PDF files (.pdf) -- Office documents (.docx, .xlsx, .pptx) -- And more (fallback to binary processing) - -## Test Output - -The test generates detailed logs including: - -- File information (path, size, MIME type) -- Extraction process details -- Extracted content summary -- AI-processed results -- Error details if any issues occur - -## Example Output - -``` -=== STARTING DOCUMENT EXTRACTION TEST === -File information: { - "file_path": "test_sample_document.txt", - "filename": "test_sample_document.txt", - "mime_type": "text/plain", - "file_size_bytes": 2048, - "file_size_mb": 0.0 -} -Document extraction completed successfully: { - "extracted_content_id": "test-doc-1234567890", - "content_items_count": 1, - "object_type": "ExtractedContent" -} -COMPLETE EXTRACTED CONTENT: { - "total_length": 1500, - "content": "PowerOn System Architecture Overview... [AI processed summary]" -} -``` - -## Error Handling - -The test includes comprehensive error handling for: - -- File not found errors -- File reading errors -- Document processing errors -- AI processing errors -- Import errors - -All errors are logged with detailed information for debugging. - -## Configuration - -The test uses the same configuration as other tests: - -- Environment variable: `POWERON_CONFIG_FILE = 'test_config.ini'` -- Log file: `test_document_extraction.log` -- Log level: DEBUG - -## Dependencies - -The test requires the same dependencies as the main PowerOn system: - -- Python 3.8+ -- Required Python packages (see requirements.txt) -- Access to AI services (if AI processing is enabled) -- Proper configuration in test_config.ini \ No newline at end of file diff --git a/modules/historic_data_agents/agentCoder.py b/modules/historic_data_agents/agentCoder.py deleted file mode 100644 index 8cb4d869..00000000 --- a/modules/historic_data_agents/agentCoder.py +++ /dev/null @@ -1,1039 +0,0 @@ -""" -Coder agent for generating and executing code. -Provides code generation, execution, and improvement capabilities. -""" - -import logging -from typing import Dict, Any, List, Tuple, Optional -import json -import os -import sys -import subprocess -import tempfile -import shutil -import venv -import importlib.util -from datetime import datetime -import uuid - -from modules.workflow.agentBase import AgentBase -from modules.shared.configuration import APP_CONFIG -from modules.interfaces.serviceChatModel import Task, ChatDocument, ChatContent -from modules.shared.attributeUtils import ModelMixin - -logger = logging.getLogger(__name__) - -class AgentCoder(AgentBase): - """Simplified Agent for developing and executing Python code with integrated executor""" - - def __init__(self): - """Initialize the coder agent""" - super().__init__() - self.name = "coder" - self.label = "Developer and Code Executor" - self.description = "Develops and executes Python code for data processing and automation" - self.capabilities = [ - "code_development", - "data_processing", - "file_processing", - "automation", - "code_execution" - ] - - # Executor settings - self.executorTimeout = int(APP_CONFIG.get("Agent_Coder_EXECUTION_TIMEOUT")) # seconds - self.executionRetryLimit = int(APP_CONFIG.get("Agent_Coder_EXECUTION_RETRY")) # max retries - self.tempDir = None - - def setDependencies(self, serviceBase=None): - """Set external dependencies for the agent.""" - self.setService(serviceBase) - - async def processTask(self, task: Task) -> Dict[str, Any]: - """ - Process a task and perform code development/execution. - First checks if the task can be completed without code execution, - then falls back to code generation if needed. - Enhanced to ensure all generated documents are included in output. - - Args: - task: Task object with prompt, inputDocuments, outputSpecifications - - Returns: - Dictionary with feedback and documents - """ - # 1. Extract task information - prompt = task.prompt - inputDocuments = task.filesInput - outputSpecs = task.filesOutput - - # Check if AI service is available - if not self.service or not self.service.base: - logger.error("No AI service configured for the Coder agent") - return { - "feedback": "The Coder agent is not properly configured.", - "documents": [] - } - - # 2. Extract data from documents in separate categories - documentData = [] # For raw file data (for code execution) - contentData = [] # For content data (later use) - contentExtraction = [] # For AI-extracted data (for quick completion) - - for doc in inputDocuments: - # Create proper filename from name and ext - filename = f"{doc.name}.{doc.ext}" if doc.ext else doc.name - - # Add main document data to documentData if it exists - docData = doc.data - if docData: - isBase64 = True # Assume base64 encoded for document data - documentData.append([filename, docData, isBase64]) - - # Process contents for different uses - if doc.contents: - for content in doc.contents: - contentName = content.name - - # For AI-extracted data (quick completion) - if content.data: - contentExtraction.append({ - "filename": filename, - "contentName": contentName, - "contentData": content.data, - "contentType": content.contentType, - "summary": content.summary - }) - - # For raw content data - if content.data: - rawData = content.data - isBase64 = content.metadata.get('base64Encoded', False) if content.metadata else False - contentData.append({ - "filename": filename, - "contentName": contentName, - "data": rawData, - "isBase64": isBase64, - "contentType": content.contentType - }) - - # Also add to documentData for code execution if not already added - if not docData or docData != rawData: - documentData.append([filename, rawData, isBase64]) - - # 3. Check if task can be completed without code execution - quickCompletion = await self._checkQuickCompletion(prompt, contentExtraction, outputSpecs) - - if quickCompletion and quickCompletion.get("complete") == 1: - logger.info("Task completed without code execution") - return { - "feedback": quickCompletion.get("prompt", "Task completed successfully."), - "documents": quickCompletion.get("documents", []) - } - else: - logger.debug(f"Code to generate, no quick check") - - # If quick completion not possible, continue with code generation and execution - logger.info("Generating code to solve the task") - - # 4. Generate code using AI - code, requirements = await self._generateCode(prompt, outputSpecs) - if not code: - return { - "feedback": "Failed to generate code for the task.", - "documents": [] - } - # Store the original code without document data - original_clean_code = code # Save clean code for later use in improvement - - # 5. Replace the placeholder with actual inputFiles data - documentDataJson = repr(documentData) - codeWithData = code.replace("inputFiles = \"=== JSONLOAD ===\"", f"inputFiles = {documentDataJson}") - - # 6. Execute code with retry logic - retryCount = 0 - maxRetries = self.executionRetryLimit - executionHistory = [] - - while retryCount <= maxRetries: - executionResult = self._executeCode(codeWithData, requirements) - executionHistory.append({ - "attempt": retryCount + 1, - "code": codeWithData, - "result": executionResult - }) - - # Check if execution was successful - if executionResult.get("success", False): - logger.info(f"Code execution succeeded on attempt {retryCount + 1}") - break - - # If we've reached max retries, exit the loop - if retryCount >= maxRetries: - logger.info(f"Reached maximum retry limit ({maxRetries}). Giving up.") - break - - # Log the error and attempt to improve the code - error = executionResult.get("error", "Unknown error") - logger.info(f"Execution attempt {retryCount + 1} failed: {error}. Attempting to improve code.") - - # Generate improved code based on error - improvedCode, improvedRequirements = await self._improveCode( - originalCode=original_clean_code, # Use clean code without document data - error=error, - executionResult=executionResult, - attempt=retryCount + 1, - outputSpecs=outputSpecs - ) - - if improvedCode: - # Inject document data into improved code - original_clean_code = improvedCode # Update clean code for next potential improvement - codeWithData = improvedCode.replace("inputFiles = \"=== JSONLOAD ===\"", f"inputFiles = {documentDataJson}") - requirements = improvedRequirements - logger.info(f"Code improved for retry {retryCount + 2}") - else: - logger.warning("Failed to improve code, using original code for retry") - - retryCount += 1 - - # 7. Process results and create output documents - documents = [] - - # Always add the final code document - documents.append(self.formatAgentDocumentOutput("generated_code.py", codeWithData, "text/plain")) - - # Add execution history document - executionHistoryStr = json.dumps(executionHistory, indent=2) - documents.append(self.formatAgentDocumentOutput("execution_history.json", executionHistoryStr, "application/json")) - - # Enhanced result handling: Create documents based on execution results - fixed for proper content extraction - if executionResult.get("success", False): - resultData = executionResult.get("result") - - # Process results from the result dictionary if available - if isinstance(resultData, dict): - # First, create a mapping of expected output labels to their specs - expectedOutputs = {spec.get("label"): spec for spec in outputSpecs} - createdOutputs = set() - - for label, result_item in resultData.items(): - # Check if result follows the expected structure with nested content - if isinstance(result_item, dict) and "content" in result_item: - # Extract values from the properly structured result - content = result_item.get("content", "") # Extract the inner content - base64Encoded = result_item.get("base64Encoded", False) - contentType = result_item.get("contentType", "text/plain") - - # Check if this label matches one of our expected output documents - # If not, but we haven't created all expected outputs yet, try to map it - finalLabel = label - if label not in expectedOutputs and len(expectedOutputs) > 0: - # Find an unused expected output label - for expectedLabel in expectedOutputs: - if expectedLabel not in createdOutputs: - logger.warning(f"Remapping output '{label}' to expected '{expectedLabel}'") - finalLabel = expectedLabel - break - - # Create document by passing only the content to formatAgentDocumentOutput - doc = self.formatAgentDocumentOutput(finalLabel, content, contentType) - - # Override the base64Encoded flag with the value from the result - # This is needed since formatAgentDocumentOutput might determine a different value - if isinstance(base64Encoded, bool): - doc.base64Encoded = base64Encoded - - documents.append(doc) - createdOutputs.add(finalLabel) - logger.info(f"Created document from result: {finalLabel} ({contentType}, base64={base64Encoded})") - else: - # Not properly structured - log warning - logger.warning(f"Skipping improperly formatted result for '{label}'. Results must include 'content' field.") - else: - # Handle non-dictionary results - logger.warning("Execution result is not a dictionary. Creating a single output document.") - doc = self.formatAgentDocumentOutput("result.txt", str(resultData), "text/plain") - documents.append(doc) - - # 8. Return results - return { - "feedback": "Code execution completed successfully." if executionResult.get("success", False) else f"Code execution failed: {executionResult.get('error', 'Unknown error')}", - "documents": documents - } - - async def _improveCode(self, originalCode: str, error: str, executionResult: Dict[str, Any], attempt: int, outputSpecs: List[Dict[str, Any]] = None) -> Tuple[str, List[str]]: - """ - Improve code based on execution error. - Enhanced to maintain proper output handling with correct document structure. - - Args: - originalCode: The code that failed to execute - error: The error message - executionResult: Complete execution result dictionary - attempt: Current attempt number - outputSpecs: List of expected output specifications - - Returns: - Tuple of (improvedCode, requirements) - """ - # Create a string with output specifications to be included in the prompt - outputSpecsStr = "" - if outputSpecs: - outputSpecsStr = "\nEXPECTED OUTPUT DOCUMENTS:\n" - for i, spec in enumerate(outputSpecs, 1): - label = spec.get("label", f"output{i}.txt") - description = spec.get("description", "") - outputSpecsStr += f"{i}. {label} - {description}\n" - - # Create prompt for code improvement - improvementPrompt = f""" -Fix the following Python code that failed during execution. This is attempt {attempt} to fix the code. - -ORIGINAL CODE: -{originalCode} - -ERROR MESSAGE: -{error} - -STDOUT: -{executionResult.get('output', '')} -{outputSpecsStr} -INSTRUCTIONS: -1. Fix all errors identified in the error message -2. If there is a requirements error for missing or failes modules, then create alternate code with other modules -3. Diagnose and fix any logical issues -4. Pay special attention to: -- Type conversions and data handling -- Error handling and edge cases -- Resource management (file handles, etc.) -- Syntax errors and typos -5. Keep the inputFiles handling logic intact -6. Maintain the same overall structure and purpose - -OUTPUT REQUIREMENTS (VERY IMPORTANT): -- Your code MUST define a 'result' variable as a dictionary to store ALL outputs -- The key for each entry MUST be the full filename with extension (e.g., "output.txt") -- The value for each entry MUST be a dictionary with the following structure: -{{ - "content": string, # The actual content (text or base64-encoded string) - "base64Encoded": boolean, # Set to true for binary data, false for text data - "contentType": string # MIME type of the content (e.g., "text/plain", "application/json") -}} -- Example result dictionary: -result = {{ - "output.txt": {{ - "content": "This is text content", - "base64Encoded": False, - "contentType": "text/plain" - }}, - "chart.png": {{ - "content": "base64encodedstring...", - "base64Encoded": True, - "contentType": "image/png" - }} -}} -- NEVER write files to disk using open() or similar methods - use the result dictionary instead - -JSON OUTPUT (CRITICAL): -- After creating the result dictionary, you MUST print it as JSON to stdout -- Make sure your code includes: print(json.dumps(result)) as the final line -- This printed JSON is how the system captures your result - -REQUIREMENTS: -Required packages should be specified as: -# REQUIREMENTS: library==version,library2>=version -- You may add/remove requirements as needed to fix the code - -Return ONLY Python code without explanations or markdown. -""" - - # Call AI service - messages = [ - {"role": "system", "content": "You are an expert Python code debugger. Provide only fixed Python code without explanations or formatting. Ensure all generated files are included in the 'result' dictionary and that result is printed as JSON with print(json.dumps(result))."}, - {"role": "user", "content": improvementPrompt} - ] - - try: - improvedContent = await self.service.base.callAi(messages, temperature=0.2) - - # Extract code and requirements - improvedCode = self._cleanCode(improvedContent) - - # Extract requirements - requirements = [] - for line in improvedCode.split('\n'): - if line.strip().startswith("# REQUIREMENTS:"): - reqStr = line.replace("# REQUIREMENTS:", "").strip() - requirements = [r.strip() for r in reqStr.split(',') if r.strip()] - break - - return improvedCode, requirements - except Exception as e: - logger.error(f"Error improving code: {str(e)}") - return None, [] - - - async def _checkQuickCompletion(self, prompt: str, contentExtraction: List[ChatDocument], outputSpecs: List[Dict[str, Any]]) -> Dict[str, Any]: - """ - Check if the task can be completed without writing and executing code. - - Args: - prompt: The task prompt - contentExtraction: List of extracted content data with contentName and dataExtracted - outputSpecs: List of output specifications - - Returns: - Dictionary with completion status and results, or None if no quick completion - """ - # If no data or no output specs, can't do a quick completion - if not contentExtraction or not outputSpecs: - return None - - # Create a prompt for the AI to check if this can be completed directly - specsJson = json.dumps(outputSpecs) - dataJson = json.dumps([doc.dict() for doc in contentExtraction]) - - checkPrompt = f""" -Analyze this task and determine if it can be completed directly without writing code. - -TASK: -{prompt} - -EXTRACTED DATA AVAILABLE: -{dataJson} - -Each entry in the extracted data contains: -- filename: The source file name -- contentName: The specific content section name -- contentData: The AI-extracted text from the content -- contentType: The type of content (text, csv, etc.) -- summary: A brief summary of the content - -REQUIRED OUTPUT: -{specsJson} - -If the task can be completed directly with the available extracted data, respond with: -{{"complete": 1, "prompt": "Brief explanation of the solution", "documents": [ - {{"label": "filename.ext", "content": "content here"}} -]}} - -If code would be needed to properly complete this task, respond with: -{{"complete": 0, "prompt": "Explanation why code is needed"}} - -Only return valid JSON. Your entire response must be parseable as JSON. -""" - - # Call AI service - logger.debug(f"Checking if task can be completed without code execution: {checkPrompt}") - messages = [ - {"role": "system", "content": "You are an AI assistant that determines if tasks require code execution. Reply with JSON only."}, - {"role": "user", "content": checkPrompt} - ] - - try: - # Use a lower temperature for more deterministic response - response = await self.service.base.callAi(messages, produceUserAnswer = True, temperature=0.1) - - # Parse response as JSON - if response: - try: - # Find JSON in response if there's any text around it - jsonStart = response.find('{') - jsonEnd = response.rfind('}') + 1 - - if jsonStart >= 0 and jsonEnd > jsonStart: - jsonStr = response[jsonStart:jsonEnd] - result = json.loads(jsonStr) - - # Check if this is a proper response - if "complete" in result: - return result - - except json.JSONDecodeError: - logger.debug("Failed to parse quick completion response as JSON") - pass - except Exception as e: - logger.debug(f"Error during quick completion check: {str(e)}") - - # Default to requiring code execution - return None - - async def _generateCode(self, prompt: str, outputSpecs: List[ChatDocument] = None) -> Tuple[str, List[str]]: - """ - Generate Python code from a prompt with the inputFiles placeholder. - Enhanced to emphasize proper result output handling with correct document structure. - - Args: - prompt: The task prompt - outputSpecs: List of expected output specifications - - Returns: - Tuple of (code, requirements) - """ - # Create a string with output specifications to be included in the prompt - outputSpecsStr = "" - if outputSpecs: - outputSpecsStr = "\nEXPECTED OUTPUT DOCUMENTS:\n" - for i, spec in enumerate(outputSpecs, 1): - label = spec.get("label", f"output{i}.txt") - description = spec.get("description", "") - outputSpecsStr += f"{i}. {label} - {description}\n" - - # Create improved prompt for code generation - aiPrompt = f""" -Generate Python code to solve the following task: - -TASK: -{prompt} -{outputSpecsStr} -INPUT FILES: -- 'inputFiles' variable is provided as [[filename, data, isBase64], ...] -- For text files (isBase64=False): use data directly as string -- For binary files (isBase64=True): use base64.b64decode(data) - -OUTPUT REQUIREMENTS (VERY IMPORTANT): -- Your code MUST define a 'result' variable as a dictionary to store ALL outputs -- The key for each entry MUST be the full filename with extension (e.g., "output.txt") -- The value for each entry MUST be a dictionary with the following structure: -{{ - "content": string, # The actual content (text or base64-encoded string) - "base64Encoded": boolean, # Set to true for binary data, false for text data - "contentType": string # MIME type of the content (e.g., "text/plain", "application/json") -}} -- Example result dictionary: -result = {{ - "output.txt": {{ - "content": "This is text content", - "base64Encoded": False, - "contentType": "text/plain" - }}, - "chart.png": {{ - "content": "base64encodedstring...", - "base64Encoded": True, - "contentType": "image/png" - }} -}} -- NEVER write files to disk using open() or similar methods - use the result dictionary instead -- If you generate any charts, reports, or visualizations, ensure they are properly encoded and included - -IMPORTANT - USE EXACT OUTPUT FILENAMES: -- You MUST use the EXACT filenames specified in EXPECTED OUTPUT DOCUMENTS section -- The key in the result dictionary must match these filenames precisely -- If no output documents are specified, use appropriate descriptive filenames - -JSON OUTPUT (CRITICAL): -- After creating the result dictionary, you MUST print it as JSON to stdout using json.dumps() -- Add these lines at the end of your code: - import json # if not already imported - print(json.dumps(result)) -- This printed JSON is how the system captures your result -- Make sure this is the last thing your code prints - -BINARY DATA HANDLING: -- For binary content (images, PDFs, etc.), convert to base64 string and set base64Encoded=True -- For text content (text, JSON, HTML, etc.), use plain string and set base64Encoded=False -- Use appropriate MIME types for different content types - -CODE QUALITY: -- Use explicit type conversions where needed (int/float/str) -- Implement feature detection, not version checks -- Handle errors gracefully with appropriate fallbacks -- Follow latest API conventions for libraries -- Validate inputs before processing - -Your code must start with: -inputFiles = "=== JSONLOAD ===" # DO NOT CHANGE THIS LINE - -REQUIREMENTS: -Required packages should be specified as: -# REQUIREMENTS: library==version,library2>=version -- Specify exact versions for critical libraries -- Use constraint operators (==,>=,<=) as needed - -Return ONLY Python code without explanations or markdown. -""" - - # Call AI service - messages = [ - {"role": "system", "content": "You are a Python code generator. Provide only valid Python code without explanations or formatting. Always output the result dictionary as JSON using print(json.dumps(result)) at the end of your code."}, - {"role": "user", "content": aiPrompt} - ] - - generatedContent = await self.service.base.callAi(messages, temperature=0.1) - - # Extract code and requirements - code = self._cleanCode(generatedContent) - - # Extract requirements - requirements = [] - for line in code.split('\n'): - if line.strip().startswith("# REQUIREMENTS:"): - reqStr = line.replace("# REQUIREMENTS:", "").strip() - requirements = [r.strip() for r in reqStr.split(',') if r.strip()] - break - - return code, requirements - - def _executeCodeProd(self, code: str, requirements: List[str] = None) -> Dict[str, Any]: - """ - Execute Python code in Azure environment using the antenv interpreter. - Optimized for production use in Azure Web App environment where venv creation fails. - - Args: - code: Python code to execute - requirements: List of required packages - - Returns: - Execution result dictionary - """ - try: - # 1. Create temp directory for code files - self.tempDir = tempfile.mkdtemp(prefix="code_exec_") - - # Try different possible paths to find the antenv Python interpreter - possible_python_paths = [ - "/home/site/wwwroot/antenv/bin/python", - "/antenv/bin/python", - "/tmp/8dd8c226509f116/antenv/bin/python", # Path from your error logs - sys.executable # Fallback to system Python - ] - - pythonExe = None - for path in possible_python_paths: - if os.path.exists(path): - pythonExe = path - logger.info(f"Found Python interpreter at: {pythonExe}") - break - - if not pythonExe: - logger.error("Could not find a valid Python interpreter in Azure environment") - return { - "success": False, - "output": "", - "error": "Could not find a valid Python interpreter in Azure environment", - "result": None, - "exitCode": -1 - } - - # 2. Install requirements to a temporary user directory if provided - if requirements: - logger.info(f"Installing requirements in Azure environment: {requirements}") - - # Create requirements.txt - reqFile = os.path.join(self.tempDir, "requirements.txt") - with open(reqFile, "w") as f: - f.write("\n".join(requirements)) - - # Set up a custom PYTHONUSERBASE to isolate package installations - custom_user_base = os.path.join(self.tempDir, "pip_packages") - os.makedirs(custom_user_base, exist_ok=True) - - env = os.environ.copy() - env["PYTHONUSERBASE"] = custom_user_base - - # Install requirements to the custom user directory - try: - pipResult = subprocess.run( - [pythonExe, "-m", "pip", "install", "--user", "-r", reqFile], - capture_output=True, - text=True, - env=env, - timeout=int(APP_CONFIG.get("Agent_Coder_INSTALL_TIMEOUT")) - ) - - if pipResult.returncode != 0: - logger.warning(f"Error installing requirements in Azure: {pipResult.stderr}") - else: - logger.info(f"Requirements installed successfully to {custom_user_base}") - - # Try to find the site-packages directory - import glob - site_packages = os.path.join(custom_user_base, "lib", "python*", "site-packages") - site_packages_paths = glob.glob(site_packages) - - if site_packages_paths: - env["PYTHONPATH"] = os.pathsep.join([site_packages_paths[0], env.get("PYTHONPATH", "")]) - logger.info(f"Added {site_packages_paths[0]} to PYTHONPATH") - else: - # Alternative paths for different Python versions - alt_site_packages = os.path.join(custom_user_base, "site-packages") - if os.path.exists(alt_site_packages): - env["PYTHONPATH"] = os.pathsep.join([alt_site_packages, env.get("PYTHONPATH", "")]) - logger.info(f"Added {alt_site_packages} to PYTHONPATH") - except Exception as e: - logger.warning(f"Exception during requirements installation in Azure: {str(e)}") - else: - env = os.environ.copy() - - # 3. Write code to file - codeFile = os.path.join(self.tempDir, "code.py") - with open(codeFile, "w", encoding="utf-8") as f: - f.write(code) - - # 4. Execute code with the modified environment - logger.debug(f"Executing code in Azure environment with timeout of {self.executorTimeout} seconds") - process = subprocess.run( - [pythonExe, codeFile], - timeout=self.executorTimeout, - capture_output=True, - text=True, - env=env - ) - - # 5. Process results - stdout = process.stdout - stderr = process.stderr - - # Try to extract result from stdout - resultData = None - if process.returncode == 0: - try: - # Find the last line that might be JSON - jsonLines = [] - for line in stdout.strip().split('\n'): - line = line.strip() - if line and line[0] in '{[' and line[-1] in '}]': - try: - parsed = json.loads(line) - jsonLines.append((line, parsed)) - except json.JSONDecodeError: - continue - - # Use the last valid JSON that appears to be a dictionary - if jsonLines: - for line, parsed in reversed(jsonLines): - if isinstance(parsed, dict): - resultData = parsed - logger.debug(f"Extracted result data from stdout: {type(resultData)}") - break - except Exception as e: - logger.debug(f"Error extracting result from stdout: {str(e)}") - - # Enhanced logging of what was found - if resultData: - logger.info(f"Found result dictionary with {len(resultData)} entries: {list(resultData.keys())}") - else: - logger.warning("No result dictionary found in output") - - # Create result dictionary - return { - "success": process.returncode == 0, - "output": stdout, - "error": stderr if process.returncode != 0 else "", - "result": resultData, - "exitCode": process.returncode - } - - except subprocess.TimeoutExpired: - logger.error(f"Execution in Azure timed out after {self.executorTimeout} seconds") - return { - "success": False, - "output": "", - "error": f"Execution timed out after {self.executorTimeout} seconds", - "result": None, - "exitCode": -1 - } - except Exception as e: - logger.error(f"Execution error in Azure environment: {str(e)}") - return { - "success": False, - "output": "", - "error": f"Execution error in Azure environment: {str(e)}", - "result": None, - "exitCode": -1 - } - finally: - # Clean up resources - self._cleanupExecution() - - def _executeCodeVenv(self, code: str, requirements: List[str] = None) -> Dict[str, Any]: - """ - Execute Python code in a virtual environment. - Original implementation with venv creation for non-Azure environments. - - Args: - code: Python code to execute - requirements: List of required packages - - Returns: - Execution result dictionary - """ - try: - # 1. Create temp directory and virtual environment - self.tempDir = tempfile.mkdtemp(prefix="code_exec_") - venvPath = os.path.join(self.tempDir, "venv") - - # Create venv - logger.debug(f"Creating virtual environment at {venvPath}") - - try: - # First try with sys.executable - the standard approach - subprocess.run([sys.executable, "-m", "venv", venvPath], - check=True, capture_output=True, timeout=60) - logger.debug("Virtual environment created successfully with sys.executable") - except (subprocess.SubprocessError, subprocess.CalledProcessError) as e: - logger.warning(f"Failed to create venv with sys.executable: {str(e)}") - - # Fallback method 1: Try with explicit 'python3' command - try: - logger.debug("Trying to create virtual environment with python3 command") - subprocess.run(["python3", "-m", "venv", venvPath], - check=True, capture_output=True, timeout=60) - logger.debug("Virtual environment created successfully with python3") - except (subprocess.SubprocessError, subprocess.CalledProcessError) as e: - logger.warning(f"Failed to create venv with python3: {str(e)}") - - # Fallback method 2: Try with virtualenv instead of venv - try: - logger.debug("Trying to create virtual environment with virtualenv module") - subprocess.run([sys.executable, "-m", "pip", "install", "virtualenv"], - check=False, capture_output=True, timeout=60) - subprocess.run([sys.executable, "-m", "virtualenv", venvPath], - check=True, capture_output=True, timeout=60) - logger.debug("Virtual environment created successfully with virtualenv") - except (subprocess.SubprocessError, subprocess.CalledProcessError) as e: - # If all methods fail, raise an exception - error_msg = f"Failed to create virtual environment with all methods: {str(e)}" - logger.error(error_msg) - raise RuntimeError(error_msg) - - # Get Python executable path - adjusted for OS - if os.name == 'nt': # Windows - pythonExe = os.path.join(venvPath, "Scripts", "python.exe") - else: # Linux/Mac - pythonExe = os.path.join(venvPath, "bin", "python") - - # Verify python executable exists - if not os.path.exists(pythonExe): - # Try to find it - if os.name == 'nt': - possible_paths = [ - os.path.join(venvPath, "Scripts", "python.exe"), - os.path.join(venvPath, "Scripts", "python") - ] - else: - possible_paths = [ - os.path.join(venvPath, "bin", "python"), - os.path.join(venvPath, "bin", "python3") - ] - - for path in possible_paths: - if os.path.exists(path): - pythonExe = path - logger.debug(f"Found Python executable at: {pythonExe}") - break - - if not os.path.exists(pythonExe): - logger.error(f"Python executable not found at expected path: {pythonExe}") - raise FileNotFoundError(f"Python executable not found in virtual environment") - - # 2. Install requirements if provided - if requirements: - logger.info(f"Installing requirements: {requirements}") - - # Create requirements.txt - reqFile = os.path.join(self.tempDir, "requirements.txt") - with open(reqFile, "w") as f: - f.write("\n".join(requirements)) - - x="\n".join(requirements) - logger.info(f"Requirements file: {x}.") - - # Install requirements - try: - pipResult = subprocess.run( - [pythonExe, "-m", "pip", "install", "-r", reqFile], - capture_output=True, - text=True, - timeout=int(APP_CONFIG.get("Agent_Coder_INSTALL_TIMEOUT")) - ) - if pipResult.returncode != 0: - logger.debug(f"Error installing requirements: {pipResult.stderr}") - else: - logger.debug(f"Requirements installed successfully") - # Log installed packages if in debug mode - if logger.isEnabledFor(logging.DEBUG): - pipList = subprocess.run( - [pythonExe, "-m", "pip", "list"], - capture_output=True, - text=True - ) - logger.debug(f"Installed packages:\n{pipList.stdout}") - - except Exception as e: - logger.debug(f"Exception during requirements installation: {str(e)}") - - # 3. Write code to file - codeFile = os.path.join(self.tempDir, "code.py") - with open(codeFile, "w", encoding="utf-8") as f: - f.write(code) - - # 4. Execute code - logger.debug(f"Executing code with timeout of {self.executorTimeout} seconds. Code: {code}") - process = subprocess.run( - [pythonExe, codeFile], - timeout=self.executorTimeout, - capture_output=True, - text=True - ) - - # 5. Process results - stdout = process.stdout - stderr = process.stderr - - # Try to extract result from stdout - resultData = None - if process.returncode == 0: - try: - # Find the last line that might be JSON - jsonLines = [] - for line in stdout.strip().split('\n'): - line = line.strip() - if line and line[0] in '{[' and line[-1] in '}]': - try: - parsed = json.loads(line) - jsonLines.append((line, parsed)) - except json.JSONDecodeError: - continue - - # Use the last valid JSON that appears to be a dictionary - if jsonLines: - for line, parsed in reversed(jsonLines): - if isinstance(parsed, dict): - resultData = parsed - logger.debug(f"Extracted result data from stdout: {type(resultData)}") - break - except Exception as e: - logger.debug(f"Error extracting result from stdout: {str(e)}") - - # Enhanced logging of what was found - if resultData: - logger.info(f"Found result dictionary with {len(resultData)} entries: {list(resultData.keys())}") - else: - logger.warning("No result dictionary found in output") - - # Create result dictionary - return { - "success": process.returncode == 0, - "output": stdout, - "error": stderr if process.returncode != 0 else "", - "result": resultData, - "exitCode": process.returncode - } - - except subprocess.TimeoutExpired: - logger.error(f"Execution timed out after {self.executorTimeout} seconds") - return { - "success": False, - "output": "", - "error": f"Execution timed out after {self.executorTimeout} seconds", - "result": None, - "exitCode": -1 - } - except Exception as e: - logger.error(f"Execution error: {str(e)}") - return { - "success": False, - "output": "", - "error": f"Execution error: {str(e)}", - "result": None, - "exitCode": -1 - } - finally: - # Clean up resources - self._cleanupExecution() - - def _executeCode(self, code: str, requirements: List[str] = None) -> Dict[str, Any]: - """ - Execute Python code in the appropriate environment based on configuration. - - Args: - code: Python code to execute - requirements: List of required packages - - Returns: - Execution result dictionary - """ - # Check if we're in a production Azure environment - env_type = APP_CONFIG.get("APP_ENV_TYPE", "dev").lower() - - logger.info(f"Executing code in environment type: {env_type}") - - if env_type == "prod": - # Use the Azure-optimized execution method - logger.info("Using Azure-optimized code execution method") - return self._executeCodeProd(code, requirements) - else: - # Use the standard virtual environment execution method - logger.info("Using standard virtual environment execution method") - return self._executeCodeVenv(code, requirements) - - - def _cleanupExecution(self): - """Clean up temporary resources from code execution.""" - if self.tempDir and os.path.exists(self.tempDir): - try: - logger.debug(f"Cleaning up temporary directory: {self.tempDir}") - shutil.rmtree(self.tempDir) - self.tempDir = None - except Exception as e: - logger.warning(f"Error cleaning up temp directory: {str(e)}") - - def _cleanCode(self, code: str) -> str: - """Remove any markdown formatting or explanations.""" - # Remove code block markers - code = code.replace("```python", "").replace("```", "") - - # Remove explanations before or after code - lines = code.strip().split('\n') - startIndex = 0 - endIndex = len(lines) - - # Find start of actual code - for i, line in enumerate(lines): - if line.strip().startswith("inputFiles =") or line.strip().startswith("# REQUIREMENTS:"): - startIndex = i - break - - # Clean code - cleanedCode = '\n'.join(lines[startIndex:endIndex]) - return cleanedCode.strip() - - def formatAgentDocumentOutput(self, filename: str, content: str, contentType: str) -> ChatDocument: - """ - Format a document for agent output. - - Args: - filename: Output filename - content: Document content - contentType: MIME type of the content - - Returns: - ChatDocument object - """ - # Split filename into name and extension - name, ext = os.path.splitext(filename) - if ext.startswith('.'): - ext = ext[1:] - - # Create document object - return ChatDocument( - id=str(uuid.uuid4()), - name=name, - ext=ext, - data=content, - contents=[ - ChatContent( - name="main", - data=content, - summary=f"Generated {filename}", - metadata={"contentType": contentType} - ) - ] - ) - -# Factory function for the Coder agent -def getAgentCoder(): - """Returns an instance of the Coder agent.""" - return AgentCoder() \ No newline at end of file diff --git a/modules/historic_data_agents/agentDocumentation.py b/modules/historic_data_agents/agentDocumentation.py deleted file mode 100644 index 1cf3e3b2..00000000 --- a/modules/historic_data_agents/agentDocumentation.py +++ /dev/null @@ -1,537 +0,0 @@ -""" -Documentation agent for generating structured documentation. -Provides comprehensive documentation generation capabilities. -""" - -import logging -from typing import Dict, Any, List, Optional -import json -import re -from datetime import datetime -import os -import hashlib -import base64 -import uuid -import shutil -from pathlib import Path -import traceback -import sys -import importlib.util -import inspect -from pydantic import BaseModel - -from modules.workflow.agentBase import AgentBase -from modules.interfaces.serviceChatModel import ChatContent - -logger = logging.getLogger(__name__) - -class AgentDocumentation(AgentBase): - """AI-driven agent for creating documentation and structured content using multi-step generation""" - - def __init__(self): - """Initialize the documentation agent""" - super().__init__() - self.name = "documentation" - self.label = "Documentation" - self.description = "Creates structured documentation, reports, and content using AI with multi-step generation" - self.capabilities = [ - "report_generation", - "documentation", - "content_structuring", - "technical_writing", - "knowledge_organization" - ] - - def setDependencies(self, serviceBase=None): - """Set external dependencies for the agent.""" - self.setService(serviceBase) - - async def processTask(self, task: Dict[str, Any]) -> Dict[str, Any]: - """ - Process a task by focusing on required outputs and using AI to generate them. - - Args: - task: Task dictionary with prompt, inputDocuments, outputSpecifications - - Returns: - Dictionary with feedback and documents - """ - try: - # Extract task information - prompt = task.get("prompt", "") - inputDocuments = task.get("inputDocuments", []) - outputSpecs = task.get("outputSpecifications", []) - - # Check AI service - if not self.service or not self.service.base: - return { - "feedback": "The Documentation agent requires an AI service to function.", - "documents": [] - } - - # Extract context from input documents - focusing only on dataExtracted - documentContext = self._extractDocumentContext(inputDocuments) - - # Create task analysis to understand the requirements - documentationPlan = await self._analyzeTask(prompt, documentContext, outputSpecs) - logger.debug(f"Documentation plan: {documentationPlan}") - - # Generate all required output documents - documents = [] - - # If no output specs provided, create default document - if not outputSpecs: - defaultFormat = documentationPlan.get("recommendedFormat", "markdown") - defaultTitle = documentationPlan.get("title", "Documentation") - safeTitle = self._sanitizeFilename(defaultTitle) - - outputSpecs = [ - {"label": f"{safeTitle}.{defaultFormat}", "description": "Comprehensive documentation"} - ] - - # Process each output specification - for spec in outputSpecs: - outputLabel = spec.get("label", "") - outputDescription = spec.get("description", "") - - # Generate the document using multi-step approach - document = await self._createDocumentMultiStep( - prompt, - documentContext, - outputLabel, - outputDescription, - documentationPlan - ) - - documents.append(document) - - # Generate feedback - feedback = documentationPlan.get("feedback", f"Created {len(documents)} documents based on your requirements.") - - return { - "feedback": feedback, - "documents": documents - } - - except Exception as e: - logger.error(f"Error in documentation generation: {str(e)}", exc_info=True) - return { - "feedback": f"Error during documentation generation: {str(e)}", - "documents": [] - } - - def _extractDocumentContext(self, documents: List[Dict[str, Any]]) -> str: - """ - Extract context from input documents, focusing on dataExtracted. - - Args: - documents: List of document objects - - Returns: - Extracted context as text - """ - contextParts = [] - - for doc in documents: - docName = doc.get("name", "unnamed") - if doc.get("ext"): - docName = f"{docName}.{doc.get('ext')}" - - contextParts.append(f"\n\n--- {docName} ---\n") - - # Process contents for dataExtracted - for content in doc.get("contents", []): - if content.get("dataExtracted"): - contextParts.append(content.get("dataExtracted", "")) - - return "\n".join(contextParts) - - def _sanitizeFilename(self, filename: str) -> str: - """ - Sanitize a filename by removing invalid characters. - - Args: - filename: Filename to sanitize - - Returns: - Sanitized filename - """ - # Replace invalid characters with underscores - invalidChars = r'<>:"/\|?*' - for char in invalidChars: - filename = filename.replace(char, '_') - - # Trim filename if too long - if len(filename) > 100: - filename = filename[:97] + "..." - - return filename - - async def _analyzeTask(self, prompt: str, context: str, outputSpecs: List) -> Dict: - """ - Use AI to analyze the task and create a documentation plan. - - Args: - prompt: The task prompt - context: Document context - outputSpecs: Output specifications - - Returns: - Documentation plan dictionary - """ - analysisPrompt = f""" - Analyze this documentation task and create a detailed plan. - - TASK: {prompt} - - DOCUMENT CONTEXT SAMPLE: - {context[:1000]}... (truncated) - - OUTPUT REQUIREMENTS: - {json.dumps(outputSpecs, indent=2)} - - Create a detailed documentation plan in JSON format with the following structure: - {{ - "title": "Document Title", - "documentType": "report|manual|guide|whitepaper|etc", - "audience": "technical|general|executive|etc", - "detailedStructure": [ - {{ - "title": "Chapter/Section Title", - "keyPoints": ["point1", "point2", ...], - "subsections": ["subsection1", "subsection2", ...], - "importance": "high|medium|low", - "estimatedLength": "short|medium|long" - }}, - ... more sections ... - ], - "keyTopics": ["topic1", "topic2", ...], - "tone": "formal|conversational|instructional|etc", - "recommendedFormat": "markdown|html|text|etc", - "formattingRequirements": ["requirement1", "requirement2", ...], - "executiveSummary": "Brief description of what the document will cover", - "feedback": "Brief message explaining the documentation approach" - }} - - Only return valid JSON. No preamble or explanations. - """ - - try: - response = await self.service.base.callAi([ - {"role": "system", "content": "You are a documentation expert. Respond with valid JSON only."}, - {"role": "user", "content": analysisPrompt} - ]) - - # Extract JSON from response - jsonStart = response.find('{') - jsonEnd = response.rfind('}') + 1 - - if jsonStart >= 0 and jsonEnd > jsonStart: - plan = json.loads(response[jsonStart:jsonEnd]) - return plan - else: - # Fallback if JSON not found - return { - "title": "Documentation (DEFAULT)", - "documentType": "report", - "audience": "general", - "detailedStructure": [ - { - "title": "Introduction", - "keyPoints": ["Purpose", "Scope"], - "subsections": [], - "importance": "high", - "estimatedLength": "short" - }, - { - "title": "Main Content", - "keyPoints": ["Core Information"], - "subsections": ["Key Findings", "Analysis"], - "importance": "high", - "estimatedLength": "long" - }, - { - "title": "Conclusion", - "keyPoints": ["Summary", "Next Steps"], - "subsections": [], - "importance": "medium", - "estimatedLength": "short" - } - ], - "keyTopics": ["General Information"], - "tone": "formal", - "recommendedFormat": "markdown", - "formattingRequirements": ["Clear headings", "Professional formatting"], - "executiveSummary": "A comprehensive documentation covering the requested topics.", - "feedback": "Created documentation based on your requirements." - } - - except Exception as e: - logger.warning(f"Error creating documentation plan: {str(e)}") - return { - "title": "Documentation", - "documentType": "report", - "audience": "general", - "detailedStructure": [ - { - "title": "Introduction", - "keyPoints": ["Purpose", "Scope"], - "subsections": [], - "importance": "high", - "estimatedLength": "short" - }, - { - "title": "Main Content", - "keyPoints": ["Core Information"], - "subsections": ["Key Findings", "Analysis"], - "importance": "high", - "estimatedLength": "long" - }, - { - "title": "Conclusion", - "keyPoints": ["Summary", "Next Steps"], - "subsections": [], - "importance": "medium", - "estimatedLength": "short" - } - ], - "keyTopics": ["General Information"], - "tone": "formal", - "recommendedFormat": "markdown", - "formattingRequirements": ["Clear headings", "Professional formatting"], - "executiveSummary": "A comprehensive documentation covering the requested topics.", - "feedback": "Created documentation based on your requirements." - } - - async def _createDocumentMultiStep(self, prompt: str, context: str, outputLabel: str, - outputDescription: str, documentationPlan: Dict) -> ChatContent: - """ - Create a document using a multi-step approach with separate AI calls for each section. - - Args: - prompt: Original task prompt - context: Document context - outputLabel: Output filename - outputDescription: Description of desired output - documentationPlan: Documentation plan from AI - - Returns: - ChatContent object - """ - try: - # Determine format from filename - formatType = outputLabel.split('.')[-1].lower() if '.' in outputLabel else "md" - - # Map format to contentType - contentTypeMap = { - "md": "text/markdown", - "markdown": "text/markdown", - "html": "text/html", - "txt": "text/plain", - "text": "text/plain", - "json": "application/json", - "csv": "text/csv" - } - - contentType = contentTypeMap.get(formatType, "text/plain") - - # Get document information - title = documentationPlan.get("title", "Documentation") - documentType = documentationPlan.get("documentType", "document") - audience = documentationPlan.get("audience", "general") - tone = documentationPlan.get("tone", "formal") - keyTopics = documentationPlan.get("keyTopics", []) - formattingRequirements = documentationPlan.get("formattingRequirements", []) - - # Get the detailed structure - detailedStructure = documentationPlan.get("detailedStructure", []) - - # Step 1: Generate executive summary - summaryPrompt = f""" - Create an executive summary for a {documentType} titled "{title}". - - DOCUMENT OVERVIEW: - - Type: {documentType} - - Audience: {audience} - - Key Topics: {', '.join(keyTopics)} - - TASK CONTEXT: {prompt} - - The executive summary should: - 1. Provide a concise overview of the document's purpose - 2. Highlight key points and findings - 3. Be clear and engaging for the target audience - 4. Set expectations for the document's content - - Keep the summary brief but comprehensive. - """ - - executiveSummary = await self.service.base.callAi([ - {"role": "system", "content": f"You are a documentation expert creating an executive summary in {formatType} format."}, - {"role": "user", "content": summaryPrompt} - ], produceUserAnswer = True) - - # Step 2: Generate introduction - introPrompt = f""" - Create an introduction for a {documentType} titled "{title}". - - DOCUMENT OVERVIEW: - - Type: {documentType} - - Audience: {audience} - - Key Topics: {', '.join(keyTopics)} - - TASK CONTEXT: {prompt} - - The introduction should: - 1. Set the context and purpose of the document - 2. Outline the scope and objectives - 3. Preview the main topics to be covered - 4. Engage the reader's interest - - Format the introduction according to {formatType} standards. - """ - - introduction = await self.service.base.callAi([ - {"role": "system", "content": f"You are a documentation expert creating an introduction in {formatType} format."}, - {"role": "user", "content": introPrompt} - ], produceUserAnswer = True) - - # Step 3: Generate main sections - sections = [] - for section in detailedStructure: - sectionTitle = section.get("title", "Section") - keyPoints = section.get("keyPoints", []) - subsections = section.get("subsections", []) - importance = section.get("importance", "medium") - estimatedLength = section.get("estimatedLength", "medium") - - sectionPrompt = f""" - Create the {sectionTitle} section for a {documentType} titled "{title}". - - SECTION DETAILS: - - Title: {sectionTitle} - - Key Points: {', '.join(keyPoints)} - - Subsections: {', '.join(subsections)} - - Importance: {importance} - - Estimated Length: {estimatedLength} - - DOCUMENT CONTEXT: - - Type: {documentType} - - Audience: {audience} - - Key Topics: {', '.join(keyTopics)} - - TASK CONTEXT: {prompt} - - The section should: - 1. Cover all key points thoroughly - 2. Include relevant subsections - 3. Maintain appropriate depth based on importance - 4. Follow the document's tone and style - - Format the section according to {formatType} standards. - """ - - sectionContent = await self.service.base.callAi([ - {"role": "system", "content": f"You are a documentation expert creating a section in {formatType} format."}, - {"role": "user", "content": sectionPrompt} - ], produceUserAnswer = True) - - sections.append(sectionContent) - - # Step 4: Generate conclusion - conclusionPrompt = f""" - Create the conclusion for a {documentType} titled "{title}". - - DOCUMENT OVERVIEW: - - Type: {documentType} - - Audience: {audience} - - Key Topics: {', '.join(keyTopics)} - - TASK CONTEXT: {prompt} - - This conclusion should: - 1. Summarize the key points covered in the document - 2. Provide closure to the topics discussed - 3. Include any relevant recommendations or next steps - 4. Leave the reader with a clear understanding of the document's significance - - The conclusion should be professional and impactful, formatted according to {formatType} standards. - """ - - conclusion = await self.service.base.callAi([ - {"role": "system", "content": f"You are a documentation expert creating a conclusion in {formatType} format."}, - {"role": "user", "content": conclusionPrompt} - ], produceUserAnswer = True) - - # Step 5: Assemble the complete document - if formatType in ["md", "markdown"]: - # Markdown format - documentContent = f"# {title}\n\n" - - if executiveSummary: - documentContent += f"## Executive Summary\n\n{executiveSummary}\n\n" - - documentContent += f"{introduction}\n\n" - - for i, sectionContent in enumerate(sections): - # Ensure section starts with heading if not already - sectionTitle = detailedStructure[i].get("title", f"Section {i+1}") - if not sectionContent.strip().startswith("#"): - documentContent += f"## {sectionTitle}\n\n" - documentContent += f"{sectionContent}\n\n" - - documentContent += f"## Conclusion\n\n{conclusion}\n" - - elif formatType == "html": - # HTML format - documentContent = f"\n\n{title}\n\n\n" - documentContent += f"

{title}

\n\n" - - if executiveSummary: - documentContent += f"

Executive Summary

\n
{executiveSummary}
\n\n" - - documentContent += f"
{introduction}
\n\n" - - for i, sectionContent in enumerate(sections): - sectionTitle = detailedStructure[i].get("title", f"Section {i+1}") - documentContent += f"

{sectionTitle}

\n
{sectionContent}
\n\n" - - documentContent += f"

Conclusion

\n
{conclusion}
\n" - documentContent += "\n" - - else: - # Plain text format - documentContent = f"{title}\n{'=' * len(title)}\n\n" - - if executiveSummary: - documentContent += f"EXECUTIVE SUMMARY\n{'-' * 17}\n\n{executiveSummary}\n\n" - - documentContent += f"{introduction}\n\n" - - for i, sectionContent in enumerate(sections): - sectionTitle = detailedStructure[i].get("title", f"Section {i+1}") - documentContent += f"{sectionTitle}\n{'-' * len(sectionTitle)}\n\n{sectionContent}\n\n" - - documentContent += f"CONCLUSION\n{'-' * 10}\n\n{conclusion}\n" - - # Create document object - return self.formatAgentDocumentOutput(outputLabel, documentContent, contentType) - - except Exception as e: - logger.error(f"Error creating document: {str(e)}", exc_info=True) - - # Create a simple error document - if formatType in ["md", "markdown"]: - content = f"# Error in Documentation\n\nThere was an error generating the documentation: {str(e)}" - elif formatType == "html": - content = f"

Error in Documentation

There was an error generating the documentation: {str(e)}

" - else: - content = f"Error in Documentation\n\nThere was an error generating the documentation: {str(e)}" - - return self.formatAgentDocumentOutput(outputLabel, content, contentType) - - -# Factory function for the Documentation agent -def getAgentDocumentation(): - """Returns an instance of the Documentation agent.""" - return AgentDocumentation() \ No newline at end of file diff --git a/modules/historic_data_agents/agentEmail.py b/modules/historic_data_agents/agentEmail.py deleted file mode 100644 index 6c6e2f5f..00000000 --- a/modules/historic_data_agents/agentEmail.py +++ /dev/null @@ -1,380 +0,0 @@ -""" -Email Agent Module. -Handles email-related tasks using Microsoft Graph API. -""" - -import logging -import json -from typing import Dict, Any, List, Optional, Tuple -import uuid -import os - -from modules.workflow.agentBase import AgentBase -from modules.interfaces.serviceChatModel import Task, ChatDocument, ChatContent - -logger = logging.getLogger(__name__) - -class AgentEmail(AgentBase): - """Agent for handling email-related tasks.""" - - def __init__(self): - """Initialize the email agent.""" - super().__init__() - self.name = "email" - self.label = "Email Agent" - self.description = "Handles email composition and sending using Microsoft Graph API" - self.capabilities = [ - "email_composition", - "email_draft_creation", - "email_template_generation" - ] - self.serviceBase = None - - def setDependencies(self, serviceBase=None): - """Set external dependencies for the agent.""" - self.serviceBase = serviceBase - - async def processTask(self, task: Task) -> Dict[str, Any]: - """ - Process an email-related task. - - Args: - task: Task object containing: - - prompt: Instructions for the agent - - inputDocuments: List of documents to process - - outputSpecifications: List of required output documents - - context: Additional context including workflow info - - Returns: - Dictionary containing: - - feedback: Text response explaining what was done - - documents: List of created documents - """ - try: - # Extract task information - prompt = task.prompt - inputDocuments = task.filesInput - outputSpecs = task.filesOutput - - # Check AI service - if not self.service.base: - return { - "feedback": "The Email agent requires an AI service to function.", - "documents": [] - } - - # Check if Microsoft connector is available - if not hasattr(self.service, 'msft'): - return { - "feedback": "Microsoft connector not available. Please ensure Microsoft integration is properly configured.", - "documents": [] - } - - # Get Microsoft token - token_data = self.service.msft.getMsftToken() - if not token_data: - # Create authentication trigger document - auth_doc = self._createFrontendAuthTriggerDocument() - return { - "feedback": "Microsoft authentication required. Please authenticate to continue.", - "documents": [auth_doc] - } - - # Extract document data from input - documentContents, attachments = self._processInputDocuments(inputDocuments) - - # Generate email subject and body using AI - emailTemplate = await self._generateEmailTemplate(prompt, documentContents) - - # Create HTML preview of the email - htmlPreview = self._createHtmlPreview(emailTemplate) - - # Attempt to create a draft email using Microsoft Graph API - draft_result = self.service.msft.createDraftEmail( - emailTemplate["recipient"], - emailTemplate["subject"], - emailTemplate["htmlBody"], - attachments - ) - - # Prepare output documents - documents = [] - - # Process output specifications - for spec in outputSpecs: - label = spec.get("label", "") - description = spec.get("description", "") - - if label.endswith(".html"): - # Create the HTML template file - templateDoc = self.formatAgentDocumentOutput( - label, - emailTemplate["htmlBody"], # Use the actual HTML body, not the preview - "text/html" - ) - documents.append(templateDoc) - elif label.endswith(".json"): - # Create JSON template if requested - templateJson = json.dumps(emailTemplate, indent=2) - templateDoc = self.formatAgentDocumentOutput( - label, - templateJson, - "application/json" - ) - documents.append(templateDoc) - else: - # Default to preview for other cases - previewDoc = self.formatAgentDocumentOutput( - label, - htmlPreview, - "text/html" - ) - documents.append(previewDoc) - - # Prepare feedback message - if draft_result: - feedback = f"Email draft created successfully for {emailTemplate.get('recipient')}. The subject is: '{emailTemplate['subject']}'" - if attachments: - feedback += f" with {len(attachments)} attachment(s)" - feedback += ". You can open and edit it in your Outlook draft folder." - else: - feedback = "Email template created but could not save as draft. HTML preview and template are available as documents." - - return { - "feedback": feedback, - "documents": documents - } - - except Exception as e: - logger.error(f"Error in email agent: {str(e)}") - return { - "feedback": f"Error processing email task: {str(e)}", - "documents": [] - } - - def _createFrontendAuthTriggerDocument(self) -> ChatDocument: - """Create a document that triggers Microsoft authentication in the frontend.""" - return ChatDocument( - id=str(uuid.uuid4()), - name="microsoft_auth", - ext="html", - data=""" -
-

Microsoft Authentication Required

-

Please click the button below to authenticate with Microsoft:

- -
- """, - contents=[ - ChatContent( - name="main", - data=""" -
-

Microsoft Authentication Required

-

Please click the button below to authenticate with Microsoft:

- -
- """, - summary="Microsoft authentication trigger page", - metadata={ - "contentType": "text/html", - "isText": True - } - ) - ] - ) - - def _processInputDocuments(self, input_docs: List[ChatDocument]) -> Tuple[str, List[Dict[str, Any]]]: - """ - Process input documents to extract content and prepare attachments. - - Args: - input_docs: List of input documents - - Returns: - Tuple of (document content text, list of attachments) - """ - documentContents = [] - attachments = [] - - for doc in input_docs: - docName = doc.name - if doc.ext: - docName = f"{docName}.{doc.ext}" - - # Add document name to contents - documentContents.append(f"\n\n--- {docName} ---\n") - - # Process document data directly - if doc.data: - # Add to attachments with proper metadata - attachments.append({ - "name": docName, - "document": { - "data": doc.data, - "mimeType": doc.contents[0].metadata.get("contentType", "application/octet-stream") if doc.contents else "application/octet-stream", - "base64Encoded": doc.contents[0].metadata.get("base64Encoded", False) if doc.contents else False - } - }) - documentContents.append(f"Document attached: {docName}") - else: - documentContents.append(f"Document referenced: {docName}") - - return "\n".join(documentContents), attachments - - def formatAgentDocumentOutput(self, filename: str, content: str, contentType: str) -> ChatDocument: - """ - Format a document for agent output. - - Args: - filename: Output filename - content: Document content - contentType: MIME type of the content - - Returns: - ChatDocument object - """ - # Split filename into name and extension - name, ext = os.path.splitext(filename) - if ext.startswith('.'): - ext = ext[1:] - - # Create document object - return ChatDocument( - id=str(uuid.uuid4()), - name=name, - ext=ext, - data=content, - contents=[ - ChatContent( - name="main", - data=content, - summary=f"Generated {filename}", - metadata={"contentType": contentType} - ) - ] - ) - - async def _generateEmailTemplate(self, prompt: str, documentContents: str) -> Dict[str, Any]: - """ - Generate email template using AI. - - Args: - prompt: The task prompt - documentContents: Extracted document content - - Returns: - Email template dictionary with recipient, subject, body - """ - emailPrompt = f""" - Create an email based on the following request: - - REQUEST: {prompt} - - DOCUMENT CONTENTS: - {documentContents[:2000]}... (truncated if longer) - - Generate an email template with: - 1. A relevant recipient (use placeholder or derive from content if possible) - 2. A concise but descriptive subject line - 3. A professional HTML-formatted email body - 4. Appropriate greeting and closing - - Format your response as JSON with these fields: - - recipient: email address - - subject: subject line - - plainBody: plain text version - - htmlBody: HTML formatted version - - Only return valid JSON. No preamble or explanations. - """ - - try: - response = await self.service.base.callAi([ - {"role": "system", "content": "You are an email template specialist. Create professional emails. Respond with valid JSON only."}, - {"role": "user", "content": emailPrompt} - ]) - - # Extract JSON from response - jsonStart = response.find('{') - jsonEnd = response.rfind('}') + 1 - - if jsonStart >= 0 and jsonEnd > jsonStart: - template = json.loads(response[jsonStart:jsonEnd]) - return template - else: - # Fallback plan - logger.warning(f"Not able creating email template, generating fallback plan") - return { - "recipient": "recipient@example.com", - "subject": "Information Regarding Your Request", - "plainBody": f"This email is regarding your request: {prompt}", - "htmlBody": f"

This email is regarding your request: {prompt}

" - } - - except Exception as e: - logger.warning(f"Error generating email template: {str(e)}") - return { - "recipient": "recipient@example.com", - "subject": "Information Regarding Your Request", - "plainBody": f"This email is regarding your request: {prompt}", - "htmlBody": f"

This email is regarding your request: {prompt}

" - } - - def _createHtmlPreview(self, emailTemplate: Dict[str, Any]) -> str: - """ - Create an HTML preview of the email template. - - Args: - emailTemplate: Email template dictionary - - Returns: - HTML string for preview - """ - html = f""" - - - - - Email Preview: {emailTemplate.get('subject', 'Email Template')} - - - -
- - - -
- - - """ - return html - -def getAgentEmail() -> AgentEmail: - """Factory function to create and return an EmailAgent instance.""" - return AgentEmail() \ No newline at end of file diff --git a/modules/historic_data_agents/agentSharepoint.py b/modules/historic_data_agents/agentSharepoint.py deleted file mode 100644 index a0fa0b0d..00000000 --- a/modules/historic_data_agents/agentSharepoint.py +++ /dev/null @@ -1,348 +0,0 @@ -""" -SharePoint Agent Module. -Handles SharePoint document search and data extraction using Microsoft Graph API. -""" - -import logging -import json -from typing import Dict, Any, List, Optional -from modules.workflow.agentBase import AgentBase - -logger = logging.getLogger(__name__) - -class AgentSharepoint(AgentBase): - """Agent for handling SharePoint document operations.""" - - def __init__(self): - """Initialize the SharePoint agent.""" - super().__init__() - self.name = "sharepoint" - self.label = "SharePoint Agent" - self.description = "Searches and extracts data from SharePoint documents using Microsoft Graph API" - self.capabilities = [ - "document_search", - "content_extraction", - "metadata_analysis", - "document_processing" - ] - - async def processTask(self, task: Dict[str, Any]) -> Dict[str, Any]: - """ - Process a SharePoint-related task. - - Args: - task: Task object containing: - - prompt: Instructions for the agent - - inputDocuments: List of documents to process - - outputSpecifications: List of required output documents - - context: Additional context including workflow info - - Returns: - Dictionary containing: - - feedback: Text response explaining what was done - - documents: List of created documents - """ - try: - # Extract task information - prompt = task.get("prompt", "") - inputDocuments = task.get("inputDocuments", []) - outputSpecs = task.get("outputSpecifications", []) - - # Check AI service - if not self.service.base: - return { - "feedback": "The SharePoint agent requires an AI service to function.", - "documents": [] - } - - # Check if Microsoft connector is available - if not hasattr(self.service, 'msft'): - return { - "feedback": "Microsoft connector not available. Please ensure Microsoft integration is properly configured.", - "documents": [] - } - - # Get Microsoft token - token_data = self.service.msft.getMsftToken() - if not token_data: - # Create authentication trigger document - auth_doc = self._createFrontendAuthTriggerDocument() - return { - "feedback": "Microsoft authentication required. Please authenticate to continue.", - "documents": [auth_doc] - } - - # Parse the search query from the prompt - searchQuery = await self._parseSearchQuery(prompt) - - # Search SharePoint documents - searchResults = await self._searchSharePointDocuments(searchQuery) - - # Process search results - documents = [] - for spec in outputSpecs: - label = spec.get("label", "") - description = spec.get("description", "") - - if label.endswith(".json"): - # Create JSON summary of search results - summaryDoc = self._createSearchSummaryJson(searchResults, description) - documents.append(summaryDoc) - elif label.endswith(".csv"): - # Create CSV summary of search results - summaryDoc = self._createSearchSummaryCsv(searchResults, description) - documents.append(summaryDoc) - else: - # Create text summary of search results - summaryDoc = self._createSearchSummaryText(searchResults, description) - documents.append(summaryDoc) - - # Prepare feedback message - feedback = f"Found {len(searchResults)} documents matching your search criteria. " - if searchResults: - feedback += "The results have been saved as documents." - else: - feedback += "No matching documents were found." - - return { - "feedback": feedback, - "documents": documents - } - - except Exception as e: - logger.error(f"Error in SharePoint agent: {str(e)}") - return { - "feedback": f"Error processing SharePoint task: {str(e)}", - "documents": [] - } - - def _createFrontendAuthTriggerDocument(self) -> Dict[str, Any]: - """Create a document that triggers Microsoft authentication in the frontend.""" - return self.formatAgentDocumentOutput( - "microsoft_auth.html", - """ -
-

Microsoft Authentication Required

-

Please click the button below to authenticate with Microsoft:

- -
- """, - "text/html" - ) - - async def _parseSearchQuery(self, prompt: str) -> Dict[str, Any]: - """ - Parse the search query from the prompt using AI. - - Args: - prompt: The task prompt - - Returns: - Dictionary containing search parameters - """ - try: - # Use AI to parse the search query - response = await self.service.base.callAi([ - {"role": "system", "content": "You are a SharePoint search query parser. Extract search parameters from the user's request."}, - {"role": "user", "content": f""" - Parse the following SharePoint search request into structured parameters: - - {prompt} - - Return a JSON object with these fields: - - query: The main search query - - site: Optional SharePoint site name - - folder: Optional folder path - - fileTypes: List of file types to search for - - dateRange: Optional date range for filtering - - maxResults: Maximum number of results to return - - Only return valid JSON. No preamble or explanations. - """} - ]) - - # Extract JSON from response - jsonStart = response.find('{') - jsonEnd = response.rfind('}') + 1 - - if jsonStart >= 0 and jsonEnd > jsonStart: - return json.loads(response[jsonStart:jsonEnd]) - else: - # Fallback to simple query - return { - "query": prompt, - "maxResults": 10 - } - - except Exception as e: - logger.warning(f"Error parsing search query: {str(e)}") - return { - "query": prompt, - "maxResults": 10 - } - - async def _searchSharePointDocuments(self, searchParams: Dict[str, Any]) -> List[Dict[str, Any]]: - """ - Search SharePoint documents using Microsoft Graph API. - - Args: - searchParams: Search parameters - - Returns: - List of search results - """ - try: - # Get Microsoft token - token = self.service.msft.getMsftToken() - if not token: - return [] - - # Prepare search query - query = searchParams.get("query", "") - site = searchParams.get("site", "") - folder = searchParams.get("folder", "") - fileTypes = searchParams.get("fileTypes", []) - maxResults = searchParams.get("maxResults", 10) - - # Build search URL - searchUrl = "https://graph.microsoft.com/v1.0/sites/root/drives" - if site: - searchUrl = f"https://graph.microsoft.com/v1.0/sites/{site}/drives" - - # Get drives (document libraries) - response = self.service.msft.makeGraphRequest("GET", searchUrl) - if not response or "value" not in response: - return [] - - results = [] - for drive in response["value"]: - # Search in each drive - driveId = drive["id"] - searchEndpoint = f"https://graph.microsoft.com/v1.0/drives/{driveId}/root/search(q='{query}')" - - # Add file type filters if specified - if fileTypes: - typeFilter = " or ".join([f"fileType eq '{ft}'" for ft in fileTypes]) - searchEndpoint += f"&filter={typeFilter}" - - # Add folder filter if specified - if folder: - searchEndpoint += f"&filter=parentReference/path eq '/{folder}'" - - # Add result limit - searchEndpoint += f"&top={maxResults}" - - # Make the search request - searchResponse = self.service.msft.makeGraphRequest("GET", searchEndpoint) - if searchResponse and "value" in searchResponse: - for item in searchResponse["value"]: - # Get file content - fileContent = await self._getFileContent(driveId, item["id"]) - - results.append({ - "name": item["name"], - "id": item["id"], - "driveId": driveId, - "webUrl": item["webUrl"], - "lastModified": item["lastModifiedDateTime"], - "size": item["size"], - "content": fileContent - }) - - return results - - except Exception as e: - logger.error(f"Error searching SharePoint: {str(e)}") - return [] - - async def _getFileContent(self, driveId: str, fileId: str) -> str: - """ - Get file content from SharePoint. - - Args: - driveId: Drive ID - fileId: File ID - - Returns: - File content as string - """ - try: - # Get file content URL - contentUrl = f"https://graph.microsoft.com/v1.0/drives/{driveId}/items/{fileId}/content" - - # Download file content - response = self.service.msft.makeGraphRequest("GET", contentUrl, raw=True) - if response: - return response.decode('utf-8') - return "" - - except Exception as e: - logger.error(f"Error getting file content: {str(e)}") - return "" - - def _createSearchSummaryJson(self, results: List[Dict[str, Any]], description: str) -> Dict[str, Any]: - """Create a JSON summary of search results.""" - summary = { - "description": description, - "totalResults": len(results), - "results": [] - } - - for result in results: - summary["results"].append({ - "name": result["name"], - "url": result["webUrl"], - "lastModified": result["lastModified"], - "size": result["size"] - }) - - return self.formatAgentDocumentOutput( - "sharepoint_search_results.json", - json.dumps(summary, indent=2), - "application/json" - ) - - def _createSearchSummaryCsv(self, results: List[Dict[str, Any]], description: str) -> Dict[str, Any]: - """Create a CSV summary of search results.""" - csvLines = ["Name,URL,Last Modified,Size (bytes)"] - - for result in results: - name = result["name"].replace('"', '""') - url = result["webUrl"].replace('"', '""') - lastModified = result["lastModified"].replace('"', '""') - size = str(result["size"]) - - csvLines.append(f'"{name}","{url}","{lastModified}",{size}') - - return self.formatAgentDocumentOutput( - "sharepoint_search_results.csv", - "\n".join(csvLines), - "text/csv" - ) - - def _createSearchSummaryText(self, results: List[Dict[str, Any]], description: str) -> Dict[str, Any]: - """Create a text summary of search results.""" - textLines = [ - f"SharePoint Search Results", - f"Description: {description}", - f"Total Results: {len(results)}", - "\nResults:" - ] - - for result in results: - textLines.extend([ - f"\nName: {result['name']}", - f"URL: {result['webUrl']}", - f"Last Modified: {result['lastModified']}", - f"Size: {result['size']} bytes" - ]) - - return self.formatAgentDocumentOutput( - "sharepoint_search_results.txt", - "\n".join(textLines), - "text/plain" - ) - -def getAgentSharepoint() -> AgentSharepoint: - """Factory function to create and return a SharePointAgent instance.""" - return AgentSharepoint() \ No newline at end of file diff --git a/modules/historic_data_agents/agentWebcrawler.py b/modules/historic_data_agents/agentWebcrawler.py deleted file mode 100644 index 0f9768f4..00000000 --- a/modules/historic_data_agents/agentWebcrawler.py +++ /dev/null @@ -1,814 +0,0 @@ -""" -Web crawler agent for gathering and analyzing web content. -Provides web research and content extraction capabilities. -""" - -import logging -import json -import re -import time -import os -from typing import Dict, Any, List -from urllib.parse import quote_plus, unquote - -from bs4 import BeautifulSoup -import requests -import markdown - -from modules.workflow.agentBase import AgentBase -from modules.shared.configuration import APP_CONFIG - -logger = logging.getLogger(__name__) - -class AgentWebcrawler(AgentBase): - """AI-driven agent for web research and information retrieval""" - - def __init__(self): - """Initialize the web crawler agent""" - super().__init__() - self.name = "webcrawler" - self.label = "Web Crawler" - self.description = "Gathers and analyzes web content using AI with multi-step research" - self.capabilities = [ - "web_research", - "content_gathering", - "data_extraction", - "information_synthesis", - "source_verification" - ] - - # Web crawling configuration - self.srcApikey = APP_CONFIG.get("Agent_Webcrawler_SERPAPI_APIKEY","") - self.srcEngine = APP_CONFIG.get("Agent_Webcrawler_SERPAPI_ENGINE","google") - self.srcCountry = APP_CONFIG.get("Agent_Webcrawler_SERPAPI_COUNTRY","auto") - self.maxUrl = int(APP_CONFIG.get("Agent_Webcrawler_SERPAPI_MAX_URLS", "5")) - self.maxSearchTerms = int(APP_CONFIG.get("Agent_Webcrawler_SERPAPI_MAX_SEARCH_KEYWORDS", "3")) - self.maxResults = int(APP_CONFIG.get("Agent_Webcrawler_SERPAPI_MAX_SEARCH_RESULTS", "5")) - self.timeout = int(APP_CONFIG.get("Agent_Webcrawler_SERPAPI_TIMEOUT", "30")) - self.userAgent = APP_CONFIG.get("Agent_Webcrawler_SERPAPI_USER_AGENT", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36") - - if not self.srcApikey: - logger.error("SerpAPI key not configured") - - def setDependencies(self, serviceBase=None): - """Set external dependencies for the agent.""" - self.setService(serviceBase) - - async def processTask(self, task: Dict[str, Any]) -> Dict[str, Any]: - """ - Process a task by focusing on required outputs and using AI to guide the research process. - - Args: - task: Task dictionary with prompt, inputDocuments, outputSpecifications - - Returns: - Dictionary with feedback and documents - """ - try: - # Extract task information - prompt = task.get("prompt", "") - inputDocuments = task.get("inputDocuments", []) - outputSpecs = task.get("outputSpecifications", []) - workflow = task.get("context", {}).get("workflow", {}) - - # Check AI service - if not self.service or not self.service.base: - return { - "feedback": "The Web Crawler agent requires an AI service to function.", - "documents": [] - } - - # Create research plan - if workflow: - self.service.logAdd(workflow, "Creating research plan...", level="info", progress=35) - researchPlan = await self._createResearchPlan(prompt) - - # Check if this is truly a web research task - if not researchPlan.get("requiresWebResearch", True): - return { - "feedback": "This task doesn't appear to require web research. Please try a different agent.", - "documents": [] - } - - # Gather raw material through web research - if workflow: - self.service.logAdd(workflow, "Gathering research material...", level="info", progress=45) - rawResults = await self._gatherResearchMaterial(researchPlan, workflow) - - # Format results into requested output documents - if workflow: - self.service.logAdd(workflow, "Creating output documents...", level="info", progress=55) - documents = await self._createOutputDocuments( - prompt, - rawResults, - outputSpecs, - researchPlan - ) - - # Generate feedback - feedback = researchPlan.get("feedback", f"I conducted web research on '{prompt[:50]}...' and gathered information from {len(rawResults)} relevant sources.") - - return { - "feedback": feedback, - "documents": documents - } - - except Exception as e: - logger.error(f"Error during web research: {str(e)}", exc_info=True) - return { - "feedback": f"Error during web research: {str(e)}", - "documents": [] - } - - async def _createResearchPlan(self, prompt: str) -> Dict[str, Any]: - """ - Use AI to create a detailed research plan. - - Args: - prompt: The research query - - Returns: - Research plan dictionary - """ - researchPrompt = f""" - Create a detailed web research plan for this task: "{prompt}" - - Analyze the request carefully and create a structured plan in JSON format with the following elements: - {{ - "requiresWebResearch": true/false, # Whether this genuinely requires web research - "researchQuestions": ["question1", "question2", ...], # 2-4 specific questions to answer - "searchTerms": ["term1", "term2", ...], # Up to {self.maxSearchTerms} effective search terms - "directUrls": ["url1", "url2", ...], # Any URLs directly mentioned in the request (up to {self.maxUrl}) - "expectedSources": ["type1", "type2", ...], # Types of sources that would be most valuable - "contentFocus": "what specific content to extract or focus on", - "feedback": "explanation of how the research will be conducted" - }} - - Respond with ONLY the JSON object, no additional text or explanations. - """ - - try: - # Get research plan from AI - response = await self.service.base.callAi([ - {"role": "system", "content": "You are a research expert. Respond with valid JSON only."}, - {"role": "user", "content": researchPrompt} - ]) - - # Extract JSON - jsonStart = response.find('{') - jsonEnd = response.rfind('}') + 1 - - if jsonStart >= 0 and jsonEnd > jsonStart: - plan = json.loads(response[jsonStart:jsonEnd]) - - # Ensure we have the expected fields with defaults if missing - if "searchTerms" not in plan: - plan["searchTerms"] = [prompt] - if "directUrls" not in plan: - plan["directUrls"] = [] - if "researchQuestions" not in plan: - plan["researchQuestions"] = ["What information can be found about this topic?"] - - return plan - else: - # Fallback plan - logger.warning(f"Not able creating research plan, generating fallback plan") - return { - "requiresWebResearch": True, - "researchQuestions": ["What information can be found about this topic?"], - "searchTerms": [prompt], - "directUrls": [], - "expectedSources": ["Web pages", "Articles"], - "contentFocus": "Relevant information about the topic", - "feedback": f"I'll conduct web research on '{prompt}' and gather relevant information." - } - - except Exception as e: - logger.warning(f"Error creating research plan: {str(e)}") - # Simple fallback plan - return { - "requiresWebResearch": True, - "researchQuestions": ["What information can be found about this topic?"], - "searchTerms": [prompt], - "directUrls": [], - "expectedSources": ["Web pages", "Articles"], - "contentFocus": "Relevant information about the topic", - "feedback": f"I'll conduct web research on '{prompt}' and gather relevant information." - } - - async def _gatherResearchMaterial(self, researchPlan: Dict[str, Any], workflow: Dict[str, Any]) -> List[Dict[str, Any]]: - """ - Gather research material based on the research plan. - - Args: - researchPlan: Research plan dictionary - workflow: Current workflow object - - Returns: - List of research results - """ - allResults = [] - - # Process direct URLs - directUrls = researchPlan.get("directUrls", [])[:self.maxUrl] - for i, url in enumerate(directUrls): - progress = 45 + int((i / len(directUrls)) * 5) # Progress from 45% to 50% - self.service.logAdd(workflow, f"Processing direct URL {i+1}/{len(directUrls)}...", level="info", progress=progress) - logger.info(f"Processing direct URL: {url}") - try: - # Fetch and extract content - soup = self._readUrl(url) - - if soup: - # Extract title and content - title = self._extractTitle(soup, url) - content = self._extractMainContent(soup) - - # Add to results - allResults.append({ - "title": title, - "url": url, - "sourceType": "directUrl", - "content": content, - "summary": "" # Will be filled later - }) - except Exception as e: - logger.warning(f"Error processing URL {url}: {str(e)}") - - # Process search terms - searchTerms = researchPlan.get("searchTerms", [])[:self.maxSearchTerms] - for i, term in enumerate(searchTerms): - progress = 50 + int((i / len(searchTerms)) * 5) # Progress from 50% to 55% - self.service.logAdd(workflow, f"Searching term {i+1}/{len(searchTerms)}...", level="info", progress=progress) - logger.info(f"Searching for: {term}") - try: - # Perform search - searchResults = self._searchWeb(term) - - # Process each search result - for result in searchResults: - # Check if URL is already in results - if not any(r["url"] == result["url"] for r in allResults): - allResults.append({ - "title": result["title"], - "url": result["url"], - "sourceType": "searchResult", - "content": result["data"], - "snippet": result["snippet"], - "summary": "" # Will be filled later - }) - - # Stop if we've reached the maximum results - if len(allResults) >= self.maxResults: - break - except Exception as e: - logger.warning(f"Error searching for {term}: {str(e)}") - - # Stop if we've reached the maximum results - if len(allResults) >= self.maxResults: - break - - # Create summaries for all results - allResults = await self._summarizeAllResults(allResults, researchPlan) - - return allResults - - async def _summarizeAllResults(self, results: List[Dict[str, Any]], researchPlan: Dict[str, Any]) -> List[Dict[str, Any]]: - """ - Create summaries for all research results. - - Args: - results: List of research results - researchPlan: Research plan with questions and focus - - Returns: - Results with added summaries - """ - for i, result in enumerate(results): - logger.info(f"Summarizing result {i+1}/{len(results)}: {result['title'][:30]}...") - - try: - # Limit content length to avoid token issues - content = self._limitText(result.get("content", ""), maxChars=8000) - researchQuestions = researchPlan.get("researchQuestions", ["What relevant information does this page contain?"]) - contentFocus = researchPlan.get("contentFocus", "Relevant information") - - # Create summary using AI - summaryPrompt = f""" - Summarize this web page content based on these research questions: - {', '.join(researchQuestions)} - - Focus on: {contentFocus} - - Web page: {result['url']} - Title: {result['title']} - - Content: - {content} - - Create a concise summary that: - 1. Directly answers the research questions if possible - 2. Extracts the most relevant information from the page - 3. Includes specific facts, figures, or quotes if available - 4. Is around 2000 characters long - - Only include information actually found in the content. No fabrications or assumptions. - """ - - # Get summary from AI - summary = await self.service.base.callAi([ - {"role": "system", "content": "You are a research expert. Respond with valid JSON only."}, - {"role": "user", "content": summaryPrompt} - ]) - - # Add summary to result - result["summary"] = summary.strip() - - except Exception as e: - logger.warning(f"Error summarizing result {i+1}: {str(e)}") - result["summary"] = f"Error creating summary: {str(e)}" - - return results - - async def _createOutputDocuments(self, prompt: str, results: List[Dict[str, Any]], - outputSpecs: List[Dict[str, Any]], researchPlan: Dict[str, Any]) -> List[Dict[str, Any]]: - """ - Create output documents based on research results and specifications. - - Args: - prompt: Original research prompt - results: List of research results - outputSpecs: Output specifications - researchPlan: Research plan - - Returns: - List of output documents - """ - # If no output specs provided, create default output - if not outputSpecs: - outputSpecs = [{ - "label": "webResearchResults.md", - "description": "Comprehensive web research results" - }] - - # Generate documents - documents = [] - - # Process each output specification - for spec in outputSpecs: - outputLabel = spec.get("label", "") - outputDescription = spec.get("description", "") - - # Determine format based on file extension - formatType = self._determineFormatType(outputLabel) - - # Create appropriate document based on format - if formatType == "json": - # JSON output - structured data - document = await self._createJsonDocument(prompt, results, researchPlan, outputLabel) - elif formatType == "csv": - # CSV output - tabular data - document = await self._createCsvDocument(results, outputLabel) - else: - # Text-based output (markdown, html, text) - narrative report - document = await self._createNarrativeDocument( - prompt, results, researchPlan, formatType, outputLabel, outputDescription - ) - - documents.append(document) - - return documents - - async def _createNarrativeDocument(self, prompt: str, results: List[Dict[str, Any]], - researchPlan: Dict[str, Any], formatType: str, - outputLabel: str, outputDescription: str) -> Dict[str, Any]: - """ - Create a narrative document (markdown, html, text) from research results. - - Args: - prompt: Original research prompt - results: Research results - researchPlan: Research plan - formatType: Output format (markdown, html, text) - outputLabel: Output filename - outputDescription: Output description - - Returns: - Document object - """ - # Create content based on format - if formatType == "markdown": - contentType = "text/markdown" - templateFormat = "markdown" - elif formatType == "html": - contentType = "text/html" - templateFormat = "html" - else: - contentType = "text/plain" - templateFormat = "text" - - # Prepare research context - researchQuestions = researchPlan.get("researchQuestions", []) - searchTerms = researchPlan.get("searchTerms", []) - - # Create document structure based on results - sourcesSummary = [] - for result in results: - sourcesSummary.append({ - "title": result.get("title", "Untitled"), - "url": result.get("url", ""), - "summary": result.get("summary", ""), - "snippet": result.get("snippet", "") - }) - - # Truncate content for prompt - sourcesJson = json.dumps(sourcesSummary, indent=2) - if len(sourcesJson) > 10000: - # Logic to truncate each summary while preserving structure - for i in range(len(sourcesSummary)): - if len(sourcesJson) <= 10000: - break - # Gradually truncate summaries - sourcesSummary[i]["summary"] = sourcesSummary[i]["summary"][:500] + "..." - sourcesJson = json.dumps(sourcesSummary, indent=2) - - # Create report prompt - reportPrompt = f""" - Create a comprehensive {formatType} research report based on the following web research: - - TASK: {prompt} - - RESEARCH QUESTIONS: - {', '.join(researchQuestions)} - - SEARCH TERMS USED: - {', '.join(searchTerms)} - - SOURCES AND FINDINGS: - {sourcesJson} - - REPORT DETAILS: - - Format: {templateFormat} - - Filename: {outputLabel} - - Description: {outputDescription} - - Create a well-structured report that: - 1. Includes an executive summary of key findings - 2. Addresses each research question directly - 3. Integrates information from all relevant sources - 4. Cites sources appropriately for each piece of information - 5. Provides a comprehensive synthesis of the research - 6. Is formatted professionally and appropriately for {templateFormat} - - The report should be scholarly, accurate, and focused on the original research task. - """ - - try: - # Generate report with AI - reportContent = await self.service.base.callAi([ - {"role": "system", "content": "You are a research expert. Respond with valid JSON only."}, - {"role": "user", "content": reportPrompt} - ]) - - # Convert to HTML if needed - if formatType == "html" and not reportContent.lower().startswith("Web Research Results{reportContent}" - - return self.formatAgentDocumentOutput(outputLabel, reportContent, contentType) - - except Exception as e: - logger.error(f"Error creating narrative document: {str(e)}") - # Create error document - if formatType == "markdown": - content = f"# Web Research Error\n\nAn error occurred: {str(e)}" - elif formatType == "html": - content = f"

Web Research Error

An error occurred: {str(e)}

" - else: - content = f"WEB RESEARCH ERROR\n\nAn error occurred: {str(e)}" - - return self.formatAgentDocumentOutput(outputLabel, content, contentType) - - async def _createJsonDocument(self, prompt: str, results: List[Dict[str, Any]], - researchPlan: Dict[str, Any], outputLabel: str) -> Dict[str, Any]: - """ - Create a JSON document from research results. - - Args: - prompt: Original research prompt - results: Research results - researchPlan: Research plan - outputLabel: Output filename - - Returns: - Document object - """ - try: - # Create structured data - sourcesData = [] - for result in results: - sourcesData.append({ - "title": result.get("title", "Untitled"), - "url": result.get("url", ""), - "summary": result.get("summary", ""), - "snippet": result.get("snippet", ""), - "sourceType": result.get("sourceType", "") - }) - - # Create metadata - metadata = { - "query": prompt, - "timestamp": time.strftime("%Y-%m-%d %H:%M:%S"), - "researchQuestions": researchPlan.get("researchQuestions", []), - "searchTerms": researchPlan.get("searchTerms", []) - } - - # Compile complete report object - jsonContent = { - "metadata": metadata, - "summary": researchPlan.get("feedback", "Web research results"), - "sources": sourcesData - } - - # Convert to JSON string - content = json.dumps(jsonContent, indent=2) - - return self.formatAgentDocumentOutput(outputLabel, content, "application/json") - - except Exception as e: - logger.error(f"Error creating JSON document: {str(e)}") - return self.formatAgentDocumentOutput(outputLabel, json.dumps({"error": str(e)}), "application/json") - - async def _createCsvDocument(self, results: List[Dict[str, Any]], outputLabel: str) -> Dict[str, Any]: - """ - Create a CSV document from research results. - - Args: - results: Research results - outputLabel: Output filename - - Returns: - Document object - """ - try: - # Create CSV header - csvLines = ["Title,URL,Source Type,Snippet"] - - # Add results - for result in results: - # Escape CSV fields - title = result.get("title", "").replace('"', '""') - url = result.get("url", "").replace('"', '""') - sourceType = result.get("sourceType", "").replace('"', '""') - snippet = result.get("snippet", "").replace('"', '""') - - csvLines.append(f'"{title}","{url}","{sourceType}","{snippet}"') - - # Combine into CSV content - content = "\n".join(csvLines) - - return self.formatAgentDocumentOutput(outputLabel, content, "text/csv") - - except Exception as e: - logger.error(f"Error creating CSV document: {str(e)}") - return self.formatAgentDocumentOutput(outputLabel, "Error,Error\nFailed to create CSV,{0}".format(str(e)), "text/csv") - - def _determineFormatType(self, outputLabel: str) -> str: - """ - Determine the format type based on the filename. - - Args: - outputLabel: Output filename - - Returns: - Format type (markdown, html, text, json, csv) - """ - outputLabelLower = outputLabel.lower() - - if outputLabelLower.endswith(".md"): - return "markdown" - elif outputLabelLower.endswith(".html"): - return "html" - elif outputLabelLower.endswith(".txt"): - return "text" - elif outputLabelLower.endswith(".json"): - return "json" - elif outputLabelLower.endswith(".csv"): - return "csv" - else: - # Default to markdown - return "markdown" - - def _searchWeb(self, query: str) -> List[Dict[str, str]]: - """ - Conduct a web search using SerpAPI and return the results. - - Args: - query: The search query - - Returns: - List of search results - """ - if not self.srcApikey: - return [] - - # Get user language from serviceBase if available - userLanguage = "en" # Default language - if self.service.base.userLanguage: - userLanguage = self.service.base.userLanguage - - try: - # Format the search request for SerpAPI - params = { - "engine": self.srcEngine, - "q": query, - "api_key": self.srcApikey, - "num": self.maxResults, # Number of results to return - "hl": userLanguage # Identified user language - } - - # Make the API request - response = requests.get("https://serpapi.com/search", params=params, timeout=self.timeout) - response.raise_for_status() - - # Parse JSON response - search_results = response.json() - - # Extract organic results - results = [] - - if "organic_results" in search_results: - for result in search_results["organic_results"][:self.maxResults]: - # Extract title - title = result.get("title", "No title") - - # Extract URL - url = result.get("link", "No URL") - - # Extract snippet - snippet = result.get("snippet", "No description") - - # Get actual page content - try: - targetPageSoup = self._readUrl(url) - content = self._extractMainContent(targetPageSoup) - except Exception as e: - logger.warning(f"Error extracting content from {url}: {str(e)}") - content = f"Error extracting content: {str(e)}" - - results.append({ - 'title': title, - 'url': url, - 'snippet': snippet, - 'data': content - }) - - # Limit number of results - if len(results) >= self.maxResults: - break - else: - logger.warning(f"No organic results found in SerpAPI response for: {query}") - - return results - - except Exception as e: - logger.error(f"Error searching with SerpAPI for {query}: {str(e)}") - return [] - - def _readUrl(self, url: str) -> BeautifulSoup: - """ - Read a URL and return a BeautifulSoup parser for the content. - - Args: - url: The URL to read - - Returns: - BeautifulSoup object with the content or None on errors - """ - if not url or not url.startswith(('http://', 'https://')): - return None - - headers = { - 'User-Agent': self.userAgent, - 'Accept': 'text/html,application/xhtml+xml,application/xml', - 'Accept-Language': 'en-US,en;q=0.9', - } - - try: - # Initial request - response = requests.get(url, headers=headers, timeout=self.timeout) - - # Handling for status 202 - if response.status_code == 202: - # Retry with backoff - backoffTimes = [0.5, 1.0, 2.0, 5.0] - - for waitTime in backoffTimes: - time.sleep(waitTime) - response = requests.get(url, headers=headers, timeout=self.timeout) - - if response.status_code != 202: - break - - # Raise for error status codes - response.raise_for_status() - - # Parse HTML - return BeautifulSoup(response.text, 'html.parser') - - except Exception as e: - logger.error(f"Error reading URL {url}: {str(e)}") - return None - - def _extractTitle(self, soup: BeautifulSoup, url: str) -> str: - """ - Extract the title from a webpage. - - Args: - soup: BeautifulSoup object of the webpage - url: URL of the webpage - - Returns: - Extracted title - """ - if not soup: - return f"Error with {url}" - - # Extract title from title tag - titleTag = soup.find('title') - title = titleTag.text.strip() if titleTag else "No title" - - # Alternative: Also look for h1 tags if title tag is missing - if title == "No title": - h1Tag = soup.find('h1') - if h1Tag: - title = h1Tag.text.strip() - - return title - - def _extractMainContent(self, soup: BeautifulSoup, maxChars: int = 10000) -> str: - """ - Extract the main content from an HTML page. - - Args: - soup: BeautifulSoup object of the webpage - maxChars: Maximum number of characters - - Returns: - Extracted main content as a string - """ - if not soup: - return "" - - # Try to find main content elements in priority order - mainContent = None - for selector in ['main', 'article', '#content', '.content', '#main', '.main']: - content = soup.select_one(selector) - if content: - mainContent = content - break - - # If no main content found, use the body - if not mainContent: - mainContent = soup.find('body') or soup - - # Remove script, style, nav, footer elements that don't contribute to main content - for element in mainContent.select('script, style, nav, footer, header, aside, .sidebar, #sidebar, .comments, #comments, .advertisement, .ads, iframe'): - element.extract() - - # Extract text content - textContent = mainContent.get_text(separator=' ', strip=True) - - # Limit to maxChars - return textContent[:maxChars] - - def _limitText(self, text: str, maxChars: int = 10000) -> str: - """ - Limit text to a maximum number of characters. - - Args: - text: Input text - maxChars: Maximum number of characters - - Returns: - Limited text - """ - if not text: - return "" - - # If text is already under the limit, return unchanged - if len(text) <= maxChars: - return text - - # Otherwise limit text to maxChars - return text[:maxChars] + "... [Content truncated due to length]" - - -# Factory function for the Webcrawler agent -def getAgentWebcrawler(): - """Returns an instance of the Webcrawler agent.""" - return AgentWebcrawler() \ No newline at end of file diff --git a/modules/interfaces/interfaceChatObjects.py b/modules/interfaces/interfaceChatObjects.py index 5bb48da3..c29fd70e 100644 --- a/modules/interfaces/interfaceChatObjects.py +++ b/modules/interfaces/interfaceChatObjects.py @@ -6,7 +6,7 @@ Uses the JSON connector for data access with added language support. import os import logging import uuid -from datetime import datetime +from datetime import datetime, UTC from typing import Dict, Any, List, Optional, Union import asyncio @@ -327,6 +327,11 @@ class ChatObjects: publishedAt=createdMessage.get("publishedAt", self._getCurrentTimestamp()), stats=ChatStat(**createdMessage.get("stats", {})) if createdMessage.get("stats") else None ) + + # Update workflow stats for message creation (estimate bytes for message) + message_size = len(createdMessage.get("message", "")) + sum(len(doc.get("filename", "")) for doc in createdMessage.get("documents", [])) + self.updateWorkflowStats(workflowId, bytesSent=0, bytesReceived=message_size) + except Exception as e: logger.error(f"Error creating workflow message: {str(e)}") return None @@ -535,6 +540,64 @@ class ChatObjects: # Get logs for this workflow return [ChatLog(**log) for log in self.db.getRecordset("workflowLogs", recordFilter={"workflowId": workflowId})] + def updateWorkflowStats(self, workflowId: str, bytesSent: int = 0, bytesReceived: int = 0) -> bool: + """Updates workflow statistics during execution with incremental values.""" + try: + # Get current workflow + workflow = self.getWorkflow(workflowId) + if not workflow: + logger.error(f"Workflow {workflowId} not found for stats update") + return False + + if not self._canModify("workflows", workflowId): + logger.error(f"No permission to update workflow {workflowId} stats") + return False + + # Get current stats + currentStats = workflow.stats.dict() if workflow.stats else { + "bytesSent": 0, + "bytesReceived": 0, + "tokenCount": 0, + "processingTime": 0 + } + + # Calculate processing time from workflow start + workflow_start = datetime.fromisoformat(workflow.startedAt.replace('Z', '+00:00')) + current_time = datetime.now(UTC) + processing_time = (current_time - workflow_start).total_seconds() + + # Update stats with incremental values + currentStats["bytesSent"] = currentStats.get("bytesSent", 0) + bytesSent + currentStats["bytesReceived"] = currentStats.get("bytesReceived", 0) + bytesReceived + currentStats["tokenCount"] = currentStats["bytesSent"] + currentStats["bytesReceived"] + currentStats["processingTime"] = processing_time + + # Update workflow in database + self.db.recordModify("workflows", workflowId, { + "dataStats": currentStats + }) + + # Log to stats table + stats_record = { + "timestamp": self._getCurrentTimestamp(), + "workflowId": workflowId, + "bytesSent": bytesSent, + "bytesReceived": bytesReceived, + "tokenCount": bytesSent + bytesReceived, + "processingTime": processing_time + } + + # Create stats record in database + self.db.recordCreate("stats", stats_record) + + logger.debug(f"Updated workflow {workflowId} stats: {currentStats}") + logger.debug(f"Logged stats record: {stats_record}") + return True + + except Exception as e: + logger.error(f"Error updating workflow stats: {str(e)}") + return False + def createWorkflowLog(self, logData: Dict[str, Any]) -> ChatLog: """Creates a log entry for a workflow if user has access.""" # Check workflow access @@ -777,14 +840,7 @@ class ChatObjects: # Create workflow workflow = self.createWorkflow(workflowData) - # Add log entry - self.createWorkflowLog({ - "workflowId": workflow.id, - "message": "Workflow started", - "type": "info", - "status": "running", - "progress": 0 - }) + # Remove the 'Workflow started' log entry # Start workflow processing from modules.workflow.managerWorkflow import WorkflowManager diff --git a/modules/methods/methodCoder.py b/modules/methods/EXCLUDED_methodCoder.py similarity index 93% rename from modules/methods/methodCoder.py rename to modules/methods/EXCLUDED_methodCoder.py index d9cc5289..33d285a0 100644 --- a/modules/methods/methodCoder.py +++ b/modules/methods/EXCLUDED_methodCoder.py @@ -10,9 +10,9 @@ logger = logging.getLogger(__name__) class MethodCoder(MethodBase): """Coder method implementation for code operations""" - def __init__(self, serviceContainer: Any): + def __init__(self, serviceCenter: Any): """Initialize the coder method""" - super().__init__(serviceContainer) + super().__init__(serviceCenter) self.name = "coder" self.description = "Handle code operations like analysis, generation, and refactoring" @@ -87,7 +87,18 @@ class MethodCoder(MethodBase): ) # Extract text content from ExtractedContent objects - text_contents = self.service.extractTextFromContentObjects(all_code_content) + text_contents = [] + for content_obj in all_code_content: + if hasattr(content_obj, 'contents') and content_obj.contents: + # Extract text from ContentItem objects + for content_item in content_obj.contents: + if hasattr(content_item, 'data') and content_item.data: + text_contents.append(content_item.data) + elif isinstance(content_obj, str): + text_contents.append(content_obj) + else: + # Fallback: convert to string representation + text_contents.append(str(content_obj)) # Combine all extracted text content for analysis combined_content = "\n\n--- CODE SEPARATOR ---\n\n".join(text_contents) diff --git a/modules/methods/methodDocument.py b/modules/methods/methodDocument.py index a1b437de..208f736d 100644 --- a/modules/methods/methodDocument.py +++ b/modules/methods/methodDocument.py @@ -8,7 +8,6 @@ from typing import Dict, Any, List, Optional import uuid from datetime import datetime, UTC -from modules.workflow.managerDocument import DocumentManager from modules.workflow.methodBase import MethodBase, ActionResult, action logger = logging.getLogger(__name__) @@ -16,12 +15,11 @@ logger = logging.getLogger(__name__) class MethodDocument(MethodBase): """Document method implementation for document operations""" - def __init__(self, serviceContainer: Any): + def __init__(self, serviceCenter: Any): """Initialize the document method""" - super().__init__(serviceContainer) + super().__init__(serviceCenter) self.name = "document" self.description = "Handle document operations like extraction and analysis" - self.documentManager = DocumentManager(serviceContainer) @action async def extract(self, parameters: Dict[str, Any]) -> ActionResult: @@ -94,7 +92,18 @@ class MethodDocument(MethodBase): ) # Extract text content from ExtractedContent objects - text_contents = self.service.extractTextFromContentObjects(all_extracted_content) + text_contents = [] + for content_obj in all_extracted_content: + if hasattr(content_obj, 'contents') and content_obj.contents: + # Extract text from ContentItem objects + for content_item in content_obj.contents: + if hasattr(content_item, 'data') and content_item.data: + text_contents.append(content_item.data) + elif isinstance(content_obj, str): + text_contents.append(content_obj) + else: + # Fallback: convert to string representation + text_contents.append(str(content_obj)) # Combine all extracted text content combined_content = "\n\n--- DOCUMENT SEPARATOR ---\n\n".join(text_contents) diff --git a/modules/methods/methodOutlook.py b/modules/methods/methodOutlook.py index f681931e..fb226731 100644 --- a/modules/methods/methodOutlook.py +++ b/modules/methods/methodOutlook.py @@ -16,9 +16,9 @@ logger = logging.getLogger(__name__) class MethodOutlook(MethodBase): """Outlook method implementation for email operations""" - def __init__(self, serviceContainer: Any): + def __init__(self, serviceCenter: Any): """Initialize the Outlook method""" - super().__init__(serviceContainer) + super().__init__(serviceCenter) self.name = "outlook" self.description = "Handle Microsoft Outlook email operations" diff --git a/modules/methods/methodSharepoint.py b/modules/methods/methodSharepoint.py index cb36b57b..dbfc4c1f 100644 --- a/modules/methods/methodSharepoint.py +++ b/modules/methods/methodSharepoint.py @@ -16,8 +16,8 @@ logger = logging.getLogger(__name__) class MethodSharepoint(MethodBase): """SharePoint method implementation for document operations""" - def __init__(self, serviceContainer: Any): - super().__init__(serviceContainer) + def __init__(self, serviceCenter: Any): + super().__init__(serviceCenter) self.name = "sharepoint" self.description = "Handle Microsoft SharePoint document operations" diff --git a/modules/methods/methodWeb.py b/modules/methods/methodWeb.py index e993ab55..4602a5a0 100644 --- a/modules/methods/methodWeb.py +++ b/modules/methods/methodWeb.py @@ -19,9 +19,9 @@ logger = logging.getLogger(__name__) class MethodWeb(MethodBase): """Web method implementation for web operations""" - def __init__(self, serviceContainer: Any): + def __init__(self, serviceCenter: Any): """Initialize the web method""" - super().__init__(serviceContainer) + super().__init__(serviceCenter) self.name = "web" self.description = "Handle web operations like crawling and scraping" @@ -452,7 +452,7 @@ class MethodWeb(MethodBase): "query": query } else: - # Get user language from service container if available + # Get user language from service center if available userLanguage = "en" # Default language if hasattr(self.service, 'user') and hasattr(self.service.user, 'language'): userLanguage = self.service.user.language diff --git a/modules/routes/routeWorkflows.py b/modules/routes/routeWorkflows.py index de39a1c4..e01cfeb7 100644 --- a/modules/routes/routeWorkflows.py +++ b/modules/routes/routeWorkflows.py @@ -176,7 +176,7 @@ async def get_workflow_status( ) -> ChatWorkflow: """Get the current status of a workflow.""" try: - # Get service container + # Get service center interfaceChat = getServiceChat(currentUser) # Retrieve workflow @@ -208,7 +208,7 @@ async def get_workflow_logs( ) -> List[ChatLog]: """Get logs for a workflow with support for selective data transfer.""" try: - # Get service container + # Get service center interfaceChat = getServiceChat(currentUser) # Verify workflow exists @@ -251,7 +251,7 @@ async def get_workflow_messages( ) -> List[ChatMessage]: """Get messages for a workflow with support for selective data transfer.""" try: - # Get service container + # Get service center interfaceChat = getServiceChat(currentUser) # Verify workflow exists @@ -297,7 +297,7 @@ async def start_workflow( Corresponds to State 1 in the state machine documentation. """ try: - # Get service container + # Get service center interfaceChat = getServiceChat(currentUser) # Start or continue workflow using ChatObjects @@ -322,7 +322,7 @@ async def stop_workflow( ) -> ChatWorkflow: """Stops a running workflow.""" try: - # Get service container + # Get service center interfaceChat = getServiceChat(currentUser) # Stop workflow using ChatObjects @@ -347,7 +347,7 @@ async def delete_workflow( ) -> Dict[str, Any]: """Deletes a workflow and its associated data.""" try: - # Get service container + # Get service center interfaceChat = getServiceChat(currentUser) # Get raw workflow data from database to check permissions @@ -402,7 +402,7 @@ async def delete_workflow_message( ) -> Dict[str, Any]: """Delete a message from a workflow.""" try: - # Get service container + # Get service center interfaceChat = getServiceChat(currentUser) # Verify workflow exists @@ -453,7 +453,7 @@ async def delete_file_from_message( ) -> Dict[str, Any]: """Delete a file reference from a message in a workflow.""" try: - # Get service container + # Get service center interfaceChat = getServiceChat(currentUser) # Verify workflow exists diff --git a/modules/workflow/managerChat.py b/modules/workflow/managerChat.py index 864a7dc9..470653d8 100644 --- a/modules/workflow/managerChat.py +++ b/modules/workflow/managerChat.py @@ -2,6 +2,7 @@ import asyncio import logging import uuid import json +import time from typing import Dict, Any, Optional, List, Union from datetime import datetime, UTC @@ -9,7 +10,7 @@ from modules.interfaces.interfaceAppModel import User from modules.interfaces.interfaceChatModel import ( TaskStatus, ChatDocument, TaskItem, TaskAction, TaskResult, ChatStat, ChatLog, ChatMessage, ChatWorkflow ) -from modules.workflow.serviceContainer import ServiceContainer +from modules.workflow.serviceCenter import ServiceCenter from modules.interfaces.interfaceChatObjects import ChatObjects logger = logging.getLogger(__name__) @@ -20,7 +21,7 @@ class ChatManager: def __init__(self, currentUser: User, chatInterface: ChatObjects): self.currentUser = currentUser self.chatInterface = chatInterface - self.service: ServiceContainer = None + self.service: ServiceCenter = None self.workflow: ChatWorkflow = None # Circuit breaker for AI calls @@ -37,7 +38,7 @@ class ChatManager: async def initialize(self, workflow: ChatWorkflow) -> None: """Initialize chat manager with workflow""" self.workflow = workflow - self.service = ServiceContainer(self.currentUser, self.workflow) + self.service = ServiceCenter(self.currentUser, self.workflow) # ===== WORKFLOW PHASES ===== @@ -119,6 +120,12 @@ class ChatManager: task_actions.append(task_action) logger.info(f"Created task action: {task_action.execMethod}.{task_action.execAction}") + # Update stats for task validation (estimate bytes for action validation) + if task_actions: + # Calculate actual action size for stats + action_size = self.service.calculateObjectSize(task_actions) + self.service.updateWorkflowStats(eventLabel="action", bytesSent=action_size) + logger.info(f"Task action definition completed: {len(task_actions)} actions") return task_actions @@ -265,6 +272,7 @@ class ChatManager: async def processFileIds(self, fileIds: List[str]) -> List[ChatDocument]: """Process file IDs and return ChatDocument objects""" documents = [] + for fileId in fileIds: try: # Ensure service is initialized @@ -290,6 +298,8 @@ class ChatManager: logger.warning(f"No file info found for file ID {fileId}") except Exception as e: logger.error(f"Error processing file ID {fileId}: {str(e)}") + + return documents def setUserLanguage(self, language: str) -> None: @@ -768,7 +778,8 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text.""" 'documents_metadata': documents_metadata, 'actionId': action_result.get('actionId', ''), 'actionMethod': action_result.get('actionMethod', ''), - 'actionName': action_result.get('actionName', '') + 'actionName': action_result.get('actionName', ''), + 'success_indicator': 'documents' if len(documents_metadata) > 0 else 'text_result' if action_result.get('result', '').strip() else 'none' } step_result_serializable['action_results'].append(serializable_action_result) @@ -787,6 +798,13 @@ INSTRUCTIONS: 4. Decide on next action: continue, retry, or fail 5. If retry, provide specific improvements needed +IMPORTANT NOTES: +- Actions can produce either text results OR documents (or both) +- Empty result_summary is acceptable if documents were produced (documents_count > 0) +- Focus on whether the action achieved its intended purpose, not just text output +- Document-based actions (like file extractions) often have empty text results but successful document outputs +- Check the 'success_indicator' field: 'documents' means success via document output, 'text_result' means success via text, 'none' means no output + REQUIRED JSON STRUCTURE: {{ "status": "success|retry|failed", @@ -829,7 +847,7 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text.""" async def _executeSingleAction(self, action: TaskAction, workflow: ChatWorkflow) -> Dict[str, Any]: """Execute a single action and return result with enhanced document processing""" try: - # Execute the actual method action using the service container + # Execute the actual method action using the service center result = await self.service.executeAction( methodName=action.execMethod, actionName=action.execAction, @@ -943,7 +961,7 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text.""" message_data = { "workflowId": workflow.id, "role": "assistant", - "message": f"Executed {action.execMethod}.{action.execAction} successfully", + "message": f"Executed action {action.execMethod}.{action.execAction}", "status": "step", "sequenceNr": len(workflow.messages) + 1, "publishedAt": datetime.now(UTC).isoformat(), @@ -979,7 +997,7 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text.""" file_size = len(str(doc_data)) mime_type = "application/octet-stream" - # Enhanced MIME type detection using service container + # Enhanced MIME type detection using service center if mime_type == "application/octet-stream": mime_type = self._detectMimeTypeFromContent(document_data, document_name) @@ -1045,7 +1063,7 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text.""" def _detectMimeTypeFromContent(self, content: Any, filename: str) -> str: """ - Detect MIME type from content and filename using service container. + Detect MIME type from content and filename using service center. Only returns a detected MIME type if it's better than application/octet-stream. Args: @@ -1065,7 +1083,7 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text.""" else: file_bytes = str(content).encode('utf-8') - # Use service container's MIME type detection + # Use service center's MIME type detection detected_mime_type = self.service.detectContentTypeFromData(file_bytes, filename) if detected_mime_type != "application/octet-stream": return detected_mime_type @@ -1076,7 +1094,7 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text.""" def _detectMimeTypeFromDocument(self, document: Any, filename: str) -> str: """ - Detect MIME type from document object using service container. + Detect MIME type from document object using service center. Only returns a detected MIME type if it's better than application/octet-stream. Args: @@ -1094,7 +1112,7 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text.""" else: file_bytes = str(content).encode('utf-8') - # Use service container's MIME type detection + # Use service center's MIME type detection detected_mime_type = self.service.detectContentTypeFromData(file_bytes, filename) if detected_mime_type != "application/octet-stream": return detected_mime_type @@ -1222,8 +1240,11 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text.""" action_results = review_context.get('action_results', []) if action_results: # Check for common issues that warrant retry + # Only consider empty results a problem if there are no documents produced has_empty_results = any( - not result.get('result', '').strip() + not result.get('result', '').strip() and + not result.get('documents', []) and + not result.get('documents_metadata', []) for result in action_results if result.get('status') == 'completed' ) @@ -1417,7 +1438,7 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text.""" assistant_messages = [msg for msg in workflow.messages if msg.role == 'assistant'] # Generate summary feedback - feedback = f"Workflow completed successfully.\n\n" + feedback = f"Workflow completed.\n\n" feedback += f"Processed {len(user_messages)} user inputs and generated {len(assistant_messages)} responses.\n" # Add final status @@ -1437,36 +1458,38 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text.""" # ===== UNIFIED WORKFLOW EXECUTION ===== async def executeUnifiedWorkflow(self, userInput: str, workflow: ChatWorkflow) -> Dict[str, Any]: - """Execute workflow using the new unified phases with retry logic""" + """Execute a unified workflow with all phases""" try: logger.info(f"Starting unified workflow execution for workflow {workflow.id}") + start_time = time.time() - # Create user-friendly progress log - self.chatInterface.createWorkflowLog({ - "workflowId": workflow.id, - "message": "Starting workflow analysis and planning", - "type": "info", - "status": "running", - "progress": 5, - "agentName": "System" - }) + # Initialize chat manager with workflow + await self.initialize(workflow) + + # Process file IDs if provided + documents = [] + if hasattr(userInput, 'listFileId') and userInput.listFileId: + documents = await self.processFileIds(userInput.listFileId) + logger.info(f"Processed {len(documents)} documents") + + # Calculate and update user input stats + user_input_size = self.service.calculateUserInputSize(userInput) + self.service.updateWorkflowStats(eventLabel="userinput", bytesReceived=user_input_size) # Phase 1: High-Level Task Planning - logger.info("=== PHASE 1: HIGH-LEVEL TASK PLANNING ===") - task_plan = await self.planHighLevelTasks(userInput, workflow) - if not task_plan or not task_plan.get('tasks'): - logger.error("Failed to create task plan") - return { - 'status': 'failed', - 'error': 'Failed to create task plan', - 'phase': 'planning' - } + logger.info("--- PHASE 1: HIGH-LEVEL TASK PLANNING ---") + task_plan = await self.planHighLevelTasks(userInput.prompt, workflow) + + # Update stats for task planning + task_plan_size = self.service.calculateObjectSize(task_plan) + self.service.updateWorkflowStats(eventLabel="taskplan", bytesSent=task_plan_size) # Create user-friendly task plan log tasks_count = len(task_plan.get('tasks', [])) + task_descriptions = "\n".join([f"- {task.get('description', 'No description')}" for task in task_plan.get('tasks', [])]) self.chatInterface.createWorkflowLog({ "workflowId": workflow.id, - "message": f"Planning completed: {tasks_count} tasks identified", + "message": f"Planning completed: {tasks_count} tasks identified\n{task_descriptions}", "type": "info", "status": "running", "progress": 15, @@ -1598,22 +1621,29 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text.""" logger.debug(f"TASK {i+1} ACTIONS CREATED: {json.dumps(task_actions_serializable, indent=2, ensure_ascii=False)}") # Phase 3: Execute Task Actions - logger.info(f"--- PHASE 3: EXECUTING ACTIONS FOR TASK {i+1} ---") + logger.info(f"--- PHASE 3: EXECUTING TASK {i+1} ACTIONS ---") action_results = await self.executeTaskActions(task_actions, workflow) + # Update stats for action execution + # Action stats are already handled by the service center during AI calls + # Create user-friendly action completion log with quality metrics successful_actions = sum(1 for result in action_results if result.get('status') == 'completed') total_actions = len(action_results) if total_actions > 0: - quality_percentage = (successful_actions / total_actions) * 100 + if successful_actions == total_actions: + log_type = "success" + elif successful_actions == 0: + log_type = "error" + else: + log_type = "warning" self.chatInterface.createWorkflowLog({ "workflowId": workflow.id, - "message": f"Task {i+1} actions completed: {successful_actions}/{total_actions} successful ({quality_percentage:.0f}% quality)", - "type": "success" if quality_percentage >= 80 else "warning" if quality_percentage >= 60 else "error", + "message": f"Successful actions: {successful_actions}/{total_actions}", + "type": log_type, "status": "running", - "progress": progress + 10, - "agentName": "System" + "progress": progress + 10 }) # Log action results (with metadata only) @@ -1653,6 +1683,9 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text.""" logger.info(f"--- PHASE 4: REVIEWING TASK {i+1} COMPLETION ---") review_result = await self.reviewTaskCompletion(task_step, task_actions, action_results, workflow) + # Update stats for task review + # Task review stats are already handled by the service center during AI calls + # Create user-friendly review log with quality metrics quality_metrics = review_result.get('quality_metrics', {}) quality_score = quality_metrics.get('score', 0) @@ -1662,29 +1695,62 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text.""" if review_status == 'success': self.chatInterface.createWorkflowLog({ "workflowId": workflow.id, - "message": f"Task {i+1} completed successfully (Quality: {quality_score:.0f}%, Confidence: {confidence:.0f}%)", + "message": f"🎯 Task completed successfully with quality score {quality_score} and confidence {confidence}", "type": "success", "status": "running", - "progress": progress + 20, - "agentName": "System" + "progress": progress + 20 }) elif review_status == 'retry': + # Extract improvement details + improvements = review_result.get('improvements', '') + reason = review_result.get('reason', '') + unmet_criteria = review_result.get('unmet_criteria', []) + + # Build detailed message + retry_details = [] + if reason: + retry_details.append(f"Reason: {reason}") + if improvements: + retry_details.append(f"Improvements: {improvements}") + if unmet_criteria: + retry_details.append(f"Missing criteria: {', '.join(unmet_criteria[:3])}{'...' if len(unmet_criteria) > 3 else ''}") + + retry_message = f"🔄 Task needs improvement" + if retry_details: + retry_message += f"\n{chr(10).join(retry_details)}" + self.chatInterface.createWorkflowLog({ "workflowId": workflow.id, - "message": f"Task {i+1} needs improvement (Quality: {quality_score:.0f}%, Confidence: {confidence:.0f}%)", + "message": retry_message, "type": "warning", "status": "running", - "progress": progress + 15, - "agentName": "System" + "progress": progress + 15 }) else: + # Extract failure details + reason = review_result.get('reason', '') + unmet_criteria = review_result.get('unmet_criteria', []) + missing_outputs = review_result.get('missing_outputs', []) + + # Build detailed failure message + failure_details = [] + if reason: + failure_details.append(f"Reason: {reason}") + if unmet_criteria: + failure_details.append(f"Unmet criteria: {', '.join(unmet_criteria[:3])}{'...' if len(unmet_criteria) > 3 else ''}") + if missing_outputs: + failure_details.append(f"Missing outputs: {', '.join(missing_outputs[:3])}{'...' if len(missing_outputs) > 3 else ''}") + + failure_message = f"❌ Task failed" + if failure_details: + failure_message += f"\n{chr(10).join(failure_details)}" + self.chatInterface.createWorkflowLog({ "workflowId": workflow.id, - "message": f"Task {i+1} failed (Quality: {quality_score:.0f}%, Confidence: {confidence:.0f}%)", + "message": failure_message, "type": "error", "status": "running", - "progress": progress + 15, - "agentName": "System" + "progress": progress + 15 }) # Log review result (with metadata only) @@ -1724,7 +1790,7 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text.""" previous_review_feedback = review_result.get('improvements', '') retry_count += 1 - if retry_count >= max_retries: + if retry_count > max_retries: logger.error(f"Task {i+1} failed after {max_retries} retries") task_success = False else: @@ -1775,35 +1841,37 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text.""" # Final workflow summary successful_tasks = sum(1 for result in workflow_results if result.get('task_success', False)) - total_tasks = len(workflow_results) + total_tasks = len(task_plan['tasks']) + + # Final workflow stats are already handled by the service center during AI calls + + # Calculate total processing time + total_processing_time = time.time() - start_time # Create final user-friendly completion log if successful_tasks == total_tasks: self.chatInterface.createWorkflowLog({ "workflowId": workflow.id, - "message": f"Workflow completed successfully: {successful_tasks}/{total_tasks} tasks completed", + "message": f"🎉 Workflow completed ({successful_tasks}/{total_tasks} tasks)", "type": "success", "status": "completed", - "progress": 100, - "agentName": "System" + "progress": 100 }) elif successful_tasks > 0: self.chatInterface.createWorkflowLog({ "workflowId": workflow.id, - "message": f"Workflow completed partially: {successful_tasks}/{total_tasks} tasks completed", + "message": f"⚠️ Workflow partially completed ({successful_tasks}/{total_tasks} tasks)", "type": "warning", "status": "completed", - "progress": 100, - "agentName": "System" + "progress": 100 }) else: self.chatInterface.createWorkflowLog({ "workflowId": workflow.id, - "message": f"Workflow failed: {successful_tasks}/{total_tasks} tasks completed", + "message": f"❌ Workflow failed ({successful_tasks}/{total_tasks} tasks)", "type": "error", "status": "failed", - "progress": 100, - "agentName": "System" + "progress": 100 }) # Create serializable workflow results (with metadata only) @@ -1836,7 +1904,8 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text.""" 'documents_metadata': documents_metadata, 'actionId': action_result.get('actionId', ''), 'actionMethod': action_result.get('actionMethod', ''), - 'actionName': action_result.get('actionName', '') + 'actionName': action_result.get('actionName', ''), + 'success_indicator': 'documents' if len(documents_metadata) > 0 else 'text_result' if action_result.get('result', '').strip() else 'none' } action_results_metadata.append(action_result_metadata) diff --git a/modules/workflow/managerDocument.py b/modules/workflow/managerDocument.py deleted file mode 100644 index b1b8e709..00000000 --- a/modules/workflow/managerDocument.py +++ /dev/null @@ -1,73 +0,0 @@ -""" -Document Manager Module for handling document operations and content extraction. -""" - -import logging - -from modules.interfaces.interfaceChatModel import ( - ChatDocument, - ExtractedContent -) -from modules.workflow.processorDocument import DocumentProcessor - -logger = logging.getLogger(__name__) - -class DocumentManager: - """Manager for document operations and content extraction""" - - def __init__(self, serviceContainer): - self.service = serviceContainer - # Create processor with service container for AI calls - self._processor = DocumentProcessor(serviceContainer) - - async def extractContentFromDocument(self, prompt: str, document: ChatDocument) -> ExtractedContent: - """Extract content from ChatDocument using prompt""" - try: - # Extract file data from ChatDocument - if document.data: - fileData = document.data.encode('utf-8') if isinstance(document.data, str) else document.data - else: - # Try to get file data from service container if document has fileId - if hasattr(document, 'fileId') and document.fileId: - fileData = self.service.getFileData(document.fileId) - else: - logger.error(f"No file data available in document: {document}") - raise ValueError("No file data available in document") - - # Get filename and mime type from document - filename = document.filename if hasattr(document, 'filename') else "document" - mimeType = document.mimeType if hasattr(document, 'mimeType') else "application/octet-stream" - - # Process with processor - extractedContent = await self._processor.processFileData( - fileData=fileData, - filename=filename, - mimeType=mimeType, - base64Encoded=False, - prompt=prompt - ) - - # Update objectId to match document ID - extractedContent.objectId = document.id - extractedContent.objectType = "ChatDocument" - - return extractedContent - - except Exception as e: - logger.error(f"Error extracting from document: {str(e)}") - raise - - async def extractContentFromFileData(self, prompt: str, fileData: bytes, filename: str, mimeType: str, base64Encoded: bool = False, documentId: str = None) -> ExtractedContent: - """Extract content from file data directly using prompt""" - try: - return await self._processor.processFileData( - fileData=fileData, - filename=filename, - mimeType=mimeType, - base64Encoded=base64Encoded, - prompt=prompt, - documentId=documentId - ) - except Exception as e: - logger.error(f"Error extracting from file data: {str(e)}") - raise diff --git a/modules/workflow/methodBase.py b/modules/workflow/methodBase.py index fe109512..8f09cb52 100644 --- a/modules/workflow/methodBase.py +++ b/modules/workflow/methodBase.py @@ -20,9 +20,9 @@ def action(func): class MethodBase: """Base class for all methods""" - def __init__(self, serviceContainer: Any): - """Initialize method with service container""" - self.service = serviceContainer + def __init__(self, serviceCenter: Any): + """Initialize method with service center""" + self.service = serviceCenter self.name: str self.description: str self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}") diff --git a/modules/workflow/processorDocument.py b/modules/workflow/processorDocument.py index 75929f86..323c6f7f 100644 --- a/modules/workflow/processorDocument.py +++ b/modules/workflow/processorDocument.py @@ -32,10 +32,10 @@ class FileProcessingError(Exception): class DocumentProcessor: """Processor for handling document operations and content extraction.""" - def __init__(self, serviceContainer=None): + def __init__(self, serviceCenter=None): """Initialize the document processor.""" self._neutralizer = DataAnonymizer() if APP_CONFIG.get("ENABLE_CONTENT_NEUTRALIZATION", False) else None - self._serviceContainer = serviceContainer + self._serviceCenter = serviceCenter self.supportedTypes: Dict[str, Callable[[bytes, str, str], Awaitable[List[ContentItem]]]] = { 'text/plain': self._processText, @@ -136,7 +136,7 @@ class DocumentProcessor: # Detect content type if needed if mimeType == "application/octet-stream": - mimeType = self._serviceContainer.detectContentTypeFromData(fileData, filename) + mimeType = self._serviceCenter.detectContentTypeFromData(fileData, filename) # Process document based on type if mimeType not in self.supportedTypes: @@ -527,7 +527,7 @@ class DocumentProcessor: # chunk is already base64 encoded string from _processImage # Use the original prompt directly for images (no content embedding) logger.debug(f"Calling image AI service for MIME type: {mimeType}") - processedContent = await self._serviceContainer.callAiImageBasic(prompt, chunk, mimeType) + processedContent = await self._serviceCenter.callAiImageBasic(prompt, chunk, mimeType) else: # For text content, use text AI service # Neutralize content if neutralizer is enabled (only for text) @@ -548,7 +548,7 @@ class DocumentProcessor: """ logger.debug(f"Calling text AI service for MIME type: {mimeType}") - processedContent = await self._serviceContainer.callAiTextBasic(aiPrompt, contentToProcess) + processedContent = await self._serviceCenter.callAiTextBasic(aiPrompt, contentToProcess) chunkResults.append(processedContent) except Exception as aiError: diff --git a/modules/workflow/serviceContainer.py b/modules/workflow/serviceCenter.py similarity index 76% rename from modules/workflow/serviceContainer.py rename to modules/workflow/serviceCenter.py index ea2f87b1..d403aca9 100644 --- a/modules/workflow/serviceContainer.py +++ b/modules/workflow/serviceCenter.py @@ -8,14 +8,14 @@ from modules.interfaces.interfaceAppModel import User, UserConnection from modules.interfaces.interfaceChatModel import ( TaskStatus, ChatDocument, TaskItem, TaskAction, TaskResult, - ChatStat, ChatLog, ChatMessage, ChatWorkflow, DocumentExchange + ChatStat, ChatLog, ChatMessage, ChatWorkflow, DocumentExchange, ExtractedContent ) from modules.interfaces.interfaceAiCalls import AiCalls from modules.interfaces.interfaceChatObjects import getInterface as getChatObjects from modules.interfaces.interfaceChatModel import ActionResult from modules.interfaces.interfaceComponentObjects import getInterface as getComponentObjects from modules.interfaces.interfaceAppObjects import getInterface as getAppObjects -from modules.workflow.managerDocument import DocumentManager +from modules.workflow.processorDocument import DocumentProcessor from modules.workflow.methodBase import MethodBase import uuid import base64 @@ -23,8 +23,8 @@ import hashlib logger = logging.getLogger(__name__) -class ServiceContainer: - """Service container that provides access to all services and their functions""" +class ServiceCenter: + """Service center that provides access to all services and their functions""" def __init__(self, currentUser: User, workflow: ChatWorkflow): # Core services @@ -39,7 +39,7 @@ class ServiceContainer: self.interfaceComponent = getComponentObjects(currentUser) self.interfaceApp = getAppObjects(currentUser) self.interfaceAiCalls = AiCalls() - self.documentManager = DocumentManager(self) + self.documentProcessor = DocumentProcessor(self) # Initialize methods catalog self.methods = {} @@ -115,7 +115,7 @@ class ServiceContainer: def detectContentTypeFromData(self, fileData: bytes, filename: str) -> str: """ Detect content type from file data and filename. - This method makes the MIME type detection function accessible through the service container. + This method makes the MIME type detection function accessible through the service center. Args: fileData: Raw file data as bytes @@ -263,17 +263,11 @@ class ServiceContainer: # ===== Functions ===== - def extractContent(self, prompt: str, document: ChatDocument) -> str: + def extractContent(self, prompt: str, document: ChatDocument) -> ExtractedContent: """Extract content from document using prompt""" - return self.documentManager.extractContentFromDocument(prompt, document) + return self.extractContentFromDocument(prompt, document) - async def extractContentFromFileData(self, prompt: str, fileData: bytes, filename: str, mimeType: str, base64Encoded: bool = False, documentId: str = None) -> str: - """Extract content from file data directly using prompt""" - extracted_content = await self.documentManager.extractContentFromFileData(prompt, fileData, filename, mimeType, base64Encoded, documentId) - # Convert ExtractedContent to string for backward compatibility - if hasattr(extracted_content, 'contents'): - return "\n".join([item.data for item in extracted_content.contents]) - return str(extracted_content) + def getMethodsCatalog(self) -> Dict[str, Any]: """Get catalog of available methods and their actions""" @@ -502,7 +496,7 @@ Instructions: Please provide a comprehensive summary of this conversation.""" # Get summary using AI - return await self.interfaceAiCalls.callAiTextBasic(prompt) + return await self.callAiTextBasic(prompt) except Exception as e: logger.error(f"Error summarizing chat: {str(e)}") @@ -535,27 +529,81 @@ Instructions: Please provide a clear summary of this message.""" # Get summary using AI - return await self.interfaceAiCalls.callAiTextBasic(prompt) + return await self.callAiTextBasic(prompt) except Exception as e: logger.error(f"Error summarizing message: {str(e)}") return f"Error summarizing message: {str(e)}" - def callAiTextBasic(self, prompt: str, context: str = None) -> str: + async def callAiTextBasic(self, prompt: str, context: str = None) -> str: """Basic text processing using OpenAI""" - return self.interfaceAiCalls.callAiTextBasic(prompt, context) + # Calculate prompt size for stats + prompt_size = self.calculateObjectSize(prompt) + if context: + prompt_size += self.calculateObjectSize(context) + + # Call AI + response = await self.interfaceAiCalls.callAiTextBasic(prompt, context) + + # Calculate response size for stats + response_size = self.calculateObjectSize(response) + + # Update stats + self.updateWorkflowStats(eventLabel="aicall.openai.text", bytesSent=prompt_size, bytesReceived=response_size) + + return response - def callAiTextAdvanced(self, prompt: str, context: str = None) -> str: + async def callAiTextAdvanced(self, prompt: str, context: str = None) -> str: """Advanced text processing using Anthropic""" - return self.interfaceAiCalls.callAiTextAdvanced(prompt, context) + # Calculate prompt size for stats + prompt_size = self.calculateObjectSize(prompt) + if context: + prompt_size += self.calculateObjectSize(context) + + # Call AI + response = await self.interfaceAiCalls.callAiTextAdvanced(prompt, context) + + # Calculate response size for stats + response_size = self.calculateObjectSize(response) + + # Update stats + self.updateWorkflowStats(eventLabel="aicall.anthropic.text", bytesSent=prompt_size, bytesReceived=response_size) + + return response - def callAiImageBasic(self, prompt: str, imageData: str, mimeType: str) -> str: + async def callAiImageBasic(self, prompt: str, imageData: str, mimeType: str) -> str: """Basic image processing using OpenAI""" - return self.interfaceAiCalls.callAiImageBasic(prompt, imageData, mimeType) + # Calculate prompt size for stats + prompt_size = self.calculateObjectSize(prompt) + prompt_size += self.calculateObjectSize(imageData) + + # Call AI + response = await self.interfaceAiCalls.callAiImageBasic(prompt, imageData, mimeType) + + # Calculate response size for stats + response_size = self.calculateObjectSize(response) + + # Update stats + self.updateWorkflowStats(eventLabel="aicall.openai.image", bytesSent=prompt_size, bytesReceived=response_size) + + return response - def callAiImageAdvanced(self, prompt: str, imageData: str, mimeType: str) -> str: + async def callAiImageAdvanced(self, prompt: str, imageData: str, mimeType: str) -> str: """Advanced image processing using Anthropic""" - return self.interfaceAiCalls.callAiImageAdvanced(prompt, imageData, mimeType) + # Calculate prompt size for stats + prompt_size = self.calculateObjectSize(prompt) + prompt_size += self.calculateObjectSize(imageData) + + # Call AI + response = await self.interfaceAiCalls.callAiImageAdvanced(prompt, imageData, mimeType) + + # Calculate response size for stats + response_size = self.calculateObjectSize(response) + + # Update stats + self.updateWorkflowStats(eventLabel="aicall.anthropic.image", bytesSent=prompt_size, bytesReceived=response_size) + + return response def getFileInfo(self, fileId: str) -> Dict[str, Any]: """Get file information""" @@ -575,6 +623,59 @@ Please provide a clear summary of this message.""" """Get file data by ID""" return self.interfaceComponent.getFileData(fileId) + async def extractContentFromDocument(self, prompt: str, document: ChatDocument) -> ExtractedContent: + """Extract content from ChatDocument using prompt""" + try: + # Extract file data from ChatDocument + if document.data: + fileData = document.data.encode('utf-8') if isinstance(document.data, str) else document.data + else: + # Try to get file data from service center if document has fileId + if hasattr(document, 'fileId') and document.fileId: + fileData = self.getFileData(document.fileId) + else: + logger.error(f"No file data available in document: {document}") + raise ValueError("No file data available in document") + + # Get filename and mime type from document + filename = document.filename if hasattr(document, 'filename') else "document" + mimeType = document.mimeType if hasattr(document, 'mimeType') else "application/octet-stream" + + # Process with document processor directly + extractedContent = await self.documentProcessor.processFileData( + fileData=fileData, + filename=filename, + mimeType=mimeType, + base64Encoded=False, + prompt=prompt, + documentId=document.id + ) + + # Update objectId to match document ID + extractedContent.objectId = document.id + extractedContent.objectType = "ChatDocument" + + return extractedContent + + except Exception as e: + logger.error(f"Error extracting from document: {str(e)}") + raise + + async def extractContentFromFileData(self, prompt: str, fileData: bytes, filename: str, mimeType: str, base64Encoded: bool = False, documentId: str = None) -> ExtractedContent: + """Extract content from file data directly using prompt""" + try: + return await self.documentProcessor.processFileData( + prompt=prompt, + fileData=fileData, + filename=filename, + mimeType=mimeType, + base64Encoded=base64Encoded, + documentId=documentId + ) + except Exception as e: + logger.error(f"Error extracting from file data: {str(e)}") + raise + def createFile(self, fileName: str, mimeType: str, content: str, base64encoded: bool = False) -> str: """Create new file and return its ID""" # Convert content to bytes based on base64 flag @@ -613,29 +714,85 @@ Please provide a clear summary of this message.""" mimeType=mimeType ) - def extractTextFromContentObjects(self, content_objects: List[Any]) -> List[str]: + def updateWorkflowStats(self, eventLabel: str = None, bytesSent: int = 0, bytesReceived: int = 0, tokenCount: int = 0) -> None: """ - Extract text content from ExtractedContent objects or other content objects. + Centralized function to update workflow statistics in database and running workflow. Args: - content_objects: List of ExtractedContent objects or other content objects + eventLabel: Label for the event (e.g., "userinput", "taskplan", "action", "aicall") + bytesSent: Bytes sent (incremental) + bytesReceived: Bytes received (incremental) + tokenCount: Token count (incremental, default 0) + """ + try: + if hasattr(self, 'workflow') and self.workflow: + # Update the running workflow stats + self.interfaceChat.updateWorkflowStats( + self.workflow.id, + bytesSent=bytesSent, + bytesReceived=bytesReceived + ) + + # Log the stats event + logger.debug(f"Workflow stats updated - Event: {eventLabel}, Sent: {bytesSent}, Received: {bytesReceived}, Tokens: {tokenCount}") + + except Exception as e: + logger.error(f"Error updating workflow stats: {str(e)}") + + def calculateObjectSize(self, obj: Any) -> int: + """ + Calculate the size of an object in bytes. + + Args: + obj: Object to calculate size for Returns: - List of extracted text strings + int: Size in bytes """ - text_contents = [] - for content_obj in content_objects: - if hasattr(content_obj, 'contents') and content_obj.contents: - # Extract text from ContentItem objects - for content_item in content_obj.contents: - if hasattr(content_item, 'data') and content_item.data: - text_contents.append(content_item.data) - elif isinstance(content_obj, str): - text_contents.append(content_obj) - else: - # Fallback: convert to string representation - text_contents.append(str(content_obj)) - return text_contents + try: + import json + import sys + + if obj is None: + return 0 + + # Convert object to JSON string and calculate size + json_str = json.dumps(obj, ensure_ascii=False, default=str) + return len(json_str.encode('utf-8')) + + except Exception as e: + logger.error(f"Error calculating object size: {str(e)}") + return 0 + + def calculateUserInputSize(self, userInput: Any) -> int: + """ + Calculate size of user input including file sizes. + + Args: + userInput: User input object + + Returns: + int: Total size in bytes + """ + try: + total_size = 0 + + # Calculate base user input size + if hasattr(userInput, 'prompt'): + total_size += self.calculateObjectSize(userInput.prompt) + + # Add file sizes if present + if hasattr(userInput, 'listFileId') and userInput.listFileId: + for fileId in userInput.listFileId: + file_info = self.getFileInfo(fileId) + if file_info: + total_size += file_info.get('size', 0) + + return total_size + + except Exception as e: + logger.error(f"Error calculating user input size: {str(e)}") + return 0 async def executeAction(self, methodName: str, actionName: str, parameters: Dict[str, Any]) -> ActionResult: """Execute a method action""" @@ -659,9 +816,9 @@ Please provide a clear summary of this message.""" # Create singleton instance serviceObject = None -def initializeServiceContainer(currentUser: User, workflow: ChatWorkflow) -> ServiceContainer: - """Initialize the service container singleton""" +def initializeServiceCenter(currentUser: User, workflow: ChatWorkflow) -> ServiceCenter: + """Initialize the service center singleton""" global serviceObject if serviceObject is None: - serviceObject = ServiceContainer(currentUser, workflow) + serviceObject = ServiceCenter(currentUser, workflow) return serviceObject diff --git a/notes/WORKFLOW_ARCHITECTURE.md b/notes/WORKFLOW_ARCHITECTURE.md deleted file mode 100644 index 5849a6cc..00000000 --- a/notes/WORKFLOW_ARCHITECTURE.md +++ /dev/null @@ -1,226 +0,0 @@ -# Workflow Architecture Documentation - -## Overview - -The workflow system has been refactored into a clear, structured approach with 5 distinct phases. This eliminates redundancies and provides better error handling, quality assessment, and maintainability. - -## Architecture Principles - -### 1. **Clear Phase Separation** -Each workflow phase has a specific responsibility and clear inputs/outputs. - -### 2. **Unified Data Model** -Standardized on `TaskAction` objects throughout the system. - -### 3. **Consistent Prompt Generation** -All AI interactions use dedicated prompt generation functions. - -### 4. **Quality Assessment** -Each task is reviewed before proceeding to the next. - -## Workflow Phases - -### **Phase 1: High-Level Task Planning** -**Function:** `planHighLevelTasks()` -**Purpose:** Analyze user request and create a structured task plan -**Input:** User input, available documents -**Output:** Task plan with multiple task steps -**Prompt Function:** `_createTaskPlanningPrompt()` - -```python -task_plan = await chatManager.planHighLevelTasks(userInput, workflow) -``` - -### **Phase 2: Task Definition and Action Generation** -**Function:** `defineTaskActions()` -**Purpose:** Define specific actions for each task step -**Input:** Task step, workflow context, previous results -**Output:** List of TaskAction objects -**Prompt Function:** `_createActionDefinitionPrompt()` - -```python -task_actions = await chatManager.defineTaskActions(task_step, workflow, previous_results) -``` - -### **Phase 3: Action Execution** -**Function:** `executeTaskActions()` -**Purpose:** Execute all actions for a task step -**Input:** List of TaskAction objects -**Output:** List of action results -**Prompt Function:** `_createActionExecutionPrompt()` - -```python -action_results = await chatManager.executeTaskActions(task_actions, workflow) -``` - -### **Phase 4: Task Review and Quality Assessment** -**Function:** `reviewTaskCompletion()` -**Purpose:** Review task completion and decide next steps -**Input:** Task step, actions, results -**Output:** Review result with quality metrics -**Prompt Function:** `_createResultReviewPrompt()` - -```python -review_result = await chatManager.reviewTaskCompletion(task_step, task_actions, action_results, workflow) -``` - -### **Phase 5: Task Handover and State Management** -**Function:** `prepareTaskHandover()` -**Purpose:** Prepare results for next task or workflow completion -**Input:** Task step, actions, review result -**Output:** Handover data for next iteration -**Prompt Function:** None (data processing only) - -```python -handover_data = await chatManager.prepareTaskHandover(task_step, task_actions, review_result, workflow) -``` - -## Unified Workflow Execution - -### **Main Entry Point** -**Function:** `executeUnifiedWorkflow()` -**Purpose:** Orchestrate all phases in sequence -**Input:** User input, workflow -**Output:** Complete workflow results - -```python -workflow_result = await chatManager.executeUnifiedWorkflow(userInput.prompt, workflow) -``` - -### **Workflow Flow** -``` -1. planHighLevelTasks() → Task Plan -2. For each task step: - ├── defineTaskActions() → Task Actions - ├── executeTaskActions() → Action Results - ├── reviewTaskCompletion() → Review Result - └── prepareTaskHandover() → Handover Data -3. Return workflow summary -``` - -## Prompt Generation Functions - -| **Function** | **Used In** | **Purpose** | -|-------------|-------------|-------------| -| `_createTaskPlanningPrompt()` | `planHighLevelTasks()` | Generate high-level task plan | -| `_createActionDefinitionPrompt()` | `defineTaskActions()` | Generate specific actions for task | -| `_createActionExecutionPrompt()` | `executeTaskActions()` | Execute individual actions | -| `_createResultReviewPrompt()` | `reviewTaskCompletion()` | Review task completion | - -## Data Models - -### **TaskAction Object** -```python -class TaskAction: - id: str - execMethod: str - execAction: str - execParameters: Dict[str, Any] - execResultLabel: Optional[str] - status: TaskStatus - error: Optional[str] - result: Optional[str] - # ... other fields -``` - -### **Workflow Result Structure** -```python -{ - 'status': 'completed' | 'partial' | 'failed', - 'successful_tasks': int, - 'total_tasks': int, - 'workflow_results': List[Dict], - 'final_results': List[str] -} -``` - -## Error Handling - -### **Phase-Level Error Handling** -Each phase has its own error handling: -- **Planning:** Fallback to basic task plan -- **Definition:** Skip task if no actions defined -- **Execution:** Stop on first action failure -- **Review:** Default to success to avoid blocking -- **Handover:** Provide empty results on error - -### **Circuit Breaker Pattern** -AI calls use circuit breaker pattern to prevent cascading failures. - -## Quality Metrics - -### **Task Quality Assessment** -- Success rate of actions -- Completion of expected outputs -- Meeting of success criteria -- Confidence scores - -### **Workflow Quality Metrics** -- Overall success rate -- Task completion percentage -- Error patterns and suggestions - -## Benefits of Refactored Architecture - -### **1. Clear Separation of Concerns** -Each phase has a single responsibility and clear interfaces. - -### **2. Better Error Handling** -Granular error handling at each phase with appropriate fallbacks. - -### **3. Quality Assessment** -Built-in review and quality metrics for each task. - -### **4. Maintainability** -Consistent patterns and unified data models. - -### **5. Extensibility** -Easy to add new phases or modify existing ones. - -### **6. Debugging** -Clear logging and error reporting at each phase. - -## Migration Path - -### **Legacy Methods** -All legacy methods are preserved for backward compatibility: -- `createInitialTask()` -- `createNextTask()` -- `executeTask()` -- `executeAction()` - -### **New Unified Approach** -Use `executeUnifiedWorkflow()` for new implementations. - -## Usage Example - -```python -# Initialize chat manager -await chatManager.initialize(workflow) - -# Execute unified workflow -workflow_result = await chatManager.executeUnifiedWorkflow(userInput.prompt, workflow) - -# Process results -if workflow_result['status'] == 'completed': - print(f"Workflow completed: {workflow_result['successful_tasks']}/{workflow_result['total_tasks']} tasks") -else: - print(f"Workflow failed: {workflow_result['error']}") -``` - -## Future Enhancements - -### **1. Retry Logic** -Add exponential backoff retry for failed tasks. - -### **2. Alternative Approaches** -When primary method fails, try alternative approaches. - -### **3. Parallel Execution** -Execute independent tasks in parallel. - -### **4. Progress Tracking** -Real-time progress updates during workflow execution. - -### **5. Rollback Mechanisms** -Undo failed operations and restore previous state. \ No newline at end of file diff --git a/notes/changelog.txt b/notes/changelog.txt index 948fa1e7..658e8b68 100644 --- a/notes/changelog.txt +++ b/notes/changelog.txt @@ -993,96 +993,7 @@ Ich möchte den agentenchat workflow ändern. kannst du mir bitte dazu in einem 3. Neue Objektstruktur für den workflow ablauf: -workflow = -{ - // Core workflow properties - "id": "workflow_uuid", - "name": "Analysis Workflow", - "mandate_id": 123, - "user_id": 456, - "status": "running", // "running", "failed", "stopped" - "started_at": "2025-03-29T14:15:00.000Z", - "last_activity": "2025-03-29T14:45:00.000Z", - "current_round": 1, - "waiting_for_user": false, - // Performance statistics (sum) - "data_stats": { - "total_processing_time": 3.9, - "total_token_count": 857, - "total_bytes_semt": 1026323, - "total_bytes_received": 4200, - } - - // Messages array - main conversation history with structured message objects - "messages": [], - - // Logs - "logs": [ - { - "id": "log_uuid1", - "message": "Workflow started", - "type": "info", - "timestamp": "2025-03-29T14:15:00.000Z" - } - ] - -} - -"messages": [ - { - // Core message properties - "id": "msg_uuid", // Unique identifier for each message - "workflow_id": "workflow_uuid", // Reference to the parent workflow - "parent_message_id": "msg_previous_uuid", // Reference to message being responded to - "started_at": "2025-03-29T14:30:00.000Z", // Single timestamp for message creation - "finished_at": "2025-03-29T14:30:00.000Z", // Single timestamp for message closing, when next message is created - "sequence_no": 1, // Optional, but useful for ordering within workflow - - // Status information - "status": "completed", // message status: "pending", "processing", "completed", "failed" - - // Role instead of agent information - "role": "system", // "system", "user", "assistant" - who created this message - - // Metadata for statistics and accounting - "data_stats": { - "processing_time": 2.5, // Time taken to generate in seconds - "token_count": 1205, // Token count (for AI models) - "bytes_sent": 4096, // Data sent to generate this message - "bytes_received": 8192, // Data received - } - - // Documents section - includes prompt and all referenced files - "documents": [ - { - // Document metadata - "id": "doc_uuid", - "source": { - "type": "prompt", // "prompt", "file", "clipboard" - "path": "/full/path/to/file.txt", // Storage path (for files) - "name": "display_filename.txt", - "size": 1024, // Size in bytes - "lines": 42, // Line count (for text files) - "content_type": "text/plain", // MIME type - "upload_date": "2025-03-29T14:30:00.000Z" - }, - - // Document contents (can have multiple parts) - "contents": [ - { - "label": "Main Content", // Optional label - "type": "text", // "text", "image", "chart", etc. - "text": "The actual text content", - "is_extracted": true // Flag if this is extracted from original file - } - ] - } - ], - - } -] - 4. Die Schritte in einem Workflow (neu) - bitte den code revidieren und alle unnötigen teile entfernen. diff --git a/notes/data_specification.md b/notes/data_specification.md deleted file mode 100644 index 7f57ea05..00000000 --- a/notes/data_specification.md +++ /dev/null @@ -1,187 +0,0 @@ -# Document Management Refactoring Specification - -## Overview -This specification outlines the refactoring of document management in the system, focusing on proper model separation, centralized content extraction, and future-proof neutralization integration. - -## Model Structure - -### Base Document Models -```python -class ContentMetadata(BaseModel, ModelMixin): - """Metadata for content items""" - size: int = Field(description="Content size in bytes") - pages: Optional[int] = Field(None, description="Number of pages for multi-page content") - error: Optional[str] = Field(None, description="Processing error if any") - # Media-specific attributes - width: Optional[int] = Field(None, description="Width in pixels for images/videos") - height: Optional[int] = Field(None, description="Height in pixels for images/videos") - colorMode: Optional[str] = Field(None, description="Color mode (e.g., RGB, CMYK, grayscale)") - fps: Optional[float] = Field(None, description="Frames per second for videos") - durationSec: Optional[float] = Field(None, description="Duration in seconds for videos/audio") - -class ContentItem(BaseModel, ModelMixin): - """Individual content item from a document""" - label: str = Field(description="Content label (e.g., tab name, tag name)") - data: str = Field(description="Text content") - metadata: ContentMetadata = Field(description="Content metadata") - -class ChatDocument(BaseModel, ModelMixin): - id: str = Field(default_factory=lambda: str(uuid.uuid4())) - fileId: str - filename: str - fileSize: int - mimeType: str - -class TaskDocument(BaseModel, ModelMixin): - id: str = Field(default_factory=lambda: str(uuid.uuid4())) - filename: str - fileSize: int - mimeType: str - data: str # Base64 encoded file data - -class ExtractedContent(BaseModel, ModelMixin): - objectId: str # Reference to source document - objectType: str = Field(description="Type of source object ('ChatDocument' or 'TaskDocument')") - contents: List[ContentItem] -``` - -## Service Layer Structure - -### Document Service -```python -class DocumentService: - def __init__(self, service_container): - self.service = service_container - self.neutralizer_enabled = False # Flag for neutralization feature - - async def extractFromChatDocument(self, prompt: str, document: ChatDocument) -> ExtractedContent: - """ - Extract content from a ChatDocument by converting it to TaskDocument first. - """ - # Convert ChatDocument to TaskDocument - task_doc = await self._convertToTaskDocument(document) - return await self.getDocumentContent(task_doc, prompt) - - async def extractFromTaskDocument(self, prompt: str, document: TaskDocument) -> ExtractedContent: - """ - Extract content directly from a TaskDocument. - """ - return await self.getDocumentContent(document, prompt) - - async def getDocumentContent(self, document: TaskDocument, prompt: str) -> ExtractedContent: - """ - Helper function for centralized content extraction. - Handles the actual content extraction and optional neutralization. - """ - # Extract content based on mimeType - content = await self._extractRawContent(document) - - # Apply neutralization if enabled - if self.neutralizer_enabled: - from modules.neutralizer import neutralizer - content = await neutralizer.process_content(content) - - # Process content with AI using prompt - processed_content = await self._processWithAI(content, prompt) - - return ExtractedContent( - objectId=document.id, - objectType="TaskDocument", - contents=processed_content - ) -``` - -## Implementation Steps - -1. **Model Cleanup** - - Create new model classes in `interfaceChatModel.py` - - Remove deprecated models: - - DocumentExtraction - - DocumentContext - - ProcessedDocument - - ChatContent (replaced by ContentItem) - - Update ChatDocument to remove contents attribute - - Convert all snake_case to camelCase in manager*.py and method*.py - -2. **Service Implementation** - - Create new `DocumentService` class in `serviceDocument.py` - - Implement the three main methods: - - extractFromChatDocument - - extractFromTaskDocument - - getDocumentContent (helper function) - - Add neutralization integration with feature flag - -3. **UserInput Processing** - - Update `UserInputRequest` processing to use `ChatMessage` - - Implement `processFileIds` in `interfaceChatObjects` - - Update all references to use new model structure - -4. **Method Module Updates** - - Update all method*.py modules to use new service layer - - Remove direct file access - - Implement proper error handling and logging - -5. **Testing and Validation** - - Create unit tests for new models and services - - Test document processing with various file types - - Validate content extraction and neutralization - - Test error handling and edge cases - -## Files to be Removed/Modified - -### To be Removed -1. `DocumentExtraction` class from interfaceChatModel.py -2. `DocumentContext` class from interfaceChatModel.py -3. `ProcessedDocument` class from interfaceChatModel.py -4. `ChatContent` class from interfaceChatModel.py -5. Direct file access methods from method*.py modules - -### To be Modified -1. `interfaceChatModel.py` - - Add new model classes - - Remove deprecated classes - - Update existing classes - -2. `managerDocument.py` - - Move core functionality to DocumentService - - Update to use new model structure - - Remove redundant methods - -3. `method*.py` modules - - Update to use DocumentService - - Remove direct file access - - Update error handling - -4. `interfaceChatObjects.py` - - Implement processFileIds - - Update document handling - -## Neutralization Integration - -The neutralization feature is integrated into the `getDocumentContent` method with a feature flag. When enabled, it will process content through the neutralizer before sending it to AI processing. - -```python -# In getDocumentContent method -if self.neutralizer_enabled: - from modules.neutralizer import neutralizer - content = await neutralizer.process_content(content) -``` - -This allows for easy enabling/disabling of the feature and future expansion of neutralization capabilities. - -## Migration Strategy - -1. Create new models and services -2. Implement new functionality alongside existing code -3. Gradually migrate method modules to use new services -4. Remove deprecated code once migration is complete -5. Enable neutralization feature when ready - -## Testing Requirements - -1. Unit tests for all new model classes -2. Integration tests for DocumentService -3. Tests for content extraction with various file types -4. Tests for neutralization integration -5. Performance tests for large file handling -6. Error handling and edge case tests \ No newline at end of file diff --git a/notes/methodbased_specification.md b/notes/methodbased_specification.md index 452e155f..dd2e590f 100644 --- a/notes/methodbased_specification.md +++ b/notes/methodbased_specification.md @@ -5,10 +5,10 @@ ### 1.1 Core Components - **WorkflowManager**: Orchestrates the overall workflow process - **ChatManager**: Manages chat interactions and task execution -- **ServiceContainer**: Central state and context management +- **ServiceCenter**: Central state and context management - **AgentTask**: Core data object for task execution -### 1.2 Service Container Structure +### 1.2 Service center Structure ```python from enum import Enum from typing import Dict, List, Optional, Any, Literal @@ -161,8 +161,8 @@ class AgentTask(BaseModel): """Check if any action has failed""" return any(a.status == ActionStatus.FAILED for a in self.actionList) -class ServiceContainer: - """Service container with improved state management""" +class ServiceCenter: + """Service center with improved state management""" def __init__(self): self.state = { @@ -481,7 +481,7 @@ class AgentTask: graph TD A[User Input] --> B[WorkflowManager.workflowProcess] B --> C[ChatManager.initialize] - C --> D[Create ServiceContainer] + C --> D[Create ServiceCenter] D --> E[Create Initial Task] ``` @@ -491,7 +491,7 @@ graph TD - Starts task processing loop 2. **ChatManager.initialize** - - Creates ServiceContainer with all required components + - Creates ServiceCenter with all required components - Initializes service interfaces - Sets up task and state management @@ -675,7 +675,7 @@ graph TD ### 3.1 Method Registration ```python def _registerMethods(self): - """Register available methods in service container""" + """Register available methods in service center""" self.service.methods = { "sharepoint": MethodSharepoint(self.service), "outlook": MethodOutlook(self.service), @@ -862,7 +862,6 @@ gateway/ │ │ ├── managerChat.py # Chat management and AI response validation │ │ ├── managerPrompt.py # AI prompt generation and management │ │ ├── methodBase.py # Base method class with result validation -│ │ ├── managerDocument.py # Document operations management │ │ └── processorDocument.py # Document content extraction │ │ │ ├── agents/ # To be refactored into methods @@ -917,7 +916,7 @@ gateway/ #### Phase 3: Manager Updates 1. **Chat Manager Enhancement** - Integrate AI response validation - - Update service container structure + - Update service center structure - Improve error handling 2. **Document Manager Integration** diff --git a/run_document_test.ps1 b/run_document_test.ps1 deleted file mode 100644 index 1e738834..00000000 --- a/run_document_test.ps1 +++ /dev/null @@ -1,31 +0,0 @@ -# PowerShell script to run document extraction test -# Usage: .\run_document_test.ps1 [file_path] - -param( - [string]$FilePath = "test_sample_document.txt" -) - -Write-Host "=== PowerOn Document Extraction Test ===" -ForegroundColor Green -Write-Host "" - -# Check if file exists -if (-not (Test-Path $FilePath)) { - Write-Host "Error: File not found: $FilePath" -ForegroundColor Red - Write-Host "Please provide a valid file path as parameter or ensure test_sample_document.txt exists." -ForegroundColor Yellow - exit 1 -} - -Write-Host "Testing document extraction for file: $FilePath" -ForegroundColor Cyan -Write-Host "Log file will be: test_document_extraction.log" -ForegroundColor Cyan -Write-Host "" - -# Run the Python test -try { - python test_document_extraction.py $FilePath - Write-Host "" - Write-Host "Test completed successfully!" -ForegroundColor Green - Write-Host "Check test_document_extraction.log for detailed results." -ForegroundColor Cyan -} catch { - Write-Host "Test failed with error: $($_.Exception.Message)" -ForegroundColor Red - exit 1 -} \ No newline at end of file diff --git a/test_config.ini b/test_config.ini deleted file mode 100644 index a4b0e667..00000000 --- a/test_config.ini +++ /dev/null @@ -1,15 +0,0 @@ -# Test configuration for workflow testing -DB_APP_HOST=_test_data_app -DB_APP_DATABASE=app -DB_APP_USER=test -DB_APP_PASSWORD_SECRET=test123 - -DB_CHAT_HOST=_test_data_chat -DB_CHAT_DATABASE=chat -DB_CHAT_USER=test -DB_CHAT_PASSWORD_SECRET=test123 - -# AI Configuration -AI_PROVIDER=openai -AI_MODEL=gpt-3.5-turbo -AI_API_KEY_SECRET=test_key \ No newline at end of file diff --git a/test_document_extraction.py b/test_document_extraction.py deleted file mode 100644 index 740448c1..00000000 --- a/test_document_extraction.py +++ /dev/null @@ -1,288 +0,0 @@ -#!/usr/bin/env python3 -""" -Test procedure for DocumentManager document extraction functionality. -""" - -import asyncio -import sys -import os -import json -import argparse -from datetime import datetime, UTC -from pathlib import Path -import logging - -print("Starting test_document_extraction.py...") - -# Configure logging FIRST, before any other imports -import logging - -# Clear any existing handlers to avoid duplicate logs -for handler in logging.root.handlers[:]: - logging.root.removeHandler(handler) - -logging.basicConfig( - level=logging.DEBUG, - format='%(asctime)s - %(levelname)s - %(name)s - %(message)s', - handlers=[ - logging.StreamHandler(sys.stdout), - logging.FileHandler('test_document_extraction.log', mode='w', encoding='utf-8') # 'w' mode clears the file - ], - force=True # Force reconfiguration even if already configured -) - -# Filter out httpcore messages -logging.getLogger('httpcore').setLevel(logging.WARNING) -logging.getLogger('httpx').setLevel(logging.WARNING) - -logger = logging.getLogger(__name__) - -# Set up test configuration -os.environ['POWERON_CONFIG_FILE'] = 'test_config.ini' -print("Set POWERON_CONFIG_FILE environment variable") - -try: - # Import required modules - from modules.interfaces.interfaceAppObjects import User, UserConnection - from modules.interfaces.interfaceChatModel import ChatWorkflow - from modules.workflow.managerDocument import DocumentManager - from modules.workflow.serviceContainer import ServiceContainer - print("All imports successful") -except Exception as e: - print(f"Import error: {e}") - import traceback - traceback.print_exc() - sys.exit(1) - -def log_extraction_debug(message: str, data: dict = None): - """Log extraction debug data with JSON dumps""" - timestamp = datetime.now(UTC).isoformat() - if data: - logger.debug(f"[{timestamp}] {message}\n{json.dumps(data, indent=2, ensure_ascii=False)}") - else: - logger.debug(f"[{timestamp}] {message}") - -def create_test_user() -> User: - """Create a test user for the document extraction""" - return User( - id="test-user-doc-001", - mandateId="test-mandate-doc-001", - username="testuser_doc", - email="test_doc@example.com", - fullName="Test Document User", - enabled=True, - language="en", - privilege="user", - authenticationAuthority="local" - ) - -def create_test_workflow() -> ChatWorkflow: - """Create a test workflow for document extraction""" - return ChatWorkflow( - id="test-workflow-doc-001", - mandateId="test-mandate-doc-001", - status="running", - name="Document Extraction Test Workflow", - currentRound=1, - lastActivity=datetime.now(UTC).isoformat(), - startedAt=datetime.now(UTC).isoformat(), - logs=[], - messages=[], - stats=None, - tasks=[] - ) - -def detect_mime_type(file_path: str) -> str: - """Detect MIME type based on file extension""" - ext = Path(file_path).suffix.lower() - mime_types = { - '.txt': 'text/plain', - '.md': 'text/markdown', - '.csv': 'text/csv', - '.json': 'application/json', - '.xml': 'application/xml', - '.js': 'application/javascript', - '.py': 'application/x-python', - '.svg': 'image/svg+xml', - '.jpg': 'image/jpeg', - '.jpeg': 'image/jpeg', - '.png': 'image/png', - '.gif': 'image/gif', - '.pdf': 'application/pdf', - '.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', - '.doc': 'application/msword', - '.xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', - '.xls': 'application/vnd.ms-excel', - '.pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation', - '.ppt': 'application/vnd.ms-powerpoint', - '.html': 'text/html', - '.htm': 'text/html' - } - return mime_types.get(ext, 'application/octet-stream') - -async def test_document_extraction(file_path: str): - """Test document extraction from a file path""" - try: - # Clear the log file before each run - log_file_path = "test_document_extraction.log" - if os.path.exists(log_file_path): - with open(log_file_path, 'w') as f: - f.write("") # Clear the file - logger.info(f"Cleared log file: {log_file_path}") - - logger.info("=== STARTING DOCUMENT EXTRACTION TEST ===") - - # Validate file path - if not os.path.exists(file_path): - raise FileNotFoundError(f"File not found: {file_path}") - - # Get file info - file_path_obj = Path(file_path) - filename = file_path_obj.name - mime_type = detect_mime_type(file_path) - file_size = file_path_obj.stat().st_size - - log_extraction_debug("File information", { - "file_path": file_path, - "filename": filename, - "mime_type": mime_type, - "file_size_bytes": file_size, - "file_size_mb": round(file_size / (1024 * 1024), 2) - }) - - # Read file data - try: - with open(file_path, 'rb') as f: - file_data = f.read() - log_extraction_debug("File read successfully", { - "bytes_read": len(file_data), - "file_encoding": "binary" - }) - except Exception as e: - logger.error(f"Error reading file: {str(e)}") - raise - - # Create test user and workflow - test_user = create_test_user() - test_workflow = create_test_workflow() - - # Create service container - service_container = ServiceContainer(test_user, test_workflow) - log_extraction_debug("Service container created", { - "user_id": test_user.id, - "workflow_id": test_workflow.id - }) - - # Create document manager - document_manager = DocumentManager(service_container) - log_extraction_debug("Document manager created") - - # Define extraction prompt - extraction_prompt = "extract the table and convert it to a csv table" - - log_extraction_debug("Starting document extraction", { - "prompt": extraction_prompt, - "filename": filename, - "mime_type": mime_type - }) - - # Extract content from file data - try: - extracted_content = await document_manager.extractContentFromFileData( - prompt=extraction_prompt, - fileData=file_data, - filename=filename, - mimeType=mime_type, - base64Encoded=False, - documentId=f"test-doc-{datetime.now(UTC).timestamp()}" - ) - - # Log extraction results - extraction_result = { - "extracted_content_id": extracted_content.id, - "content_items_count": len(extracted_content.contents) - } - - # Add objectId and objectType if they exist (set by DocumentManager) - if hasattr(extracted_content, 'objectId'): - extraction_result["object_id"] = extracted_content.objectId - if hasattr(extracted_content, 'objectType'): - extraction_result["object_type"] = extracted_content.objectType - - log_extraction_debug("Document extraction completed successfully", extraction_result) - - # Log detailed content information - for i, content_item in enumerate(extracted_content.contents): - content_info = { - "label": content_item.label, - "data_length": len(content_item.data) if content_item.data else 0, - "data_preview": content_item.data[:500] + "..." if content_item.data and len(content_item.data) > 500 else content_item.data - } - - # Add metadata if available - if content_item.metadata: - content_info["metadata"] = { - "size": content_item.metadata.size, - "mime_type": content_item.metadata.mimeType, - "base64_encoded": content_item.metadata.base64Encoded, - "pages": content_item.metadata.pages - } - - log_extraction_debug(f"CONTENT ITEM {i+1}:", content_info) - - # Log summary of all extracted content - all_content = "\n\n".join([item.data for item in extracted_content.contents if item.data]) - log_extraction_debug("COMPLETE EXTRACTED CONTENT:", { - "total_length": len(all_content), - "content": all_content - }) - - return extracted_content - - except Exception as e: - log_extraction_debug("DOCUMENT EXTRACTION EXCEPTION:", { - "error_type": type(e).__name__, - "error_message": str(e), - "error_args": e.args if hasattr(e, 'args') else None - }) - raise - - logger.info("=== DOCUMENT EXTRACTION TEST COMPLETED ===") - return extracted_content - - except Exception as e: - logger.error(f"❌ Document extraction test failed with error: {str(e)}") - log_extraction_debug("Full error details", { - "error_type": type(e).__name__, - "error_message": str(e) - }) - raise - -async def main(): - """Main function to run the document extraction test""" - print("Inside main()") - logger.info("=" * 50) - logger.info("DOCUMENT EXTRACTION TEST") - logger.info("=" * 50) - - # Parse command line arguments - parser = argparse.ArgumentParser(description='Test document extraction functionality') - parser.add_argument('file_path', help='Path to the file to extract content from') - args = parser.parse_args() - - try: - extracted_content = await test_document_extraction(args.file_path) - logger.info("=" * 50) - logger.info("TEST COMPLETED SUCCESSFULLY") - logger.info("=" * 50) - return extracted_content - except Exception as e: - logger.error("=" * 50) - logger.error("TEST FAILED") - logger.error("=" * 50) - raise - -if __name__ == "__main__": - print("About to run main()") - asyncio.run(main()) - print("main() finished") \ No newline at end of file diff --git a/test_param_extraction.py b/test_param_extraction.py deleted file mode 100644 index 801811cb..00000000 --- a/test_param_extraction.py +++ /dev/null @@ -1,27 +0,0 @@ -#!/usr/bin/env python3 - -from modules.workflow.methodBase import MethodBase - -class TestMethod(MethodBase): - pass - -def test_parameter_extraction(): - test = TestMethod(None) - test.name = 'test' - - docstring = """Call AI service with document content - -Parameters: - prompt (str): The prompt to send to the AI service - documents (List[Dict[str, Any]], optional): List of documents to include in context - Each document should have: documentReference (str), contentExtractionPrompt (str, optional)""" - - print("Docstring:") - print(docstring) - print("\nExtracted descriptions:") - descriptions = test._extractParameterDescriptions(docstring) - for param, desc in descriptions.items(): - print(f" {param}: {desc}") - -if __name__ == "__main__": - test_parameter_extraction() \ No newline at end of file diff --git a/test_retry_enhancement.py b/test_retry_enhancement.py deleted file mode 100644 index 6beecb3d..00000000 --- a/test_retry_enhancement.py +++ /dev/null @@ -1,289 +0,0 @@ -#!/usr/bin/env python3 -""" -Test script for retry enhancement in managerChat.py -Tests that previous action results and review feedback are properly passed to retry prompts. -""" - -import asyncio -import logging -import sys -import os - -# Add the gateway directory to the Python path -sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'gateway')) - -from modules.workflow.managerChat import ChatManager -from modules.interfaces.interfaceAppModel import User -from modules.interfaces.interfaceChatModel import ChatWorkflow, ChatMessage -from modules.interfaces.interfaceChatObjects import ChatObjects - -# Configure logging -logging.basicConfig(level=logging.DEBUG) -logger = logging.getLogger(__name__) - -class MockChatObjects(ChatObjects): - """Mock implementation of ChatObjects for testing""" - - def createTaskAction(self, action_data): - """Mock task action creation""" - class MockTaskAction: - def __init__(self, data): - self.id = "test_action_id" - self.execMethod = data.get("execMethod", "unknown") - self.execAction = data.get("execAction", "unknown") - self.execParameters = data.get("execParameters", {}) - self.execResultLabel = data.get("execResultLabel", "") - self.status = data.get("status", "PENDING") - self.result = "" - self.error = "" - - def setSuccess(self): - self.status = "COMPLETED" - - def setError(self, error): - self.status = "FAILED" - self.error = error - - def isSuccessful(self): - return self.status == "COMPLETED" - - return MockTaskAction(action_data) - - def createChatDocument(self, document_data): - """Mock document creation""" - class MockChatDocument: - def __init__(self, data): - self.fileId = data.get("fileId", "") - self.filename = data.get("filename", "unknown") - self.fileSize = data.get("fileSize", 0) - self.mimeType = data.get("mimeType", "application/octet-stream") - self.content = "" - - return MockChatDocument(document_data) - - def createWorkflowMessage(self, message_data): - """Mock message creation""" - class MockWorkflowMessage: - def __init__(self, data): - self.workflowId = data.get("workflowId", "") - self.role = data.get("role", "assistant") - self.message = data.get("message", "") - self.status = data.get("status", "step") - self.sequenceNr = data.get("sequenceNr", 1) - self.publishedAt = data.get("publishedAt", "") - self.actionId = data.get("actionId", "") - self.actionMethod = data.get("actionMethod", "") - self.actionName = data.get("actionName", "") - self.documentsLabel = data.get("documentsLabel", "") - self.documents = data.get("documents", []) - - return MockWorkflowMessage(message_data) - -class MockServiceContainer: - """Mock service container for testing""" - - def __init__(self, user, workflow): - self.user = user - self.workflow = workflow - - def getMethodsList(self): - """Mock methods list""" - return ["document.extract(documentList, aiPrompt)", "document.analyze(documentList, aiPrompt)"] - - async def summarizeChat(self, messages): - """Mock chat summarization""" - return "Mock chat history summary" - - def getDocumentReferenceList(self): - """Mock document references""" - return { - 'chat': [], - 'history': [] - } - - def getConnectionReferenceList(self): - """Mock connection references""" - return ["connection1", "connection2"] - - def getFileInfo(self, fileId): - """Mock file info""" - return { - "filename": f"test_file_{fileId}.txt", - "size": 1024, - "mimeType": "text/plain" - } - - def createFile(self, fileName, mimeType, content, base64encoded=False): - """Mock file creation""" - return f"file_id_{fileName}" - - def createDocument(self, fileName, mimeType, content, base64encoded=False): - """Mock document creation""" - class MockDocument: - def __init__(self, name, mime, cont): - self.filename = name - self.mimeType = mime - self.content = cont - self.fileSize = len(cont) - - return MockDocument(fileName, mimeType, content) - - def getFileExtension(self, filename): - """Mock file extension extraction""" - return filename.split('.')[-1] if '.' in filename else 'txt' - - def getMimeTypeFromExtension(self, extension): - """Mock MIME type detection""" - mime_types = { - 'txt': 'text/plain', - 'pdf': 'application/pdf', - 'doc': 'application/msword', - 'json': 'application/json' - } - return mime_types.get(extension, 'application/octet-stream') - - def detectContentTypeFromData(self, file_bytes, filename): - """Mock content type detection""" - if filename.endswith('.txt'): - return 'text/plain' - elif filename.endswith('.pdf'): - return 'application/pdf' - elif filename.endswith('.json'): - return 'application/json' - return 'application/octet-stream' - - async def callAiTextBasic(self, prompt): - """Mock AI call""" - return '{"actions": [{"method": "document", "action": "extract", "parameters": {"documentList": ["test"], "aiPrompt": "Test prompt"}, "resultLabel": "task1_action1_test", "description": "Test action"}]}' - - async def callAiTextAdvanced(self, prompt): - """Mock advanced AI call""" - return '{"overview": "Test plan", "tasks": [{"id": "task_1", "description": "Test task", "dependencies": [], "expected_outputs": ["output1"], "success_criteria": ["criteria1"], "required_documents": [], "estimated_complexity": "low", "ai_prompt": "Test prompt"}]}' - - async def executeAction(self, methodName, actionName, parameters): - """Mock action execution""" - class MockResult: - def __init__(self): - self.success = True - self.data = { - "result": "Mock execution result", - "documents": [] - } - self.error = None - - return MockResult() - -async def test_retry_enhancement(): - """Test the retry enhancement functionality""" - logger.info("Testing retry enhancement in managerChat.py") - - # Create mock objects - mock_user = User(id="test_user", username="testuser", email="test@example.com", mandateId="test_mandate") - mock_chat_objects = MockChatObjects() - mock_workflow = ChatWorkflow( - id="test_workflow", - userId="test_user", - status="active", - messages=[], - createdAt="2024-01-01T00:00:00Z", - updatedAt="2024-01-01T00:00:00Z", - mandateId="test_mandate", - currentRound=1, - lastActivity="2024-01-01T00:00:00Z", - startedAt="2024-01-01T00:00:00Z" - ) - - # Create chat manager - chat_manager = ChatManager(mock_user, mock_chat_objects) - - # Mock the service container directly instead of initializing - chat_manager.service = MockServiceContainer(mock_user, mock_workflow) - chat_manager.workflow = mock_workflow - - # Test 1: Basic action definition without retry - logger.info("Test 1: Basic action definition") - task_step = { - "id": "task_1", - "description": "Test task", - "expected_outputs": ["output1"], - "success_criteria": ["criteria1"], - "ai_prompt": "Test AI prompt" - } - - actions = await chat_manager.defineTaskActions(task_step, mock_workflow, []) - logger.info(f"Generated {len(actions)} actions without retry context") - - # Test 2: Action definition with retry context - logger.info("Test 2: Action definition with retry context") - enhanced_context = { - 'task_step': task_step, - 'workflow': mock_workflow, - 'workflow_id': mock_workflow.id, - 'available_documents': ["test_doc.txt"], - 'previous_results': ["task0_action1_results"], - 'improvements': "Previous attempt failed - ensure comprehensive extraction", - 'retry_count': 1, - 'previous_action_results': [ - { - 'actionMethod': 'document', - 'actionName': 'extract', - 'status': 'failed', - 'error': 'Empty result returned', - 'result': 'No content extracted', - 'resultLabel': 'task1_action1_failed' - } - ], - 'previous_review_result': { - 'status': 'retry', - 'reason': 'Incomplete extraction', - 'quality_score': 3, - 'missing_outputs': ['detailed_analysis'], - 'unmet_criteria': ['comprehensive_coverage'] - } - } - - retry_actions = await chat_manager.defineTaskActions(task_step, mock_workflow, [], enhanced_context) - logger.info(f"Generated {len(retry_actions)} actions with retry context") - - # Test 3: Verify retry context is properly handled - logger.info("Test 3: Verifying retry context handling") - - # Create a test prompt to see if retry context is included - test_prompt = await chat_manager._createActionDefinitionPrompt(enhanced_context) - - # Check if retry context is in the prompt - if "RETRY CONTEXT" in test_prompt: - logger.info("✓ Retry context properly included in prompt") - else: - logger.error("✗ Retry context not found in prompt") - - if "Previous action results that failed" in test_prompt: - logger.info("✓ Previous action results included in prompt") - else: - logger.error("✗ Previous action results not found in prompt") - - if "Previous review feedback" in test_prompt: - logger.info("✓ Previous review feedback included in prompt") - else: - logger.error("✗ Previous review feedback not found in prompt") - - if "Previous attempt failed" in test_prompt: - logger.info("✓ Improvements needed included in prompt") - else: - logger.error("✗ Improvements needed not found in prompt") - - # Test 4: Verify fallback actions with retry context - logger.info("Test 4: Testing fallback actions with retry context") - fallback_actions = chat_manager._createFallbackActions(task_step, enhanced_context) - logger.info(f"Generated {len(fallback_actions)} fallback actions with retry context") - - # Check if fallback actions include retry information - if any("retry" in action.get("resultLabel", "") for action in fallback_actions): - logger.info("✓ Fallback actions include retry information") - else: - logger.error("✗ Fallback actions missing retry information") - - logger.info("Retry enhancement test completed successfully!") - -if __name__ == "__main__": - asyncio.run(test_retry_enhancement()) \ No newline at end of file diff --git a/test_sample_document.txt b/test_sample_document.txt deleted file mode 100644 index 56415ca3..00000000 --- a/test_sample_document.txt +++ /dev/null @@ -1,47 +0,0 @@ -PowerOn System Architecture Overview - -This document provides a comprehensive overview of the PowerOn system architecture, including its key components, data flow, and technical specifications. - -MAJOR TOPICS: - -1. System Architecture - - Frontend Agents: Web-based user interface components - - Gateway: Central API and workflow management system - - Database: JSON-based data storage with component interfaces - - AI Integration: Anthropic and OpenAI connectors for intelligent processing - -2. Core Components - - Document Manager: Handles file processing and content extraction - - Workflow Manager: Orchestrates complex business processes - - Service Container: Provides unified access to all system services - - Neutralizer: Data anonymization and privacy protection - -3. Data Flow Architecture - - User authentication and authorization - - Document upload and processing pipeline - - AI-powered content analysis and extraction - - Workflow execution and task management - - Result generation and storage - -4. Technical Specifications - - Python-based backend with async/await support - - RESTful API design with JSON data exchange - - Modular component architecture - - Extensible method system for business logic - - Comprehensive logging and monitoring - -5. Security Features - - Multi-authentication authority support (Local, Microsoft, Google) - - Token-based session management - - Data encryption and anonymization - - Role-based access control - - Audit trail and compliance features - -6. Integration Capabilities - - SharePoint document management - - Email system integration (Outlook) - - Web crawling and data collection - - AI service integration (Anthropic, OpenAI) - - Custom method development framework - -The PowerOn system is designed to provide a comprehensive platform for intelligent document processing, workflow automation, and AI-powered business process management. It combines modern web technologies with advanced AI capabilities to deliver a robust and scalable solution for enterprise document management and workflow automation. \ No newline at end of file diff --git a/test_signature.py b/test_signature.py deleted file mode 100644 index b4ede9a0..00000000 --- a/test_signature.py +++ /dev/null @@ -1,23 +0,0 @@ -#!/usr/bin/env python3 - -from modules.workflow.serviceContainer import ServiceContainer -from modules.interfaces.interfaceAppObjects import User -from modules.interfaces.interfaceChatModel import ChatWorkflow - -def test_signatures(): - user = User(id='test', mandateId='test', username='test', email='test@test.com', - fullName='Test User', enabled=True, language='en', privilege='user', - authenticationAuthority='local') - workflow = ChatWorkflow(id='test', mandateId='test', status='running', name='Test', - currentRound=1, lastActivity='2025-01-01T00:00:00Z', - startedAt='2025-01-01T00:00:00Z', logs=[], messages=[], - stats=None, tasks=[]) - service = ServiceContainer(user, workflow) - - print("Method signatures:") - methodList = service.getMethodsList() - for sig in methodList[:5]: # Show first 5 - print(f" {sig}") - -if __name__ == "__main__": - test_signatures() \ No newline at end of file diff --git a/test_workflow.py b/test_workflow.py deleted file mode 100644 index 503392ec..00000000 --- a/test_workflow.py +++ /dev/null @@ -1,488 +0,0 @@ -#!/usr/bin/env python3 -""" -Test routine for WorkflowManager.workflowProcess() with new unified workflow architecture -""" - -import asyncio -import sys -import os -import json -from datetime import datetime, UTC, timedelta -import uuid -from typing import List - -print("Starting test_workflow.py...") - -# Configure logging FIRST, before any other imports -import logging - -# Clear any existing handlers to avoid duplicate logs -for handler in logging.root.handlers[:]: - logging.root.removeHandler(handler) - -logging.basicConfig( - level=logging.DEBUG, - format='%(asctime)s - %(levelname)s - %(name)s - %(message)s', - handlers=[ - logging.StreamHandler(sys.stdout), - logging.FileHandler('test_workflow.log', mode='w', encoding='utf-8') # 'w' mode clears the file - ], - force=True # Force reconfiguration even if already configured -) - -# Filter out httpcore messages -logging.getLogger('httpcore').setLevel(logging.WARNING) -logging.getLogger('httpx').setLevel(logging.WARNING) - -logger = logging.getLogger(__name__) - -# Set up test configuration -os.environ['POWERON_CONFIG_FILE'] = 'test_config.ini' -print("Set POWERON_CONFIG_FILE environment variable") - -try: - # Simple imports from modules (same as app.py) - from modules.interfaces.interfaceAppObjects import User, UserConnection - from modules.interfaces.interfaceChatObjects import ChatObjects - from modules.interfaces.interfaceChatModel import UserInputRequest, ChatWorkflow - from modules.workflow.managerWorkflow import WorkflowManager - print("All imports successful") -except Exception as e: - print(f"Import error: {e}") - import traceback - traceback.print_exc() - sys.exit(1) - -def log_workflow_debug(message: str, data: dict = None): - """Log workflow debug data with JSON dumps""" - timestamp = datetime.now(UTC).isoformat() - if data: - logger.debug(f"[{timestamp}] {message}\n{json.dumps(data, indent=2, ensure_ascii=False)}") - else: - logger.debug(f"[{timestamp}] {message}") - -def create_test_user() -> User: - """Create a test user for the workflow""" - return User( - id="test-user-001", - mandateId="test-mandate-001", - username="testuser", - email="test@example.com", - fullName="Test User", - enabled=True, - language="en", - privilege="user", - authenticationAuthority="local" - ) - -def create_test_workflow() -> ChatWorkflow: - """Create a test workflow""" - return ChatWorkflow( - id="test-workflow-001", - mandateId="test-mandate-001", - status="running", - name="Candidate Evaluation and Selection Workflow", - currentRound=1, - lastActivity=datetime.now(UTC).isoformat(), - startedAt=datetime.now(UTC).isoformat(), - logs=[], - messages=[], - stats=None, - tasks=[] - ) - -def create_test_user_input() -> UserInputRequest: - """Create test user input with a candidate evaluation task""" - return UserInputRequest( - prompt="""I have following list of job profiles from candidates (3 job profiles as text files) and want to know, who is best suited for the position of product designer (file with criteria). Create an evaluation matrix and rate all candidates according to the matrix, then produce a presentation for the management to decide and store it on the SharePoint for an available account. - - Please ensure the evaluation includes: - - Technical skills assessment - - Experience level evaluation - - Cultural fit analysis - - Portfolio quality review - - Communication skills assessment - - Overall suitability score - - The output should be suitable for executive review and include both detailed analysis and clear recommendations.""", - listFileId=["candidate_1_profile.txt", "candidate_2_profile.txt", "candidate_3_profile.txt", "product_designer_criteria.txt"], - userLanguage="en" - ) - -def create_test_files(chat_interface) -> List[str]: - """Create test files in the database for candidate evaluation""" - test_files = [] - - # Import the component interface - from modules.interfaces.interfaceComponentObjects import getInterface as getComponentObjects - - # Get component interface with the same user context - component_interface = getComponentObjects(chat_interface.currentUser) - - # Candidate 1 Profile - candidate_1_content = """CANDIDATE 1: Sarah Johnson -Position: Senior Product Designer -Experience: 8 years - -TECHNICAL SKILLS: -- Figma, Sketch, Adobe Creative Suite (Expert) -- Prototyping tools: Framer, Principle (Advanced) -- Design systems and component libraries (Expert) -- User research and usability testing (Advanced) -- HTML/CSS/JavaScript basics (Intermediate) - -EXPERIENCE: -- Senior Product Designer at TechCorp (3 years) -- Product Designer at StartupXYZ (3 years) -- UI/UX Designer at DesignAgency (2 years) - -PORTFOLIO HIGHLIGHTS: -- Redesigned e-commerce platform increasing conversion by 25% -- Created comprehensive design system for 50+ product team -- Led user research for mobile banking app with 1M+ users - -COMMUNICATION SKILLS: -- Excellent presentation skills -- Experience presenting to C-level executives -- Strong stakeholder management -- Mentored 5 junior designers - -CULTURAL FIT: -- Collaborative team player -- Proactive problem solver -- Adapts quickly to new environments -- Values user-centered design approach""" - - # Candidate 2 Profile - candidate_2_content = """CANDIDATE 2: Michael Chen -Position: Product Designer -Experience: 5 years - -TECHNICAL SKILLS: -- Figma, Sketch, Adobe Creative Suite (Advanced) -- Prototyping tools: InVision, Marvel (Intermediate) -- Design systems (Intermediate) -- User research (Intermediate) -- No coding experience - -EXPERIENCE: -- Product Designer at MidSizeTech (3 years) -- Junior Designer at CreativeStudio (2 years) - -PORTFOLIO HIGHLIGHTS: -- Designed mobile app for local restaurant chain -- Created brand identity for startup -- Improved user flow for SaaS dashboard - -COMMUNICATION SKILLS: -- Good presentation skills -- Works well in small teams -- Some experience with stakeholders -- Learning to mentor others - -CULTURAL FIT: -- Quiet but dedicated worker -- Detail-oriented -- Prefers structured environments -- Focuses on visual design quality""" - - # Candidate 3 Profile - candidate_3_content = """CANDIDATE 3: Emma Rodriguez -Position: UX/UI Designer -Experience: 6 years - -TECHNICAL SKILLS: -- Figma, Sketch, Adobe Creative Suite (Advanced) -- Prototyping tools: Framer, Axure (Advanced) -- Design systems (Advanced) -- User research and analytics (Expert) -- Basic React/JavaScript (Intermediate) - -EXPERIENCE: -- UX/UI Designer at EnterpriseCorp (4 years) -- UX Designer at ConsultingFirm (2 years) - -PORTFOLIO HIGHLIGHTS: -- Led UX research for enterprise software used by 10K+ users -- Implemented data-driven design improvements increasing user satisfaction by 30% -- Created accessibility-compliant design system -- Conducted international user research studies - -COMMUNICATION SKILLS: -- Outstanding presentation and storytelling skills -- Experience with international stakeholders -- Strong analytical communication -- Excellent at translating user insights to business value - -CULTURAL FIT: -- Natural leader and team motivator -- Strategic thinker -- Adapts well to change -- Passionate about user advocacy""" - - # Product Designer Criteria - criteria_content = """PRODUCT DESIGNER POSITION CRITERIA -Company: ValueOn -Department: Product Development -Level: Senior - -REQUIRED SKILLS: -- Expert proficiency in Figma and modern design tools -- Strong understanding of user-centered design principles -- Experience with design systems and component libraries -- Ability to conduct user research and usability testing -- Basic understanding of front-end development (HTML/CSS/JavaScript) - -REQUIRED EXPERIENCE: -- Minimum 5 years in product design -- Experience working with cross-functional teams -- Portfolio demonstrating complex product design solutions -- Experience with SaaS or enterprise software preferred - -COMMUNICATION REQUIREMENTS: -- Excellent presentation skills -- Ability to communicate design decisions to stakeholders -- Experience presenting to management/executives -- Strong collaboration and feedback skills - -CULTURAL FIT: -- Team-oriented and collaborative -- Proactive and self-motivated -- Adaptable to fast-paced environment -- Passionate about user experience - -RESPONSIBILITIES: -- Lead design for core product features -- Collaborate with product managers and engineers -- Conduct user research and usability testing -- Create and maintain design system -- Present design solutions to stakeholders -- Mentor junior designers - -EVALUATION WEIGHTS: -- Technical Skills: 30% -- Experience: 25% -- Communication: 20% -- Cultural Fit: 15% -- Portfolio Quality: 10%""" - - # Create files in database - file_contents = [ - ("candidate_1_profile.txt", candidate_1_content), - ("candidate_2_profile.txt", candidate_2_content), - ("candidate_3_profile.txt", candidate_3_content), - ("product_designer_criteria.txt", criteria_content) - ] - - for filename, content in file_contents: - try: - # Create file in database using the component interface - file_item = component_interface.saveUploadedFile( - fileContent=content.encode('utf-8'), - fileName=filename - ) - test_files.append(file_item.id) - log_workflow_debug(f"Created test file: {filename}", { - "file_id": file_item.id, - "filename": filename, - "content_length": len(content) - }) - except Exception as e: - log_workflow_debug(f"Error creating test file {filename}: {str(e)}") - # Create a dummy file ID if creation fails - test_files.append(f"file_{filename.replace('.', '_')}") - - return test_files - -async def test_workflow_process(): - print("Inside test_workflow_process()") - """Test the workflowProcess function with new unified workflow architecture""" - try: - logger.info("=== STARTING UNIFIED WORKFLOW PROCESS TEST ===") - - # Create test data - test_user = create_test_user() - test_workflow = create_test_workflow() - test_user_input = create_test_user_input() - - log_workflow_debug("Test data created", { - "user_id": test_user.id, - "workflow_id": test_workflow.id, - "user_input_prompt": test_user_input.prompt, - "file_ids": test_user_input.listFileId - }) - - # Create test user in database through AppObjects interface - from modules.interfaces.interfaceAppObjects import getRootInterface - from modules.interfaces.interfaceAppModel import AuthAuthority, ConnectionStatus, Token, UserPrivilege - - root_interface = getRootInterface() - created_user = root_interface.createUser( - username=test_user.username, - password="testpassword123", # Required for local authentication - email=test_user.email, - fullName=test_user.fullName, - language=test_user.language, - enabled=test_user.enabled, - privilege=UserPrivilege.USER, - authenticationAuthority=AuthAuthority.LOCAL - ) - log_workflow_debug("Created test user in database", { - "user_id": created_user.id, - "username": created_user.username, - "email": created_user.email - }) - - # Create test connection through AppObjects interface - from modules.interfaces.interfaceAppObjects import getInterface as getAppObjects - app_interface = getAppObjects(created_user) - test_connection = app_interface.addUserConnection( - userId=created_user.id, - authority=AuthAuthority.MSFT, - externalId="msft-user-123", - externalUsername="testuser@example.com", - externalEmail="testuser@example.com", - status=ConnectionStatus.ACTIVE - ) - log_workflow_debug("Created test connection", { - "connection_id": test_connection.id, - "authority": test_connection.authority, - "external_username": test_connection.externalUsername - }) - - # Create test token for the connection - test_token = Token( - userId=created_user.id, - authority=AuthAuthority.MSFT, - tokenAccess="test-access-token-123", - tokenRefresh="test-refresh-token-456", - tokenType="bearer", - expiresAt=datetime.now(UTC).timestamp() + 3600, # 1 hour from now - createdAt=datetime.now(UTC) - ) - app_interface.saveToken(test_token) - log_workflow_debug("Created test token", { - "token_id": test_token.id, - "authority": test_token.authority, - "expires_at": test_token.expiresAt - }) - - # Create test workflow in database through ChatObjects interface - from modules.interfaces.interfaceChatObjects import getInterface as getChatObjects - - chat_interface = getChatObjects(created_user) - workflow_data = { - "name": test_workflow.name, - "status": test_workflow.status, - "mandateId": created_user.mandateId, - "currentRound": test_workflow.currentRound, - "startedAt": test_workflow.startedAt, - "lastActivity": test_workflow.lastActivity - } - created_workflow = chat_interface.createWorkflow(workflow_data) - log_workflow_debug("Created test workflow in database", { - "workflow_id": created_workflow.id, - "name": created_workflow.name, - "status": created_workflow.status - }) - - # Update the test_workflow object with the created workflow's ID - test_workflow.id = created_workflow.id - - # Create test files in database - logger.info("Creating test files for candidate evaluation...") - test_file_ids = create_test_files(chat_interface) - log_workflow_debug("Test files created", { - "file_count": len(test_file_ids), - "file_ids": test_file_ids - }) - - # Update user input with real file IDs - test_user_input.listFileId = test_file_ids - log_workflow_debug("Updated user input with file IDs", { - "file_ids": test_user_input.listFileId - }) - - # Initialize WorkflowManager - workflow_manager = WorkflowManager(chat_interface, created_user) - logger.info("WorkflowManager initialized") - - # Test the workflowProcess function - logger.info("Calling workflowProcess with unified workflow architecture...") - - try: - # Execute the unified workflow process - await workflow_manager.workflowProcess(test_user_input, test_workflow) - - # Log workflow results - log_workflow_debug("Workflow process completed successfully", { - "workflow_id": test_workflow.id, - "workflow_status": test_workflow.status, - "message_count": len(test_workflow.messages), - "final_messages": [ - { - "role": msg.role, - "message": msg.message[:200] + "..." if len(msg.message) > 200 else msg.message, - "status": msg.status, - "sequence_nr": msg.sequenceNr - } for msg in test_workflow.messages[-3:] # Last 3 messages - ] - }) - - # Log detailed workflow messages - for i, message in enumerate(test_workflow.messages): - log_workflow_debug(f"WORKFLOW MESSAGE {i+1}:", { - "role": message.role, - "message": message.message, - "status": message.status, - "sequence_nr": message.sequenceNr, - "published_at": message.publishedAt, - "document_count": len(message.documents) if hasattr(message, 'documents') else 0 - }) - - return test_workflow - - except Exception as e: - import traceback - error_details = { - "error_type": type(e).__name__, - "error_message": str(e), - "error_args": e.args if hasattr(e, 'args') else None, - "traceback": traceback.format_exc() - } - log_workflow_debug("WORKFLOW PROCESS EXCEPTION:", error_details) - raise - - logger.info("=== UNIFIED WORKFLOW PROCESS TEST COMPLETED ===") - return test_workflow - - except Exception as e: - logger.error(f"❌ Test failed with error: {str(e)}") - log_workflow_debug("Full error details", { - "error_type": type(e).__name__, - "error_message": str(e) - }) - raise - -async def main(): - print("Inside main()") - logger.info("=" * 50) - logger.info("CANDIDATE EVALUATION UNIFIED WORKFLOW TEST") - logger.info("=" * 50) - - try: - workflow = await test_workflow_process() - logger.info("=" * 50) - logger.info("TEST COMPLETED SUCCESSFULLY") - logger.info("=" * 50) - return workflow - except Exception as e: - logger.error("=" * 50) - logger.error("TEST FAILED") - logger.error("=" * 50) - raise - -if __name__ == "__main__": - print("About to run main()") - asyncio.run(main()) - print("main() finished") \ No newline at end of file