From b907d068b30d9c82beabe3652defc732f88e1592 Mon Sep 17 00:00:00 2001 From: ValueOn AG Date: Sat, 21 Jun 2025 01:30:27 +0200 Subject: [PATCH] methods implemented --- modules/historic_data_agents/agentCoder.py | 1039 +++++++++++++++++ .../agentDocumentation.py | 537 +++++++++ modules/historic_data_agents/agentEmail.py | 380 ++++++ .../historic_data_agents/agentSharepoint.py | 348 ++++++ .../historic_data_agents/agentWebcrawler.py | 814 +++++++++++++ modules/interfaces/interfaceChatModel.py | 40 +- .../interfaces/interfaceComponentObjects.py | 8 +- modules/methods/methodBase.py | 18 +- modules/methods/methodCoder.py | 328 +++--- modules/methods/methodDocument.py | 375 +++--- modules/methods/methodExcel.py | 423 +++++-- modules/methods/methodOperator.py | 291 +++-- modules/methods/methodOutlook.py | 362 +++++- modules/methods/methodPowerpoint.py | 600 +++++++--- modules/methods/methodSharepoint.py | 597 ++++++++-- modules/methods/methodWeb.py | 568 +++++++-- modules/neutralizer/neutralizer.py | 2 +- modules/workflow/managerChat.py | 152 ++- modules/workflow/managerDocument.py | 57 +- modules/workflow/managerWorkflow.py | 91 +- modules/workflow/processorDocument.py | 142 ++- modules/workflow/serviceContainer.py | 13 +- notes/changelog.txt | 13 + notes/methodbased_specification.md | 4 +- test_config.ini | 15 + test_workflow.py | 175 +++ 26 files changed, 6291 insertions(+), 1101 deletions(-) create mode 100644 modules/historic_data_agents/agentCoder.py create mode 100644 modules/historic_data_agents/agentDocumentation.py create mode 100644 modules/historic_data_agents/agentEmail.py create mode 100644 modules/historic_data_agents/agentSharepoint.py create mode 100644 modules/historic_data_agents/agentWebcrawler.py create mode 100644 test_config.ini create mode 100644 test_workflow.py diff --git a/modules/historic_data_agents/agentCoder.py b/modules/historic_data_agents/agentCoder.py new file mode 100644 index 00000000..8cb4d869 --- /dev/null +++ b/modules/historic_data_agents/agentCoder.py @@ -0,0 +1,1039 @@ +""" +Coder agent for generating and executing code. +Provides code generation, execution, and improvement capabilities. +""" + +import logging +from typing import Dict, Any, List, Tuple, Optional +import json +import os +import sys +import subprocess +import tempfile +import shutil +import venv +import importlib.util +from datetime import datetime +import uuid + +from modules.workflow.agentBase import AgentBase +from modules.shared.configuration import APP_CONFIG +from modules.interfaces.serviceChatModel import Task, ChatDocument, ChatContent +from modules.shared.attributeUtils import ModelMixin + +logger = logging.getLogger(__name__) + +class AgentCoder(AgentBase): + """Simplified Agent for developing and executing Python code with integrated executor""" + + def __init__(self): + """Initialize the coder agent""" + super().__init__() + self.name = "coder" + self.label = "Developer and Code Executor" + self.description = "Develops and executes Python code for data processing and automation" + self.capabilities = [ + "code_development", + "data_processing", + "file_processing", + "automation", + "code_execution" + ] + + # Executor settings + self.executorTimeout = int(APP_CONFIG.get("Agent_Coder_EXECUTION_TIMEOUT")) # seconds + self.executionRetryLimit = int(APP_CONFIG.get("Agent_Coder_EXECUTION_RETRY")) # max retries + self.tempDir = None + + def setDependencies(self, serviceBase=None): + """Set external dependencies for the agent.""" + self.setService(serviceBase) + + async def processTask(self, task: Task) -> Dict[str, Any]: + """ + Process a task and perform code development/execution. + First checks if the task can be completed without code execution, + then falls back to code generation if needed. + Enhanced to ensure all generated documents are included in output. + + Args: + task: Task object with prompt, inputDocuments, outputSpecifications + + Returns: + Dictionary with feedback and documents + """ + # 1. Extract task information + prompt = task.prompt + inputDocuments = task.filesInput + outputSpecs = task.filesOutput + + # Check if AI service is available + if not self.service or not self.service.base: + logger.error("No AI service configured for the Coder agent") + return { + "feedback": "The Coder agent is not properly configured.", + "documents": [] + } + + # 2. Extract data from documents in separate categories + documentData = [] # For raw file data (for code execution) + contentData = [] # For content data (later use) + contentExtraction = [] # For AI-extracted data (for quick completion) + + for doc in inputDocuments: + # Create proper filename from name and ext + filename = f"{doc.name}.{doc.ext}" if doc.ext else doc.name + + # Add main document data to documentData if it exists + docData = doc.data + if docData: + isBase64 = True # Assume base64 encoded for document data + documentData.append([filename, docData, isBase64]) + + # Process contents for different uses + if doc.contents: + for content in doc.contents: + contentName = content.name + + # For AI-extracted data (quick completion) + if content.data: + contentExtraction.append({ + "filename": filename, + "contentName": contentName, + "contentData": content.data, + "contentType": content.contentType, + "summary": content.summary + }) + + # For raw content data + if content.data: + rawData = content.data + isBase64 = content.metadata.get('base64Encoded', False) if content.metadata else False + contentData.append({ + "filename": filename, + "contentName": contentName, + "data": rawData, + "isBase64": isBase64, + "contentType": content.contentType + }) + + # Also add to documentData for code execution if not already added + if not docData or docData != rawData: + documentData.append([filename, rawData, isBase64]) + + # 3. Check if task can be completed without code execution + quickCompletion = await self._checkQuickCompletion(prompt, contentExtraction, outputSpecs) + + if quickCompletion and quickCompletion.get("complete") == 1: + logger.info("Task completed without code execution") + return { + "feedback": quickCompletion.get("prompt", "Task completed successfully."), + "documents": quickCompletion.get("documents", []) + } + else: + logger.debug(f"Code to generate, no quick check") + + # If quick completion not possible, continue with code generation and execution + logger.info("Generating code to solve the task") + + # 4. Generate code using AI + code, requirements = await self._generateCode(prompt, outputSpecs) + if not code: + return { + "feedback": "Failed to generate code for the task.", + "documents": [] + } + # Store the original code without document data + original_clean_code = code # Save clean code for later use in improvement + + # 5. Replace the placeholder with actual inputFiles data + documentDataJson = repr(documentData) + codeWithData = code.replace("inputFiles = \"=== JSONLOAD ===\"", f"inputFiles = {documentDataJson}") + + # 6. Execute code with retry logic + retryCount = 0 + maxRetries = self.executionRetryLimit + executionHistory = [] + + while retryCount <= maxRetries: + executionResult = self._executeCode(codeWithData, requirements) + executionHistory.append({ + "attempt": retryCount + 1, + "code": codeWithData, + "result": executionResult + }) + + # Check if execution was successful + if executionResult.get("success", False): + logger.info(f"Code execution succeeded on attempt {retryCount + 1}") + break + + # If we've reached max retries, exit the loop + if retryCount >= maxRetries: + logger.info(f"Reached maximum retry limit ({maxRetries}). Giving up.") + break + + # Log the error and attempt to improve the code + error = executionResult.get("error", "Unknown error") + logger.info(f"Execution attempt {retryCount + 1} failed: {error}. Attempting to improve code.") + + # Generate improved code based on error + improvedCode, improvedRequirements = await self._improveCode( + originalCode=original_clean_code, # Use clean code without document data + error=error, + executionResult=executionResult, + attempt=retryCount + 1, + outputSpecs=outputSpecs + ) + + if improvedCode: + # Inject document data into improved code + original_clean_code = improvedCode # Update clean code for next potential improvement + codeWithData = improvedCode.replace("inputFiles = \"=== JSONLOAD ===\"", f"inputFiles = {documentDataJson}") + requirements = improvedRequirements + logger.info(f"Code improved for retry {retryCount + 2}") + else: + logger.warning("Failed to improve code, using original code for retry") + + retryCount += 1 + + # 7. Process results and create output documents + documents = [] + + # Always add the final code document + documents.append(self.formatAgentDocumentOutput("generated_code.py", codeWithData, "text/plain")) + + # Add execution history document + executionHistoryStr = json.dumps(executionHistory, indent=2) + documents.append(self.formatAgentDocumentOutput("execution_history.json", executionHistoryStr, "application/json")) + + # Enhanced result handling: Create documents based on execution results - fixed for proper content extraction + if executionResult.get("success", False): + resultData = executionResult.get("result") + + # Process results from the result dictionary if available + if isinstance(resultData, dict): + # First, create a mapping of expected output labels to their specs + expectedOutputs = {spec.get("label"): spec for spec in outputSpecs} + createdOutputs = set() + + for label, result_item in resultData.items(): + # Check if result follows the expected structure with nested content + if isinstance(result_item, dict) and "content" in result_item: + # Extract values from the properly structured result + content = result_item.get("content", "") # Extract the inner content + base64Encoded = result_item.get("base64Encoded", False) + contentType = result_item.get("contentType", "text/plain") + + # Check if this label matches one of our expected output documents + # If not, but we haven't created all expected outputs yet, try to map it + finalLabel = label + if label not in expectedOutputs and len(expectedOutputs) > 0: + # Find an unused expected output label + for expectedLabel in expectedOutputs: + if expectedLabel not in createdOutputs: + logger.warning(f"Remapping output '{label}' to expected '{expectedLabel}'") + finalLabel = expectedLabel + break + + # Create document by passing only the content to formatAgentDocumentOutput + doc = self.formatAgentDocumentOutput(finalLabel, content, contentType) + + # Override the base64Encoded flag with the value from the result + # This is needed since formatAgentDocumentOutput might determine a different value + if isinstance(base64Encoded, bool): + doc.base64Encoded = base64Encoded + + documents.append(doc) + createdOutputs.add(finalLabel) + logger.info(f"Created document from result: {finalLabel} ({contentType}, base64={base64Encoded})") + else: + # Not properly structured - log warning + logger.warning(f"Skipping improperly formatted result for '{label}'. Results must include 'content' field.") + else: + # Handle non-dictionary results + logger.warning("Execution result is not a dictionary. Creating a single output document.") + doc = self.formatAgentDocumentOutput("result.txt", str(resultData), "text/plain") + documents.append(doc) + + # 8. Return results + return { + "feedback": "Code execution completed successfully." if executionResult.get("success", False) else f"Code execution failed: {executionResult.get('error', 'Unknown error')}", + "documents": documents + } + + async def _improveCode(self, originalCode: str, error: str, executionResult: Dict[str, Any], attempt: int, outputSpecs: List[Dict[str, Any]] = None) -> Tuple[str, List[str]]: + """ + Improve code based on execution error. + Enhanced to maintain proper output handling with correct document structure. + + Args: + originalCode: The code that failed to execute + error: The error message + executionResult: Complete execution result dictionary + attempt: Current attempt number + outputSpecs: List of expected output specifications + + Returns: + Tuple of (improvedCode, requirements) + """ + # Create a string with output specifications to be included in the prompt + outputSpecsStr = "" + if outputSpecs: + outputSpecsStr = "\nEXPECTED OUTPUT DOCUMENTS:\n" + for i, spec in enumerate(outputSpecs, 1): + label = spec.get("label", f"output{i}.txt") + description = spec.get("description", "") + outputSpecsStr += f"{i}. {label} - {description}\n" + + # Create prompt for code improvement + improvementPrompt = f""" +Fix the following Python code that failed during execution. This is attempt {attempt} to fix the code. + +ORIGINAL CODE: +{originalCode} + +ERROR MESSAGE: +{error} + +STDOUT: +{executionResult.get('output', '')} +{outputSpecsStr} +INSTRUCTIONS: +1. Fix all errors identified in the error message +2. If there is a requirements error for missing or failes modules, then create alternate code with other modules +3. Diagnose and fix any logical issues +4. Pay special attention to: +- Type conversions and data handling +- Error handling and edge cases +- Resource management (file handles, etc.) +- Syntax errors and typos +5. Keep the inputFiles handling logic intact +6. Maintain the same overall structure and purpose + +OUTPUT REQUIREMENTS (VERY IMPORTANT): +- Your code MUST define a 'result' variable as a dictionary to store ALL outputs +- The key for each entry MUST be the full filename with extension (e.g., "output.txt") +- The value for each entry MUST be a dictionary with the following structure: +{{ + "content": string, # The actual content (text or base64-encoded string) + "base64Encoded": boolean, # Set to true for binary data, false for text data + "contentType": string # MIME type of the content (e.g., "text/plain", "application/json") +}} +- Example result dictionary: +result = {{ + "output.txt": {{ + "content": "This is text content", + "base64Encoded": False, + "contentType": "text/plain" + }}, + "chart.png": {{ + "content": "base64encodedstring...", + "base64Encoded": True, + "contentType": "image/png" + }} +}} +- NEVER write files to disk using open() or similar methods - use the result dictionary instead + +JSON OUTPUT (CRITICAL): +- After creating the result dictionary, you MUST print it as JSON to stdout +- Make sure your code includes: print(json.dumps(result)) as the final line +- This printed JSON is how the system captures your result + +REQUIREMENTS: +Required packages should be specified as: +# REQUIREMENTS: library==version,library2>=version +- You may add/remove requirements as needed to fix the code + +Return ONLY Python code without explanations or markdown. +""" + + # Call AI service + messages = [ + {"role": "system", "content": "You are an expert Python code debugger. Provide only fixed Python code without explanations or formatting. Ensure all generated files are included in the 'result' dictionary and that result is printed as JSON with print(json.dumps(result))."}, + {"role": "user", "content": improvementPrompt} + ] + + try: + improvedContent = await self.service.base.callAi(messages, temperature=0.2) + + # Extract code and requirements + improvedCode = self._cleanCode(improvedContent) + + # Extract requirements + requirements = [] + for line in improvedCode.split('\n'): + if line.strip().startswith("# REQUIREMENTS:"): + reqStr = line.replace("# REQUIREMENTS:", "").strip() + requirements = [r.strip() for r in reqStr.split(',') if r.strip()] + break + + return improvedCode, requirements + except Exception as e: + logger.error(f"Error improving code: {str(e)}") + return None, [] + + + async def _checkQuickCompletion(self, prompt: str, contentExtraction: List[ChatDocument], outputSpecs: List[Dict[str, Any]]) -> Dict[str, Any]: + """ + Check if the task can be completed without writing and executing code. + + Args: + prompt: The task prompt + contentExtraction: List of extracted content data with contentName and dataExtracted + outputSpecs: List of output specifications + + Returns: + Dictionary with completion status and results, or None if no quick completion + """ + # If no data or no output specs, can't do a quick completion + if not contentExtraction or not outputSpecs: + return None + + # Create a prompt for the AI to check if this can be completed directly + specsJson = json.dumps(outputSpecs) + dataJson = json.dumps([doc.dict() for doc in contentExtraction]) + + checkPrompt = f""" +Analyze this task and determine if it can be completed directly without writing code. + +TASK: +{prompt} + +EXTRACTED DATA AVAILABLE: +{dataJson} + +Each entry in the extracted data contains: +- filename: The source file name +- contentName: The specific content section name +- contentData: The AI-extracted text from the content +- contentType: The type of content (text, csv, etc.) +- summary: A brief summary of the content + +REQUIRED OUTPUT: +{specsJson} + +If the task can be completed directly with the available extracted data, respond with: +{{"complete": 1, "prompt": "Brief explanation of the solution", "documents": [ + {{"label": "filename.ext", "content": "content here"}} +]}} + +If code would be needed to properly complete this task, respond with: +{{"complete": 0, "prompt": "Explanation why code is needed"}} + +Only return valid JSON. Your entire response must be parseable as JSON. +""" + + # Call AI service + logger.debug(f"Checking if task can be completed without code execution: {checkPrompt}") + messages = [ + {"role": "system", "content": "You are an AI assistant that determines if tasks require code execution. Reply with JSON only."}, + {"role": "user", "content": checkPrompt} + ] + + try: + # Use a lower temperature for more deterministic response + response = await self.service.base.callAi(messages, produceUserAnswer = True, temperature=0.1) + + # Parse response as JSON + if response: + try: + # Find JSON in response if there's any text around it + jsonStart = response.find('{') + jsonEnd = response.rfind('}') + 1 + + if jsonStart >= 0 and jsonEnd > jsonStart: + jsonStr = response[jsonStart:jsonEnd] + result = json.loads(jsonStr) + + # Check if this is a proper response + if "complete" in result: + return result + + except json.JSONDecodeError: + logger.debug("Failed to parse quick completion response as JSON") + pass + except Exception as e: + logger.debug(f"Error during quick completion check: {str(e)}") + + # Default to requiring code execution + return None + + async def _generateCode(self, prompt: str, outputSpecs: List[ChatDocument] = None) -> Tuple[str, List[str]]: + """ + Generate Python code from a prompt with the inputFiles placeholder. + Enhanced to emphasize proper result output handling with correct document structure. + + Args: + prompt: The task prompt + outputSpecs: List of expected output specifications + + Returns: + Tuple of (code, requirements) + """ + # Create a string with output specifications to be included in the prompt + outputSpecsStr = "" + if outputSpecs: + outputSpecsStr = "\nEXPECTED OUTPUT DOCUMENTS:\n" + for i, spec in enumerate(outputSpecs, 1): + label = spec.get("label", f"output{i}.txt") + description = spec.get("description", "") + outputSpecsStr += f"{i}. {label} - {description}\n" + + # Create improved prompt for code generation + aiPrompt = f""" +Generate Python code to solve the following task: + +TASK: +{prompt} +{outputSpecsStr} +INPUT FILES: +- 'inputFiles' variable is provided as [[filename, data, isBase64], ...] +- For text files (isBase64=False): use data directly as string +- For binary files (isBase64=True): use base64.b64decode(data) + +OUTPUT REQUIREMENTS (VERY IMPORTANT): +- Your code MUST define a 'result' variable as a dictionary to store ALL outputs +- The key for each entry MUST be the full filename with extension (e.g., "output.txt") +- The value for each entry MUST be a dictionary with the following structure: +{{ + "content": string, # The actual content (text or base64-encoded string) + "base64Encoded": boolean, # Set to true for binary data, false for text data + "contentType": string # MIME type of the content (e.g., "text/plain", "application/json") +}} +- Example result dictionary: +result = {{ + "output.txt": {{ + "content": "This is text content", + "base64Encoded": False, + "contentType": "text/plain" + }}, + "chart.png": {{ + "content": "base64encodedstring...", + "base64Encoded": True, + "contentType": "image/png" + }} +}} +- NEVER write files to disk using open() or similar methods - use the result dictionary instead +- If you generate any charts, reports, or visualizations, ensure they are properly encoded and included + +IMPORTANT - USE EXACT OUTPUT FILENAMES: +- You MUST use the EXACT filenames specified in EXPECTED OUTPUT DOCUMENTS section +- The key in the result dictionary must match these filenames precisely +- If no output documents are specified, use appropriate descriptive filenames + +JSON OUTPUT (CRITICAL): +- After creating the result dictionary, you MUST print it as JSON to stdout using json.dumps() +- Add these lines at the end of your code: + import json # if not already imported + print(json.dumps(result)) +- This printed JSON is how the system captures your result +- Make sure this is the last thing your code prints + +BINARY DATA HANDLING: +- For binary content (images, PDFs, etc.), convert to base64 string and set base64Encoded=True +- For text content (text, JSON, HTML, etc.), use plain string and set base64Encoded=False +- Use appropriate MIME types for different content types + +CODE QUALITY: +- Use explicit type conversions where needed (int/float/str) +- Implement feature detection, not version checks +- Handle errors gracefully with appropriate fallbacks +- Follow latest API conventions for libraries +- Validate inputs before processing + +Your code must start with: +inputFiles = "=== JSONLOAD ===" # DO NOT CHANGE THIS LINE + +REQUIREMENTS: +Required packages should be specified as: +# REQUIREMENTS: library==version,library2>=version +- Specify exact versions for critical libraries +- Use constraint operators (==,>=,<=) as needed + +Return ONLY Python code without explanations or markdown. +""" + + # Call AI service + messages = [ + {"role": "system", "content": "You are a Python code generator. Provide only valid Python code without explanations or formatting. Always output the result dictionary as JSON using print(json.dumps(result)) at the end of your code."}, + {"role": "user", "content": aiPrompt} + ] + + generatedContent = await self.service.base.callAi(messages, temperature=0.1) + + # Extract code and requirements + code = self._cleanCode(generatedContent) + + # Extract requirements + requirements = [] + for line in code.split('\n'): + if line.strip().startswith("# REQUIREMENTS:"): + reqStr = line.replace("# REQUIREMENTS:", "").strip() + requirements = [r.strip() for r in reqStr.split(',') if r.strip()] + break + + return code, requirements + + def _executeCodeProd(self, code: str, requirements: List[str] = None) -> Dict[str, Any]: + """ + Execute Python code in Azure environment using the antenv interpreter. + Optimized for production use in Azure Web App environment where venv creation fails. + + Args: + code: Python code to execute + requirements: List of required packages + + Returns: + Execution result dictionary + """ + try: + # 1. Create temp directory for code files + self.tempDir = tempfile.mkdtemp(prefix="code_exec_") + + # Try different possible paths to find the antenv Python interpreter + possible_python_paths = [ + "/home/site/wwwroot/antenv/bin/python", + "/antenv/bin/python", + "/tmp/8dd8c226509f116/antenv/bin/python", # Path from your error logs + sys.executable # Fallback to system Python + ] + + pythonExe = None + for path in possible_python_paths: + if os.path.exists(path): + pythonExe = path + logger.info(f"Found Python interpreter at: {pythonExe}") + break + + if not pythonExe: + logger.error("Could not find a valid Python interpreter in Azure environment") + return { + "success": False, + "output": "", + "error": "Could not find a valid Python interpreter in Azure environment", + "result": None, + "exitCode": -1 + } + + # 2. Install requirements to a temporary user directory if provided + if requirements: + logger.info(f"Installing requirements in Azure environment: {requirements}") + + # Create requirements.txt + reqFile = os.path.join(self.tempDir, "requirements.txt") + with open(reqFile, "w") as f: + f.write("\n".join(requirements)) + + # Set up a custom PYTHONUSERBASE to isolate package installations + custom_user_base = os.path.join(self.tempDir, "pip_packages") + os.makedirs(custom_user_base, exist_ok=True) + + env = os.environ.copy() + env["PYTHONUSERBASE"] = custom_user_base + + # Install requirements to the custom user directory + try: + pipResult = subprocess.run( + [pythonExe, "-m", "pip", "install", "--user", "-r", reqFile], + capture_output=True, + text=True, + env=env, + timeout=int(APP_CONFIG.get("Agent_Coder_INSTALL_TIMEOUT")) + ) + + if pipResult.returncode != 0: + logger.warning(f"Error installing requirements in Azure: {pipResult.stderr}") + else: + logger.info(f"Requirements installed successfully to {custom_user_base}") + + # Try to find the site-packages directory + import glob + site_packages = os.path.join(custom_user_base, "lib", "python*", "site-packages") + site_packages_paths = glob.glob(site_packages) + + if site_packages_paths: + env["PYTHONPATH"] = os.pathsep.join([site_packages_paths[0], env.get("PYTHONPATH", "")]) + logger.info(f"Added {site_packages_paths[0]} to PYTHONPATH") + else: + # Alternative paths for different Python versions + alt_site_packages = os.path.join(custom_user_base, "site-packages") + if os.path.exists(alt_site_packages): + env["PYTHONPATH"] = os.pathsep.join([alt_site_packages, env.get("PYTHONPATH", "")]) + logger.info(f"Added {alt_site_packages} to PYTHONPATH") + except Exception as e: + logger.warning(f"Exception during requirements installation in Azure: {str(e)}") + else: + env = os.environ.copy() + + # 3. Write code to file + codeFile = os.path.join(self.tempDir, "code.py") + with open(codeFile, "w", encoding="utf-8") as f: + f.write(code) + + # 4. Execute code with the modified environment + logger.debug(f"Executing code in Azure environment with timeout of {self.executorTimeout} seconds") + process = subprocess.run( + [pythonExe, codeFile], + timeout=self.executorTimeout, + capture_output=True, + text=True, + env=env + ) + + # 5. Process results + stdout = process.stdout + stderr = process.stderr + + # Try to extract result from stdout + resultData = None + if process.returncode == 0: + try: + # Find the last line that might be JSON + jsonLines = [] + for line in stdout.strip().split('\n'): + line = line.strip() + if line and line[0] in '{[' and line[-1] in '}]': + try: + parsed = json.loads(line) + jsonLines.append((line, parsed)) + except json.JSONDecodeError: + continue + + # Use the last valid JSON that appears to be a dictionary + if jsonLines: + for line, parsed in reversed(jsonLines): + if isinstance(parsed, dict): + resultData = parsed + logger.debug(f"Extracted result data from stdout: {type(resultData)}") + break + except Exception as e: + logger.debug(f"Error extracting result from stdout: {str(e)}") + + # Enhanced logging of what was found + if resultData: + logger.info(f"Found result dictionary with {len(resultData)} entries: {list(resultData.keys())}") + else: + logger.warning("No result dictionary found in output") + + # Create result dictionary + return { + "success": process.returncode == 0, + "output": stdout, + "error": stderr if process.returncode != 0 else "", + "result": resultData, + "exitCode": process.returncode + } + + except subprocess.TimeoutExpired: + logger.error(f"Execution in Azure timed out after {self.executorTimeout} seconds") + return { + "success": False, + "output": "", + "error": f"Execution timed out after {self.executorTimeout} seconds", + "result": None, + "exitCode": -1 + } + except Exception as e: + logger.error(f"Execution error in Azure environment: {str(e)}") + return { + "success": False, + "output": "", + "error": f"Execution error in Azure environment: {str(e)}", + "result": None, + "exitCode": -1 + } + finally: + # Clean up resources + self._cleanupExecution() + + def _executeCodeVenv(self, code: str, requirements: List[str] = None) -> Dict[str, Any]: + """ + Execute Python code in a virtual environment. + Original implementation with venv creation for non-Azure environments. + + Args: + code: Python code to execute + requirements: List of required packages + + Returns: + Execution result dictionary + """ + try: + # 1. Create temp directory and virtual environment + self.tempDir = tempfile.mkdtemp(prefix="code_exec_") + venvPath = os.path.join(self.tempDir, "venv") + + # Create venv + logger.debug(f"Creating virtual environment at {venvPath}") + + try: + # First try with sys.executable - the standard approach + subprocess.run([sys.executable, "-m", "venv", venvPath], + check=True, capture_output=True, timeout=60) + logger.debug("Virtual environment created successfully with sys.executable") + except (subprocess.SubprocessError, subprocess.CalledProcessError) as e: + logger.warning(f"Failed to create venv with sys.executable: {str(e)}") + + # Fallback method 1: Try with explicit 'python3' command + try: + logger.debug("Trying to create virtual environment with python3 command") + subprocess.run(["python3", "-m", "venv", venvPath], + check=True, capture_output=True, timeout=60) + logger.debug("Virtual environment created successfully with python3") + except (subprocess.SubprocessError, subprocess.CalledProcessError) as e: + logger.warning(f"Failed to create venv with python3: {str(e)}") + + # Fallback method 2: Try with virtualenv instead of venv + try: + logger.debug("Trying to create virtual environment with virtualenv module") + subprocess.run([sys.executable, "-m", "pip", "install", "virtualenv"], + check=False, capture_output=True, timeout=60) + subprocess.run([sys.executable, "-m", "virtualenv", venvPath], + check=True, capture_output=True, timeout=60) + logger.debug("Virtual environment created successfully with virtualenv") + except (subprocess.SubprocessError, subprocess.CalledProcessError) as e: + # If all methods fail, raise an exception + error_msg = f"Failed to create virtual environment with all methods: {str(e)}" + logger.error(error_msg) + raise RuntimeError(error_msg) + + # Get Python executable path - adjusted for OS + if os.name == 'nt': # Windows + pythonExe = os.path.join(venvPath, "Scripts", "python.exe") + else: # Linux/Mac + pythonExe = os.path.join(venvPath, "bin", "python") + + # Verify python executable exists + if not os.path.exists(pythonExe): + # Try to find it + if os.name == 'nt': + possible_paths = [ + os.path.join(venvPath, "Scripts", "python.exe"), + os.path.join(venvPath, "Scripts", "python") + ] + else: + possible_paths = [ + os.path.join(venvPath, "bin", "python"), + os.path.join(venvPath, "bin", "python3") + ] + + for path in possible_paths: + if os.path.exists(path): + pythonExe = path + logger.debug(f"Found Python executable at: {pythonExe}") + break + + if not os.path.exists(pythonExe): + logger.error(f"Python executable not found at expected path: {pythonExe}") + raise FileNotFoundError(f"Python executable not found in virtual environment") + + # 2. Install requirements if provided + if requirements: + logger.info(f"Installing requirements: {requirements}") + + # Create requirements.txt + reqFile = os.path.join(self.tempDir, "requirements.txt") + with open(reqFile, "w") as f: + f.write("\n".join(requirements)) + + x="\n".join(requirements) + logger.info(f"Requirements file: {x}.") + + # Install requirements + try: + pipResult = subprocess.run( + [pythonExe, "-m", "pip", "install", "-r", reqFile], + capture_output=True, + text=True, + timeout=int(APP_CONFIG.get("Agent_Coder_INSTALL_TIMEOUT")) + ) + if pipResult.returncode != 0: + logger.debug(f"Error installing requirements: {pipResult.stderr}") + else: + logger.debug(f"Requirements installed successfully") + # Log installed packages if in debug mode + if logger.isEnabledFor(logging.DEBUG): + pipList = subprocess.run( + [pythonExe, "-m", "pip", "list"], + capture_output=True, + text=True + ) + logger.debug(f"Installed packages:\n{pipList.stdout}") + + except Exception as e: + logger.debug(f"Exception during requirements installation: {str(e)}") + + # 3. Write code to file + codeFile = os.path.join(self.tempDir, "code.py") + with open(codeFile, "w", encoding="utf-8") as f: + f.write(code) + + # 4. Execute code + logger.debug(f"Executing code with timeout of {self.executorTimeout} seconds. Code: {code}") + process = subprocess.run( + [pythonExe, codeFile], + timeout=self.executorTimeout, + capture_output=True, + text=True + ) + + # 5. Process results + stdout = process.stdout + stderr = process.stderr + + # Try to extract result from stdout + resultData = None + if process.returncode == 0: + try: + # Find the last line that might be JSON + jsonLines = [] + for line in stdout.strip().split('\n'): + line = line.strip() + if line and line[0] in '{[' and line[-1] in '}]': + try: + parsed = json.loads(line) + jsonLines.append((line, parsed)) + except json.JSONDecodeError: + continue + + # Use the last valid JSON that appears to be a dictionary + if jsonLines: + for line, parsed in reversed(jsonLines): + if isinstance(parsed, dict): + resultData = parsed + logger.debug(f"Extracted result data from stdout: {type(resultData)}") + break + except Exception as e: + logger.debug(f"Error extracting result from stdout: {str(e)}") + + # Enhanced logging of what was found + if resultData: + logger.info(f"Found result dictionary with {len(resultData)} entries: {list(resultData.keys())}") + else: + logger.warning("No result dictionary found in output") + + # Create result dictionary + return { + "success": process.returncode == 0, + "output": stdout, + "error": stderr if process.returncode != 0 else "", + "result": resultData, + "exitCode": process.returncode + } + + except subprocess.TimeoutExpired: + logger.error(f"Execution timed out after {self.executorTimeout} seconds") + return { + "success": False, + "output": "", + "error": f"Execution timed out after {self.executorTimeout} seconds", + "result": None, + "exitCode": -1 + } + except Exception as e: + logger.error(f"Execution error: {str(e)}") + return { + "success": False, + "output": "", + "error": f"Execution error: {str(e)}", + "result": None, + "exitCode": -1 + } + finally: + # Clean up resources + self._cleanupExecution() + + def _executeCode(self, code: str, requirements: List[str] = None) -> Dict[str, Any]: + """ + Execute Python code in the appropriate environment based on configuration. + + Args: + code: Python code to execute + requirements: List of required packages + + Returns: + Execution result dictionary + """ + # Check if we're in a production Azure environment + env_type = APP_CONFIG.get("APP_ENV_TYPE", "dev").lower() + + logger.info(f"Executing code in environment type: {env_type}") + + if env_type == "prod": + # Use the Azure-optimized execution method + logger.info("Using Azure-optimized code execution method") + return self._executeCodeProd(code, requirements) + else: + # Use the standard virtual environment execution method + logger.info("Using standard virtual environment execution method") + return self._executeCodeVenv(code, requirements) + + + def _cleanupExecution(self): + """Clean up temporary resources from code execution.""" + if self.tempDir and os.path.exists(self.tempDir): + try: + logger.debug(f"Cleaning up temporary directory: {self.tempDir}") + shutil.rmtree(self.tempDir) + self.tempDir = None + except Exception as e: + logger.warning(f"Error cleaning up temp directory: {str(e)}") + + def _cleanCode(self, code: str) -> str: + """Remove any markdown formatting or explanations.""" + # Remove code block markers + code = code.replace("```python", "").replace("```", "") + + # Remove explanations before or after code + lines = code.strip().split('\n') + startIndex = 0 + endIndex = len(lines) + + # Find start of actual code + for i, line in enumerate(lines): + if line.strip().startswith("inputFiles =") or line.strip().startswith("# REQUIREMENTS:"): + startIndex = i + break + + # Clean code + cleanedCode = '\n'.join(lines[startIndex:endIndex]) + return cleanedCode.strip() + + def formatAgentDocumentOutput(self, filename: str, content: str, contentType: str) -> ChatDocument: + """ + Format a document for agent output. + + Args: + filename: Output filename + content: Document content + contentType: MIME type of the content + + Returns: + ChatDocument object + """ + # Split filename into name and extension + name, ext = os.path.splitext(filename) + if ext.startswith('.'): + ext = ext[1:] + + # Create document object + return ChatDocument( + id=str(uuid.uuid4()), + name=name, + ext=ext, + data=content, + contents=[ + ChatContent( + name="main", + data=content, + summary=f"Generated {filename}", + metadata={"contentType": contentType} + ) + ] + ) + +# Factory function for the Coder agent +def getAgentCoder(): + """Returns an instance of the Coder agent.""" + return AgentCoder() \ No newline at end of file diff --git a/modules/historic_data_agents/agentDocumentation.py b/modules/historic_data_agents/agentDocumentation.py new file mode 100644 index 00000000..1cf3e3b2 --- /dev/null +++ b/modules/historic_data_agents/agentDocumentation.py @@ -0,0 +1,537 @@ +""" +Documentation agent for generating structured documentation. +Provides comprehensive documentation generation capabilities. +""" + +import logging +from typing import Dict, Any, List, Optional +import json +import re +from datetime import datetime +import os +import hashlib +import base64 +import uuid +import shutil +from pathlib import Path +import traceback +import sys +import importlib.util +import inspect +from pydantic import BaseModel + +from modules.workflow.agentBase import AgentBase +from modules.interfaces.serviceChatModel import ChatContent + +logger = logging.getLogger(__name__) + +class AgentDocumentation(AgentBase): + """AI-driven agent for creating documentation and structured content using multi-step generation""" + + def __init__(self): + """Initialize the documentation agent""" + super().__init__() + self.name = "documentation" + self.label = "Documentation" + self.description = "Creates structured documentation, reports, and content using AI with multi-step generation" + self.capabilities = [ + "report_generation", + "documentation", + "content_structuring", + "technical_writing", + "knowledge_organization" + ] + + def setDependencies(self, serviceBase=None): + """Set external dependencies for the agent.""" + self.setService(serviceBase) + + async def processTask(self, task: Dict[str, Any]) -> Dict[str, Any]: + """ + Process a task by focusing on required outputs and using AI to generate them. + + Args: + task: Task dictionary with prompt, inputDocuments, outputSpecifications + + Returns: + Dictionary with feedback and documents + """ + try: + # Extract task information + prompt = task.get("prompt", "") + inputDocuments = task.get("inputDocuments", []) + outputSpecs = task.get("outputSpecifications", []) + + # Check AI service + if not self.service or not self.service.base: + return { + "feedback": "The Documentation agent requires an AI service to function.", + "documents": [] + } + + # Extract context from input documents - focusing only on dataExtracted + documentContext = self._extractDocumentContext(inputDocuments) + + # Create task analysis to understand the requirements + documentationPlan = await self._analyzeTask(prompt, documentContext, outputSpecs) + logger.debug(f"Documentation plan: {documentationPlan}") + + # Generate all required output documents + documents = [] + + # If no output specs provided, create default document + if not outputSpecs: + defaultFormat = documentationPlan.get("recommendedFormat", "markdown") + defaultTitle = documentationPlan.get("title", "Documentation") + safeTitle = self._sanitizeFilename(defaultTitle) + + outputSpecs = [ + {"label": f"{safeTitle}.{defaultFormat}", "description": "Comprehensive documentation"} + ] + + # Process each output specification + for spec in outputSpecs: + outputLabel = spec.get("label", "") + outputDescription = spec.get("description", "") + + # Generate the document using multi-step approach + document = await self._createDocumentMultiStep( + prompt, + documentContext, + outputLabel, + outputDescription, + documentationPlan + ) + + documents.append(document) + + # Generate feedback + feedback = documentationPlan.get("feedback", f"Created {len(documents)} documents based on your requirements.") + + return { + "feedback": feedback, + "documents": documents + } + + except Exception as e: + logger.error(f"Error in documentation generation: {str(e)}", exc_info=True) + return { + "feedback": f"Error during documentation generation: {str(e)}", + "documents": [] + } + + def _extractDocumentContext(self, documents: List[Dict[str, Any]]) -> str: + """ + Extract context from input documents, focusing on dataExtracted. + + Args: + documents: List of document objects + + Returns: + Extracted context as text + """ + contextParts = [] + + for doc in documents: + docName = doc.get("name", "unnamed") + if doc.get("ext"): + docName = f"{docName}.{doc.get('ext')}" + + contextParts.append(f"\n\n--- {docName} ---\n") + + # Process contents for dataExtracted + for content in doc.get("contents", []): + if content.get("dataExtracted"): + contextParts.append(content.get("dataExtracted", "")) + + return "\n".join(contextParts) + + def _sanitizeFilename(self, filename: str) -> str: + """ + Sanitize a filename by removing invalid characters. + + Args: + filename: Filename to sanitize + + Returns: + Sanitized filename + """ + # Replace invalid characters with underscores + invalidChars = r'<>:"/\|?*' + for char in invalidChars: + filename = filename.replace(char, '_') + + # Trim filename if too long + if len(filename) > 100: + filename = filename[:97] + "..." + + return filename + + async def _analyzeTask(self, prompt: str, context: str, outputSpecs: List) -> Dict: + """ + Use AI to analyze the task and create a documentation plan. + + Args: + prompt: The task prompt + context: Document context + outputSpecs: Output specifications + + Returns: + Documentation plan dictionary + """ + analysisPrompt = f""" + Analyze this documentation task and create a detailed plan. + + TASK: {prompt} + + DOCUMENT CONTEXT SAMPLE: + {context[:1000]}... (truncated) + + OUTPUT REQUIREMENTS: + {json.dumps(outputSpecs, indent=2)} + + Create a detailed documentation plan in JSON format with the following structure: + {{ + "title": "Document Title", + "documentType": "report|manual|guide|whitepaper|etc", + "audience": "technical|general|executive|etc", + "detailedStructure": [ + {{ + "title": "Chapter/Section Title", + "keyPoints": ["point1", "point2", ...], + "subsections": ["subsection1", "subsection2", ...], + "importance": "high|medium|low", + "estimatedLength": "short|medium|long" + }}, + ... more sections ... + ], + "keyTopics": ["topic1", "topic2", ...], + "tone": "formal|conversational|instructional|etc", + "recommendedFormat": "markdown|html|text|etc", + "formattingRequirements": ["requirement1", "requirement2", ...], + "executiveSummary": "Brief description of what the document will cover", + "feedback": "Brief message explaining the documentation approach" + }} + + Only return valid JSON. No preamble or explanations. + """ + + try: + response = await self.service.base.callAi([ + {"role": "system", "content": "You are a documentation expert. Respond with valid JSON only."}, + {"role": "user", "content": analysisPrompt} + ]) + + # Extract JSON from response + jsonStart = response.find('{') + jsonEnd = response.rfind('}') + 1 + + if jsonStart >= 0 and jsonEnd > jsonStart: + plan = json.loads(response[jsonStart:jsonEnd]) + return plan + else: + # Fallback if JSON not found + return { + "title": "Documentation (DEFAULT)", + "documentType": "report", + "audience": "general", + "detailedStructure": [ + { + "title": "Introduction", + "keyPoints": ["Purpose", "Scope"], + "subsections": [], + "importance": "high", + "estimatedLength": "short" + }, + { + "title": "Main Content", + "keyPoints": ["Core Information"], + "subsections": ["Key Findings", "Analysis"], + "importance": "high", + "estimatedLength": "long" + }, + { + "title": "Conclusion", + "keyPoints": ["Summary", "Next Steps"], + "subsections": [], + "importance": "medium", + "estimatedLength": "short" + } + ], + "keyTopics": ["General Information"], + "tone": "formal", + "recommendedFormat": "markdown", + "formattingRequirements": ["Clear headings", "Professional formatting"], + "executiveSummary": "A comprehensive documentation covering the requested topics.", + "feedback": "Created documentation based on your requirements." + } + + except Exception as e: + logger.warning(f"Error creating documentation plan: {str(e)}") + return { + "title": "Documentation", + "documentType": "report", + "audience": "general", + "detailedStructure": [ + { + "title": "Introduction", + "keyPoints": ["Purpose", "Scope"], + "subsections": [], + "importance": "high", + "estimatedLength": "short" + }, + { + "title": "Main Content", + "keyPoints": ["Core Information"], + "subsections": ["Key Findings", "Analysis"], + "importance": "high", + "estimatedLength": "long" + }, + { + "title": "Conclusion", + "keyPoints": ["Summary", "Next Steps"], + "subsections": [], + "importance": "medium", + "estimatedLength": "short" + } + ], + "keyTopics": ["General Information"], + "tone": "formal", + "recommendedFormat": "markdown", + "formattingRequirements": ["Clear headings", "Professional formatting"], + "executiveSummary": "A comprehensive documentation covering the requested topics.", + "feedback": "Created documentation based on your requirements." + } + + async def _createDocumentMultiStep(self, prompt: str, context: str, outputLabel: str, + outputDescription: str, documentationPlan: Dict) -> ChatContent: + """ + Create a document using a multi-step approach with separate AI calls for each section. + + Args: + prompt: Original task prompt + context: Document context + outputLabel: Output filename + outputDescription: Description of desired output + documentationPlan: Documentation plan from AI + + Returns: + ChatContent object + """ + try: + # Determine format from filename + formatType = outputLabel.split('.')[-1].lower() if '.' in outputLabel else "md" + + # Map format to contentType + contentTypeMap = { + "md": "text/markdown", + "markdown": "text/markdown", + "html": "text/html", + "txt": "text/plain", + "text": "text/plain", + "json": "application/json", + "csv": "text/csv" + } + + contentType = contentTypeMap.get(formatType, "text/plain") + + # Get document information + title = documentationPlan.get("title", "Documentation") + documentType = documentationPlan.get("documentType", "document") + audience = documentationPlan.get("audience", "general") + tone = documentationPlan.get("tone", "formal") + keyTopics = documentationPlan.get("keyTopics", []) + formattingRequirements = documentationPlan.get("formattingRequirements", []) + + # Get the detailed structure + detailedStructure = documentationPlan.get("detailedStructure", []) + + # Step 1: Generate executive summary + summaryPrompt = f""" + Create an executive summary for a {documentType} titled "{title}". + + DOCUMENT OVERVIEW: + - Type: {documentType} + - Audience: {audience} + - Key Topics: {', '.join(keyTopics)} + + TASK CONTEXT: {prompt} + + The executive summary should: + 1. Provide a concise overview of the document's purpose + 2. Highlight key points and findings + 3. Be clear and engaging for the target audience + 4. Set expectations for the document's content + + Keep the summary brief but comprehensive. + """ + + executiveSummary = await self.service.base.callAi([ + {"role": "system", "content": f"You are a documentation expert creating an executive summary in {formatType} format."}, + {"role": "user", "content": summaryPrompt} + ], produceUserAnswer = True) + + # Step 2: Generate introduction + introPrompt = f""" + Create an introduction for a {documentType} titled "{title}". + + DOCUMENT OVERVIEW: + - Type: {documentType} + - Audience: {audience} + - Key Topics: {', '.join(keyTopics)} + + TASK CONTEXT: {prompt} + + The introduction should: + 1. Set the context and purpose of the document + 2. Outline the scope and objectives + 3. Preview the main topics to be covered + 4. Engage the reader's interest + + Format the introduction according to {formatType} standards. + """ + + introduction = await self.service.base.callAi([ + {"role": "system", "content": f"You are a documentation expert creating an introduction in {formatType} format."}, + {"role": "user", "content": introPrompt} + ], produceUserAnswer = True) + + # Step 3: Generate main sections + sections = [] + for section in detailedStructure: + sectionTitle = section.get("title", "Section") + keyPoints = section.get("keyPoints", []) + subsections = section.get("subsections", []) + importance = section.get("importance", "medium") + estimatedLength = section.get("estimatedLength", "medium") + + sectionPrompt = f""" + Create the {sectionTitle} section for a {documentType} titled "{title}". + + SECTION DETAILS: + - Title: {sectionTitle} + - Key Points: {', '.join(keyPoints)} + - Subsections: {', '.join(subsections)} + - Importance: {importance} + - Estimated Length: {estimatedLength} + + DOCUMENT CONTEXT: + - Type: {documentType} + - Audience: {audience} + - Key Topics: {', '.join(keyTopics)} + + TASK CONTEXT: {prompt} + + The section should: + 1. Cover all key points thoroughly + 2. Include relevant subsections + 3. Maintain appropriate depth based on importance + 4. Follow the document's tone and style + + Format the section according to {formatType} standards. + """ + + sectionContent = await self.service.base.callAi([ + {"role": "system", "content": f"You are a documentation expert creating a section in {formatType} format."}, + {"role": "user", "content": sectionPrompt} + ], produceUserAnswer = True) + + sections.append(sectionContent) + + # Step 4: Generate conclusion + conclusionPrompt = f""" + Create the conclusion for a {documentType} titled "{title}". + + DOCUMENT OVERVIEW: + - Type: {documentType} + - Audience: {audience} + - Key Topics: {', '.join(keyTopics)} + + TASK CONTEXT: {prompt} + + This conclusion should: + 1. Summarize the key points covered in the document + 2. Provide closure to the topics discussed + 3. Include any relevant recommendations or next steps + 4. Leave the reader with a clear understanding of the document's significance + + The conclusion should be professional and impactful, formatted according to {formatType} standards. + """ + + conclusion = await self.service.base.callAi([ + {"role": "system", "content": f"You are a documentation expert creating a conclusion in {formatType} format."}, + {"role": "user", "content": conclusionPrompt} + ], produceUserAnswer = True) + + # Step 5: Assemble the complete document + if formatType in ["md", "markdown"]: + # Markdown format + documentContent = f"# {title}\n\n" + + if executiveSummary: + documentContent += f"## Executive Summary\n\n{executiveSummary}\n\n" + + documentContent += f"{introduction}\n\n" + + for i, sectionContent in enumerate(sections): + # Ensure section starts with heading if not already + sectionTitle = detailedStructure[i].get("title", f"Section {i+1}") + if not sectionContent.strip().startswith("#"): + documentContent += f"## {sectionTitle}\n\n" + documentContent += f"{sectionContent}\n\n" + + documentContent += f"## Conclusion\n\n{conclusion}\n" + + elif formatType == "html": + # HTML format + documentContent = f"\n\n{title}\n\n\n" + documentContent += f"

{title}

\n\n" + + if executiveSummary: + documentContent += f"

Executive Summary

\n
{executiveSummary}
\n\n" + + documentContent += f"
{introduction}
\n\n" + + for i, sectionContent in enumerate(sections): + sectionTitle = detailedStructure[i].get("title", f"Section {i+1}") + documentContent += f"

{sectionTitle}

\n
{sectionContent}
\n\n" + + documentContent += f"

Conclusion

\n
{conclusion}
\n" + documentContent += "\n" + + else: + # Plain text format + documentContent = f"{title}\n{'=' * len(title)}\n\n" + + if executiveSummary: + documentContent += f"EXECUTIVE SUMMARY\n{'-' * 17}\n\n{executiveSummary}\n\n" + + documentContent += f"{introduction}\n\n" + + for i, sectionContent in enumerate(sections): + sectionTitle = detailedStructure[i].get("title", f"Section {i+1}") + documentContent += f"{sectionTitle}\n{'-' * len(sectionTitle)}\n\n{sectionContent}\n\n" + + documentContent += f"CONCLUSION\n{'-' * 10}\n\n{conclusion}\n" + + # Create document object + return self.formatAgentDocumentOutput(outputLabel, documentContent, contentType) + + except Exception as e: + logger.error(f"Error creating document: {str(e)}", exc_info=True) + + # Create a simple error document + if formatType in ["md", "markdown"]: + content = f"# Error in Documentation\n\nThere was an error generating the documentation: {str(e)}" + elif formatType == "html": + content = f"

Error in Documentation

There was an error generating the documentation: {str(e)}

" + else: + content = f"Error in Documentation\n\nThere was an error generating the documentation: {str(e)}" + + return self.formatAgentDocumentOutput(outputLabel, content, contentType) + + +# Factory function for the Documentation agent +def getAgentDocumentation(): + """Returns an instance of the Documentation agent.""" + return AgentDocumentation() \ No newline at end of file diff --git a/modules/historic_data_agents/agentEmail.py b/modules/historic_data_agents/agentEmail.py new file mode 100644 index 00000000..6c6e2f5f --- /dev/null +++ b/modules/historic_data_agents/agentEmail.py @@ -0,0 +1,380 @@ +""" +Email Agent Module. +Handles email-related tasks using Microsoft Graph API. +""" + +import logging +import json +from typing import Dict, Any, List, Optional, Tuple +import uuid +import os + +from modules.workflow.agentBase import AgentBase +from modules.interfaces.serviceChatModel import Task, ChatDocument, ChatContent + +logger = logging.getLogger(__name__) + +class AgentEmail(AgentBase): + """Agent for handling email-related tasks.""" + + def __init__(self): + """Initialize the email agent.""" + super().__init__() + self.name = "email" + self.label = "Email Agent" + self.description = "Handles email composition and sending using Microsoft Graph API" + self.capabilities = [ + "email_composition", + "email_draft_creation", + "email_template_generation" + ] + self.serviceBase = None + + def setDependencies(self, serviceBase=None): + """Set external dependencies for the agent.""" + self.serviceBase = serviceBase + + async def processTask(self, task: Task) -> Dict[str, Any]: + """ + Process an email-related task. + + Args: + task: Task object containing: + - prompt: Instructions for the agent + - inputDocuments: List of documents to process + - outputSpecifications: List of required output documents + - context: Additional context including workflow info + + Returns: + Dictionary containing: + - feedback: Text response explaining what was done + - documents: List of created documents + """ + try: + # Extract task information + prompt = task.prompt + inputDocuments = task.filesInput + outputSpecs = task.filesOutput + + # Check AI service + if not self.service.base: + return { + "feedback": "The Email agent requires an AI service to function.", + "documents": [] + } + + # Check if Microsoft connector is available + if not hasattr(self.service, 'msft'): + return { + "feedback": "Microsoft connector not available. Please ensure Microsoft integration is properly configured.", + "documents": [] + } + + # Get Microsoft token + token_data = self.service.msft.getMsftToken() + if not token_data: + # Create authentication trigger document + auth_doc = self._createFrontendAuthTriggerDocument() + return { + "feedback": "Microsoft authentication required. Please authenticate to continue.", + "documents": [auth_doc] + } + + # Extract document data from input + documentContents, attachments = self._processInputDocuments(inputDocuments) + + # Generate email subject and body using AI + emailTemplate = await self._generateEmailTemplate(prompt, documentContents) + + # Create HTML preview of the email + htmlPreview = self._createHtmlPreview(emailTemplate) + + # Attempt to create a draft email using Microsoft Graph API + draft_result = self.service.msft.createDraftEmail( + emailTemplate["recipient"], + emailTemplate["subject"], + emailTemplate["htmlBody"], + attachments + ) + + # Prepare output documents + documents = [] + + # Process output specifications + for spec in outputSpecs: + label = spec.get("label", "") + description = spec.get("description", "") + + if label.endswith(".html"): + # Create the HTML template file + templateDoc = self.formatAgentDocumentOutput( + label, + emailTemplate["htmlBody"], # Use the actual HTML body, not the preview + "text/html" + ) + documents.append(templateDoc) + elif label.endswith(".json"): + # Create JSON template if requested + templateJson = json.dumps(emailTemplate, indent=2) + templateDoc = self.formatAgentDocumentOutput( + label, + templateJson, + "application/json" + ) + documents.append(templateDoc) + else: + # Default to preview for other cases + previewDoc = self.formatAgentDocumentOutput( + label, + htmlPreview, + "text/html" + ) + documents.append(previewDoc) + + # Prepare feedback message + if draft_result: + feedback = f"Email draft created successfully for {emailTemplate.get('recipient')}. The subject is: '{emailTemplate['subject']}'" + if attachments: + feedback += f" with {len(attachments)} attachment(s)" + feedback += ". You can open and edit it in your Outlook draft folder." + else: + feedback = "Email template created but could not save as draft. HTML preview and template are available as documents." + + return { + "feedback": feedback, + "documents": documents + } + + except Exception as e: + logger.error(f"Error in email agent: {str(e)}") + return { + "feedback": f"Error processing email task: {str(e)}", + "documents": [] + } + + def _createFrontendAuthTriggerDocument(self) -> ChatDocument: + """Create a document that triggers Microsoft authentication in the frontend.""" + return ChatDocument( + id=str(uuid.uuid4()), + name="microsoft_auth", + ext="html", + data=""" +
+

Microsoft Authentication Required

+

Please click the button below to authenticate with Microsoft:

+ +
+ """, + contents=[ + ChatContent( + name="main", + data=""" +
+

Microsoft Authentication Required

+

Please click the button below to authenticate with Microsoft:

+ +
+ """, + summary="Microsoft authentication trigger page", + metadata={ + "contentType": "text/html", + "isText": True + } + ) + ] + ) + + def _processInputDocuments(self, input_docs: List[ChatDocument]) -> Tuple[str, List[Dict[str, Any]]]: + """ + Process input documents to extract content and prepare attachments. + + Args: + input_docs: List of input documents + + Returns: + Tuple of (document content text, list of attachments) + """ + documentContents = [] + attachments = [] + + for doc in input_docs: + docName = doc.name + if doc.ext: + docName = f"{docName}.{doc.ext}" + + # Add document name to contents + documentContents.append(f"\n\n--- {docName} ---\n") + + # Process document data directly + if doc.data: + # Add to attachments with proper metadata + attachments.append({ + "name": docName, + "document": { + "data": doc.data, + "mimeType": doc.contents[0].metadata.get("contentType", "application/octet-stream") if doc.contents else "application/octet-stream", + "base64Encoded": doc.contents[0].metadata.get("base64Encoded", False) if doc.contents else False + } + }) + documentContents.append(f"Document attached: {docName}") + else: + documentContents.append(f"Document referenced: {docName}") + + return "\n".join(documentContents), attachments + + def formatAgentDocumentOutput(self, filename: str, content: str, contentType: str) -> ChatDocument: + """ + Format a document for agent output. + + Args: + filename: Output filename + content: Document content + contentType: MIME type of the content + + Returns: + ChatDocument object + """ + # Split filename into name and extension + name, ext = os.path.splitext(filename) + if ext.startswith('.'): + ext = ext[1:] + + # Create document object + return ChatDocument( + id=str(uuid.uuid4()), + name=name, + ext=ext, + data=content, + contents=[ + ChatContent( + name="main", + data=content, + summary=f"Generated {filename}", + metadata={"contentType": contentType} + ) + ] + ) + + async def _generateEmailTemplate(self, prompt: str, documentContents: str) -> Dict[str, Any]: + """ + Generate email template using AI. + + Args: + prompt: The task prompt + documentContents: Extracted document content + + Returns: + Email template dictionary with recipient, subject, body + """ + emailPrompt = f""" + Create an email based on the following request: + + REQUEST: {prompt} + + DOCUMENT CONTENTS: + {documentContents[:2000]}... (truncated if longer) + + Generate an email template with: + 1. A relevant recipient (use placeholder or derive from content if possible) + 2. A concise but descriptive subject line + 3. A professional HTML-formatted email body + 4. Appropriate greeting and closing + + Format your response as JSON with these fields: + - recipient: email address + - subject: subject line + - plainBody: plain text version + - htmlBody: HTML formatted version + + Only return valid JSON. No preamble or explanations. + """ + + try: + response = await self.service.base.callAi([ + {"role": "system", "content": "You are an email template specialist. Create professional emails. Respond with valid JSON only."}, + {"role": "user", "content": emailPrompt} + ]) + + # Extract JSON from response + jsonStart = response.find('{') + jsonEnd = response.rfind('}') + 1 + + if jsonStart >= 0 and jsonEnd > jsonStart: + template = json.loads(response[jsonStart:jsonEnd]) + return template + else: + # Fallback plan + logger.warning(f"Not able creating email template, generating fallback plan") + return { + "recipient": "recipient@example.com", + "subject": "Information Regarding Your Request", + "plainBody": f"This email is regarding your request: {prompt}", + "htmlBody": f"

This email is regarding your request: {prompt}

" + } + + except Exception as e: + logger.warning(f"Error generating email template: {str(e)}") + return { + "recipient": "recipient@example.com", + "subject": "Information Regarding Your Request", + "plainBody": f"This email is regarding your request: {prompt}", + "htmlBody": f"

This email is regarding your request: {prompt}

" + } + + def _createHtmlPreview(self, emailTemplate: Dict[str, Any]) -> str: + """ + Create an HTML preview of the email template. + + Args: + emailTemplate: Email template dictionary + + Returns: + HTML string for preview + """ + html = f""" + + + + + Email Preview: {emailTemplate.get('subject', 'Email Template')} + + + +
+ + + +
+ + + """ + return html + +def getAgentEmail() -> AgentEmail: + """Factory function to create and return an EmailAgent instance.""" + return AgentEmail() \ No newline at end of file diff --git a/modules/historic_data_agents/agentSharepoint.py b/modules/historic_data_agents/agentSharepoint.py new file mode 100644 index 00000000..a0fa0b0d --- /dev/null +++ b/modules/historic_data_agents/agentSharepoint.py @@ -0,0 +1,348 @@ +""" +SharePoint Agent Module. +Handles SharePoint document search and data extraction using Microsoft Graph API. +""" + +import logging +import json +from typing import Dict, Any, List, Optional +from modules.workflow.agentBase import AgentBase + +logger = logging.getLogger(__name__) + +class AgentSharepoint(AgentBase): + """Agent for handling SharePoint document operations.""" + + def __init__(self): + """Initialize the SharePoint agent.""" + super().__init__() + self.name = "sharepoint" + self.label = "SharePoint Agent" + self.description = "Searches and extracts data from SharePoint documents using Microsoft Graph API" + self.capabilities = [ + "document_search", + "content_extraction", + "metadata_analysis", + "document_processing" + ] + + async def processTask(self, task: Dict[str, Any]) -> Dict[str, Any]: + """ + Process a SharePoint-related task. + + Args: + task: Task object containing: + - prompt: Instructions for the agent + - inputDocuments: List of documents to process + - outputSpecifications: List of required output documents + - context: Additional context including workflow info + + Returns: + Dictionary containing: + - feedback: Text response explaining what was done + - documents: List of created documents + """ + try: + # Extract task information + prompt = task.get("prompt", "") + inputDocuments = task.get("inputDocuments", []) + outputSpecs = task.get("outputSpecifications", []) + + # Check AI service + if not self.service.base: + return { + "feedback": "The SharePoint agent requires an AI service to function.", + "documents": [] + } + + # Check if Microsoft connector is available + if not hasattr(self.service, 'msft'): + return { + "feedback": "Microsoft connector not available. Please ensure Microsoft integration is properly configured.", + "documents": [] + } + + # Get Microsoft token + token_data = self.service.msft.getMsftToken() + if not token_data: + # Create authentication trigger document + auth_doc = self._createFrontendAuthTriggerDocument() + return { + "feedback": "Microsoft authentication required. Please authenticate to continue.", + "documents": [auth_doc] + } + + # Parse the search query from the prompt + searchQuery = await self._parseSearchQuery(prompt) + + # Search SharePoint documents + searchResults = await self._searchSharePointDocuments(searchQuery) + + # Process search results + documents = [] + for spec in outputSpecs: + label = spec.get("label", "") + description = spec.get("description", "") + + if label.endswith(".json"): + # Create JSON summary of search results + summaryDoc = self._createSearchSummaryJson(searchResults, description) + documents.append(summaryDoc) + elif label.endswith(".csv"): + # Create CSV summary of search results + summaryDoc = self._createSearchSummaryCsv(searchResults, description) + documents.append(summaryDoc) + else: + # Create text summary of search results + summaryDoc = self._createSearchSummaryText(searchResults, description) + documents.append(summaryDoc) + + # Prepare feedback message + feedback = f"Found {len(searchResults)} documents matching your search criteria. " + if searchResults: + feedback += "The results have been saved as documents." + else: + feedback += "No matching documents were found." + + return { + "feedback": feedback, + "documents": documents + } + + except Exception as e: + logger.error(f"Error in SharePoint agent: {str(e)}") + return { + "feedback": f"Error processing SharePoint task: {str(e)}", + "documents": [] + } + + def _createFrontendAuthTriggerDocument(self) -> Dict[str, Any]: + """Create a document that triggers Microsoft authentication in the frontend.""" + return self.formatAgentDocumentOutput( + "microsoft_auth.html", + """ +
+

Microsoft Authentication Required

+

Please click the button below to authenticate with Microsoft:

+ +
+ """, + "text/html" + ) + + async def _parseSearchQuery(self, prompt: str) -> Dict[str, Any]: + """ + Parse the search query from the prompt using AI. + + Args: + prompt: The task prompt + + Returns: + Dictionary containing search parameters + """ + try: + # Use AI to parse the search query + response = await self.service.base.callAi([ + {"role": "system", "content": "You are a SharePoint search query parser. Extract search parameters from the user's request."}, + {"role": "user", "content": f""" + Parse the following SharePoint search request into structured parameters: + + {prompt} + + Return a JSON object with these fields: + - query: The main search query + - site: Optional SharePoint site name + - folder: Optional folder path + - fileTypes: List of file types to search for + - dateRange: Optional date range for filtering + - maxResults: Maximum number of results to return + + Only return valid JSON. No preamble or explanations. + """} + ]) + + # Extract JSON from response + jsonStart = response.find('{') + jsonEnd = response.rfind('}') + 1 + + if jsonStart >= 0 and jsonEnd > jsonStart: + return json.loads(response[jsonStart:jsonEnd]) + else: + # Fallback to simple query + return { + "query": prompt, + "maxResults": 10 + } + + except Exception as e: + logger.warning(f"Error parsing search query: {str(e)}") + return { + "query": prompt, + "maxResults": 10 + } + + async def _searchSharePointDocuments(self, searchParams: Dict[str, Any]) -> List[Dict[str, Any]]: + """ + Search SharePoint documents using Microsoft Graph API. + + Args: + searchParams: Search parameters + + Returns: + List of search results + """ + try: + # Get Microsoft token + token = self.service.msft.getMsftToken() + if not token: + return [] + + # Prepare search query + query = searchParams.get("query", "") + site = searchParams.get("site", "") + folder = searchParams.get("folder", "") + fileTypes = searchParams.get("fileTypes", []) + maxResults = searchParams.get("maxResults", 10) + + # Build search URL + searchUrl = "https://graph.microsoft.com/v1.0/sites/root/drives" + if site: + searchUrl = f"https://graph.microsoft.com/v1.0/sites/{site}/drives" + + # Get drives (document libraries) + response = self.service.msft.makeGraphRequest("GET", searchUrl) + if not response or "value" not in response: + return [] + + results = [] + for drive in response["value"]: + # Search in each drive + driveId = drive["id"] + searchEndpoint = f"https://graph.microsoft.com/v1.0/drives/{driveId}/root/search(q='{query}')" + + # Add file type filters if specified + if fileTypes: + typeFilter = " or ".join([f"fileType eq '{ft}'" for ft in fileTypes]) + searchEndpoint += f"&filter={typeFilter}" + + # Add folder filter if specified + if folder: + searchEndpoint += f"&filter=parentReference/path eq '/{folder}'" + + # Add result limit + searchEndpoint += f"&top={maxResults}" + + # Make the search request + searchResponse = self.service.msft.makeGraphRequest("GET", searchEndpoint) + if searchResponse and "value" in searchResponse: + for item in searchResponse["value"]: + # Get file content + fileContent = await self._getFileContent(driveId, item["id"]) + + results.append({ + "name": item["name"], + "id": item["id"], + "driveId": driveId, + "webUrl": item["webUrl"], + "lastModified": item["lastModifiedDateTime"], + "size": item["size"], + "content": fileContent + }) + + return results + + except Exception as e: + logger.error(f"Error searching SharePoint: {str(e)}") + return [] + + async def _getFileContent(self, driveId: str, fileId: str) -> str: + """ + Get file content from SharePoint. + + Args: + driveId: Drive ID + fileId: File ID + + Returns: + File content as string + """ + try: + # Get file content URL + contentUrl = f"https://graph.microsoft.com/v1.0/drives/{driveId}/items/{fileId}/content" + + # Download file content + response = self.service.msft.makeGraphRequest("GET", contentUrl, raw=True) + if response: + return response.decode('utf-8') + return "" + + except Exception as e: + logger.error(f"Error getting file content: {str(e)}") + return "" + + def _createSearchSummaryJson(self, results: List[Dict[str, Any]], description: str) -> Dict[str, Any]: + """Create a JSON summary of search results.""" + summary = { + "description": description, + "totalResults": len(results), + "results": [] + } + + for result in results: + summary["results"].append({ + "name": result["name"], + "url": result["webUrl"], + "lastModified": result["lastModified"], + "size": result["size"] + }) + + return self.formatAgentDocumentOutput( + "sharepoint_search_results.json", + json.dumps(summary, indent=2), + "application/json" + ) + + def _createSearchSummaryCsv(self, results: List[Dict[str, Any]], description: str) -> Dict[str, Any]: + """Create a CSV summary of search results.""" + csvLines = ["Name,URL,Last Modified,Size (bytes)"] + + for result in results: + name = result["name"].replace('"', '""') + url = result["webUrl"].replace('"', '""') + lastModified = result["lastModified"].replace('"', '""') + size = str(result["size"]) + + csvLines.append(f'"{name}","{url}","{lastModified}",{size}') + + return self.formatAgentDocumentOutput( + "sharepoint_search_results.csv", + "\n".join(csvLines), + "text/csv" + ) + + def _createSearchSummaryText(self, results: List[Dict[str, Any]], description: str) -> Dict[str, Any]: + """Create a text summary of search results.""" + textLines = [ + f"SharePoint Search Results", + f"Description: {description}", + f"Total Results: {len(results)}", + "\nResults:" + ] + + for result in results: + textLines.extend([ + f"\nName: {result['name']}", + f"URL: {result['webUrl']}", + f"Last Modified: {result['lastModified']}", + f"Size: {result['size']} bytes" + ]) + + return self.formatAgentDocumentOutput( + "sharepoint_search_results.txt", + "\n".join(textLines), + "text/plain" + ) + +def getAgentSharepoint() -> AgentSharepoint: + """Factory function to create and return a SharePointAgent instance.""" + return AgentSharepoint() \ No newline at end of file diff --git a/modules/historic_data_agents/agentWebcrawler.py b/modules/historic_data_agents/agentWebcrawler.py new file mode 100644 index 00000000..0f9768f4 --- /dev/null +++ b/modules/historic_data_agents/agentWebcrawler.py @@ -0,0 +1,814 @@ +""" +Web crawler agent for gathering and analyzing web content. +Provides web research and content extraction capabilities. +""" + +import logging +import json +import re +import time +import os +from typing import Dict, Any, List +from urllib.parse import quote_plus, unquote + +from bs4 import BeautifulSoup +import requests +import markdown + +from modules.workflow.agentBase import AgentBase +from modules.shared.configuration import APP_CONFIG + +logger = logging.getLogger(__name__) + +class AgentWebcrawler(AgentBase): + """AI-driven agent for web research and information retrieval""" + + def __init__(self): + """Initialize the web crawler agent""" + super().__init__() + self.name = "webcrawler" + self.label = "Web Crawler" + self.description = "Gathers and analyzes web content using AI with multi-step research" + self.capabilities = [ + "web_research", + "content_gathering", + "data_extraction", + "information_synthesis", + "source_verification" + ] + + # Web crawling configuration + self.srcApikey = APP_CONFIG.get("Agent_Webcrawler_SERPAPI_APIKEY","") + self.srcEngine = APP_CONFIG.get("Agent_Webcrawler_SERPAPI_ENGINE","google") + self.srcCountry = APP_CONFIG.get("Agent_Webcrawler_SERPAPI_COUNTRY","auto") + self.maxUrl = int(APP_CONFIG.get("Agent_Webcrawler_SERPAPI_MAX_URLS", "5")) + self.maxSearchTerms = int(APP_CONFIG.get("Agent_Webcrawler_SERPAPI_MAX_SEARCH_KEYWORDS", "3")) + self.maxResults = int(APP_CONFIG.get("Agent_Webcrawler_SERPAPI_MAX_SEARCH_RESULTS", "5")) + self.timeout = int(APP_CONFIG.get("Agent_Webcrawler_SERPAPI_TIMEOUT", "30")) + self.userAgent = APP_CONFIG.get("Agent_Webcrawler_SERPAPI_USER_AGENT", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36") + + if not self.srcApikey: + logger.error("SerpAPI key not configured") + + def setDependencies(self, serviceBase=None): + """Set external dependencies for the agent.""" + self.setService(serviceBase) + + async def processTask(self, task: Dict[str, Any]) -> Dict[str, Any]: + """ + Process a task by focusing on required outputs and using AI to guide the research process. + + Args: + task: Task dictionary with prompt, inputDocuments, outputSpecifications + + Returns: + Dictionary with feedback and documents + """ + try: + # Extract task information + prompt = task.get("prompt", "") + inputDocuments = task.get("inputDocuments", []) + outputSpecs = task.get("outputSpecifications", []) + workflow = task.get("context", {}).get("workflow", {}) + + # Check AI service + if not self.service or not self.service.base: + return { + "feedback": "The Web Crawler agent requires an AI service to function.", + "documents": [] + } + + # Create research plan + if workflow: + self.service.logAdd(workflow, "Creating research plan...", level="info", progress=35) + researchPlan = await self._createResearchPlan(prompt) + + # Check if this is truly a web research task + if not researchPlan.get("requiresWebResearch", True): + return { + "feedback": "This task doesn't appear to require web research. Please try a different agent.", + "documents": [] + } + + # Gather raw material through web research + if workflow: + self.service.logAdd(workflow, "Gathering research material...", level="info", progress=45) + rawResults = await self._gatherResearchMaterial(researchPlan, workflow) + + # Format results into requested output documents + if workflow: + self.service.logAdd(workflow, "Creating output documents...", level="info", progress=55) + documents = await self._createOutputDocuments( + prompt, + rawResults, + outputSpecs, + researchPlan + ) + + # Generate feedback + feedback = researchPlan.get("feedback", f"I conducted web research on '{prompt[:50]}...' and gathered information from {len(rawResults)} relevant sources.") + + return { + "feedback": feedback, + "documents": documents + } + + except Exception as e: + logger.error(f"Error during web research: {str(e)}", exc_info=True) + return { + "feedback": f"Error during web research: {str(e)}", + "documents": [] + } + + async def _createResearchPlan(self, prompt: str) -> Dict[str, Any]: + """ + Use AI to create a detailed research plan. + + Args: + prompt: The research query + + Returns: + Research plan dictionary + """ + researchPrompt = f""" + Create a detailed web research plan for this task: "{prompt}" + + Analyze the request carefully and create a structured plan in JSON format with the following elements: + {{ + "requiresWebResearch": true/false, # Whether this genuinely requires web research + "researchQuestions": ["question1", "question2", ...], # 2-4 specific questions to answer + "searchTerms": ["term1", "term2", ...], # Up to {self.maxSearchTerms} effective search terms + "directUrls": ["url1", "url2", ...], # Any URLs directly mentioned in the request (up to {self.maxUrl}) + "expectedSources": ["type1", "type2", ...], # Types of sources that would be most valuable + "contentFocus": "what specific content to extract or focus on", + "feedback": "explanation of how the research will be conducted" + }} + + Respond with ONLY the JSON object, no additional text or explanations. + """ + + try: + # Get research plan from AI + response = await self.service.base.callAi([ + {"role": "system", "content": "You are a research expert. Respond with valid JSON only."}, + {"role": "user", "content": researchPrompt} + ]) + + # Extract JSON + jsonStart = response.find('{') + jsonEnd = response.rfind('}') + 1 + + if jsonStart >= 0 and jsonEnd > jsonStart: + plan = json.loads(response[jsonStart:jsonEnd]) + + # Ensure we have the expected fields with defaults if missing + if "searchTerms" not in plan: + plan["searchTerms"] = [prompt] + if "directUrls" not in plan: + plan["directUrls"] = [] + if "researchQuestions" not in plan: + plan["researchQuestions"] = ["What information can be found about this topic?"] + + return plan + else: + # Fallback plan + logger.warning(f"Not able creating research plan, generating fallback plan") + return { + "requiresWebResearch": True, + "researchQuestions": ["What information can be found about this topic?"], + "searchTerms": [prompt], + "directUrls": [], + "expectedSources": ["Web pages", "Articles"], + "contentFocus": "Relevant information about the topic", + "feedback": f"I'll conduct web research on '{prompt}' and gather relevant information." + } + + except Exception as e: + logger.warning(f"Error creating research plan: {str(e)}") + # Simple fallback plan + return { + "requiresWebResearch": True, + "researchQuestions": ["What information can be found about this topic?"], + "searchTerms": [prompt], + "directUrls": [], + "expectedSources": ["Web pages", "Articles"], + "contentFocus": "Relevant information about the topic", + "feedback": f"I'll conduct web research on '{prompt}' and gather relevant information." + } + + async def _gatherResearchMaterial(self, researchPlan: Dict[str, Any], workflow: Dict[str, Any]) -> List[Dict[str, Any]]: + """ + Gather research material based on the research plan. + + Args: + researchPlan: Research plan dictionary + workflow: Current workflow object + + Returns: + List of research results + """ + allResults = [] + + # Process direct URLs + directUrls = researchPlan.get("directUrls", [])[:self.maxUrl] + for i, url in enumerate(directUrls): + progress = 45 + int((i / len(directUrls)) * 5) # Progress from 45% to 50% + self.service.logAdd(workflow, f"Processing direct URL {i+1}/{len(directUrls)}...", level="info", progress=progress) + logger.info(f"Processing direct URL: {url}") + try: + # Fetch and extract content + soup = self._readUrl(url) + + if soup: + # Extract title and content + title = self._extractTitle(soup, url) + content = self._extractMainContent(soup) + + # Add to results + allResults.append({ + "title": title, + "url": url, + "sourceType": "directUrl", + "content": content, + "summary": "" # Will be filled later + }) + except Exception as e: + logger.warning(f"Error processing URL {url}: {str(e)}") + + # Process search terms + searchTerms = researchPlan.get("searchTerms", [])[:self.maxSearchTerms] + for i, term in enumerate(searchTerms): + progress = 50 + int((i / len(searchTerms)) * 5) # Progress from 50% to 55% + self.service.logAdd(workflow, f"Searching term {i+1}/{len(searchTerms)}...", level="info", progress=progress) + logger.info(f"Searching for: {term}") + try: + # Perform search + searchResults = self._searchWeb(term) + + # Process each search result + for result in searchResults: + # Check if URL is already in results + if not any(r["url"] == result["url"] for r in allResults): + allResults.append({ + "title": result["title"], + "url": result["url"], + "sourceType": "searchResult", + "content": result["data"], + "snippet": result["snippet"], + "summary": "" # Will be filled later + }) + + # Stop if we've reached the maximum results + if len(allResults) >= self.maxResults: + break + except Exception as e: + logger.warning(f"Error searching for {term}: {str(e)}") + + # Stop if we've reached the maximum results + if len(allResults) >= self.maxResults: + break + + # Create summaries for all results + allResults = await self._summarizeAllResults(allResults, researchPlan) + + return allResults + + async def _summarizeAllResults(self, results: List[Dict[str, Any]], researchPlan: Dict[str, Any]) -> List[Dict[str, Any]]: + """ + Create summaries for all research results. + + Args: + results: List of research results + researchPlan: Research plan with questions and focus + + Returns: + Results with added summaries + """ + for i, result in enumerate(results): + logger.info(f"Summarizing result {i+1}/{len(results)}: {result['title'][:30]}...") + + try: + # Limit content length to avoid token issues + content = self._limitText(result.get("content", ""), maxChars=8000) + researchQuestions = researchPlan.get("researchQuestions", ["What relevant information does this page contain?"]) + contentFocus = researchPlan.get("contentFocus", "Relevant information") + + # Create summary using AI + summaryPrompt = f""" + Summarize this web page content based on these research questions: + {', '.join(researchQuestions)} + + Focus on: {contentFocus} + + Web page: {result['url']} + Title: {result['title']} + + Content: + {content} + + Create a concise summary that: + 1. Directly answers the research questions if possible + 2. Extracts the most relevant information from the page + 3. Includes specific facts, figures, or quotes if available + 4. Is around 2000 characters long + + Only include information actually found in the content. No fabrications or assumptions. + """ + + # Get summary from AI + summary = await self.service.base.callAi([ + {"role": "system", "content": "You are a research expert. Respond with valid JSON only."}, + {"role": "user", "content": summaryPrompt} + ]) + + # Add summary to result + result["summary"] = summary.strip() + + except Exception as e: + logger.warning(f"Error summarizing result {i+1}: {str(e)}") + result["summary"] = f"Error creating summary: {str(e)}" + + return results + + async def _createOutputDocuments(self, prompt: str, results: List[Dict[str, Any]], + outputSpecs: List[Dict[str, Any]], researchPlan: Dict[str, Any]) -> List[Dict[str, Any]]: + """ + Create output documents based on research results and specifications. + + Args: + prompt: Original research prompt + results: List of research results + outputSpecs: Output specifications + researchPlan: Research plan + + Returns: + List of output documents + """ + # If no output specs provided, create default output + if not outputSpecs: + outputSpecs = [{ + "label": "webResearchResults.md", + "description": "Comprehensive web research results" + }] + + # Generate documents + documents = [] + + # Process each output specification + for spec in outputSpecs: + outputLabel = spec.get("label", "") + outputDescription = spec.get("description", "") + + # Determine format based on file extension + formatType = self._determineFormatType(outputLabel) + + # Create appropriate document based on format + if formatType == "json": + # JSON output - structured data + document = await self._createJsonDocument(prompt, results, researchPlan, outputLabel) + elif formatType == "csv": + # CSV output - tabular data + document = await self._createCsvDocument(results, outputLabel) + else: + # Text-based output (markdown, html, text) - narrative report + document = await self._createNarrativeDocument( + prompt, results, researchPlan, formatType, outputLabel, outputDescription + ) + + documents.append(document) + + return documents + + async def _createNarrativeDocument(self, prompt: str, results: List[Dict[str, Any]], + researchPlan: Dict[str, Any], formatType: str, + outputLabel: str, outputDescription: str) -> Dict[str, Any]: + """ + Create a narrative document (markdown, html, text) from research results. + + Args: + prompt: Original research prompt + results: Research results + researchPlan: Research plan + formatType: Output format (markdown, html, text) + outputLabel: Output filename + outputDescription: Output description + + Returns: + Document object + """ + # Create content based on format + if formatType == "markdown": + contentType = "text/markdown" + templateFormat = "markdown" + elif formatType == "html": + contentType = "text/html" + templateFormat = "html" + else: + contentType = "text/plain" + templateFormat = "text" + + # Prepare research context + researchQuestions = researchPlan.get("researchQuestions", []) + searchTerms = researchPlan.get("searchTerms", []) + + # Create document structure based on results + sourcesSummary = [] + for result in results: + sourcesSummary.append({ + "title": result.get("title", "Untitled"), + "url": result.get("url", ""), + "summary": result.get("summary", ""), + "snippet": result.get("snippet", "") + }) + + # Truncate content for prompt + sourcesJson = json.dumps(sourcesSummary, indent=2) + if len(sourcesJson) > 10000: + # Logic to truncate each summary while preserving structure + for i in range(len(sourcesSummary)): + if len(sourcesJson) <= 10000: + break + # Gradually truncate summaries + sourcesSummary[i]["summary"] = sourcesSummary[i]["summary"][:500] + "..." + sourcesJson = json.dumps(sourcesSummary, indent=2) + + # Create report prompt + reportPrompt = f""" + Create a comprehensive {formatType} research report based on the following web research: + + TASK: {prompt} + + RESEARCH QUESTIONS: + {', '.join(researchQuestions)} + + SEARCH TERMS USED: + {', '.join(searchTerms)} + + SOURCES AND FINDINGS: + {sourcesJson} + + REPORT DETAILS: + - Format: {templateFormat} + - Filename: {outputLabel} + - Description: {outputDescription} + + Create a well-structured report that: + 1. Includes an executive summary of key findings + 2. Addresses each research question directly + 3. Integrates information from all relevant sources + 4. Cites sources appropriately for each piece of information + 5. Provides a comprehensive synthesis of the research + 6. Is formatted professionally and appropriately for {templateFormat} + + The report should be scholarly, accurate, and focused on the original research task. + """ + + try: + # Generate report with AI + reportContent = await self.service.base.callAi([ + {"role": "system", "content": "You are a research expert. Respond with valid JSON only."}, + {"role": "user", "content": reportPrompt} + ]) + + # Convert to HTML if needed + if formatType == "html" and not reportContent.lower().startswith("Web Research Results{reportContent}" + + return self.formatAgentDocumentOutput(outputLabel, reportContent, contentType) + + except Exception as e: + logger.error(f"Error creating narrative document: {str(e)}") + # Create error document + if formatType == "markdown": + content = f"# Web Research Error\n\nAn error occurred: {str(e)}" + elif formatType == "html": + content = f"

Web Research Error

An error occurred: {str(e)}

" + else: + content = f"WEB RESEARCH ERROR\n\nAn error occurred: {str(e)}" + + return self.formatAgentDocumentOutput(outputLabel, content, contentType) + + async def _createJsonDocument(self, prompt: str, results: List[Dict[str, Any]], + researchPlan: Dict[str, Any], outputLabel: str) -> Dict[str, Any]: + """ + Create a JSON document from research results. + + Args: + prompt: Original research prompt + results: Research results + researchPlan: Research plan + outputLabel: Output filename + + Returns: + Document object + """ + try: + # Create structured data + sourcesData = [] + for result in results: + sourcesData.append({ + "title": result.get("title", "Untitled"), + "url": result.get("url", ""), + "summary": result.get("summary", ""), + "snippet": result.get("snippet", ""), + "sourceType": result.get("sourceType", "") + }) + + # Create metadata + metadata = { + "query": prompt, + "timestamp": time.strftime("%Y-%m-%d %H:%M:%S"), + "researchQuestions": researchPlan.get("researchQuestions", []), + "searchTerms": researchPlan.get("searchTerms", []) + } + + # Compile complete report object + jsonContent = { + "metadata": metadata, + "summary": researchPlan.get("feedback", "Web research results"), + "sources": sourcesData + } + + # Convert to JSON string + content = json.dumps(jsonContent, indent=2) + + return self.formatAgentDocumentOutput(outputLabel, content, "application/json") + + except Exception as e: + logger.error(f"Error creating JSON document: {str(e)}") + return self.formatAgentDocumentOutput(outputLabel, json.dumps({"error": str(e)}), "application/json") + + async def _createCsvDocument(self, results: List[Dict[str, Any]], outputLabel: str) -> Dict[str, Any]: + """ + Create a CSV document from research results. + + Args: + results: Research results + outputLabel: Output filename + + Returns: + Document object + """ + try: + # Create CSV header + csvLines = ["Title,URL,Source Type,Snippet"] + + # Add results + for result in results: + # Escape CSV fields + title = result.get("title", "").replace('"', '""') + url = result.get("url", "").replace('"', '""') + sourceType = result.get("sourceType", "").replace('"', '""') + snippet = result.get("snippet", "").replace('"', '""') + + csvLines.append(f'"{title}","{url}","{sourceType}","{snippet}"') + + # Combine into CSV content + content = "\n".join(csvLines) + + return self.formatAgentDocumentOutput(outputLabel, content, "text/csv") + + except Exception as e: + logger.error(f"Error creating CSV document: {str(e)}") + return self.formatAgentDocumentOutput(outputLabel, "Error,Error\nFailed to create CSV,{0}".format(str(e)), "text/csv") + + def _determineFormatType(self, outputLabel: str) -> str: + """ + Determine the format type based on the filename. + + Args: + outputLabel: Output filename + + Returns: + Format type (markdown, html, text, json, csv) + """ + outputLabelLower = outputLabel.lower() + + if outputLabelLower.endswith(".md"): + return "markdown" + elif outputLabelLower.endswith(".html"): + return "html" + elif outputLabelLower.endswith(".txt"): + return "text" + elif outputLabelLower.endswith(".json"): + return "json" + elif outputLabelLower.endswith(".csv"): + return "csv" + else: + # Default to markdown + return "markdown" + + def _searchWeb(self, query: str) -> List[Dict[str, str]]: + """ + Conduct a web search using SerpAPI and return the results. + + Args: + query: The search query + + Returns: + List of search results + """ + if not self.srcApikey: + return [] + + # Get user language from serviceBase if available + userLanguage = "en" # Default language + if self.service.base.userLanguage: + userLanguage = self.service.base.userLanguage + + try: + # Format the search request for SerpAPI + params = { + "engine": self.srcEngine, + "q": query, + "api_key": self.srcApikey, + "num": self.maxResults, # Number of results to return + "hl": userLanguage # Identified user language + } + + # Make the API request + response = requests.get("https://serpapi.com/search", params=params, timeout=self.timeout) + response.raise_for_status() + + # Parse JSON response + search_results = response.json() + + # Extract organic results + results = [] + + if "organic_results" in search_results: + for result in search_results["organic_results"][:self.maxResults]: + # Extract title + title = result.get("title", "No title") + + # Extract URL + url = result.get("link", "No URL") + + # Extract snippet + snippet = result.get("snippet", "No description") + + # Get actual page content + try: + targetPageSoup = self._readUrl(url) + content = self._extractMainContent(targetPageSoup) + except Exception as e: + logger.warning(f"Error extracting content from {url}: {str(e)}") + content = f"Error extracting content: {str(e)}" + + results.append({ + 'title': title, + 'url': url, + 'snippet': snippet, + 'data': content + }) + + # Limit number of results + if len(results) >= self.maxResults: + break + else: + logger.warning(f"No organic results found in SerpAPI response for: {query}") + + return results + + except Exception as e: + logger.error(f"Error searching with SerpAPI for {query}: {str(e)}") + return [] + + def _readUrl(self, url: str) -> BeautifulSoup: + """ + Read a URL and return a BeautifulSoup parser for the content. + + Args: + url: The URL to read + + Returns: + BeautifulSoup object with the content or None on errors + """ + if not url or not url.startswith(('http://', 'https://')): + return None + + headers = { + 'User-Agent': self.userAgent, + 'Accept': 'text/html,application/xhtml+xml,application/xml', + 'Accept-Language': 'en-US,en;q=0.9', + } + + try: + # Initial request + response = requests.get(url, headers=headers, timeout=self.timeout) + + # Handling for status 202 + if response.status_code == 202: + # Retry with backoff + backoffTimes = [0.5, 1.0, 2.0, 5.0] + + for waitTime in backoffTimes: + time.sleep(waitTime) + response = requests.get(url, headers=headers, timeout=self.timeout) + + if response.status_code != 202: + break + + # Raise for error status codes + response.raise_for_status() + + # Parse HTML + return BeautifulSoup(response.text, 'html.parser') + + except Exception as e: + logger.error(f"Error reading URL {url}: {str(e)}") + return None + + def _extractTitle(self, soup: BeautifulSoup, url: str) -> str: + """ + Extract the title from a webpage. + + Args: + soup: BeautifulSoup object of the webpage + url: URL of the webpage + + Returns: + Extracted title + """ + if not soup: + return f"Error with {url}" + + # Extract title from title tag + titleTag = soup.find('title') + title = titleTag.text.strip() if titleTag else "No title" + + # Alternative: Also look for h1 tags if title tag is missing + if title == "No title": + h1Tag = soup.find('h1') + if h1Tag: + title = h1Tag.text.strip() + + return title + + def _extractMainContent(self, soup: BeautifulSoup, maxChars: int = 10000) -> str: + """ + Extract the main content from an HTML page. + + Args: + soup: BeautifulSoup object of the webpage + maxChars: Maximum number of characters + + Returns: + Extracted main content as a string + """ + if not soup: + return "" + + # Try to find main content elements in priority order + mainContent = None + for selector in ['main', 'article', '#content', '.content', '#main', '.main']: + content = soup.select_one(selector) + if content: + mainContent = content + break + + # If no main content found, use the body + if not mainContent: + mainContent = soup.find('body') or soup + + # Remove script, style, nav, footer elements that don't contribute to main content + for element in mainContent.select('script, style, nav, footer, header, aside, .sidebar, #sidebar, .comments, #comments, .advertisement, .ads, iframe'): + element.extract() + + # Extract text content + textContent = mainContent.get_text(separator=' ', strip=True) + + # Limit to maxChars + return textContent[:maxChars] + + def _limitText(self, text: str, maxChars: int = 10000) -> str: + """ + Limit text to a maximum number of characters. + + Args: + text: Input text + maxChars: Maximum number of characters + + Returns: + Limited text + """ + if not text: + return "" + + # If text is already under the limit, return unchanged + if len(text) <= maxChars: + return text + + # Otherwise limit text to maxChars + return text[:maxChars] + "... [Content truncated due to length]" + + +# Factory function for the Webcrawler agent +def getAgentWebcrawler(): + """Returns an instance of the Webcrawler agent.""" + return AgentWebcrawler() \ No newline at end of file diff --git a/modules/interfaces/interfaceChatModel.py b/modules/interfaces/interfaceChatModel.py index b7d2a488..53c5f4d6 100644 --- a/modules/interfaces/interfaceChatModel.py +++ b/modules/interfaces/interfaceChatModel.py @@ -12,18 +12,18 @@ from modules.shared.attributeUtils import register_model_labels, ModelMixin # ===== Method Models ===== -class MethodResult(BaseModel, ModelMixin): - """Model for method results""" +class ActionResult(BaseModel, ModelMixin): + """Model for action results from a methods action""" success: bool = Field(description="Whether the method execution was successful") data: Dict[str, Any] = Field(description="Result data") metadata: Dict[str, Any] = Field(default_factory=dict, description="Additional metadata") validation: List[str] = Field(default_factory=list, description="Validation messages") error: Optional[str] = Field(None, description="Error message if any") -# Register labels for MethodResult +# Register labels for ActionResult register_model_labels( - "MethodResult", - {"en": "Method Result", "fr": "Résultat de méthode"}, + "ActionResult", + {"en": "Action Result", "fr": "Résultat de l'action"}, { "success": {"en": "Success", "fr": "Succès"}, "data": {"en": "Data", "fr": "Données"}, @@ -174,6 +174,8 @@ class TaskAction(BaseModel, ModelMixin): retryMax: int = Field(default=3, description="Maximum number of retries") processingTime: Optional[float] = Field(None, description="Processing time in seconds") timestamp: datetime = Field(default_factory=lambda: datetime.now(UTC), description="When the action was executed") + result: Optional[str] = Field(None, description="Result of the action") + resultDocuments: Optional[List[ChatDocument]] = Field(None, description="Result documents from the action") def isSuccessful(self) -> bool: """Check if action was successful""" @@ -206,14 +208,36 @@ register_model_labels( "execMethod": {"en": "Method", "fr": "Méthode"}, "execAction": {"en": "Action", "fr": "Action"}, "execParameters": {"en": "Parameters", "fr": "Paramètres"}, + "execResultLabel": {"en": "Result Label", "fr": "Label du résultat"}, "status": {"en": "Status", "fr": "Statut"}, "error": {"en": "Error", "fr": "Erreur"}, "retryCount": {"en": "Retry Count", "fr": "Nombre de tentatives"}, "retryMax": {"en": "Max Retries", "fr": "Tentatives max"}, - "resultDocuments": {"en": "Result Documents", "fr": "Documents du résultat"}, - "execResultLabel": {"en": "Document Label", "fr": "Label du document"}, "processingTime": {"en": "Processing Time", "fr": "Temps de traitement"}, - "timestamp": {"en": "Timestamp", "fr": "Horodatage"} + "timestamp": {"en": "Timestamp", "fr": "Horodatage"}, + "result": {"en": "Result", "fr": "Résultat"}, + "resultDocuments": {"en": "Result Documents", "fr": "Documents de résultat"} + } +) + +class TaskResult(BaseModel, ModelMixin): + """Model for task results""" + taskId: str = Field(..., description="Task ID") + status: TaskStatus = Field(default=TaskStatus.PENDING, description="Task status") + success: bool = Field(..., description="Whether the task was successful") + feedback: Optional[str] = Field(None, description="Task feedback message") + error: Optional[str] = Field(None, description="Error message if task failed") + +# Register labels for TaskResult +register_model_labels( + "TaskResult", + {"en": "Task Result", "fr": "Résultat de tâche"}, + { + "taskId": {"en": "Task ID", "fr": "ID de la tâche"}, + "status": {"en": "Status", "fr": "Statut"}, + "success": {"en": "Success", "fr": "Succès"}, + "feedback": {"en": "Feedback", "fr": "Retour"}, + "error": {"en": "Error", "fr": "Erreur"} } ) diff --git a/modules/interfaces/interfaceComponentObjects.py b/modules/interfaces/interfaceComponentObjects.py index 87ec4e7c..272344c9 100644 --- a/modules/interfaces/interfaceComponentObjects.py +++ b/modules/interfaces/interfaceComponentObjects.py @@ -5,7 +5,6 @@ Uses the JSON connector for data access with added language support. import os import logging -import uuid from datetime import datetime, UTC from typing import Dict, Any, List, Optional, Union @@ -15,11 +14,10 @@ from modules.interfaces.interfaceComponentAccess import ComponentAccess from modules.interfaces.interfaceComponentModel import ( FilePreview, Prompt, FileItem, FileData ) -from modules.interfaces.interfaceAppModel import User, Mandate, UserPrivilege +from modules.interfaces.interfaceAppModel import User # DYNAMIC PART: Connectors to the Interface from modules.connectors.connectorDbJson import DatabaseConnector -from modules.connectors.connectorAiOpenai import ChatService # Basic Configurations from modules.shared.configuration import APP_CONFIG @@ -61,7 +59,6 @@ class ComponentObjects: self.currentUser: Optional[User] = None self.userId: Optional[str] = None self.access: Optional[ComponentAccess] = None # Will be set when user context is provided - self.aiService: Optional[ChatService] = None # Will be set when user context is provided # Initialize database self._initializeDatabase() @@ -87,9 +84,6 @@ class ComponentObjects: # Initialize access control with user context self.access = ComponentAccess(self.currentUser, self.db) - # Initialize AI service - self.aiService = ChatService() - # Update database context self.db.updateContext(self.userId) diff --git a/modules/methods/methodBase.py b/modules/methods/methodBase.py index 824be505..ad5c3942 100644 --- a/modules/methods/methodBase.py +++ b/modules/methods/methodBase.py @@ -3,7 +3,7 @@ from typing import Dict, List, Optional, Any, Literal from datetime import datetime, UTC from pydantic import BaseModel, Field import logging -from modules.interfaces.interfaceChatModel import MethodResult +from modules.interfaces.interfaceChatModel import ActionResult from functools import wraps logger = logging.getLogger(__name__) @@ -11,8 +11,8 @@ logger = logging.getLogger(__name__) def action(func): """Decorator to mark a method as an available action""" @wraps(func) - async def wrapper(self, *args, **kwargs): - return await func(self, *args, **kwargs) + async def wrapper(self, parameters: Dict[str, Any], *args, **kwargs): + return await func(self, parameters, *args, **kwargs) wrapper.is_action = True return wrapper @@ -31,7 +31,7 @@ class MethodBase: """Available actions and their parameters""" raise NotImplementedError - async def execute(self, action: str, parameters: Dict[str, Any], authData: Optional[Dict[str, Any]] = None) -> MethodResult: + async def execute(self, action: str, parameters: Dict[str, Any], authData: Optional[Dict[str, Any]] = None) -> ActionResult: """ Execute method action with authentication data @@ -41,7 +41,7 @@ class MethodBase: authData: Authentication data Returns: - MethodResult containing execution results + ActionResult containing execution results Raises: ValueError: If action is not supported @@ -79,7 +79,7 @@ class MethodBase: error=str(e) ) - async def _executeAction(self, action: str, parameters: Dict[str, Any], authData: Optional[Dict[str, Any]] = None) -> MethodResult: + async def _executeAction(self, action: str, parameters: Dict[str, Any], authData: Optional[Dict[str, Any]] = None) -> ActionResult: """Execute specific action - to be implemented by subclasses""" raise NotImplementedError @@ -109,9 +109,9 @@ class MethodBase: """Rollback specific action - to be implemented by subclasses""" pass - def _createResult(self, success: bool, data: Dict[str, Any], metadata: Optional[Dict[str, Any]] = None, error: Optional[str] = None) -> MethodResult: + def _createResult(self, success: bool, data: Dict[str, Any], metadata: Optional[Dict[str, Any]] = None, error: Optional[str] = None) -> ActionResult: """Create a method result""" - return MethodResult( + return ActionResult( success=success, data=data, metadata=metadata or {}, @@ -119,6 +119,6 @@ class MethodBase: error=error ) - def _addValidationMessage(self, result: MethodResult, message: str) -> None: + def _addValidationMessage(self, result: ActionResult, message: str) -> None: """Add a validation message to the result""" result.validation.append(message) \ No newline at end of file diff --git a/modules/methods/methodCoder.py b/modules/methods/methodCoder.py index c99b93fb..c570e456 100644 --- a/modules/methods/methodCoder.py +++ b/modules/methods/methodCoder.py @@ -1,208 +1,246 @@ from typing import Dict, Any, Optional import logging -import ast -import re +from datetime import datetime, UTC -from modules.methods.methodBase import MethodBase, MethodResult, action +from modules.methods.methodBase import MethodBase, ActionResult, action logger = logging.getLogger(__name__) +class CoderService: + """Service for code analysis, generation, and refactoring operations""" + + def __init__(self, serviceContainer: Any): + self.serviceContainer = serviceContainer + + async def analyzeCode(self, code: str, language: str = "python", checks: list = None) -> Dict[str, Any]: + """Analyze code quality and structure""" + if checks is None: + checks = ["complexity", "style", "security"] + + try: + # Create analysis prompt + analysis_prompt = f""" + Analyze this {language} code for quality, structure, and potential issues. + + Code to analyze: + {code} + + Please check for: + {', '.join(checks)} + + Provide a detailed analysis including: + 1. Code quality assessment + 2. Potential issues and improvements + 3. Security considerations + 4. Performance optimizations + 5. Best practices compliance + """ + + # Use AI service for analysis + analysis_result = await self.serviceContainer.interfaceAiCalls.callAiTextAdvanced(analysis_prompt) + + return { + "language": language, + "checks": checks, + "analysis": analysis_result, + "timestamp": datetime.now(UTC).isoformat() + } + + except Exception as e: + logger.error(f"Error analyzing code: {str(e)}") + return { + "error": str(e), + "language": language, + "checks": checks + } + + async def generateCode(self, requirements: str, language: str = "python", template: str = None) -> Dict[str, Any]: + """Generate code based on requirements""" + try: + # Create generation prompt + generation_prompt = f""" + Generate {language} code based on the following requirements: + + Requirements: + {requirements} + + {f'Template to follow: {template}' if template else ''} + + Please provide: + 1. Complete, working code + 2. Clear comments and documentation + 3. Error handling where appropriate + 4. Best practices implementation + """ + + # Use AI service for code generation + generated_code = await self.serviceContainer.interfaceAiCalls.callAiTextAdvanced(generation_prompt) + + return { + "language": language, + "requirements": requirements, + "code": generated_code, + "timestamp": datetime.now(UTC).isoformat() + } + + except Exception as e: + logger.error(f"Error generating code: {str(e)}") + return { + "error": str(e), + "language": language, + "requirements": requirements + } + + async def refactorCode(self, code: str, language: str = "python", improvements: list = None) -> Dict[str, Any]: + """Refactor code for better quality""" + if improvements is None: + improvements = ["style", "complexity"] + + try: + # Create refactoring prompt + refactor_prompt = f""" + Refactor this {language} code to improve: + {', '.join(improvements)} + + Original code: + {code} + + Please provide: + 1. Refactored code with improvements + 2. Explanation of changes made + 3. Benefits of the refactoring + 4. Any potential trade-offs + """ + + # Use AI service for refactoring + refactored_code = await self.serviceContainer.interfaceAiCalls.callAiTextAdvanced(refactor_prompt) + + return { + "language": language, + "improvements": improvements, + "original_code": code, + "refactored_code": refactored_code, + "timestamp": datetime.now(UTC).isoformat() + } + + except Exception as e: + logger.error(f"Error refactoring code: {str(e)}") + return { + "error": str(e), + "language": language, + "improvements": improvements + } + class MethodCoder(MethodBase): """Coder method implementation for code operations""" def __init__(self, serviceContainer: Any): super().__init__(serviceContainer) self.name = "coder" - self.description = "Handle code operations like analysis, generation, and refactoring" + self.description = "Handle code operations like analysis and generation" + self.coderService = CoderService(serviceContainer) @action - async def analyze(self, parameters: Dict[str, Any], authData: Optional[Dict[str, Any]] = None) -> MethodResult: - """Analyze code structure and quality""" + async def analyze(self, parameters: Dict[str, Any]) -> ActionResult: + """Analyze code quality and structure""" try: - code = parameters["code"] + code = parameters.get("code") language = parameters.get("language", "python") - metrics = parameters.get("metrics", ["complexity", "style", "documentation"]) + checks = parameters.get("checks", ["complexity", "style", "security"]) - analysis = {} - - if language.lower() == "python": - # Parse Python code - try: - tree = ast.parse(code) - - # Calculate basic metrics - analysis["metrics"] = { - "lines": len(code.splitlines()), - "classes": len([node for node in ast.walk(tree) if isinstance(node, ast.ClassDef)]), - "functions": len([node for node in ast.walk(tree) if isinstance(node, ast.FunctionDef)]), - "imports": len([node for node in ast.walk(tree) if isinstance(node, ast.Import) or isinstance(node, ast.ImportFrom)]) - } - - # Check for common issues - analysis["issues"] = [] - - # Check for missing docstrings - if "documentation" in metrics: - for node in ast.walk(tree): - if isinstance(node, (ast.ClassDef, ast.FunctionDef)) and not ast.get_docstring(node): - analysis["issues"].append({ - "type": "missing_docstring", - "line": node.lineno, - "name": node.name - }) - - # Check for long functions - if "complexity" in metrics: - for node in ast.walk(tree): - if isinstance(node, ast.FunctionDef): - bodyLines = len(node.body) - if bodyLines > 20: # Arbitrary threshold - analysis["issues"].append({ - "type": "long_function", - "line": node.lineno, - "name": node.name, - "lines": bodyLines - }) - - # Check for style issues - if "style" in metrics: - # Check line length - for i, line in enumerate(code.splitlines(), 1): - if len(line) > 100: # PEP 8 recommendation - analysis["issues"].append({ - "type": "line_too_long", - "line": i, - "length": len(line) - }) - - # Check for mixed tabs and spaces - if "\t" in code and " " in code: - analysis["issues"].append({ - "type": "mixed_tabs_spaces", - "message": "Code mixes tabs and spaces" - }) - - except SyntaxError as e: - return self._createResult( - success=False, - data={"error": f"Syntax error: {str(e)}"} - ) - else: - # TODO: Implement analysis for other languages + if not code: return self._createResult( success=False, - data={"error": f"Unsupported language: {language}"} + data={}, + error="Code is required" ) + # Analyze code + results = await self.coderService.analyzeCode( + code=code, + language=language, + checks=checks + ) + return self._createResult( success=True, - data={ - "language": language, - "analysis": analysis - } + data=results ) + except Exception as e: - logger.error(f"Error analyzing code: {e}") + logger.error(f"Error analyzing code: {str(e)}") return self._createResult( success=False, - data={"error": f"Analysis failed: {str(e)}"} + data={}, + error=str(e) ) @action - async def generate(self, parameters: Dict[str, Any], authData: Optional[Dict[str, Any]] = None) -> MethodResult: + async def generate(self, parameters: Dict[str, Any]) -> ActionResult: """Generate code based on requirements""" try: - requirements = parameters["requirements"] + requirements = parameters.get("requirements") language = parameters.get("language", "python") - style = parameters.get("style", "standard") + template = parameters.get("template") - # TODO: Implement code generation using AI or templates - # This is a placeholder implementation - if language.lower() == "python": - # Generate a simple Python class based on requirements - className = re.sub(r'[^a-zA-Z0-9]', '', requirements.split()[0].title()) - code = f"""class {className}: - \"\"\" - {requirements} - \"\"\" - - def __init__(self): - pass - - def process(self): - pass -""" - else: + if not requirements: return self._createResult( success=False, - data={"error": f"Unsupported language: {language}"} + data={}, + error="Requirements are required" ) + # Generate code + code = await self.coderService.generateCode( + requirements=requirements, + language=language, + template=template + ) + return self._createResult( success=True, - data={ - "language": language, - "code": code - } + data=code ) + except Exception as e: - logger.error(f"Error generating code: {e}") + logger.error(f"Error generating code: {str(e)}") return self._createResult( success=False, - data={"error": f"Generation failed: {str(e)}"} + data={}, + error=str(e) ) @action - async def refactor(self, parameters: Dict[str, Any], authData: Optional[Dict[str, Any]] = None) -> MethodResult: + async def refactor(self, parameters: Dict[str, Any]) -> ActionResult: """Refactor code for better quality""" try: - code = parameters["code"] + code = parameters.get("code") language = parameters.get("language", "python") improvements = parameters.get("improvements", ["style", "complexity"]) - if language.lower() == "python": - # Parse Python code - try: - tree = ast.parse(code) - - # Apply improvements - if "style" in improvements: - # Format code (placeholder) - code = code.strip() - - if "complexity" in improvements: - # TODO: Implement complexity reduction - pass - - if "documentation" in improvements: - # Add missing docstrings - for node in ast.walk(tree): - if isinstance(node, (ast.ClassDef, ast.FunctionDef)) and not ast.get_docstring(node): - # TODO: Generate docstring - pass - - except SyntaxError as e: - return self._createResult( - success=False, - data={"error": f"Syntax error: {str(e)}"} - ) - else: + if not code: return self._createResult( success=False, - data={"error": f"Unsupported language: {language}"} + data={}, + error="Code is required" ) + # Refactor code + result = await self.coderService.refactorCode( + code=code, + language=language, + improvements=improvements + ) + return self._createResult( success=True, - data={ - "language": language, - "code": code, - "improvements": improvements - } + data=result ) + except Exception as e: - logger.error(f"Error refactoring code: {e}") + logger.error(f"Error refactoring code: {str(e)}") return self._createResult( success=False, - data={"error": f"Refactoring failed: {str(e)}"} + data={}, + error=str(e) ) \ No newline at end of file diff --git a/modules/methods/methodDocument.py b/modules/methods/methodDocument.py index 34bafb43..c4a38209 100644 --- a/modules/methods/methodDocument.py +++ b/modules/methods/methodDocument.py @@ -5,198 +5,281 @@ Handles document operations using the document service. import logging from typing import Dict, Any, List, Optional -from datetime import datetime -from modules.interfaces.interfaceChatModel import ( - ChatDocument, - TaskDocument, - ExtractedContent, - ContentItem -) from modules.workflow.managerDocument import DocumentManager -from modules.methods.methodBase import MethodBase, MethodResult, action +from modules.methods.methodBase import MethodBase, ActionResult, action logger = logging.getLogger(__name__) -class MethodDocument(MethodBase): - """Document processing method implementation""" +class DocumentService: + """Service for document content extraction, analysis, and summarization""" - def __init__(self, serviceContainer): + def __init__(self, serviceContainer: Any): + self.serviceContainer = serviceContainer + + async def extractContent(self, fileId: str, format: str = "text", includeMetadata: bool = True) -> Dict[str, Any]: + """Extract content from document using prompt-based extraction""" + try: + # Get file data + file_data = self.serviceContainer.getFileData(fileId) + file_info = self.serviceContainer.getFileInfo(fileId) + + if not file_data: + return { + "error": "File not found or empty", + "fileId": fileId + } + + # Create extraction prompt based on format + extraction_prompt = f""" + Extract and structure the content from this document. + + File information: + - Name: {file_info.get('name', 'Unknown')} + - Type: {file_info.get('mimeType', 'Unknown')} + - Size: {len(file_data)} bytes + + Please extract: + 1. Main content and key information + 2. Structured data if present (tables, lists, etc.) + 3. Important facts and figures + 4. Key insights and takeaways + + Format the output as: {format} + Include metadata: {includeMetadata} + """ + + # Use the new direct file data extraction method + extracted_content = await self.serviceContainer.extractContentFromFileData( + prompt=extraction_prompt, + fileData=file_data, + filename=file_info.get('name', 'document'), + mimeType=file_info.get('mimeType', 'application/octet-stream'), + base64Encoded=False + ) + + result = { + "fileId": fileId, + "format": format, + "content": extracted_content, + "fileInfo": file_info if includeMetadata else None + } + + return result + + except Exception as e: + logger.error(f"Error extracting content: {str(e)}") + return { + "error": str(e), + "fileId": fileId + } + + async def analyzeContent(self, fileId: str, analysis: list = None) -> Dict[str, Any]: + """Analyze document content for entities, topics, and sentiment""" + if analysis is None: + analysis = ["entities", "topics", "sentiment"] + + try: + # First extract content + content_result = await self.extractContent(fileId, "text", True) + + if "error" in content_result: + return content_result + + content = content_result.get("content", "") + + # Create analysis prompt + analysis_prompt = f""" + Analyze this document content for the following aspects: + {', '.join(analysis)} + + Document content: + {content[:5000]} # Limit content length + + Please provide a detailed analysis including: + 1. Key entities (people, organizations, locations, dates) + 2. Main topics and themes + 3. Sentiment analysis (positive, negative, neutral) + 4. Key insights and patterns + 5. Important relationships between entities + 6. Document structure and organization + """ + + # Use AI service for analysis + analysis_result = await self.serviceContainer.interfaceAiCalls.callAiTextAdvanced(analysis_prompt) + + return { + "fileId": fileId, + "analysis": analysis, + "results": analysis_result, + "content": content_result + } + + except Exception as e: + logger.error(f"Error analyzing content: {str(e)}") + return { + "error": str(e), + "fileId": fileId, + "analysis": analysis + } + + async def summarizeContent(self, fileId: str, maxLength: int = 200, format: str = "text") -> Dict[str, Any]: + """Summarize document content""" + try: + # First extract content + content_result = await self.extractContent(fileId, "text", False) + + if "error" in content_result: + return content_result + + content = content_result.get("content", "") + + # Create summarization prompt + summary_prompt = f""" + Create a comprehensive summary of this document content. + + Document content: + {content[:8000]} # Limit content length + + Requirements: + - Maximum length: {maxLength} words + - Format: {format} + - Include key points and main ideas + - Maintain accuracy and completeness + - Use clear, professional language + - Highlight important insights and conclusions + """ + + # Use AI service for summarization + summary = await self.serviceContainer.interfaceAiCalls.callAiTextAdvanced(summary_prompt) + + return { + "fileId": fileId, + "maxLength": maxLength, + "format": format, + "summary": summary, + "wordCount": len(summary.split()), + "originalContent": content_result + } + + except Exception as e: + logger.error(f"Error summarizing content: {str(e)}") + return { + "error": str(e), + "fileId": fileId, + "maxLength": maxLength + } + +class MethodDocument(MethodBase): + """Document method implementation for document operations""" + + def __init__(self, serviceContainer: Any): """Initialize the document method""" super().__init__(serviceContainer) + self.name = "document" + self.description = "Handle document operations like extraction and analysis" + self.documentService = DocumentService(serviceContainer) self.documentManager = DocumentManager(serviceContainer) @action - async def extract(self, parameters: Dict[str, Any], authData: Optional[Dict[str, Any]] = None) -> MethodResult: - """ - Extract content from document - - Args: - parameters: - documentId: ID of the document to extract from - documentType: Type of document - extractionType: Type of extraction to perform - """ + async def extract(self, parameters: Dict[str, Any]) -> ActionResult: + """Extract content from document""" try: - documentId = parameters["documentId"] - documentType = parameters.get("documentType", "text") - extractionType = parameters.get("extractionType", "full") + fileId = parameters.get("fileId") + format = parameters.get("format", "text") + includeMetadata = parameters.get("includeMetadata", True) - # Get document from service - document = await self.service.interfaceComponent.getDocument(documentId) - if not document: + if not fileId: return self._createResult( success=False, - data={"error": f"Document not found: {documentId}"} + data={}, + error="File ID is required" ) - # Extract content based on type - if documentType == "text": - content = await self.documentManager.extractTextContent(document, extractionType) - elif documentType == "table": - content = await self.documentManager.extractTableContent(document, extractionType) - elif documentType == "image": - content = await self.documentManager.extractImageContent(document, extractionType) - else: - return self._createResult( - success=False, - data={"error": f"Unsupported document type: {documentType}"} - ) + # Extract content + content = await self.documentService.extractContent( + fileId=fileId, + format=format, + includeMetadata=includeMetadata + ) return self._createResult( success=True, - data={ - "documentId": documentId, - "type": documentType, - "content": content - } + data=content ) except Exception as e: logger.error(f"Error extracting content: {str(e)}") return self._createResult( success=False, - data={"error": str(e)} + data={}, + error=str(e) ) @action - async def analyze(self, parameters: Dict[str, Any], authData: Optional[Dict[str, Any]] = None) -> MethodResult: - """ - Analyze document content - - Args: - parameters: - documentId: ID of the document to analyze - documentType: Type of document - analysisType: Type of analysis to perform - """ + async def analyze(self, parameters: Dict[str, Any]) -> ActionResult: + """Analyze document content""" try: - # Extract content first - contentResult = await self.extract(parameters) - if not contentResult.success: - return contentResult + fileId = parameters.get("fileId") + analysis = parameters.get("analysis", ["entities", "topics", "sentiment"]) - # Perform analysis based on type - analysisType = parameters.get("analysisType", "basic") - content = ExtractedContent(**contentResult.data["content"]) - - if analysisType == "basic": - # Basic analysis: count items, calculate statistics - stats = { - "totalItems": len(content.contents), - "totalSize": sum(item.metadata.size for item in content.contents), - "itemTypes": {} - } - - for item in content.contents: - itemType = item.label - if itemType not in stats["itemTypes"]: - stats["itemTypes"][itemType] = 0 - stats["itemTypes"][itemType] += 1 - - return self._createResult( - success=True, - data={ - "documentId": parameters["documentId"], - "analysis": stats - } - ) - else: + if not fileId: return self._createResult( success=False, - data={"error": f"Unsupported analysis type: {analysisType}"} + data={}, + error="File ID is required" ) - + + # Analyze content + results = await self.documentService.analyzeContent( + fileId=fileId, + analysis=analysis + ) + + return self._createResult( + success=True, + data=results + ) + except Exception as e: - logger.error(f"Error analyzing document: {str(e)}") + logger.error(f"Error analyzing content: {str(e)}") return self._createResult( success=False, - data={"error": str(e)} + data={}, + error=str(e) ) @action - async def summarize(self, parameters: Dict[str, Any], authData: Optional[Dict[str, Any]] = None) -> MethodResult: - """ - Summarize document content - - Args: - parameters: - documentId: ID of the document to summarize - documentType: Type of document - summaryType: Type of summary to generate - """ + async def summarize(self, parameters: Dict[str, Any]) -> ActionResult: + """Summarize document content""" try: - # Extract content first - contentResult = await self.extract(parameters) - if not contentResult.success: - return contentResult + fileId = parameters.get("fileId") + maxLength = parameters.get("maxLength", 200) + format = parameters.get("format", "text") - # Generate summary based on type - summaryType = parameters.get("summaryType", "basic") - content = ExtractedContent(**contentResult.data["content"]) - - if summaryType == "basic": - # Basic summary: concatenate all text content - summary = "\n".join(item.content for item in content.contents if item.content) - - return self._createResult( - success=True, - data={ - "documentId": parameters["documentId"], - "summary": summary - } - ) - else: + if not fileId: return self._createResult( success=False, - data={"error": f"Unsupported summary type: {summaryType}"} + data={}, + error="File ID is required" ) - + + # Summarize content + summary = await self.documentService.summarizeContent( + fileId=fileId, + maxLength=maxLength, + format=format + ) + + return self._createResult( + success=True, + data=summary + ) + except Exception as e: - logger.error(f"Error summarizing document: {str(e)}") + logger.error(f"Error summarizing content: {str(e)}") return self._createResult( success=False, - data={"error": str(e)} + data={}, + error=str(e) ) - - async def _getChatDocument(self, documentId: str) -> Optional[ChatDocument]: - """Get ChatDocument from database""" - try: - documentData = self.service.db.getRecord("chatDocuments", documentId) - if documentData: - return ChatDocument(**documentData) - return None - except Exception as e: - logger.error(f"Error getting ChatDocument {documentId}: {str(e)}") - return None - - async def _getTaskDocument(self, documentId: str) -> Optional[TaskDocument]: - """Get TaskDocument from database""" - try: - documentData = self.service.db.getRecord("taskDocuments", documentId) - if documentData: - return TaskDocument(**documentData) - return None - except Exception as e: - logger.error(f"Error getting TaskDocument {documentId}: {str(e)}") - return None \ No newline at end of file diff --git a/modules/methods/methodExcel.py b/modules/methods/methodExcel.py index 898c25bd..442204cd 100644 --- a/modules/methods/methodExcel.py +++ b/modules/methods/methodExcel.py @@ -5,184 +5,415 @@ Handles Excel operations using the Excel service. import logging from typing import Dict, Any, List, Optional -from datetime import datetime +from datetime import datetime, UTC +import json +import base64 -from modules.interfaces.interfaceExcel import ExcelService -from modules.methods.methodBase import MethodBase, MethodResult, action +from modules.methods.methodBase import MethodBase, ActionResult, action logger = logging.getLogger(__name__) -class MethodExcel(MethodBase): - """Excel method implementation""" +class ExcelService: + """Service for Microsoft Excel operations using Graph API""" - def __init__(self, serviceContainer): + def __init__(self, serviceContainer: Any): + self.serviceContainer = serviceContainer + + def _getMicrosoftConnection(self, connectionReference: str) -> Optional[Dict[str, Any]]: + """Get Microsoft connection from connection reference""" + try: + userConnection = self.serviceContainer.getUserConnectionFromConnectionReference(connectionReference) + if userConnection and userConnection.authority == "microsoft" and userConnection.enabled: + return { + "id": userConnection.id, + "accessToken": userConnection.accessToken, + "refreshToken": userConnection.refreshToken, + "scopes": userConnection.scopes + } + return None + except Exception as e: + logger.error(f"Error getting Microsoft connection: {str(e)}") + return None + + async def readFile(self, fileId: str, connectionReference: str, sheetName: str = "Sheet1", range: str = None) -> Dict[str, Any]: + """Read data from Excel file using Microsoft Graph API""" + try: + connection = self._getMicrosoftConnection(connectionReference) + if not connection: + return { + "error": "No valid Microsoft connection found for the provided connection reference", + "fileId": fileId, + "connectionReference": connectionReference + } + + # Get file data from service container + file_data = self.serviceContainer.getFileData(fileId) + file_info = self.serviceContainer.getFileInfo(fileId) + + if not file_data: + return { + "error": "File not found or empty", + "fileId": fileId + } + + # For now, simulate Excel reading with AI analysis + # In a real implementation, you would use Microsoft Graph API + excel_prompt = f""" + Analyze this Excel file data and extract structured information. + + File: {file_info.get('name', 'Unknown')} + Sheet: {sheetName} + Range: {range or 'All data'} + + File content (first 5000 characters): + {file_data.decode('utf-8', errors='ignore')[:5000] if isinstance(file_data, bytes) else str(file_data)[:5000]} + + Please extract: + 1. All data from the specified sheet and range + 2. Column headers and data types + 3. Key metrics and calculations + 4. Any charts or visualizations described + 5. Summary statistics + + Return the data in a structured JSON format. + """ + + # Use AI to analyze Excel content + analysis_result = await self.serviceContainer.interfaceAiCalls.callAiTextAdvanced(excel_prompt) + + return { + "fileId": fileId, + "sheetName": sheetName, + "range": range, + "data": analysis_result, + "fileInfo": file_info, + "connection": { + "id": connection["id"], + "authority": "microsoft", + "reference": connectionReference + } + } + + except Exception as e: + logger.error(f"Error reading Excel file: {str(e)}") + return { + "error": str(e), + "fileId": fileId + } + + async def writeFile(self, fileId: str, connectionReference: str, sheetName: str, data: Any, range: str = None) -> Dict[str, Any]: + """Write data to Excel file using Microsoft Graph API""" + try: + connection = self._getMicrosoftConnection(connectionReference) + if not connection: + return { + "error": "No valid Microsoft connection found for the provided connection reference", + "fileId": fileId, + "connectionReference": connectionReference + } + + # For now, simulate Excel writing + # In a real implementation, you would use Microsoft Graph API + write_prompt = f""" + Prepare data for writing to Excel file. + + File: {fileId} + Sheet: {sheetName} + Range: {range or 'Auto-detect'} + + Data to write: + {json.dumps(data, indent=2)} + + Please format this data appropriately for Excel and provide: + 1. Structured data ready for Excel + 2. Column headers and formatting + 3. Any formulas or calculations needed + 4. Data validation rules if applicable + """ + + # Use AI to prepare Excel data + prepared_data = await self.serviceContainer.interfaceAiCalls.callAiTextAdvanced(write_prompt) + + return { + "fileId": fileId, + "sheetName": sheetName, + "range": range, + "data": prepared_data, + "status": "prepared", + "connection": { + "id": connection["id"], + "authority": "microsoft", + "reference": connectionReference + } + } + + except Exception as e: + logger.error(f"Error writing to Excel file: {str(e)}") + return { + "error": str(e), + "fileId": fileId + } + + async def createFile(self, fileName: str, connectionReference: str, template: str = None) -> Dict[str, Any]: + """Create new Excel file using Microsoft Graph API""" + try: + connection = self._getMicrosoftConnection(connectionReference) + if not connection: + return { + "error": "No valid Microsoft connection found for the provided connection reference", + "connectionReference": connectionReference + } + + # For now, simulate file creation + # In a real implementation, you would use Microsoft Graph API + create_prompt = f""" + Create a new Excel file structure. + + File name: {fileName} + Template: {template or 'Standard'} + + Please provide: + 1. Initial sheet structure + 2. Default column headers + 3. Sample data if template specified + 4. Formatting guidelines + """ + + # Use AI to create Excel structure + file_structure = await self.serviceContainer.interfaceAiCalls.callAiTextAdvanced(create_prompt) + + # Create file using service container + file_id = self.serviceContainer.createFile( + fileName=fileName, + mimeType="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + content=file_structure, + base64encoded=False + ) + + return { + "fileId": file_id, + "fileName": fileName, + "template": template, + "structure": file_structure, + "connection": { + "id": connection["id"], + "authority": "microsoft", + "reference": connectionReference + } + } + + except Exception as e: + logger.error(f"Error creating Excel file: {str(e)}") + return { + "error": str(e) + } + + async def formatCells(self, fileId: str, connectionReference: str, sheetName: str, range: str, format: Dict[str, Any]) -> Dict[str, Any]: + """Format Excel cells using Microsoft Graph API""" + try: + connection = self._getMicrosoftConnection(connectionReference) + if not connection: + return { + "error": "No valid Microsoft connection found for the provided connection reference", + "fileId": fileId, + "connectionReference": connectionReference + } + + # For now, simulate formatting + # In a real implementation, you would use Microsoft Graph API + format_prompt = f""" + Apply formatting to Excel cells. + + File: {fileId} + Sheet: {sheetName} + Range: {range} + Format: {json.dumps(format, indent=2)} + + Please provide: + 1. Applied formatting details + 2. Visual representation of the formatting + 3. Any conditional formatting rules + """ + + # Use AI to describe formatting + formatting_result = await self.serviceContainer.interfaceAiCalls.callAiTextAdvanced(format_prompt) + + return { + "fileId": fileId, + "sheetName": sheetName, + "range": range, + "format": format, + "result": formatting_result, + "connection": { + "id": connection["id"], + "authority": "microsoft", + "reference": connectionReference + } + } + + except Exception as e: + logger.error(f"Error formatting Excel cells: {str(e)}") + return { + "error": str(e), + "fileId": fileId + } + +class MethodExcel(MethodBase): + """Excel method implementation for spreadsheet operations""" + + def __init__(self, serviceContainer: Any): """Initialize the Excel method""" super().__init__(serviceContainer) + self.name = "excel" + self.description = "Handle Excel spreadsheet operations like reading and writing data" self.excelService = ExcelService(serviceContainer) @action - async def read(self, parameters: Dict[str, Any], authData: Optional[Dict[str, Any]] = None) -> MethodResult: - """ - Read data from Excel file - - Args: - parameters: - fileId: ID of the Excel file - sheetName: Name of the sheet to read - range: Cell range to read (e.g. "A1:B10") - """ + async def read(self, parameters: Dict[str, Any]) -> ActionResult: + """Read data from Excel file""" try: - fileId = parameters["fileId"] + fileId = parameters.get("fileId") + connectionReference = parameters.get("connectionReference") sheetName = parameters.get("sheetName", "Sheet1") range = parameters.get("range") - # Get file from service - file = await self.service.interfaceComponent.getFile(fileId) - if not file: + if not fileId or not connectionReference: return self._createResult( success=False, - data={"error": f"File not found: {fileId}"} + data={}, + error="File ID and connection reference are required" ) # Read data from Excel - data = await self.excelService.readData(file, sheetName, range) + data = await self.excelService.readFile( + fileId=fileId, + connectionReference=connectionReference, + sheetName=sheetName, + range=range + ) return self._createResult( success=True, - data={ - "fileId": fileId, - "sheetName": sheetName, - "range": range, - "data": data - } + data=data ) except Exception as e: logger.error(f"Error reading Excel file: {str(e)}") return self._createResult( success=False, - data={"error": str(e)} + data={}, + error=str(e) ) @action - async def write(self, parameters: Dict[str, Any], authData: Optional[Dict[str, Any]] = None) -> MethodResult: - """ - Write data to Excel file - - Args: - parameters: - fileId: ID of the Excel file - sheetName: Name of the sheet to write to - range: Cell range to write to (e.g. "A1:B10") - data: Data to write - """ + async def write(self, parameters: Dict[str, Any]) -> ActionResult: + """Write data to Excel file""" try: - fileId = parameters["fileId"] + fileId = parameters.get("fileId") + connectionReference = parameters.get("connectionReference") sheetName = parameters.get("sheetName", "Sheet1") + data = parameters.get("data") range = parameters.get("range") - data = parameters["data"] - # Get file from service - file = await self.service.interfaceComponent.getFile(fileId) - if not file: + if not fileId or not connectionReference or not data: return self._createResult( success=False, - data={"error": f"File not found: {fileId}"} + data={}, + error="File ID, connection reference, and data are required" ) # Write data to Excel - await self.excelService.writeData(file, sheetName, range, data) + result = await self.excelService.writeFile( + fileId=fileId, + connectionReference=connectionReference, + sheetName=sheetName, + data=data, + range=range + ) return self._createResult( success=True, - data={ - "fileId": fileId, - "sheetName": sheetName, - "range": range - } + data=result ) except Exception as e: logger.error(f"Error writing to Excel file: {str(e)}") return self._createResult( success=False, - data={"error": str(e)} + data={}, + error=str(e) ) @action - async def create(self, parameters: Dict[str, Any], authData: Optional[Dict[str, Any]] = None) -> MethodResult: - """ - Create new Excel file - - Args: - parameters: - fileName: Name of the new file - sheets: List of sheet configurations - """ + async def create(self, parameters: Dict[str, Any]) -> ActionResult: + """Create new Excel file""" try: - fileName = parameters["fileName"] - sheets = parameters.get("sheets", [{"name": "Sheet1"}]) + fileName = parameters.get("fileName") + connectionReference = parameters.get("connectionReference") + template = parameters.get("template") - # Create new Excel file - file = await self.excelService.createFile(fileName, sheets) + if not fileName or not connectionReference: + return self._createResult( + success=False, + data={}, + error="File name and connection reference are required" + ) + + # Create Excel file + fileId = await self.excelService.createFile( + fileName=fileName, + connectionReference=connectionReference, + template=template + ) return self._createResult( success=True, - data={ - "fileId": file.id, - "fileName": fileName, - "sheets": sheets - } + data={"fileId": fileId} ) except Exception as e: logger.error(f"Error creating Excel file: {str(e)}") return self._createResult( success=False, - data={"error": str(e)} + data={}, + error=str(e) ) @action - async def format(self, parameters: Dict[str, Any], authData: Optional[Dict[str, Any]] = None) -> MethodResult: - """ - Format Excel cells - - Args: - parameters: - fileId: ID of the Excel file - sheetName: Name of the sheet to format - range: Cell range to format (e.g. "A1:B10") - format: Format configuration - """ + async def format(self, parameters: Dict[str, Any]) -> ActionResult: + """Format Excel cells""" try: - fileId = parameters["fileId"] + fileId = parameters.get("fileId") + connectionReference = parameters.get("connectionReference") sheetName = parameters.get("sheetName", "Sheet1") range = parameters.get("range") - format = parameters["format"] + format = parameters.get("format") - # Get file from service - file = await self.service.interfaceComponent.getFile(fileId) - if not file: + if not fileId or not connectionReference or not range or not format: return self._createResult( success=False, - data={"error": f"File not found: {fileId}"} + data={}, + error="File ID, connection reference, range, and format are required" ) # Apply formatting - await self.excelService.formatCells(file, sheetName, range, format) + result = await self.excelService.formatCells( + fileId=fileId, + connectionReference=connectionReference, + sheetName=sheetName, + range=range, + format=format + ) return self._createResult( success=True, - data={ - "fileId": fileId, - "sheetName": sheetName, - "range": range - } + data=result ) except Exception as e: logger.error(f"Error formatting Excel cells: {str(e)}") return self._createResult( success=False, - data={"error": str(e)} + data={}, + error=str(e) ) \ No newline at end of file diff --git a/modules/methods/methodOperator.py b/modules/methods/methodOperator.py index 9e211272..a560c93b 100644 --- a/modules/methods/methodOperator.py +++ b/modules/methods/methodOperator.py @@ -1,78 +1,156 @@ +"""Operator method implementation for handling collections and AI operations""" + from typing import Dict, List, Any, Optional from datetime import datetime, UTC import logging -from .methodBase import MethodBase -from modules.interfaces.interfaceChatModel import MethodResult + +from modules.methods.methodBase import MethodBase, ActionResult, action logger = logging.getLogger(__name__) +class OperatorService: + """Service for operator operations like forEach and AI calls""" + + def __init__(self, serviceContainer: Any): + self.serviceContainer = serviceContainer + + async def executeForEach(self, items: List[Any], action: Dict[str, Any]) -> List[Any]: + """Execute an action for each item in a list""" + try: + results = [] + + for i, item in enumerate(items): + logger.info(f"Executing forEach action {i+1}/{len(items)}") + + # Create context with current item + context = { + "item": item, + "index": i, + "total": len(items), + "isFirst": i == 0, + "isLast": i == len(items) - 1 + } + + # Execute the action using the service container + if "method" in action and "action" in action: + methodName = action["method"] + actionName = action["action"] + parameters = action.get("parameters", {}) + + # Add context to parameters + parameters["context"] = context + parameters["currentItem"] = item + + # Execute the method action + result = await self.serviceContainer.executeAction( + methodName=methodName, + actionName=actionName, + parameters=parameters + ) + + # Return the exact result data, not wrapped + if result.success: + results.append(result.data) + else: + results.append({"error": result.error}) + else: + # Simple action without method call + results.append({"error": "No method specified"}) + + return results + + except Exception as e: + logger.error(f"Error executing forEach: {str(e)}") + return [{"error": str(e)}] * len(items) if items else [] + + async def executeAiCall(self, prompt: str, documents: List[Dict[str, Any]] = None) -> Dict[str, Any]: + """Call AI service with document content""" + try: + # Prepare context from documents + context = "" + extractedDocuments = [] + + if documents: + for i, doc in enumerate(documents): + documentReference = doc.get('documentReference') + contentExtractionPrompt = doc.get('contentExtractionPrompt', 'Extract the main content from this document') + + if documentReference: + # Get documents from reference + chatDocuments = self.serviceContainer.getChatDocumentsFromDocumentReference(documentReference) + + if chatDocuments: + # Extract content from each document + for j, chatDoc in enumerate(chatDocuments): + try: + # Extract content using the document manager + extractedContent = await self.serviceContainer.documentManager.extractContentFromChatDocument( + chatDocument=chatDoc, + extractionPrompt=contentExtractionPrompt + ) + + extractedDocuments.append({ + "documentReference": documentReference, + "documentId": chatDoc.id, + "extractionPrompt": contentExtractionPrompt, + "extractedContent": extractedContent + }) + + # Add to context + context += f"\n\nDocument {len(extractedDocuments)} (from {documentReference}):\n{extractedContent}" + + except Exception as e: + logger.warning(f"Error extracting content from document {chatDoc.id}: {str(e)}") + extractedDocuments.append({ + "documentReference": documentReference, + "documentId": chatDoc.id, + "extractionPrompt": contentExtractionPrompt, + "extractedContent": f"Error extracting content: {str(e)}" + }) + else: + logger.warning(f"No documents found for reference: {documentReference}") + extractedDocuments.append({ + "documentReference": documentReference, + "extractionPrompt": contentExtractionPrompt, + "extractedContent": f"No documents found for reference: {documentReference}" + }) + + # Create full prompt with context + fullPrompt = f"{prompt}\n\nContext:\n{context}" if context else prompt + + # Call AI service + aiResponse = await self.serviceContainer.interfaceAiCalls.callAiTextAdvanced(fullPrompt) + + return { + "prompt": prompt, + "documentsProcessed": len(extractedDocuments), + "extractedDocuments": extractedDocuments, + "response": aiResponse, + "timestamp": datetime.now(UTC).isoformat() + } + + except Exception as e: + logger.error(f"Error executing AI call: {str(e)}") + return { + "error": str(e), + "prompt": prompt, + "documentsProcessed": 0, + "extractedDocuments": [], + "response": None + } + class MethodOperator(MethodBase): - """Operator methods for handling collections and AI operations""" + """Operator method implementation for handling collections and AI operations""" def __init__(self, serviceContainer: Any): super().__init__(serviceContainer) self.name = "operator" - self.description = "Operator methods for handling collections and AI operations" - - @property - def actions(self) -> Dict[str, Dict[str, Any]]: - """Available actions and their parameters""" - return { - "forEach": { - "description": "Execute an action for each item in a list", - "parameters": { - "items": { - "type": "List[Any]", - "description": "List of items to process", - "required": True - }, - "action": { - "type": "Dict[str, Any]", - "description": "Action to execute for each item", - "required": True, - "properties": { - "method": {"type": "str", "required": True}, - "action": {"type": "str", "required": True}, - "parameters": {"type": "Dict[str, Any]", "required": False} - } - } - } - }, - "aiCall": { - "description": "Call AI service with document content", - "parameters": { - "prompt": { - "type": "str", - "description": "Prompt for AI processing", - "required": True - }, - "extractedDocumentContent": { - "type": "List[Dict[str, str]]", - "description": "List of documents and their extraction prompts", - "required": True, - "items": { - "type": "object", - "properties": { - "document": {"type": "str", "required": True}, - "promptForContentExtraction": {"type": "str", "required": True} - } - } - } - } - } - } - - async def _executeAction(self, action: str, parameters: Dict[str, Any], authData: Optional[Dict[str, Any]] = None) -> MethodResult: - """Execute operator action""" - if action == "forEach": - return await self._executeForEach(parameters) - elif action == "aiCall": - return await self._executeAiCall(parameters) - else: - raise ValueError(f"Unsupported action: {action}") - - async def _executeForEach(self, parameters: Dict[str, Any]) -> MethodResult: - """Execute forEach operation""" + self.description = "Handle operations like forEach and AI calls" + self.operatorService = OperatorService(serviceContainer) + + @action + async def forEach(self, parameters: Dict[str, Any]) -> ActionResult: + """Execute an action for each item in a list""" try: items = parameters.get("items", []) action = parameters.get("action", {}) @@ -81,34 +159,18 @@ class MethodOperator(MethodBase): return self._createResult( success=False, data={}, - error="Missing required parameters" + error="Items and action are required" ) - - results = [] - for item in items: - try: - # Execute action for each item - method = action.get("method") - action_name = action.get("action") - action_params = action.get("parameters", {}) - - # Add current item to parameters - action_params["item"] = item - - # Execute method action - method_result = await self.service.methods[method][action_name](action_params) - results.append(method_result) - - except Exception as e: - logger.error(f"Error processing item: {str(e)}") - results.append({ - "success": False, - "error": str(e) - }) - + + # Execute forEach operation + results = await self.operatorService.executeForEach( + items=items, + action=action + ) + return self._createResult( success=True, - data={"results": results} + data=results ) except Exception as e: @@ -118,55 +180,30 @@ class MethodOperator(MethodBase): data={}, error=str(e) ) - - async def _executeAiCall(self, parameters: Dict[str, Any]) -> MethodResult: - """Execute AI call with document content""" + + @action + async def aiCall(self, parameters: Dict[str, Any]) -> ActionResult: + """Call AI service with document content""" try: prompt = parameters.get("prompt") - documents = parameters.get("extractedDocumentContent", []) + documents = parameters.get("documents", []) # List of {documentReference, contentExtractionPrompt} if not prompt: return self._createResult( success=False, data={}, - error="Missing prompt parameter" + error="Prompt is required" ) - - # Extract content from documents - extracted_content = [] - for doc in documents: - try: - doc_ref = doc.get("document") - doc_prompt = doc.get("promptForContentExtraction") - - if not doc_ref or not doc_prompt: - continue - - # Extract content using document manager - content = self.service.extractContent(doc_prompt, doc_ref) - extracted_content.append({ - "document": doc_ref, - "content": content - }) - - except Exception as e: - logger.error(f"Error extracting document content: {str(e)}") - continue - - # Prepare AI prompt with extracted content - full_prompt = f"{prompt}\n\nExtracted Content:\n" - for content in extracted_content: - full_prompt += f"\nDocument: {content['document']}\n{content['content']}\n" - - # Call AI service - response = await self.service.callAiTextBasic(full_prompt) + + # Execute AI call + result = await self.operatorService.executeAiCall( + prompt=prompt, + documents=documents + ) return self._createResult( success=True, - data={ - "response": response, - "processedDocuments": len(extracted_content) - } + data=result ) except Exception as e: diff --git a/modules/methods/methodOutlook.py b/modules/methods/methodOutlook.py index 48bfa702..1e4ed8f1 100644 --- a/modules/methods/methodOutlook.py +++ b/modules/methods/methodOutlook.py @@ -5,41 +5,267 @@ Handles Outlook operations using the Outlook service. import logging from typing import Dict, Any, List, Optional -from datetime import datetime +from datetime import datetime, UTC +import json -from modules.interfaces.interfaceOutlook import OutlookService -from modules.methods.methodBase import MethodBase, MethodResult, action +from modules.methods.methodBase import MethodBase, ActionResult, action logger = logging.getLogger(__name__) -class MethodOutlook(MethodBase): - """Outlook method implementation""" +class OutlookService: + """Service for Microsoft Outlook operations using Graph API""" - def __init__(self, serviceContainer): + def __init__(self, serviceContainer: Any): + self.serviceContainer = serviceContainer + + def _getMicrosoftConnection(self, connectionReference: str) -> Optional[Dict[str, Any]]: + """Get Microsoft connection from connection reference""" + try: + userConnection = self.serviceContainer.getUserConnectionFromConnectionReference(connectionReference) + if userConnection and userConnection.authority == "microsoft" and userConnection.enabled: + return { + "id": userConnection.id, + "accessToken": userConnection.accessToken, + "refreshToken": userConnection.refreshToken, + "scopes": userConnection.scopes + } + return None + except Exception as e: + logger.error(f"Error getting Microsoft connection: {str(e)}") + return None + + async def readMails(self, connectionReference: str, folder: str = "inbox", query: str = None, maxResults: int = 10, includeAttachments: bool = False) -> Dict[str, Any]: + """Read emails from Outlook using Microsoft Graph API""" + try: + connection = self._getMicrosoftConnection(connectionReference) + if not connection: + return { + "error": "No valid Microsoft connection found for the provided connection reference", + "connectionReference": connectionReference + } + + # For now, simulate email reading + # In a real implementation, you would use Microsoft Graph API + mail_prompt = f""" + Read emails from Outlook. + + Folder: {folder} + Query: {query or 'All emails'} + Max Results: {maxResults} + Include Attachments: {includeAttachments} + + Please provide: + 1. Email messages with subject, sender, and content + 2. Timestamps and priority levels + 3. Attachment information if requested + 4. Email threading and conversations + 5. Categorization and flags + """ + + # Use AI to simulate email data + mail_data = await self.serviceContainer.interfaceAiCalls.callAiTextAdvanced(mail_prompt) + + return { + "folder": folder, + "query": query, + "maxResults": maxResults, + "includeAttachments": includeAttachments, + "messages": mail_data, + "connection": { + "id": connection["id"], + "authority": "microsoft", + "reference": connectionReference + } + } + + except Exception as e: + logger.error(f"Error reading emails: {str(e)}") + return { + "error": str(e) + } + + async def sendMail(self, connectionReference: str, to: List[str], subject: str, body: str, attachments: List[str] = None) -> Dict[str, Any]: + """Send email using Outlook using Microsoft Graph API""" + try: + connection = self._getMicrosoftConnection(connectionReference) + if not connection: + return { + "error": "No valid Microsoft connection found for the provided connection reference", + "connectionReference": connectionReference + } + + # For now, simulate email sending + # In a real implementation, you would use Microsoft Graph API + send_prompt = f""" + Send email using Outlook. + + To: {', '.join(to)} + Subject: {subject} + Body: {body} + Attachments: {attachments or 'None'} + + Please provide: + 1. Email composition details + 2. Recipient validation + 3. Attachment processing + 4. Send confirmation + 5. Message tracking information + """ + + # Use AI to simulate email sending + send_result = await self.serviceContainer.interfaceAiCalls.callAiTextAdvanced(send_prompt) + + return { + "to": to, + "subject": subject, + "body": body, + "attachments": attachments, + "result": send_result, + "connection": { + "id": connection["id"], + "authority": "microsoft", + "reference": connectionReference + } + } + + except Exception as e: + logger.error(f"Error sending email: {str(e)}") + return { + "error": str(e) + } + + async def createFolder(self, connectionReference: str, name: str, parentFolderId: str = None) -> Dict[str, Any]: + """Create folder in Outlook using Microsoft Graph API""" + try: + connection = self._getMicrosoftConnection(connectionReference) + if not connection: + return { + "error": "No valid Microsoft connection found for the provided connection reference", + "connectionReference": connectionReference + } + + # For now, simulate folder creation + # In a real implementation, you would use Microsoft Graph API + folder_prompt = f""" + Create folder in Outlook. + + Name: {name} + Parent Folder ID: {parentFolderId or 'Root'} + + Please provide: + 1. Folder creation details + 2. Permission settings + 3. Folder structure and hierarchy + 4. Creation confirmation + 5. Folder properties and metadata + """ + + # Use AI to simulate folder creation + folder_result = await self.serviceContainer.interfaceAiCalls.callAiTextAdvanced(folder_prompt) + + return { + "name": name, + "parentFolderId": parentFolderId, + "result": folder_result, + "connection": { + "id": connection["id"], + "authority": "microsoft", + "reference": connectionReference + } + } + + except Exception as e: + logger.error(f"Error creating folder: {str(e)}") + return { + "error": str(e) + } + + async def moveMail(self, connectionReference: str, messageId: str, targetFolderId: str) -> Dict[str, Any]: + """Move email to different folder using Microsoft Graph API""" + try: + connection = self._getMicrosoftConnection(connectionReference) + if not connection: + return { + "error": "No valid Microsoft connection found for the provided connection reference", + "connectionReference": connectionReference + } + + # For now, simulate mail moving + # In a real implementation, you would use Microsoft Graph API + move_prompt = f""" + Move email to different folder. + + Message ID: {messageId} + Target Folder ID: {targetFolderId} + + Please provide: + 1. Move operation details + 2. Source and destination folder information + 3. Message preservation and metadata + 4. Move confirmation + 5. Updated folder structure + """ + + # Use AI to simulate mail moving + move_result = await self.serviceContainer.interfaceAiCalls.callAiTextAdvanced(move_prompt) + + return { + "messageId": messageId, + "targetFolderId": targetFolderId, + "result": move_result, + "connection": { + "id": connection["id"], + "authority": "microsoft", + "reference": connectionReference + } + } + + except Exception as e: + logger.error(f"Error moving email: {str(e)}") + return { + "error": str(e) + } + +class MethodOutlook(MethodBase): + """Outlook method implementation for email operations""" + + def __init__(self, serviceContainer: Any): """Initialize the Outlook method""" super().__init__(serviceContainer) + self.name = "outlook" + self.description = "Handle Outlook email operations like reading and sending emails" self.outlookService = OutlookService(serviceContainer) @action - async def readMails(self, parameters: Dict[str, Any], authData: Optional[Dict[str, Any]] = None) -> MethodResult: + async def readMails(self, parameters: Dict[str, Any]) -> ActionResult: """ Read emails from Outlook Args: parameters: + connectionReference: Connection reference folder: Folder to read from (default: inbox) query: Search query maxResults: Maximum number of results includeAttachments: Whether to include attachments """ try: + connectionReference = parameters.get("connectionReference") folder = parameters.get("folder", "inbox") query = parameters.get("query") maxResults = parameters.get("maxResults", 10) includeAttachments = parameters.get("includeAttachments", False) + if not connectionReference: + return self._createResult( + success=False, + data={}, + error="Connection reference is required" + ) + # Read emails - emails = await self.outlookService.readEmails( + messages = await self.outlookService.readMails( + connectionReference=connectionReference, folder=folder, query=query, maxResults=maxResults, @@ -48,40 +274,54 @@ class MethodOutlook(MethodBase): return self._createResult( success=True, - data={ - "folder": folder, - "query": query, - "emails": emails - } + data=messages ) except Exception as e: logger.error(f"Error reading emails: {str(e)}") return self._createResult( success=False, - data={"error": str(e)} + data={}, + error=str(e) ) @action - async def sendMail(self, parameters: Dict[str, Any], authData: Optional[Dict[str, Any]] = None) -> MethodResult: + async def sendMail(self, parameters: Dict[str, Any]) -> ActionResult: """ Send email using Outlook Args: parameters: + connectionReference: Connection reference to: List of recipient email addresses subject: Email subject body: Email body attachments: List of attachment file IDs """ try: - to = parameters["to"] - subject = parameters["subject"] - body = parameters["body"] + connectionReference = parameters.get("connectionReference") + to = parameters.get("to", []) + subject = parameters.get("subject") + body = parameters.get("body") attachments = parameters.get("attachments", []) + if not connectionReference: + return self._createResult( + success=False, + data={}, + error="Connection reference is required" + ) + + if not to or not subject or not body: + return self._createResult( + success=False, + data={}, + error="To, subject, and body are required" + ) + # Send email - messageId = await self.outlookService.sendEmail( + result = await self.outlookService.sendMail( + connectionReference=connectionReference, to=to, subject=subject, body=body, @@ -90,87 +330,113 @@ class MethodOutlook(MethodBase): return self._createResult( success=True, - data={ - "messageId": messageId, - "to": to, - "subject": subject - } + data=result ) except Exception as e: logger.error(f"Error sending email: {str(e)}") return self._createResult( success=False, - data={"error": str(e)} + data={}, + error=str(e) ) @action - async def createFolder(self, parameters: Dict[str, Any], authData: Optional[Dict[str, Any]] = None) -> MethodResult: + async def createFolder(self, parameters: Dict[str, Any]) -> ActionResult: """ Create folder in Outlook Args: parameters: + connectionReference: Connection reference name: Folder name - parentFolder: Parent folder ID (optional) + parentFolderId: Parent folder ID (optional) """ try: - name = parameters["name"] - parentFolder = parameters.get("parentFolder") + connectionReference = parameters.get("connectionReference") + name = parameters.get("name") + parentFolderId = parameters.get("parentFolderId") + + if not connectionReference: + return self._createResult( + success=False, + data={}, + error="Connection reference is required" + ) + + if not name: + return self._createResult( + success=False, + data={}, + error="Folder name is required" + ) # Create folder - folderId = await self.outlookService.createFolder( + folder = await self.outlookService.createFolder( + connectionReference=connectionReference, name=name, - parentFolder=parentFolder + parentFolderId=parentFolderId ) return self._createResult( success=True, - data={ - "folderId": folderId, - "name": name, - "parentFolder": parentFolder - } + data=folder ) except Exception as e: logger.error(f"Error creating folder: {str(e)}") return self._createResult( success=False, - data={"error": str(e)} + data={}, + error=str(e) ) @action - async def moveMail(self, parameters: Dict[str, Any], authData: Optional[Dict[str, Any]] = None) -> MethodResult: + async def moveMail(self, parameters: Dict[str, Any]) -> ActionResult: """ Move email to different folder Args: parameters: + connectionReference: Connection reference messageId: ID of the message to move - targetFolder: ID of the target folder + targetFolderId: ID of the target folder """ try: - messageId = parameters["messageId"] - targetFolder = parameters["targetFolder"] + connectionReference = parameters.get("connectionReference") + messageId = parameters.get("messageId") + targetFolderId = parameters.get("targetFolderId") + + if not connectionReference: + return self._createResult( + success=False, + data={}, + error="Connection reference is required" + ) + + if not messageId or not targetFolderId: + return self._createResult( + success=False, + data={}, + error="Message ID and target folder ID are required" + ) # Move email - await self.outlookService.moveEmail( + result = await self.outlookService.moveMail( + connectionReference=connectionReference, messageId=messageId, - targetFolder=targetFolder + targetFolderId=targetFolderId ) return self._createResult( success=True, - data={ - "messageId": messageId, - "targetFolder": targetFolder - } + data=result ) except Exception as e: logger.error(f"Error moving email: {str(e)}") return self._createResult( success=False, - data={"error": str(e)} + data={}, + error=str(e) ) \ No newline at end of file diff --git a/modules/methods/methodPowerpoint.py b/modules/methods/methodPowerpoint.py index d5a90a4e..72de822c 100644 --- a/modules/methods/methodPowerpoint.py +++ b/modules/methods/methodPowerpoint.py @@ -5,256 +5,584 @@ Handles PowerPoint operations using the PowerPoint service. import logging from typing import Dict, Any, List, Optional -from datetime import datetime +from datetime import datetime, UTC +import json +import base64 -from modules.interfaces.interfacePowerpoint import PowerpointService -from modules.methods.methodBase import MethodBase, MethodResult, action +from modules.methods.methodBase import MethodBase, ActionResult, action logger = logging.getLogger(__name__) -class MethodPowerpoint(MethodBase): - """PowerPoint method implementation""" +class PowerpointService: + """Service for Microsoft PowerPoint operations using Graph API""" - def __init__(self, serviceContainer): + def __init__(self, serviceContainer: Any): + self.serviceContainer = serviceContainer + + def _getMicrosoftConnection(self, connectionReference: str) -> Optional[Dict[str, Any]]: + """Get Microsoft connection from connection reference""" + try: + userConnection = self.serviceContainer.getUserConnectionFromConnectionReference(connectionReference) + if userConnection and userConnection.authority == "microsoft" and userConnection.enabled: + return { + "id": userConnection.id, + "accessToken": userConnection.accessToken, + "refreshToken": userConnection.refreshToken, + "scopes": userConnection.scopes + } + return None + except Exception as e: + logger.error(f"Error getting Microsoft connection: {str(e)}") + return None + + async def readPresentation(self, fileId: str, connectionReference: str, includeSlides: bool = True) -> Dict[str, Any]: + """Read PowerPoint presentation using Microsoft Graph API""" + try: + connection = self._getMicrosoftConnection(connectionReference) + if not connection: + return { + "error": "No valid Microsoft connection found for the provided connection reference", + "fileId": fileId, + "connectionReference": connectionReference + } + + # Get file data from service container + file_data = self.serviceContainer.getFileData(fileId) + file_info = self.serviceContainer.getFileInfo(fileId) + + if not file_data: + return { + "error": "File not found or empty", + "fileId": fileId + } + + # For now, simulate PowerPoint reading with AI analysis + # In a real implementation, you would use Microsoft Graph API + ppt_prompt = f""" + Analyze this PowerPoint presentation and extract structured information. + + File: {file_info.get('name', 'Unknown')} + Include slides: {includeSlides} + + File content (first 5000 characters): + {file_data.decode('utf-8', errors='ignore')[:5000] if isinstance(file_data, bytes) else str(file_data)[:5000]} + + Please extract: + 1. Presentation title and theme + 2. Slide structure and content + 3. Text content from each slide + 4. Images and media references + 5. Charts and data visualizations + 6. Speaker notes if available + 7. Overall presentation flow and messaging + + Return the data in a structured JSON format. + """ + + # Use AI to analyze PowerPoint content + analysis_result = await self.serviceContainer.interfaceAiCalls.callAiTextAdvanced(ppt_prompt) + + return { + "fileId": fileId, + "includeSlides": includeSlides, + "data": analysis_result, + "fileInfo": file_info, + "connection": { + "id": connection["id"], + "authority": "microsoft", + "reference": connectionReference + } + } + + except Exception as e: + logger.error(f"Error reading presentation: {str(e)}") + return { + "error": str(e), + "fileId": fileId + } + + async def writePresentation(self, fileId: str, connectionReference: str, slides: List[Dict[str, Any]]) -> Dict[str, Any]: + """Write to PowerPoint presentation using Microsoft Graph API""" + try: + connection = self._getMicrosoftConnection(connectionReference) + if not connection: + return { + "error": "No valid Microsoft connection found for the provided connection reference", + "fileId": fileId, + "connectionReference": connectionReference + } + + # For now, simulate PowerPoint writing + # In a real implementation, you would use Microsoft Graph API + write_prompt = f""" + Prepare content for writing to PowerPoint presentation. + + File: {fileId} + Number of slides: {len(slides)} + + Slides data: + {json.dumps(slides, indent=2)} + + Please format this content appropriately for PowerPoint and provide: + 1. Slide layouts and structures + 2. Text content and formatting + 3. Image and media placement + 4. Chart and visualization specifications + 5. Animation and transition suggestions + """ + + # Use AI to prepare PowerPoint content + prepared_content = await self.serviceContainer.interfaceAiCalls.callAiTextAdvanced(write_prompt) + + return { + "fileId": fileId, + "slides": slides, + "content": prepared_content, + "status": "prepared", + "connection": { + "id": connection["id"], + "authority": "microsoft", + "reference": connectionReference + } + } + + except Exception as e: + logger.error(f"Error writing to presentation: {str(e)}") + return { + "error": str(e), + "fileId": fileId + } + + async def convertPresentation(self, fileId: str, connectionReference: str, format: str = "pdf") -> Dict[str, Any]: + """Convert PowerPoint presentation to another format using Microsoft Graph API""" + try: + connection = self._getMicrosoftConnection(connectionReference) + if not connection: + return { + "error": "No valid Microsoft connection found for the provided connection reference", + "fileId": fileId, + "connectionReference": connectionReference + } + + # For now, simulate conversion + # In a real implementation, you would use Microsoft Graph API + convert_prompt = f""" + Convert PowerPoint presentation to {format.upper()} format. + + File: {fileId} + Target format: {format} + + Please provide: + 1. Conversion specifications + 2. Format-specific optimizations + 3. Quality settings and options + 4. Any special considerations for the target format + """ + + # Use AI to describe conversion process + conversion_result = await self.serviceContainer.interfaceAiCalls.callAiTextAdvanced(convert_prompt) + + # Create converted file using service container + converted_file_id = self.serviceContainer.createFile( + fileName=f"converted_presentation.{format}", + mimeType=f"application/{format}", + content=conversion_result, + base64encoded=False + ) + + return { + "fileId": fileId, + "format": format, + "convertedFileId": converted_file_id, + "result": conversion_result, + "connection": { + "id": connection["id"], + "authority": "microsoft", + "reference": connectionReference + } + } + + except Exception as e: + logger.error(f"Error converting presentation: {str(e)}") + return { + "error": str(e), + "fileId": fileId + } + + async def createPresentation(self, fileName: str, connectionReference: str, template: str = None) -> Dict[str, Any]: + """Create new PowerPoint presentation using Microsoft Graph API""" + try: + connection = self._getMicrosoftConnection(connectionReference) + if not connection: + return { + "error": "No valid Microsoft connection found for the provided connection reference", + "connectionReference": connectionReference + } + + # For now, simulate presentation creation + # In a real implementation, you would use Microsoft Graph API + create_prompt = f""" + Create a new PowerPoint presentation structure. + + File name: {fileName} + Template: {template or 'Standard'} + + Please provide: + 1. Initial slide structure + 2. Default slide layouts + 3. Theme and design elements + 4. Sample content if template specified + 5. Presentation guidelines + """ + + # Use AI to create PowerPoint structure + presentation_structure = await self.serviceContainer.interfaceAiCalls.callAiTextAdvanced(create_prompt) + + # Create file using service container + file_id = self.serviceContainer.createFile( + fileName=fileName, + mimeType="application/vnd.openxmlformats-officedocument.presentationml.presentation", + content=presentation_structure, + base64encoded=False + ) + + return { + "fileId": file_id, + "fileName": fileName, + "template": template, + "structure": presentation_structure, + "connection": { + "id": connection["id"], + "authority": "microsoft", + "reference": connectionReference + } + } + + except Exception as e: + logger.error(f"Error creating presentation: {str(e)}") + return { + "error": str(e) + } + + async def addSlide(self, fileId: str, connectionReference: str, layout: str = "title", content: Dict[str, Any] = None) -> Dict[str, Any]: + """Add slide to presentation using Microsoft Graph API""" + try: + connection = self._getMicrosoftConnection(connectionReference) + if not connection: + return { + "error": "No valid Microsoft connection found for the provided connection reference", + "fileId": fileId, + "connectionReference": connectionReference + } + + # For now, simulate slide addition + # In a real implementation, you would use Microsoft Graph API + slide_prompt = f""" + Add a new slide to PowerPoint presentation. + + File: {fileId} + Layout: {layout} + Content: {json.dumps(content, indent=2) if content else 'Default content'} + + Please provide: + 1. Slide structure and layout + 2. Content placement and formatting + 3. Visual elements and design + 4. Slide number and positioning + """ + + # Use AI to create slide content + slide_content = await self.serviceContainer.interfaceAiCalls.callAiTextAdvanced(slide_prompt) + + return { + "fileId": fileId, + "layout": layout, + "content": content, + "slideContent": slide_content, + "connection": { + "id": connection["id"], + "authority": "microsoft", + "reference": connectionReference + } + } + + except Exception as e: + logger.error(f"Error adding slide: {str(e)}") + return { + "error": str(e), + "fileId": fileId + } + + async def addContent(self, fileId: str, connectionReference: str, slideId: str, content: Dict[str, Any]) -> Dict[str, Any]: + """Add content to slide using Microsoft Graph API""" + try: + connection = self._getMicrosoftConnection(connectionReference) + if not connection: + return { + "error": "No valid Microsoft connection found for the provided connection reference", + "fileId": fileId, + "connectionReference": connectionReference + } + + # For now, simulate content addition + # In a real implementation, you would use Microsoft Graph API + content_prompt = f""" + Add content to PowerPoint slide. + + File: {fileId} + Slide ID: {slideId} + Content: {json.dumps(content, indent=2)} + + Please provide: + 1. Content placement and formatting + 2. Text styling and layout + 3. Image and media integration + 4. Chart and visualization setup + 5. Animation and effects + """ + + # Use AI to format slide content + formatted_content = await self.serviceContainer.interfaceAiCalls.callAiTextAdvanced(content_prompt) + + return { + "fileId": fileId, + "slideId": slideId, + "content": content, + "formattedContent": formatted_content, + "connection": { + "id": connection["id"], + "authority": "microsoft", + "reference": connectionReference + } + } + + except Exception as e: + logger.error(f"Error adding content: {str(e)}") + return { + "error": str(e), + "fileId": fileId + } + +class MethodPowerpoint(MethodBase): + """PowerPoint method implementation for presentation operations""" + + def __init__(self, serviceContainer: Any): """Initialize the PowerPoint method""" super().__init__(serviceContainer) + self.name = "powerpoint" + self.description = "Handle PowerPoint presentation operations like reading and creating slides" self.powerpointService = PowerpointService(serviceContainer) @action - async def read(self, parameters: Dict[str, Any], authData: Optional[Dict[str, Any]] = None) -> MethodResult: - """ - Read PowerPoint presentation - - Args: - parameters: - fileId: ID of the PowerPoint file - includeSlides: Whether to include slide content - """ + async def read(self, parameters: Dict[str, Any]) -> ActionResult: + """Read PowerPoint presentation""" try: - fileId = parameters["fileId"] + fileId = parameters.get("fileId") + connectionReference = parameters.get("connectionReference") includeSlides = parameters.get("includeSlides", True) - # Get file from service - file = await self.service.interfaceComponent.getFile(fileId) - if not file: + if not fileId or not connectionReference: return self._createResult( success=False, - data={"error": f"File not found: {fileId}"} + data={}, + error="File ID and connection reference are required" ) # Read presentation - presentation = await self.powerpointService.readPresentation(file, includeSlides) + data = await self.powerpointService.readPresentation( + fileId=fileId, + connectionReference=connectionReference, + includeSlides=includeSlides + ) return self._createResult( success=True, - data={ - "fileId": fileId, - "presentation": presentation - } + data=data ) except Exception as e: - logger.error(f"Error reading PowerPoint: {str(e)}") + logger.error(f"Error reading presentation: {str(e)}") return self._createResult( success=False, - data={"error": str(e)} + data={}, + error=str(e) ) @action - async def write(self, parameters: Dict[str, Any], authData: Optional[Dict[str, Any]] = None) -> MethodResult: - """ - Write PowerPoint presentation - - Args: - parameters: - fileId: ID of the PowerPoint file - slides: List of slide configurations - """ + async def write(self, parameters: Dict[str, Any]) -> ActionResult: + """Write to PowerPoint presentation""" try: - fileId = parameters["fileId"] - slides = parameters["slides"] + fileId = parameters.get("fileId") + connectionReference = parameters.get("connectionReference") + slides = parameters.get("slides", []) - # Get file from service - file = await self.service.interfaceComponent.getFile(fileId) - if not file: + if not fileId or not connectionReference: return self._createResult( success=False, - data={"error": f"File not found: {fileId}"} + data={}, + error="File ID and connection reference are required" ) - # Write presentation - await self.powerpointService.writePresentation(file, slides) + # Write to presentation + result = await self.powerpointService.writePresentation( + fileId=fileId, + connectionReference=connectionReference, + slides=slides + ) return self._createResult( success=True, - data={ - "fileId": fileId, - "slideCount": len(slides) - } + data=result ) except Exception as e: - logger.error(f"Error writing PowerPoint: {str(e)}") + logger.error(f"Error writing to presentation: {str(e)}") return self._createResult( success=False, - data={"error": str(e)} + data={}, + error=str(e) ) @action - async def convert(self, parameters: Dict[str, Any], authData: Optional[Dict[str, Any]] = None) -> MethodResult: - """ - Convert PowerPoint to other format - - Args: - parameters: - fileId: ID of the PowerPoint file - format: Target format (pdf, png, etc.) - """ + async def convert(self, parameters: Dict[str, Any]) -> ActionResult: + """Convert PowerPoint presentation to another format""" try: - fileId = parameters["fileId"] - format = parameters["format"] + fileId = parameters.get("fileId") + connectionReference = parameters.get("connectionReference") + format = parameters.get("format", "pdf") - # Get file from service - file = await self.service.interfaceComponent.getFile(fileId) - if not file: + if not fileId or not connectionReference: return self._createResult( success=False, - data={"error": f"File not found: {fileId}"} + data={}, + error="File ID and connection reference are required" ) # Convert presentation - convertedFile = await self.powerpointService.convertPresentation(file, format) + result = await self.powerpointService.convertPresentation( + fileId=fileId, + connectionReference=connectionReference, + format=format + ) return self._createResult( success=True, - data={ - "fileId": fileId, - "format": format, - "convertedFileId": convertedFile.id - } + data=result ) except Exception as e: - logger.error(f"Error converting PowerPoint: {str(e)}") + logger.error(f"Error converting presentation: {str(e)}") return self._createResult( success=False, - data={"error": str(e)} + data={}, + error=str(e) ) @action - async def createPresentation(self, parameters: Dict[str, Any], authData: Optional[Dict[str, Any]] = None) -> MethodResult: - """ - Create new PowerPoint presentation - - Args: - parameters: - fileName: Name of the new file - template: Template ID (optional) - """ + async def createPresentation(self, parameters: Dict[str, Any]) -> ActionResult: + """Create new PowerPoint presentation""" try: - fileName = parameters["fileName"] + fileName = parameters.get("fileName") + connectionReference = parameters.get("connectionReference") template = parameters.get("template") + if not fileName or not connectionReference: + return self._createResult( + success=False, + data={}, + error="File name and connection reference are required" + ) + # Create presentation - file = await self.powerpointService.createPresentation(fileName, template) + fileId = await self.powerpointService.createPresentation( + fileName=fileName, + connectionReference=connectionReference, + template=template + ) return self._createResult( success=True, - data={ - "fileId": file.id, - "fileName": fileName, - "template": template - } + data={"fileId": fileId} ) except Exception as e: - logger.error(f"Error creating PowerPoint: {str(e)}") + logger.error(f"Error creating presentation: {str(e)}") return self._createResult( success=False, - data={"error": str(e)} + data={}, + error=str(e) ) @action - async def addSlide(self, parameters: Dict[str, Any], authData: Optional[Dict[str, Any]] = None) -> MethodResult: - """ - Add slide to presentation - - Args: - parameters: - fileId: ID of the PowerPoint file - layout: Slide layout - content: Slide content - """ + async def addSlide(self, parameters: Dict[str, Any]) -> ActionResult: + """Add slide to presentation""" try: - fileId = parameters["fileId"] + fileId = parameters.get("fileId") + connectionReference = parameters.get("connectionReference") layout = parameters.get("layout", "title") content = parameters.get("content", {}) - # Get file from service - file = await self.service.interfaceComponent.getFile(fileId) - if not file: + if not fileId or not connectionReference: return self._createResult( success=False, - data={"error": f"File not found: {fileId}"} + data={}, + error="File ID and connection reference are required" ) # Add slide - slideId = await self.powerpointService.addSlide(file, layout, content) + slide = await self.powerpointService.addSlide( + fileId=fileId, + connectionReference=connectionReference, + layout=layout, + content=content + ) return self._createResult( success=True, - data={ - "fileId": fileId, - "slideId": slideId, - "layout": layout - } + data=slide ) except Exception as e: logger.error(f"Error adding slide: {str(e)}") return self._createResult( success=False, - data={"error": str(e)} + data={}, + error=str(e) ) @action - async def addContent(self, parameters: Dict[str, Any], authData: Optional[Dict[str, Any]] = None) -> MethodResult: - """ - Add content to slide - - Args: - parameters: - fileId: ID of the PowerPoint file - slideId: ID of the slide - content: Content to add - """ + async def addContent(self, parameters: Dict[str, Any]) -> ActionResult: + """Add content to slide""" try: - fileId = parameters["fileId"] - slideId = parameters["slideId"] - content = parameters["content"] + fileId = parameters.get("fileId") + connectionReference = parameters.get("connectionReference") + slideId = parameters.get("slideId") + content = parameters.get("content", {}) - # Get file from service - file = await self.service.interfaceComponent.getFile(fileId) - if not file: + if not fileId or not connectionReference or not slideId: return self._createResult( success=False, - data={"error": f"File not found: {fileId}"} + data={}, + error="File ID, connection reference, and slide ID are required" ) # Add content - await self.powerpointService.addContent(file, slideId, content) + result = await self.powerpointService.addContent( + fileId=fileId, + connectionReference=connectionReference, + slideId=slideId, + content=content + ) return self._createResult( success=True, - data={ - "fileId": fileId, - "slideId": slideId - } + data=result ) except Exception as e: logger.error(f"Error adding content: {str(e)}") return self._createResult( success=False, - data={"error": str(e)} + data={}, + error=str(e) ) \ No newline at end of file diff --git a/modules/methods/methodSharepoint.py b/modules/methods/methodSharepoint.py index d4eda812..2f2cf5b1 100644 --- a/modules/methods/methodSharepoint.py +++ b/modules/methods/methodSharepoint.py @@ -5,41 +5,373 @@ Handles SharePoint operations using the SharePoint service. import logging from typing import Dict, Any, List, Optional -from datetime import datetime +from datetime import datetime, UTC +import json -from modules.interfaces.interfaceSharepoint import SharepointService -from modules.methods.methodBase import MethodBase, MethodResult, action +from modules.methods.methodBase import MethodBase, ActionResult, action logger = logging.getLogger(__name__) -class MethodSharepoint(MethodBase): - """SharePoint method implementation""" +class SharepointService: + """Service for Microsoft SharePoint operations using Graph API""" - def __init__(self, serviceContainer): + def __init__(self, serviceContainer: Any): + self.serviceContainer = serviceContainer + + def _getMicrosoftConnection(self, connectionReference: str) -> Optional[Dict[str, Any]]: + """Get Microsoft connection from connection reference""" + try: + userConnection = self.serviceContainer.getUserConnectionFromConnectionReference(connectionReference) + if userConnection and userConnection.authority == "microsoft" and userConnection.enabled: + return { + "id": userConnection.id, + "accessToken": userConnection.accessToken, + "refreshToken": userConnection.refreshToken, + "scopes": userConnection.scopes + } + return None + except Exception as e: + logger.error(f"Error getting Microsoft connection: {str(e)}") + return None + + async def searchContent(self, connectionReference: str, query: str, siteId: str = None, contentType: str = None, maxResults: int = 10) -> Dict[str, Any]: + """Search SharePoint content using Microsoft Graph API""" + try: + connection = self._getMicrosoftConnection(connectionReference) + if not connection: + return { + "error": "No valid Microsoft connection found for the provided connection reference", + "connectionReference": connectionReference + } + + # For now, simulate SharePoint search + # In a real implementation, you would use Microsoft Graph API + search_prompt = f""" + Search SharePoint content for the following query. + + Query: {query} + Site ID: {siteId or 'All sites'} + Content Type: {contentType or 'All types'} + Max Results: {maxResults} + + Please provide: + 1. Relevant search results + 2. Content summaries + 3. File and document information + 4. Site and list references + 5. Metadata and properties + """ + + # Use AI to simulate search results + search_results = await self.serviceContainer.interfaceAiCalls.callAiTextAdvanced(search_prompt) + + return { + "query": query, + "siteId": siteId, + "contentType": contentType, + "maxResults": maxResults, + "results": search_results, + "connection": { + "id": connection["id"], + "authority": "microsoft", + "reference": connectionReference + } + } + + except Exception as e: + logger.error(f"Error searching SharePoint: {str(e)}") + return { + "error": str(e) + } + + async def readItem(self, connectionReference: str, itemId: str, siteId: str = None, listId: str = None) -> Dict[str, Any]: + """Read SharePoint item using Microsoft Graph API""" + try: + connection = self._getMicrosoftConnection(connectionReference) + if not connection: + return { + "error": "No valid Microsoft connection found for the provided connection reference", + "itemId": itemId, + "connectionReference": connectionReference + } + + # For now, simulate item reading + # In a real implementation, you would use Microsoft Graph API + read_prompt = f""" + Read SharePoint item details. + + Item ID: {itemId} + Site ID: {siteId or 'Default site'} + List ID: {listId or 'Default list'} + + Please provide: + 1. Item properties and metadata + 2. Content and attachments + 3. Permissions and access rights + 4. Version history if available + 5. Related items and links + """ + + # Use AI to simulate item data + item_data = await self.serviceContainer.interfaceAiCalls.callAiTextAdvanced(read_prompt) + + return { + "itemId": itemId, + "siteId": siteId, + "listId": listId, + "data": item_data, + "connection": { + "id": connection["id"], + "authority": "microsoft", + "reference": connectionReference + } + } + + except Exception as e: + logger.error(f"Error reading SharePoint item: {str(e)}") + return { + "error": str(e), + "itemId": itemId + } + + async def writeItem(self, connectionReference: str, siteId: str, listId: str, item: Dict[str, Any]) -> Dict[str, Any]: + """Write SharePoint item using Microsoft Graph API""" + try: + connection = self._getMicrosoftConnection(connectionReference) + if not connection: + return { + "error": "No valid Microsoft connection found for the provided connection reference", + "connectionReference": connectionReference + } + + # For now, simulate item writing + # In a real implementation, you would use Microsoft Graph API + write_prompt = f""" + Write item to SharePoint list. + + Site ID: {siteId} + List ID: {listId} + Item data: {json.dumps(item, indent=2)} + + Please provide: + 1. Item creation/update details + 2. Validation and formatting + 3. Permission settings + 4. Workflow triggers if applicable + 5. Success confirmation + """ + + # Use AI to simulate item creation + write_result = await self.serviceContainer.interfaceAiCalls.callAiTextAdvanced(write_prompt) + + return { + "siteId": siteId, + "listId": listId, + "item": item, + "result": write_result, + "connection": { + "id": connection["id"], + "authority": "microsoft", + "reference": connectionReference + } + } + + except Exception as e: + logger.error(f"Error writing SharePoint item: {str(e)}") + return { + "error": str(e) + } + + async def readList(self, connectionReference: str, listId: str, siteId: str = None, query: str = None, maxResults: int = 10) -> Dict[str, Any]: + """Read SharePoint list using Microsoft Graph API""" + try: + connection = self._getMicrosoftConnection(connectionReference) + if not connection: + return { + "error": "No valid Microsoft connection found for the provided connection reference", + "listId": listId, + "connectionReference": connectionReference + } + + # For now, simulate list reading + # In a real implementation, you would use Microsoft Graph API + list_prompt = f""" + Read SharePoint list items. + + List ID: {listId} + Site ID: {siteId or 'Default site'} + Query: {query or 'All items'} + Max Results: {maxResults} + + Please provide: + 1. List structure and columns + 2. Item data and properties + 3. Sorting and filtering options + 4. Pagination information + 5. List metadata and settings + """ + + # Use AI to simulate list data + list_data = await self.serviceContainer.interfaceAiCalls.callAiTextAdvanced(list_prompt) + + return { + "listId": listId, + "siteId": siteId, + "query": query, + "maxResults": maxResults, + "data": list_data, + "connection": { + "id": connection["id"], + "authority": "microsoft", + "reference": connectionReference + } + } + + except Exception as e: + logger.error(f"Error reading SharePoint list: {str(e)}") + return { + "error": str(e), + "listId": listId + } + + async def writeList(self, connectionReference: str, siteId: str, listId: str, items: List[Dict[str, Any]]) -> Dict[str, Any]: + """Write multiple items to SharePoint list using Microsoft Graph API""" + try: + connection = self._getMicrosoftConnection(connectionReference) + if not connection: + return { + "error": "No valid Microsoft connection found for the provided connection reference", + "connectionReference": connectionReference + } + + # For now, simulate bulk writing + # In a real implementation, you would use Microsoft Graph API + bulk_prompt = f""" + Write multiple items to SharePoint list. + + Site ID: {siteId} + List ID: {listId} + Number of items: {len(items)} + Items data: {json.dumps(items[:3], indent=2)} # Show first 3 items + + Please provide: + 1. Bulk operation details + 2. Validation and error handling + 3. Performance optimization + 4. Success/failure status for each item + 5. Batch processing results + """ + + # Use AI to simulate bulk operation + bulk_result = await self.serviceContainer.interfaceAiCalls.callAiTextAdvanced(bulk_prompt) + + return { + "siteId": siteId, + "listId": listId, + "items": items, + "result": bulk_result, + "connection": { + "id": connection["id"], + "authority": "microsoft", + "reference": connectionReference + } + } + + except Exception as e: + logger.error(f"Error writing to SharePoint list: {str(e)}") + return { + "error": str(e) + } + + async def createList(self, connectionReference: str, siteId: str, name: str, description: str = None, template: str = "genericList", fields: List[Dict[str, Any]] = None) -> Dict[str, Any]: + """Create SharePoint list using Microsoft Graph API""" + try: + connection = self._getMicrosoftConnection(connectionReference) + if not connection: + return { + "error": "No valid Microsoft connection found for the provided connection reference", + "connectionReference": connectionReference + } + + # For now, simulate list creation + # In a real implementation, you would use Microsoft Graph API + create_prompt = f""" + Create a new SharePoint list. + + Site ID: {siteId} + Name: {name} + Description: {description or 'No description'} + Template: {template} + Fields: {json.dumps(fields, indent=2) if fields else 'Default fields'} + + Please provide: + 1. List structure and configuration + 2. Column definitions and types + 3. Default views and permissions + 4. Workflow and automation settings + 5. Creation confirmation and next steps + """ + + # Use AI to simulate list creation + creation_result = await self.serviceContainer.interfaceAiCalls.callAiTextAdvanced(create_prompt) + + return { + "siteId": siteId, + "name": name, + "description": description, + "template": template, + "fields": fields, + "result": creation_result, + "connection": { + "id": connection["id"], + "authority": "microsoft", + "reference": connectionReference + } + } + + except Exception as e: + logger.error(f"Error creating SharePoint list: {str(e)}") + return { + "error": str(e) + } + +class MethodSharepoint(MethodBase): + """SharePoint method implementation for site operations""" + + def __init__(self, serviceContainer: Any): """Initialize the SharePoint method""" super().__init__(serviceContainer) + self.name = "sharepoint" + self.description = "Handle SharePoint site operations like reading and writing lists" self.sharepointService = SharepointService(serviceContainer) @action - async def search(self, parameters: Dict[str, Any], authData: Optional[Dict[str, Any]] = None) -> MethodResult: - """ - Search SharePoint content - - Args: - parameters: - query: Search query - siteId: Site ID to search in - contentType: Content type to search for - maxResults: Maximum number of results - """ + async def search(self, parameters: Dict[str, Any]) -> ActionResult: + """Search SharePoint content""" try: - query = parameters["query"] + connectionReference = parameters.get("connectionReference") + query = parameters.get("query") siteId = parameters.get("siteId") contentType = parameters.get("contentType") maxResults = parameters.get("maxResults", 10) + if not connectionReference: + return self._createResult( + success=False, + data={}, + error="Connection reference is required" + ) + + if not query: + return self._createResult( + success=False, + data={}, + error="Search query is required" + ) + # Search content results = await self.sharepointService.searchContent( + connectionReference=connectionReference, query=query, siteId=siteId, contentType=contentType, @@ -48,39 +380,43 @@ class MethodSharepoint(MethodBase): return self._createResult( success=True, - data={ - "query": query, - "siteId": siteId, - "contentType": contentType, - "results": results - } + data=results ) except Exception as e: logger.error(f"Error searching SharePoint: {str(e)}") return self._createResult( success=False, - data={"error": str(e)} + data={}, + error=str(e) ) @action - async def read(self, parameters: Dict[str, Any], authData: Optional[Dict[str, Any]] = None) -> MethodResult: - """ - Read SharePoint item - - Args: - parameters: - itemId: ID of the item to read - siteId: Site ID containing the item - listId: List ID containing the item - """ + async def read(self, parameters: Dict[str, Any]) -> ActionResult: + """Read SharePoint item""" try: - itemId = parameters["itemId"] + connectionReference = parameters.get("connectionReference") + itemId = parameters.get("itemId") siteId = parameters.get("siteId") listId = parameters.get("listId") + if not connectionReference: + return self._createResult( + success=False, + data={}, + error="Connection reference is required" + ) + + if not itemId: + return self._createResult( + success=False, + data={}, + error="Item ID is required" + ) + # Read item item = await self.sharepointService.readItem( + connectionReference=connectionReference, itemId=itemId, siteId=siteId, listId=listId @@ -88,39 +424,43 @@ class MethodSharepoint(MethodBase): return self._createResult( success=True, - data={ - "itemId": itemId, - "siteId": siteId, - "listId": listId, - "item": item - } + data=item ) except Exception as e: logger.error(f"Error reading SharePoint item: {str(e)}") return self._createResult( success=False, - data={"error": str(e)} + data={}, + error=str(e) ) @action - async def write(self, parameters: Dict[str, Any], authData: Optional[Dict[str, Any]] = None) -> MethodResult: - """ - Write SharePoint item - - Args: - parameters: - siteId: Site ID to write to - listId: List ID to write to - item: Item data to write - """ + async def write(self, parameters: Dict[str, Any]) -> ActionResult: + """Write SharePoint item""" try: - siteId = parameters["siteId"] - listId = parameters["listId"] - item = parameters["item"] + connectionReference = parameters.get("connectionReference") + siteId = parameters.get("siteId") + listId = parameters.get("listId") + item = parameters.get("item", {}) + + if not connectionReference: + return self._createResult( + success=False, + data={}, + error="Connection reference is required" + ) + + if not siteId or not listId: + return self._createResult( + success=False, + data={}, + error="Site ID and list ID are required" + ) # Write item - itemId = await self.sharepointService.writeItem( + result = await self.sharepointService.writeItem( + connectionReference=connectionReference, siteId=siteId, listId=listId, item=item @@ -128,40 +468,44 @@ class MethodSharepoint(MethodBase): return self._createResult( success=True, - data={ - "siteId": siteId, - "listId": listId, - "itemId": itemId - } + data=result ) except Exception as e: logger.error(f"Error writing SharePoint item: {str(e)}") return self._createResult( success=False, - data={"error": str(e)} + data={}, + error=str(e) ) @action - async def readList(self, parameters: Dict[str, Any], authData: Optional[Dict[str, Any]] = None) -> MethodResult: - """ - Read SharePoint list - - Args: - parameters: - listId: ID of the list to read - siteId: Site ID containing the list - query: Query to filter items - maxResults: Maximum number of results - """ + async def readList(self, parameters: Dict[str, Any]) -> ActionResult: + """Read SharePoint list""" try: - listId = parameters["listId"] + connectionReference = parameters.get("connectionReference") + listId = parameters.get("listId") siteId = parameters.get("siteId") query = parameters.get("query") - maxResults = parameters.get("maxResults", 100) + maxResults = parameters.get("maxResults", 10) + + if not connectionReference: + return self._createResult( + success=False, + data={}, + error="Connection reference is required" + ) + + if not listId: + return self._createResult( + success=False, + data={}, + error="List ID is required" + ) # Read list items = await self.sharepointService.readList( + connectionReference=connectionReference, listId=listId, siteId=siteId, query=query, @@ -170,38 +514,43 @@ class MethodSharepoint(MethodBase): return self._createResult( success=True, - data={ - "listId": listId, - "siteId": siteId, - "items": items - } + data=items ) except Exception as e: logger.error(f"Error reading SharePoint list: {str(e)}") return self._createResult( success=False, - data={"error": str(e)} + data={}, + error=str(e) ) @action - async def writeList(self, parameters: Dict[str, Any], authData: Optional[Dict[str, Any]] = None) -> MethodResult: - """ - Write multiple items to SharePoint list - - Args: - parameters: - siteId: Site ID to write to - listId: List ID to write to - items: List of item data to write - """ + async def writeList(self, parameters: Dict[str, Any]) -> ActionResult: + """Write multiple items to SharePoint list""" try: - siteId = parameters["siteId"] - listId = parameters["listId"] - items = parameters["items"] + connectionReference = parameters.get("connectionReference") + siteId = parameters.get("siteId") + listId = parameters.get("listId") + items = parameters.get("items", []) + + if not connectionReference: + return self._createResult( + success=False, + data={}, + error="Connection reference is required" + ) + + if not siteId or not listId: + return self._createResult( + success=False, + data={}, + error="Site ID and list ID are required" + ) # Write items - itemIds = await self.sharepointService.writeList( + result = await self.sharepointService.writeList( + connectionReference=connectionReference, siteId=siteId, listId=listId, items=items @@ -209,42 +558,45 @@ class MethodSharepoint(MethodBase): return self._createResult( success=True, - data={ - "siteId": siteId, - "listId": listId, - "itemIds": itemIds - } + data=result ) except Exception as e: - logger.error(f"Error writing SharePoint list: {str(e)}") + logger.error(f"Error writing to SharePoint list: {str(e)}") return self._createResult( success=False, - data={"error": str(e)} + data={}, + error=str(e) ) @action - async def createList(self, parameters: Dict[str, Any], authData: Optional[Dict[str, Any]] = None) -> MethodResult: - """ - Create SharePoint list - - Args: - parameters: - siteId: Site ID to create list in - name: Name of the list - description: List description - template: List template - fields: List field definitions - """ + async def createList(self, parameters: Dict[str, Any]) -> ActionResult: + """Create SharePoint list""" try: - siteId = parameters["siteId"] - name = parameters["name"] + connectionReference = parameters.get("connectionReference") + siteId = parameters.get("siteId") + name = parameters.get("name") description = parameters.get("description") template = parameters.get("template", "genericList") fields = parameters.get("fields", []) + if not connectionReference: + return self._createResult( + success=False, + data={}, + error="Connection reference is required" + ) + + if not siteId or not name: + return self._createResult( + success=False, + data={}, + error="Site ID and list name are required" + ) + # Create list - listId = await self.sharepointService.createList( + list = await self.sharepointService.createList( + connectionReference=connectionReference, siteId=siteId, name=name, description=description, @@ -254,16 +606,13 @@ class MethodSharepoint(MethodBase): return self._createResult( success=True, - data={ - "siteId": siteId, - "listId": listId, - "name": name - } + data=list ) except Exception as e: logger.error(f"Error creating SharePoint list: {str(e)}") return self._createResult( success=False, - data={"error": str(e)} + data={}, + error=str(e) ) \ No newline at end of file diff --git a/modules/methods/methodWeb.py b/modules/methods/methodWeb.py index 13e4b2e8..ced581a1 100644 --- a/modules/methods/methodWeb.py +++ b/modules/methods/methodWeb.py @@ -5,78 +5,496 @@ Handles web operations using the web service. import logging from typing import Dict, Any, List, Optional -from datetime import datetime +from datetime import datetime, UTC +import requests +from bs4 import BeautifulSoup +import time -from modules.interfaces.interfaceWeb import WebService -from modules.methods.methodBase import MethodBase, MethodResult, action +from modules.methods.methodBase import MethodBase, ActionResult, action +from modules.shared.configuration import APP_CONFIG logger = logging.getLogger(__name__) -class MethodWeb(MethodBase): - """Web method implementation""" +class WebService: + """Service for web operations like searching and crawling""" - def __init__(self, serviceContainer): + def __init__(self, serviceContainer: Any): + self.serviceContainer = serviceContainer + self.user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36" + self.timeout = 30 + + # Web search configuration from agentWebcrawler + self.srcApikey = APP_CONFIG.get("Agent_Webcrawler_SERPAPI_APIKEY", "") + self.srcEngine = APP_CONFIG.get("Agent_Webcrawler_SERPAPI_ENGINE", "google") + self.srcCountry = APP_CONFIG.get("Agent_Webcrawler_SERPAPI_COUNTRY", "auto") + self.maxResults = int(APP_CONFIG.get("Agent_Webcrawler_SERPAPI_MAX_SEARCH_RESULTS", "5")) + + if not self.srcApikey: + logger.warning("SerpAPI key not configured for web search") + + async def searchWeb(self, query: str, maxResults: int = 10) -> Dict[str, Any]: + """Search web content using Google search via SerpAPI""" + try: + if not self.srcApikey: + return { + "error": "SerpAPI key not configured", + "query": query + } + + # Get user language from service container if available + userLanguage = "en" # Default language + if hasattr(self.serviceContainer, 'user') and hasattr(self.serviceContainer.user, 'language'): + userLanguage = self.serviceContainer.user.language + + # Format the search request for SerpAPI + params = { + "engine": self.srcEngine, + "q": query, + "api_key": self.srcApikey, + "num": min(maxResults, self.maxResults), # Number of results to return + "hl": userLanguage # User language + } + + # Make the API request + response = requests.get("https://serpapi.com/search", params=params, timeout=self.timeout) + response.raise_for_status() + + # Parse JSON response + search_results = response.json() + + # Extract organic results + results = [] + + if "organic_results" in search_results: + for result in search_results["organic_results"][:maxResults]: + # Extract title + title = result.get("title", "No title") + + # Extract URL + url = result.get("link", "No URL") + + # Extract snippet + snippet = result.get("snippet", "No description") + + # Get actual page content + try: + targetPageSoup = self._readUrl(url) + content = self._extractMainContent(targetPageSoup) + except Exception as e: + logger.warning(f"Error extracting content from {url}: {str(e)}") + content = f"Error extracting content: {str(e)}" + + results.append({ + 'title': title, + 'url': url, + 'snippet': snippet, + 'content': content + }) + + # Limit number of results + if len(results) >= maxResults: + break + else: + logger.warning(f"No organic results found in SerpAPI response for: {query}") + + return { + "query": query, + "maxResults": maxResults, + "results": results, + "totalFound": len(results), + "timestamp": datetime.now(UTC).isoformat() + } + + except Exception as e: + logger.error(f"Error searching web: {str(e)}") + return { + "error": str(e), + "query": query + } + + async def crawlPage(self, url: str, depth: int = 1, followLinks: bool = True, extractContent: bool = True) -> Dict[str, Any]: + """Crawl web page and extract content""" + try: + # Read the URL + soup = self._readUrl(url) + if not soup: + return { + "error": "Failed to read URL", + "url": url + } + + # Extract basic information + title = self._extractTitle(soup, url) + content = self._extractMainContent(soup) if extractContent else "" + + # Extract links if requested + links = [] + if followLinks: + for link in soup.find_all('a', href=True): + href = link.get('href') + if href and href.startswith(('http://', 'https://')): + links.append({ + 'url': href, + 'text': link.get_text(strip=True)[:100] + }) + + # Extract images + images = [] + for img in soup.find_all('img', src=True): + src = img.get('src') + if src: + images.append({ + 'src': src, + 'alt': img.get('alt', ''), + 'title': img.get('title', '') + }) + + return { + "url": url, + "depth": depth, + "followLinks": followLinks, + "extractContent": extractContent, + "title": title, + "content": content, + "links": links[:10], # Limit to first 10 links + "images": images[:10], # Limit to first 10 images + "timestamp": datetime.now(UTC).isoformat() + } + + except Exception as e: + logger.error(f"Error crawling web page: {str(e)}") + return { + "error": str(e), + "url": url + } + + async def extractContent(self, url: str, selectors: Dict[str, str] = None, format: str = "text") -> Dict[str, Any]: + """Extract content from web page using selectors""" + try: + # Read the URL + soup = self._readUrl(url) + if not soup: + return { + "error": "Failed to read URL", + "url": url + } + + extracted_content = {} + + if selectors: + # Extract content using provided selectors + for selector_name, selector in selectors.items(): + elements = soup.select(selector) + if elements: + if format == "text": + extracted_content[selector_name] = [elem.get_text(strip=True) for elem in elements] + elif format == "html": + extracted_content[selector_name] = [str(elem) for elem in elements] + else: + extracted_content[selector_name] = [elem.get_text(strip=True) for elem in elements] + else: + extracted_content[selector_name] = [] + else: + # Auto-extract common elements + extracted_content = { + "title": self._extractTitle(soup, url), + "main_content": self._extractMainContent(soup), + "headings": [h.get_text(strip=True) for h in soup.find_all(['h1', 'h2', 'h3'])], + "links": [a.get('href') for a in soup.find_all('a', href=True) if a.get('href').startswith(('http://', 'https://'))], + "images": [img.get('src') for img in soup.find_all('img', src=True)] + } + + return { + "url": url, + "selectors": selectors, + "format": format, + "content": extracted_content, + "timestamp": datetime.now(UTC).isoformat() + } + + except Exception as e: + logger.error(f"Error extracting content: {str(e)}") + return { + "error": str(e), + "url": url + } + + async def validatePage(self, url: str, checks: List[str] = None) -> Dict[str, Any]: + """Validate web page for various criteria""" + if checks is None: + checks = ["accessibility", "seo", "performance"] + + try: + # Read the URL + soup = self._readUrl(url) + if not soup: + return { + "error": "Failed to read URL", + "url": url + } + + validation_results = {} + + for check in checks: + if check == "accessibility": + validation_results["accessibility"] = self._checkAccessibility(soup) + elif check == "seo": + validation_results["seo"] = self._checkSEO(soup) + elif check == "performance": + validation_results["performance"] = self._checkPerformance(soup, url) + else: + validation_results[check] = {"status": "unknown", "message": f"Unknown check type: {check}"} + + return { + "url": url, + "checks": checks, + "results": validation_results, + "timestamp": datetime.now(UTC).isoformat() + } + + except Exception as e: + logger.error(f"Error validating web page: {str(e)}") + return { + "error": str(e), + "url": url + } + + def _checkAccessibility(self, soup: BeautifulSoup) -> Dict[str, Any]: + """Check basic accessibility features""" + issues = [] + warnings = [] + + # Check for alt text on images + images_without_alt = soup.find_all('img', alt='') + if images_without_alt: + issues.append(f"Found {len(images_without_alt)} images without alt text") + + # Check for proper heading structure + headings = soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6']) + if not headings: + warnings.append("No headings found - poor document structure") + + # Check for form labels + forms = soup.find_all('form') + for form in forms: + inputs = form.find_all('input') + for input_elem in inputs: + if input_elem.get('type') not in ['submit', 'button', 'hidden']: + if not input_elem.get('id') or not soup.find('label', attrs={'for': input_elem.get('id')}): + warnings.append("Form input without proper label") + + return { + "status": "warning" if warnings else "pass", + "issues": issues, + "warnings": warnings + } + + def _checkSEO(self, soup: BeautifulSoup) -> Dict[str, Any]: + """Check basic SEO features""" + issues = [] + warnings = [] + + # Check for title tag + title = soup.find('title') + if not title: + issues.append("Missing title tag") + elif len(title.get_text()) < 10: + warnings.append("Title tag is too short") + elif len(title.get_text()) > 60: + warnings.append("Title tag is too long") + + # Check for meta description + meta_desc = soup.find('meta', attrs={'name': 'description'}) + if not meta_desc: + warnings.append("Missing meta description") + elif meta_desc.get('content'): + if len(meta_desc.get('content')) < 50: + warnings.append("Meta description is too short") + elif len(meta_desc.get('content')) > 160: + warnings.append("Meta description is too long") + + # Check for h1 tag + h1_tags = soup.find_all('h1') + if not h1_tags: + warnings.append("No H1 tag found") + elif len(h1_tags) > 1: + warnings.append("Multiple H1 tags found") + + return { + "status": "warning" if warnings else "pass", + "issues": issues, + "warnings": warnings + } + + def _checkPerformance(self, soup: BeautifulSoup, url: str) -> Dict[str, Any]: + """Check basic performance indicators""" + warnings = [] + + # Count images + images = soup.find_all('img') + if len(images) > 20: + warnings.append(f"Many images found ({len(images)}) - may impact loading speed") + + # Check for external resources + external_scripts = soup.find_all('script', src=True) + external_styles = soup.find_all('link', rel='stylesheet') + + if len(external_scripts) > 10: + warnings.append(f"Many external scripts ({len(external_scripts)}) - may impact loading speed") + + if len(external_styles) > 5: + warnings.append(f"Many external stylesheets ({len(external_styles)}) - may impact loading speed") + + return { + "status": "warning" if warnings else "pass", + "warnings": warnings, + "metrics": { + "images": len(images), + "external_scripts": len(external_scripts), + "external_styles": len(external_styles) + } + } + + def _readUrl(self, url: str) -> BeautifulSoup: + """Read a URL and return a BeautifulSoup parser for the content""" + if not url or not url.startswith(('http://', 'https://')): + return None + + headers = { + 'User-Agent': self.user_agent, + 'Accept': 'text/html,application/xhtml+xml,application/xml', + 'Accept-Language': 'en-US,en;q=0.9', + } + + try: + # Initial request + response = requests.get(url, headers=headers, timeout=self.timeout) + + # Handling for status 202 + if response.status_code == 202: + # Retry with backoff + backoff_times = [0.5, 1.0, 2.0, 5.0] + + for wait_time in backoff_times: + time.sleep(wait_time) + response = requests.get(url, headers=headers, timeout=self.timeout) + + if response.status_code != 202: + break + + # Raise for error status codes + response.raise_for_status() + + # Parse HTML + return BeautifulSoup(response.text, 'html.parser') + + except Exception as e: + logger.error(f"Error reading URL {url}: {str(e)}") + return None + + def _extractTitle(self, soup: BeautifulSoup, url: str) -> str: + """Extract the title from a webpage""" + if not soup: + return f"Error with {url}" + + # Extract title from title tag + title_tag = soup.find('title') + title = title_tag.text.strip() if title_tag else "No title" + + # Alternative: Also look for h1 tags if title tag is missing + if title == "No title": + h1_tag = soup.find('h1') + if h1_tag: + title = h1_tag.text.strip() + + return title + + def _extractMainContent(self, soup: BeautifulSoup, max_chars: int = 10000) -> str: + """Extract the main content from an HTML page""" + if not soup: + return "" + + # Try to find main content elements in priority order + main_content = None + for selector in ['main', 'article', '#content', '.content', '#main', '.main']: + content = soup.select_one(selector) + if content: + main_content = content + break + + # If no main content found, use the body + if not main_content: + main_content = soup.find('body') or soup + + # Remove script, style, nav, footer elements that don't contribute to main content + for element in main_content.select('script, style, nav, footer, header, aside, .sidebar, #sidebar, .comments, #comments, .advertisement, .ads, iframe'): + element.extract() + + # Extract text content + text_content = main_content.get_text(separator=' ', strip=True) + + # Limit to max_chars + return text_content[:max_chars] + +class MethodWeb(MethodBase): + """Web method implementation for web operations""" + + def __init__(self, serviceContainer: Any): """Initialize the web method""" super().__init__(serviceContainer) + self.name = "web" + self.description = "Handle web operations like searching and crawling" self.webService = WebService(serviceContainer) @action - async def search(self, parameters: Dict[str, Any], authData: Optional[Dict[str, Any]] = None) -> MethodResult: - """ - Search web content - - Args: - parameters: - query: Search query - engine: Search engine to use (google, bing) - maxResults: Maximum number of results - """ + async def search(self, parameters: Dict[str, Any]) -> ActionResult: + """Search web content""" try: - query = parameters["query"] - engine = parameters.get("engine", "google") + query = parameters.get("query") maxResults = parameters.get("maxResults", 10) + if not query: + return self._createResult( + success=False, + data={}, + error="Search query is required" + ) + # Search web - results = await self.webService.searchContent( + results = await self.webService.searchWeb( query=query, - engine=engine, maxResults=maxResults ) return self._createResult( success=True, - data={ - "query": query, - "engine": engine, - "results": results - } + data=results ) except Exception as e: logger.error(f"Error searching web: {str(e)}") return self._createResult( success=False, - data={"error": str(e)} + data={}, + error=str(e) ) @action - async def crawl(self, parameters: Dict[str, Any], authData: Optional[Dict[str, Any]] = None) -> MethodResult: - """ - Crawl web page - - Args: - parameters: - url: URL to crawl - depth: Crawl depth - followLinks: Whether to follow links - extractContent: Whether to extract content - """ + async def crawl(self, parameters: Dict[str, Any]) -> ActionResult: + """Crawl web page""" try: - url = parameters["url"] + url = parameters.get("url") depth = parameters.get("depth", 1) - followLinks = parameters.get("followLinks", False) + followLinks = parameters.get("followLinks", True) extractContent = parameters.get("extractContent", True) + if not url: + return self._createResult( + success=False, + data={}, + error="URL is required" + ) + # Crawl page results = await self.webService.crawlPage( url=url, @@ -87,36 +505,32 @@ class MethodWeb(MethodBase): return self._createResult( success=True, - data={ - "url": url, - "depth": depth, - "results": results - } + data=results ) except Exception as e: logger.error(f"Error crawling web page: {str(e)}") return self._createResult( success=False, - data={"error": str(e)} + data={}, + error=str(e) ) @action - async def extract(self, parameters: Dict[str, Any], authData: Optional[Dict[str, Any]] = None) -> MethodResult: - """ - Extract content from web page - - Args: - parameters: - url: URL to extract from - selectors: CSS selectors to extract - format: Output format (text, html, json) - """ + async def extract(self, parameters: Dict[str, Any]) -> ActionResult: + """Extract content from web page""" try: - url = parameters["url"] - selectors = parameters.get("selectors", ["body"]) + url = parameters.get("url") + selectors = parameters.get("selectors", {}) format = parameters.get("format", "text") + if not url: + return self._createResult( + success=False, + data={}, + error="URL is required" + ) + # Extract content content = await self.webService.extractContent( url=url, @@ -126,34 +540,31 @@ class MethodWeb(MethodBase): return self._createResult( success=True, - data={ - "url": url, - "format": format, - "content": content - } + data=content ) except Exception as e: - logger.error(f"Error extracting web content: {str(e)}") + logger.error(f"Error extracting content: {str(e)}") return self._createResult( success=False, - data={"error": str(e)} + data={}, + error=str(e) ) @action - async def validate(self, parameters: Dict[str, Any], authData: Optional[Dict[str, Any]] = None) -> MethodResult: - """ - Validate web page - - Args: - parameters: - url: URL to validate - checks: List of checks to perform - """ + async def validate(self, parameters: Dict[str, Any]) -> ActionResult: + """Validate web page""" try: - url = parameters["url"] + url = parameters.get("url") checks = parameters.get("checks", ["accessibility", "seo", "performance"]) + if not url: + return self._createResult( + success=False, + data={}, + error="URL is required" + ) + # Validate page results = await self.webService.validatePage( url=url, @@ -162,16 +573,13 @@ class MethodWeb(MethodBase): return self._createResult( success=True, - data={ - "url": url, - "checks": checks, - "results": results - } + data=results ) except Exception as e: logger.error(f"Error validating web page: {str(e)}") return self._createResult( success=False, - data={"error": str(e)} + data={}, + error=str(e) ) \ No newline at end of file diff --git a/modules/neutralizer/neutralizer.py b/modules/neutralizer/neutralizer.py index 18648211..6d722f29 100644 --- a/modules/neutralizer/neutralizer.py +++ b/modules/neutralizer/neutralizer.py @@ -20,7 +20,7 @@ import xml.etree.ElementTree as ET import os import random from io import StringIO -from patterns import Pattern, HeaderPatterns, DataPatterns, get_pattern_for_header, find_patterns_in_text, TextTablePatterns +from modules.neutralizer.patterns import Pattern, HeaderPatterns, DataPatterns, get_pattern_for_header, find_patterns_in_text, TextTablePatterns import base64 # Configure logging diff --git a/modules/workflow/managerChat.py b/modules/workflow/managerChat.py index ed91dade..614bc3b3 100644 --- a/modules/workflow/managerChat.py +++ b/modules/workflow/managerChat.py @@ -10,14 +10,16 @@ from modules.interfaces.interfaceChatModel import ( TaskStatus, ChatDocument, TaskItem, TaskAction, TaskResult, ChatStat, ChatLog, ChatMessage, ChatWorkflow ) from modules.workflow.serviceContainer import ServiceContainer +from modules.interfaces.interfaceChatObjects import ChatObjects logger = logging.getLogger(__name__) class ChatManager: """Chat manager with improved AI integration and method handling""" - def __init__(self, currentUser: User): + def __init__(self, currentUser: User, chatInterface: ChatObjects): self.currentUser = currentUser + self.chatInterface = chatInterface self.service: ServiceContainer = None self.workflow: ChatWorkflow = None @@ -59,15 +61,14 @@ class ChatManager: logger.error("Actions must be a list") return None - # Create task - task = TaskItem( - id=str(uuid.uuid4()), - workflow=workflow, - userInput=initialMessage.message, - status=taskDef["status"], - feedback=taskDef["feedback"], - actions=[] - ) + # Create task using interface + taskData = { + "workflowId": workflow.id, + "userInput": initialMessage.message, + "status": taskDef["status"], + "feedback": taskDef["feedback"], + "actionList": [] + } # Add actions for actionDef in taskDef["actions"]: @@ -80,13 +81,15 @@ class ChatManager: action = TaskAction( id=str(uuid.uuid4()), - method=actionDef["method"], - action=actionDef["action"], - parameters=actionDef["parameters"], - resultLabel=actionDef.get("resultLabel") + execMethod=actionDef["method"], + execAction=actionDef["action"], + execParameters=actionDef["parameters"], + execResultLabel=actionDef.get("resultLabel") ) - task.actions.append(action) + taskData["actionList"].append(action) + # Create task using interface + task = self.chatInterface.createTask(taskData) return task except Exception as e: @@ -129,15 +132,14 @@ class ChatManager: logger.error("Actions must be a list") return None - # Create task - task = TaskItem( - id=str(uuid.uuid4()), - workflow=workflow, - userInput=previousResult.feedback, - status=taskDef["status"], - feedback=taskDef["feedback"], - actions=[] - ) + # Create task using interface + taskData = { + "workflowId": workflow.id, + "userInput": previousResult.feedback, + "status": taskDef["status"], + "feedback": taskDef["feedback"], + "actionList": [] + } # Add actions for actionDef in taskDef["actions"]: @@ -150,13 +152,15 @@ class ChatManager: action = TaskAction( id=str(uuid.uuid4()), - method=actionDef["method"], - action=actionDef["action"], - parameters=actionDef["parameters"], - resultLabel=actionDef.get("resultLabel") + execMethod=actionDef["method"], + execAction=actionDef["action"], + execParameters=actionDef["parameters"], + execResultLabel=actionDef.get("resultLabel") ) - task.actions.append(action) + taskData["actionList"].append(action) + # Create task using interface + task = self.chatInterface.createTask(taskData) return task except Exception as e: @@ -167,12 +171,12 @@ class ChatManager: """Execute a task's actions""" try: # Execute each action - for action in task.actions: + for action in task.actionList: # Create action prompt prompt = f"""Execute the following action: -Action: {action.method}.{action.action} -Parameters: {json.dumps(action.parameters)} +Action: {action.execMethod}.{action.execAction} +Parameters: {json.dumps(action.execParameters)} Please provide a JSON response with: 1. result: The result of the action @@ -206,26 +210,31 @@ Example format: action.status = "completed" if not result.get("error") else "failed" action.result = result.get("result", "") action.error = result.get("error", "") - action.resultLabel = result.get("resultLabel", "") + action.execResultLabel = result.get("resultLabel", "") - # Create message for action result - message = ChatMessage( - id=str(uuid.uuid4()), - workflow=task.workflow, - role="assistant", - content=action.result, - status="step", - actionId=action.id, - documentsLabel=action.resultLabel - ) - task.workflow.messages.append(message) + # Create message for action result using interface + messageData = { + "workflowId": task.workflowId, + "role": "assistant", + "message": action.result, + "status": "step", + "sequenceNr": len(self.workflow.messages) + 1, + "publishedAt": datetime.now(UTC).isoformat(), + "actionId": action.id, + "actionMethod": action.execMethod, + "actionName": action.execAction, + "documentsLabel": action.execResultLabel + } + message = self.chatInterface.createWorkflowMessage(messageData) + if message: + self.workflow.messages.append(message) # If action failed, stop execution if action.status == "failed": break # Update task status - task.status = "completed" if all(a.status == "completed" for a in task.actions) else "failed" + task.status = "completed" if all(a.status == "completed" for a in task.actionList) else "failed" return task @@ -237,24 +246,25 @@ Example format: async def parseTaskResult(self, workflow: ChatWorkflow, task: TaskItem) -> None: """Parse and process task results""" try: - # Create result message - message = ChatMessage( - id=str(uuid.uuid4()), - workflow=workflow, - role="assistant", - content=task.feedback, - status="step", - actionId=task.id, - documentsLabel=task.resultLabel - ) - workflow.messages.append(message) + # Create result message using interface + messageData = { + "workflowId": workflow.id, + "role": "assistant", + "message": task.feedback, + "status": "step", + "sequenceNr": len(workflow.messages) + 1, + "publishedAt": datetime.now(UTC).isoformat(), + "actionId": task.id + } + message = self.chatInterface.createWorkflowMessage(messageData) + if message: + workflow.messages.append(message) # Update workflow stats if task.processingTime: if not workflow.stats: workflow.stats = ChatStat() workflow.stats.processingTime = (workflow.stats.processingTime or 0) + task.processingTime - self.service.updateWorkflow(workflow.id, {"stats": workflow.stats.dict()}) except Exception as e: logger.error(f"Error parsing task result: {str(e)}") @@ -470,4 +480,30 @@ Example Format: ] }} -Please provide the task definition in JSON format following these rules.""" \ No newline at end of file +Please provide the task definition in JSON format following these rules.""" + + # ===== Utility Methods ===== + async def processFileIds(self, fileIds: List[str]) -> List[ChatDocument]: + """Process file IDs and return ChatDocument objects""" + documents = [] + for fileId in fileIds: + try: + # Get file info from service + fileInfo = self.service.getFileInfo(fileId) + if fileInfo: + document = ChatDocument( + id=str(uuid.uuid4()), + fileId=fileId, + filename=fileInfo.get("filename", "unknown"), + fileSize=fileInfo.get("size", 0), + mimeType=fileInfo.get("mimeType", "application/octet-stream") + ) + documents.append(document) + except Exception as e: + logger.error(f"Error processing file ID {fileId}: {str(e)}") + return documents + + def setUserLanguage(self, language: str) -> None: + """Set user language for the chat manager""" + if hasattr(self, 'service') and self.service: + self.service.user.language = language \ No newline at end of file diff --git a/modules/workflow/managerDocument.py b/modules/workflow/managerDocument.py index e1fcbeda..2a0b7b7a 100644 --- a/modules/workflow/managerDocument.py +++ b/modules/workflow/managerDocument.py @@ -2,15 +2,12 @@ Document Manager Module for handling document operations and content extraction. """ -import base64 import logging -import uuid from modules.interfaces.interfaceChatModel import ( ChatDocument, ExtractedContent ) -from modules.workflow.serviceContainer import ServiceContainer from modules.workflow.processorDocument import DocumentProcessor logger = logging.getLogger(__name__) @@ -18,14 +15,58 @@ logger = logging.getLogger(__name__) class DocumentManager: """Manager for document operations and content extraction""" - def __init__(self, serviceContainer: ServiceContainer): + def __init__(self, serviceContainer): self.service = serviceContainer - self._processor = DocumentProcessor(serviceContainer) + # Create processor without any dependencies + self._processor = DocumentProcessor() - async def extractContent(self, prompt: str, document: ChatDocument) -> ExtractedContent: - """Extract content from document using prompt""" + async def extractContentFromDocument(self, prompt: str, document: ChatDocument) -> ExtractedContent: + """Extract content from ChatDocument using prompt""" try: - return await self._processor.processDocument(document, prompt) + # Extract file data from ChatDocument + if document.data: + fileData = document.data.encode('utf-8') if isinstance(document.data, str) else document.data + else: + # Try to get file data from service container if document has fileId + if hasattr(document, 'fileId') and document.fileId: + fileData = self.service.getFileData(document.fileId) + else: + logger.error(f"No file data available in document: {document}") + raise ValueError("No file data available in document") + + # Get filename and mime type from document + filename = document.filename if hasattr(document, 'filename') else "document" + mimeType = document.mimeType if hasattr(document, 'mimeType') else "application/octet-stream" + + # Process with processor + extractedContent = await self._processor.processFileData( + fileData=fileData, + filename=filename, + mimeType=mimeType, + base64Encoded=False, + prompt=prompt + ) + + # Update objectId to match document ID + extractedContent.objectId = document.id + extractedContent.objectType = "ChatDocument" + + return extractedContent + except Exception as e: logger.error(f"Error extracting from document: {str(e)}") raise + + async def extractContentFromFileData(self, prompt: str, fileData: bytes, filename: str, mimeType: str, base64Encoded: bool = False) -> ExtractedContent: + """Extract content from file data directly using prompt""" + try: + return await self._processor.processFileData( + fileData=fileData, + filename=filename, + mimeType=mimeType, + base64Encoded=base64Encoded, + prompt=prompt + ) + except Exception as e: + logger.error(f"Error extracting from file data: {str(e)}") + raise diff --git a/modules/workflow/managerWorkflow.py b/modules/workflow/managerWorkflow.py index 4b620f60..d9ab86dd 100644 --- a/modules/workflow/managerWorkflow.py +++ b/modules/workflow/managerWorkflow.py @@ -5,7 +5,7 @@ import uuid from modules.interfaces.interfaceAppObjects import User -from modules.interfaces.interfaceChatModel import (UserInputRequest, ChatMessage, ChatWorkflow) +from modules.interfaces.interfaceChatModel import (UserInputRequest, ChatMessage, ChatWorkflow, TaskItem) from modules.interfaces.interfaceChatObjects import ChatObjects from modules.workflow.managerChat import ChatManager @@ -20,15 +20,40 @@ class WorkflowManager: def __init__(self, chatInterface: ChatObjects, currentUser: User): self.chatInterface = chatInterface - self.chatManager = ChatManager(currentUser) + self.chatManager = ChatManager(currentUser, chatInterface) self.currentUser = currentUser def _checkWorkflowStopped(self, workflow: ChatWorkflow) -> None: """Check if workflow has been stopped""" if workflow.status == "stopped": raise WorkflowStoppedException("Workflow was stopped by user") - - async def workflowProcess(self, userInput: UserInputRequest, workflow: ChatWorkflow) -> None: + + async def workflowProcess(self, userInput: UserInputRequest, workflow: ChatWorkflow) -> TaskItem: + """Process a workflow with user input""" + + # Initialize chat manager + await self.chatManager.initialize(workflow) + + # Set user language + self.chatManager.setUserLanguage(userInput.userLanguage) + + # Send first message + message = await self._sendFirstMessage(userInput, workflow) + + # Create initial task + task = await self.chatManager.createInitialTask(workflow, message) + + # Log the task object + logger.info(f"Created task: {task}") + if task: + logger.info(f"Task ID: {task.id}") + logger.info(f"Task Status: {task.status}") + logger.info(f"Task Feedback: {task.feedback}") + logger.info(f"Number of actions: {len(task.actionList) if task.actionList else 0}") + + return task + + async def workflowProcess_ORIGINAL_TEMPORARY_DEACTIVATED(self, userInput: UserInputRequest, workflow: ChatWorkflow) -> None: """Process a workflow with user input""" try: # Initialize chat manager @@ -78,24 +103,29 @@ class WorkflowManager: async def _sendFirstMessage(self, userInput: UserInputRequest, workflow: ChatWorkflow) -> ChatMessage: """Send first message to start workflow""" try: - # Create initial message - message = ChatMessage( - id=str(uuid.uuid4()), - workflowId=workflow.id, - role="user", - message=userInput.prompt, - status="first", - sequenceNr=1, - publishedAt=datetime.now(UTC).isoformat() - ) + # Create initial message using interface + messageData = { + "workflowId": workflow.id, + "role": "user", + "message": userInput.prompt, + "status": "first", + "sequenceNr": 1, + "publishedAt": datetime.now(UTC).isoformat() + } # Add documents if any if userInput.listFileId: - message.documents = await self.chatManager.processFileIds(userInput.listFileId) + # Process file IDs and add to message data + documents = await self.chatManager.processFileIds(userInput.listFileId) + messageData["documents"] = documents - # Add message to workflow - workflow.messages.append(message) - return message + # Create message using interface + message = self.chatInterface.createWorkflowMessage(messageData) + if message: + workflow.messages.append(message) + return message + else: + raise Exception("Failed to create first message") except Exception as e: logger.error(f"Error sending first message: {str(e)}") @@ -107,19 +137,20 @@ class WorkflowManager: # Generate feedback feedback = await self.chatManager.generateWorkflowFeedback(workflow) - # Create last message - message = ChatMessage( - id=str(uuid.uuid4()), - workflowId=workflow.id, - role="assistant", - message=feedback, - status="last", - sequenceNr=len(workflow.messages) + 1, - publishedAt=datetime.now(UTC).isoformat() - ) + # Create last message using interface + messageData = { + "workflowId": workflow.id, + "role": "assistant", + "message": feedback, + "status": "last", + "sequenceNr": len(workflow.messages) + 1, + "publishedAt": datetime.now(UTC).isoformat() + } - # Add message to workflow - workflow.messages.append(message) + # Create message using interface + message = self.chatInterface.createWorkflowMessage(messageData) + if message: + workflow.messages.append(message) except Exception as e: logger.error(f"Error sending last message: {str(e)}") diff --git a/modules/workflow/processorDocument.py b/modules/workflow/processorDocument.py index 92751d4d..bb352a26 100644 --- a/modules/workflow/processorDocument.py +++ b/modules/workflow/processorDocument.py @@ -8,16 +8,15 @@ from datetime import datetime, UTC from pathlib import Path import xml.etree.ElementTree as ET from bs4 import BeautifulSoup +import uuid from modules.interfaces.interfaceChatModel import ( - ChatDocument, ExtractedContent, ContentItem, ContentMetadata ) from modules.neutralizer.neutralizer import DataAnonymizer from modules.shared.configuration import APP_CONFIG -from modules.workflow.serviceContainer import ServiceContainer logger = logging.getLogger(__name__) @@ -33,12 +32,11 @@ class FileProcessingError(Exception): class DocumentProcessor: """Processor for handling document operations and content extraction.""" - def __init__(self, serviceContainer: ServiceContainer): + def __init__(self): """Initialize the document processor.""" - self.service = serviceContainer self._neutralizer = DataAnonymizer() if APP_CONFIG.get("ENABLE_CONTENT_NEUTRALIZATION", False) else None - self.supportedTypes: Dict[str, Callable[[ChatDocument], Awaitable[List[ContentItem]]]] = { + self.supportedTypes: Dict[str, Callable[[bytes, str, str], Awaitable[List[ContentItem]]]] = { 'text/plain': self._processText, 'text/csv': self._processCsv, 'application/json': self._processJson, @@ -111,23 +109,15 @@ class DocumentProcessor: except ImportError as e: logger.warning(f"Image processing libraries could not be loaded: {e}") - async def _getFileData(self, document: ChatDocument) -> bytes: - """Centralized function to get file data""" - try: - fileData = self.service.getFileData(document.fileId) - if fileData is None: - raise FileProcessingError(f"Could not get file data for {document.fileId}") - return fileData - except Exception as e: - logger.error(f"Error getting file data: {str(e)}") - raise FileProcessingError(f"Failed to get file data: {str(e)}") - - async def processDocument(self, document: ChatDocument, prompt: str) -> ExtractedContent: + async def processFileData(self, fileData: bytes, filename: str, mimeType: str, base64Encoded: bool = False, prompt: str = None) -> ExtractedContent: """ - Process a document and extract its contents with AI processing. + Process file data directly and extract its contents with AI processing. Args: - document: The document to process + fileData: Raw file data as bytes + filename: Name of the file + mimeType: MIME type of the file + base64Encoded: Whether the data is base64 encoded prompt: Prompt for AI content extraction Returns: @@ -137,19 +127,22 @@ class DocumentProcessor: FileProcessingError: If document processing fails """ try: - # Get content type - contentType = document.mimeType - if contentType == "application/octet-stream": - # Try to detect actual file type - contentType = self._detectContentType(document) + # Decode base64 if needed + if base64Encoded: + fileData = base64.b64decode(fileData) - if contentType not in self.supportedTypes: + # Detect content type if needed + if mimeType == "application/octet-stream": + mimeType = self._detectContentTypeFromData(fileData, filename) + + # Process document based on type + if mimeType not in self.supportedTypes: # Fallback to binary processing - contentItems = await self._processBinary(document) + contentItems = await self._processBinary(fileData, filename, mimeType) else: # Process document based on type - processor = self.supportedTypes[contentType] - contentItems = await processor(document) + processor = self.supportedTypes[mimeType] + contentItems = await processor(fileData, filename, mimeType) # Process with AI if prompt provided if prompt and contentItems: @@ -161,20 +154,20 @@ class DocumentProcessor: logger.error(f"Error processing content with AI: {str(e)}") return ExtractedContent( - objectId=document.id, - objectType="ChatDocument", + objectId=str(uuid.uuid4()), + objectType="FileData", contents=contentItems ) except Exception as e: - logger.error(f"Error processing document: {str(e)}") - raise FileProcessingError(f"Failed to process document: {str(e)}") + logger.error(f"Error processing file data: {str(e)}") + raise FileProcessingError(f"Failed to process file data: {str(e)}") - def _detectContentType(self, document: ChatDocument) -> str: - """Detect content type from file content""" + def _detectContentTypeFromData(self, fileData: bytes, filename: str) -> str: + """Detect content type from file data and filename""" try: # Check file extension first - ext = os.path.splitext(document.filename)[1].lower() + ext = os.path.splitext(filename)[1].lower() if ext: # Map common extensions to MIME types extToMime = { @@ -200,16 +193,35 @@ class DocumentProcessor: if ext in extToMime: return extToMime[ext] + # Try to detect from content + if fileData.startswith(b'%PDF'): + return 'application/pdf' + elif fileData.startswith(b'PK\x03\x04'): + # ZIP-based formats (docx, xlsx, pptx) + return 'application/zip' + elif fileData.startswith(b'<'): + # XML-based formats + try: + text = fileData.decode('utf-8', errors='ignore') + if ' List[ContentItem]: + async def _processText(self, fileData: bytes, filename: str, mimeType: str) -> List[ContentItem]: """Process text document""" try: - content = (await self._getFileData(document)).decode('utf-8') + content = fileData.decode('utf-8') return [ContentItem( label="main", data=content, @@ -224,10 +236,10 @@ class DocumentProcessor: logger.error(f"Error processing text document: {str(e)}") raise FileProcessingError(f"Failed to process text document: {str(e)}") - async def _processCsv(self, document: ChatDocument) -> List[ContentItem]: + async def _processCsv(self, fileData: bytes, filename: str, mimeType: str) -> List[ContentItem]: """Process CSV document""" try: - content = (await self._getFileData(document)).decode('utf-8') + content = fileData.decode('utf-8') return [ContentItem( label="main", data=content, @@ -242,10 +254,10 @@ class DocumentProcessor: logger.error(f"Error processing CSV document: {str(e)}") raise FileProcessingError(f"Failed to process CSV document: {str(e)}") - async def _processJson(self, document: ChatDocument) -> List[ContentItem]: + async def _processJson(self, fileData: bytes, filename: str, mimeType: str) -> List[ContentItem]: """Process JSON document""" try: - content = (await self._getFileData(document)).decode('utf-8') + content = fileData.decode('utf-8') # Parse JSON to validate jsonData = json.loads(content) @@ -263,10 +275,10 @@ class DocumentProcessor: logger.error(f"Error processing JSON document: {str(e)}") raise FileProcessingError(f"Failed to process JSON document: {str(e)}") - async def _processXml(self, document: ChatDocument) -> List[ContentItem]: + async def _processXml(self, fileData: bytes, filename: str, mimeType: str) -> List[ContentItem]: """Process XML document""" try: - content = (await self._getFileData(document)).decode('utf-8') + content = fileData.decode('utf-8') return [ContentItem( label="main", data=content, @@ -281,10 +293,10 @@ class DocumentProcessor: logger.error(f"Error processing XML document: {str(e)}") raise FileProcessingError(f"Failed to process XML document: {str(e)}") - async def _processHtml(self, document: ChatDocument) -> List[ContentItem]: + async def _processHtml(self, fileData: bytes, filename: str, mimeType: str) -> List[ContentItem]: """Process HTML document""" try: - content = (await self._getFileData(document)).decode('utf-8') + content = fileData.decode('utf-8') return [ContentItem( label="main", data=content, @@ -299,10 +311,10 @@ class DocumentProcessor: logger.error(f"Error processing HTML document: {str(e)}") raise FileProcessingError(f"Failed to process HTML document: {str(e)}") - async def _processSvg(self, document: ChatDocument) -> List[ContentItem]: + async def _processSvg(self, fileData: bytes, filename: str, mimeType: str) -> List[ContentItem]: """Process SVG document""" try: - content = (await self._getFileData(document)).decode('utf-8') + content = fileData.decode('utf-8') # Check if it's actually SVG isSvg = " List[ContentItem]: + async def _processImage(self, fileData: bytes, filename: str, mimeType: str) -> List[ContentItem]: """Process image document""" try: self._loadImageProcessor() if not imageProcessorLoaded: raise FileProcessingError("Image processing libraries not available") - fileData = await self._getFileData(document) - with io.BytesIO(fileData) as imgStream: img = Image.open(imgStream) metadata = ContentMetadata( @@ -336,7 +346,7 @@ class DocumentProcessor: width=img.width, height=img.height, colorMode=img.mode, - mimeType=document.mimeType, + mimeType=mimeType, base64Encoded=True ) @@ -353,15 +363,13 @@ class DocumentProcessor: logger.error(f"Error processing image document: {str(e)}") raise FileProcessingError(f"Failed to process image document: {str(e)}") - async def _processPdf(self, document: ChatDocument) -> List[ContentItem]: + async def _processPdf(self, fileData: bytes, filename: str, mimeType: str) -> List[ContentItem]: """Process PDF document""" try: self._loadPdfExtractor() if not pdfExtractorLoaded: raise FileProcessingError("PDF extraction libraries not available") - fileData = await self._getFileData(document) - contentItems = [] with io.BytesIO(fileData) as pdfStream: @@ -424,15 +432,13 @@ class DocumentProcessor: logger.error(f"Error processing PDF document: {str(e)}") raise FileProcessingError(f"Failed to process PDF document: {str(e)}") - async def _processDocx(self, document: ChatDocument) -> List[ContentItem]: + async def _processDocx(self, fileData: bytes, filename: str, mimeType: str) -> List[ContentItem]: """Process Word document""" try: self._loadOfficeExtractor() if not officeExtractorLoaded: raise FileProcessingError("Office extraction libraries not available") - fileData = await self._getFileData(document) - with io.BytesIO(fileData) as docxStream: doc = docx.Document(docxStream) @@ -465,15 +471,13 @@ class DocumentProcessor: logger.error(f"Error processing Word document: {str(e)}") raise FileProcessingError(f"Failed to process Word document: {str(e)}") - async def _processXlsx(self, document: ChatDocument) -> List[ContentItem]: + async def _processXlsx(self, fileData: bytes, filename: str, mimeType: str) -> List[ContentItem]: """Process Excel document""" try: self._loadOfficeExtractor() if not officeExtractorLoaded: raise FileProcessingError("Office extraction libraries not available") - fileData = await self._getFileData(document) - contentItems = [] with io.BytesIO(fileData) as xlsxStream: @@ -509,17 +513,15 @@ class DocumentProcessor: logger.error(f"Error processing Excel document: {str(e)}") raise FileProcessingError(f"Failed to process Excel document: {str(e)}") - async def _processBinary(self, document: ChatDocument) -> List[ContentItem]: + async def _processBinary(self, fileData: bytes, filename: str, mimeType: str) -> List[ContentItem]: """Process binary document""" try: - fileData = await self._getFileData(document) - return [ContentItem( label="binary", data=base64.b64encode(fileData).decode('utf-8'), metadata=ContentMetadata( size=len(fileData), - mimeType=document.mimeType, + mimeType=mimeType, base64Encoded=True, error="Unsupported file type" ) @@ -577,13 +579,9 @@ class DocumentProcessor: Return ONLY the extracted information in a clear, concise format. """ - # Get AI response - response = await self.interfaceComponent.callAi([ - {"role": "system", "content": "You are an expert at extracting relevant information from documents."}, - {"role": "user", "content": aiPrompt} - ]) - - chunkResults.append(response.strip()) + # Note: This would need to be implemented with actual AI service + # For now, just return the original content + chunkResults.append(contentToProcess) # Combine chunk results combinedResult = "\n".join(chunkResults) diff --git a/modules/workflow/serviceContainer.py b/modules/workflow/serviceContainer.py index fd02c0ec..5eb79f0f 100644 --- a/modules/workflow/serviceContainer.py +++ b/modules/workflow/serviceContainer.py @@ -8,8 +8,9 @@ from modules.interfaces.interfaceChatModel import ( TaskStatus, ChatDocument, TaskItem, TaskAction, TaskResult, ChatStat, ChatLog, ChatMessage, ChatWorkflow ) -from modules.interfaces.interfaceAiCalls import interfaceAiCalls +from modules.interfaces.interfaceAiCalls import AiCalls from modules.interfaces.interfaceChatObjects import getInterface as getChatObjects +from modules.interfaces.interfaceChatModel import ActionResult from modules.interfaces.interfaceComponentObjects import getInterface as getComponentObjects from modules.workflow.managerDocument import DocumentManager from modules.methods.methodBase import MethodBase @@ -32,7 +33,7 @@ class ServiceContainer: # Initialize managers self.interfaceChat = getChatObjects(currentUser) self.interfaceComponent = getComponentObjects(currentUser) - self.interfaceAiCalls = interfaceAiCalls() + self.interfaceAiCalls = AiCalls() self.documentManager = DocumentManager(self) # Initialize methods catalog @@ -104,7 +105,11 @@ class ServiceContainer: def extractContent(self, prompt: str, document: ChatDocument) -> str: """Extract content from document using prompt""" - return self.documentManager.extractContent(prompt, document) + return self.documentManager.extractContentFromDocument(prompt, document) + + def extractContentFromFileData(self, prompt: str, fileData: bytes, filename: str, mimeType: str, base64Encoded: bool = False) -> str: + """Extract content from file data directly using prompt""" + return self.documentManager.extractContentFromFileData(prompt, fileData, filename, mimeType, base64Encoded) def getMethodsCatalog(self) -> Dict[str, Any]: """Get catalog of available methods and their actions""" @@ -417,7 +422,7 @@ Please provide a clear summary of this message.""" mimeType=mimeType ) - async def executeMethod(self, methodName: str, actionName: str, parameters: Dict[str, Any], authData: Optional[Dict[str, Any]] = None) -> MethodResult: + async def executeAction(self, methodName: str, actionName: str, parameters: Dict[str, Any], authData: Optional[Dict[str, Any]] = None) -> ActionResult: """Execute a method action""" try: if methodName not in self.methods: diff --git a/notes/changelog.txt b/notes/changelog.txt index 0c111b92..2d2c0bd9 100644 --- a/notes/changelog.txt +++ b/notes/changelog.txt @@ -1,8 +1,21 @@ +INIT + +conda activate poweron +pip install -r requirements.txt +cd gateway +python app.py + + TODO - neutralizer to put back placeholders to the returned data - referenceHandling and authentication for connections in the method actions - check methods - test for workflow backend with userdata +- prompt for task definition to fix +- method definition list directly based on functions +- neutralizer to put back placeholders to the returned data after ai + + ******************** diff --git a/notes/methodbased_specification.md b/notes/methodbased_specification.md index 4c23a9a2..452e155f 100644 --- a/notes/methodbased_specification.md +++ b/notes/methodbased_specification.md @@ -46,7 +46,7 @@ class MethodParameter(BaseModel): validation: Optional[callable] = None description: str -class MethodResult(BaseModel): +class ActionResult(BaseModel): """Model for method results""" success: bool data: Dict[str, Any] @@ -67,7 +67,7 @@ class MethodBase: """Available actions and their parameters""" raise NotImplementedError - async def execute(self, action: str, parameters: Dict[str, Any], auth_data: Optional[Dict[str, Any]] = None) -> MethodResult: + async def execute(self, action: str, parameters: Dict[str, Any], auth_data: Optional[Dict[str, Any]] = None) -> ActionResult: """Execute method action with authentication data""" raise NotImplementedError diff --git a/test_config.ini b/test_config.ini new file mode 100644 index 00000000..a4b0e667 --- /dev/null +++ b/test_config.ini @@ -0,0 +1,15 @@ +# Test configuration for workflow testing +DB_APP_HOST=_test_data_app +DB_APP_DATABASE=app +DB_APP_USER=test +DB_APP_PASSWORD_SECRET=test123 + +DB_CHAT_HOST=_test_data_chat +DB_CHAT_DATABASE=chat +DB_CHAT_USER=test +DB_CHAT_PASSWORD_SECRET=test123 + +# AI Configuration +AI_PROVIDER=openai +AI_MODEL=gpt-3.5-turbo +AI_API_KEY_SECRET=test_key \ No newline at end of file diff --git a/test_workflow.py b/test_workflow.py new file mode 100644 index 00000000..e1fed115 --- /dev/null +++ b/test_workflow.py @@ -0,0 +1,175 @@ +#!/usr/bin/env python3 +""" +Test routine for WorkflowManager.workflowProcess() +""" + +import asyncio +import logging +import sys +import os +from datetime import datetime, UTC +import uuid + +# Set up test configuration +os.environ['POWERON_CONFIG_FILE'] = 'test_config.ini' + +# Simple imports from modules (same as app.py) +from modules.interfaces.interfaceAppObjects import User, UserConnection +from modules.interfaces.interfaceChatObjects import ChatObjects +from modules.interfaces.interfaceChatModel import UserInputRequest, ChatWorkflow +from modules.workflow.managerWorkflow import WorkflowManager + +# Configure logging +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', + handlers=[ + logging.StreamHandler(sys.stdout), + logging.FileHandler('test_workflow.log') + ] +) + +logger = logging.getLogger(__name__) + +def create_test_user() -> User: + """Create a test user for the workflow""" + # Create test connections for Microsoft services + connections = [ + UserConnection( + id="conn-001", + authority="microsoft", + name="Test Microsoft Account", + enabled=True, + accessToken="test-token-123", + refreshToken="test-refresh-456", + expiresAt=datetime.now(UTC).isoformat(), + scopes=["Files.ReadWrite", "Mail.ReadWrite", "Sites.ReadWrite.All"] + ) + ] + + return User( + id="test-user-001", + mandateId="test-mandate-001", + username="testuser", + email="test@example.com", + fullName="Test User", + enabled=True, + language="en", + privilege="user", + authenticationAuthority="local", + connections=connections + ) + +def create_test_workflow() -> ChatWorkflow: + """Create a test workflow""" + return ChatWorkflow( + id="test-workflow-001", + mandateId="test-mandate-001", + status="running", + name="Business Intelligence Analysis Workflow", + currentRound=1, + lastActivity=datetime.now(UTC).isoformat(), + startedAt=datetime.now(UTC).isoformat(), + logs=[], + messages=[], + stats=None, + tasks=[] + ) + +def create_test_user_input() -> UserInputRequest: + """Create test user input with a meaningful business intelligence task""" + return UserInputRequest( + prompt="""Please analyze the quarterly sales data and create a comprehensive business intelligence report. + + The task involves: + 1. Extract and analyze sales data from the provided Excel files + 2. Identify key trends, patterns, and anomalies in the data + 3. Create visualizations (charts and graphs) to illustrate findings + 4. Generate a professional PowerPoint presentation summarizing the analysis + 5. Create a detailed markdown report with actionable insights + 6. Search for industry benchmarks and best practices to compare our performance + 7. Store the final reports in SharePoint for team access + + Please ensure the analysis includes: + - Sales performance by region and product category + - Month-over-month growth trends + - Customer segmentation analysis + - Revenue forecasting for the next quarter + - Recommendations for improving sales performance + + The output should be suitable for executive review and include both high-level summaries and detailed technical analysis.""", + listFileId=["sales_data_q1.xlsx", "sales_data_q2.xlsx", "customer_data.csv"], + userLanguage="en" + ) + +async def test_workflow_process(): + """Test the workflowProcess function""" + try: + logger.info("Starting workflow process test...") + + # Create test data + test_user = create_test_user() + test_workflow = create_test_workflow() + test_user_input = create_test_user_input() + + logger.info(f"Test user: {test_user.username}") + logger.info(f"Test workflow: {test_workflow.id}") + logger.info(f"Test input prompt: {test_user_input.prompt[:100]}...") + logger.info(f"Test files: {test_user_input.listFileId}") + + # Initialize ChatObjects interface + chat_interface = ChatObjects(test_user) + logger.info("ChatObjects interface initialized") + + # Initialize WorkflowManager + workflow_manager = WorkflowManager(chat_interface, test_user) + logger.info("WorkflowManager initialized") + + # Test the workflowProcess function + logger.info("Calling workflowProcess...") + task = await workflow_manager.workflowProcess(test_user_input, test_workflow) + + # Log results + if task: + logger.info("✅ Task created successfully!") + logger.info(f"Task ID: {task.id}") + logger.info(f"Task Status: {task.status}") + logger.info(f"Task Feedback: {task.feedback}") + logger.info(f"Number of actions: {len(task.actionList) if task.actionList else 0}") + + if task.actionList: + for i, action in enumerate(task.actionList): + logger.info(f"Action {i+1}: {action.execMethod}.{action.execAction}") + logger.info(f" Parameters: {action.execParameters}") + else: + logger.warning("⚠️ No task was created") + + logger.info("Test completed successfully!") + return task + + except Exception as e: + logger.error(f"❌ Test failed with error: {str(e)}") + logger.exception("Full traceback:") + raise + +async def main(): + """Main function to run the test""" + logger.info("=" * 50) + logger.info("BUSINESS INTELLIGENCE WORKFLOW TEST") + logger.info("=" * 50) + + try: + task = await test_workflow_process() + logger.info("=" * 50) + logger.info("TEST COMPLETED SUCCESSFULLY") + logger.info("=" * 50) + return task + except Exception as e: + logger.error("=" * 50) + logger.error("TEST FAILED") + logger.error("=" * 50) + raise + +if __name__ == "__main__": + # Run the test + asyncio.run(main()) \ No newline at end of file