model based chat workflow

This commit is contained in:
ValueOn AG 2025-06-10 01:25:32 +02:00
parent f3860723af
commit 739f22785c
29 changed files with 5285 additions and 5019 deletions

View file

@ -1,902 +0,0 @@
"""
Data analyst agent for analysis and interpretation of data.
Focuses on output-first design with AI-powered analysis.
"""
import logging
import json
import io
import base64
import os
import time
from typing import Dict, Any, List, Optional
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import hashlib
import uuid
import re
import shutil
from pathlib import Path
import traceback
import sys
import importlib.util
import inspect
from pydantic import BaseModel
from modules.workflow.agentBase import AgentBase
from modules.interfaces.serviceChatModel import ChatContent
logger = logging.getLogger(__name__)
class AgentAnalyst(AgentBase):
"""AI-driven agent for data analysis and visualization"""
def __init__(self):
"""Initialize the data analysis agent"""
super().__init__()
self.name = "analyst"
self.label = "Data Analysis"
self.description = "Analyzes data using AI-powered insights and visualizations, produce diagrams and visualizations"
self.capabilities = [
"dataAnalysis",
"statistics",
"visualization",
"dataInterpretation",
"reportGeneration"
]
# Set default visualization settings
plt.style.use('seaborn-v0_8-whitegrid')
def setDependencies(self, serviceBase=None):
"""Set external dependencies for the agent."""
self.setService(serviceBase)
async def processTask(self, task: Dict[str, Any]) -> Dict[str, Any]:
"""
Process a task by focusing on required outputs and using AI to guide the analysis process.
Args:
task: Task dictionary with prompt, inputDocuments, outputSpecifications
Returns:
Dictionary with feedback and documents
"""
try:
# Extract task information
prompt = task.get("prompt", "")
outputSpecs = task.get("outputSpecifications", [])
workflow = task.get("context", {}).get("workflow", {})
# Check AI service
if not self.service or not self.service.base:
return {
"feedback": "The Analyst agent requires an AI service to function effectively.",
"documents": []
}
# Create analysis plan
if workflow:
self.service.logAdd(workflow, "Extracting data from documents...", level="info", progress=35)
analysisPlan = await self._createAnalysisPlan(prompt)
# Check if this is truly an analysis task
if not analysisPlan.get("requiresAnalysis", True):
return {
"feedback": "This task doesn't appear to require analysis. Please try a different agent.",
"documents": []
}
# Analyze data
if workflow:
self.service.logAdd(workflow, "Analyzing task requirements...", level="info", progress=45)
analysisResults = await self._analyzeData(task, analysisPlan)
# Format results into requested output documents
totalSpecs = len(outputSpecs)
for i, spec in enumerate(outputSpecs):
progress = 50 + int((i / totalSpecs) * 40) # Progress from 50% to 90%
self.service.logAdd(workflow, f"Creating output {i+1}/{totalSpecs}...", level="info", progress=progress)
documents = await self._createOutputDocuments(
prompt,
analysisResults,
outputSpecs,
analysisPlan
)
# Generate feedback
feedback = analysisPlan.get("feedback", f"I analyzed '{prompt[:50]}...' and generated {len(documents)} output documents.")
return {
"feedback": feedback,
"documents": documents
}
except Exception as e:
logger.error(f"Error during analysis: {str(e)}", exc_info=True)
return {
"feedback": f"Error during analysis: {str(e)}",
"documents": []
}
def _extractData(self, documents: List[Dict[str, Any]]) -> tuple:
"""
Extract data from documents, focusing on dataExtracted fields.
Args:
documents: List of input documents
Returns:
Tuple of (datasets dictionary, document context text)
"""
datasets = {}
documentContext = ""
# Process each document
for doc in documents:
docName = doc.get("name", "unnamed")
if doc.get("ext"):
docName = f"{docName}.{doc.get('ext')}"
documentContext += f"\n\n--- {docName} ---\n"
# Process contents
for content in doc.get("contents", []):
# Focus only on dataExtracted
if content.get("dataExtracted"):
extractedText = content.get("dataExtracted", "")
documentContext += extractedText
# Try to parse as structured data if appropriate
if docName.lower().endswith(('.csv', '.tsv')):
try:
df = pd.read_csv(io.StringIO(extractedText))
datasets[docName] = df
except:
pass
elif docName.lower().endswith('.json'):
try:
jsonData = json.loads(extractedText)
if isinstance(jsonData, list):
df = pd.DataFrame(jsonData)
datasets[docName] = df
elif isinstance(jsonData, dict):
# Handle nested JSON structures
if any(isinstance(v, list) for v in jsonData.values()):
for key, value in jsonData.items():
if isinstance(value, list) and len(value) > 0:
df = pd.DataFrame(value)
datasets[f"{docName}:{key}"] = df
else:
df = pd.DataFrame([jsonData])
datasets[docName] = df
except:
pass
# Try to detect tabular data in text content
if docName not in datasets and len(extractedText.splitlines()) > 2:
lines = extractedText.splitlines()
if any(',' in line for line in lines[:5]):
try:
df = pd.read_csv(io.StringIO(extractedText))
if len(df.columns) > 1:
datasets[docName] = df
except:
pass
elif any('\t' in line for line in lines[:5]):
try:
df = pd.read_csv(io.StringIO(extractedText), sep='\t')
if len(df.columns) > 1:
datasets[docName] = df
except:
pass
return datasets, documentContext
async def _analyzeTask(self, prompt: str, documentContext: str, datasets: Dict[str, Any], outputSpecs: List[Dict[str, Any]]) -> Dict[str, Any]:
"""
Analyze the task requirements using AI.
Args:
prompt: The task prompt
documentContext: Context from input documents
datasets: Available datasets
outputSpecs: Output specifications
Returns:
Analysis plan dictionary
"""
# Create analysis prompt
analysisPrompt = f"""
Analyze this data analysis task and create a detailed plan:
TASK: {prompt}
DOCUMENT CONTEXT:
{documentContext}
AVAILABLE DATASETS:
{json.dumps(datasets, indent=2)}
REQUIRED OUTPUTS:
{json.dumps(outputSpecs, indent=2)}
Create a detailed analysis plan in JSON format with:
{{
"analysisSteps": [
{{
"step": "step description",
"purpose": "why this step is needed",
"datasets": ["dataset1", "dataset2"],
"techniques": ["technique1", "technique2"],
"outputs": ["output1", "output2"]
}}
],
"visualizations": [
{{
"type": "visualization type",
"purpose": "what it shows",
"datasets": ["dataset1"],
"settings": {{"key": "value"}}
}}
],
"insights": [
{{
"type": "insight type",
"description": "what to look for",
"datasets": ["dataset1"]
}}
],
"feedback": "explanation of the analysis approach"
}}
Respond with ONLY the JSON object, no additional text or explanations.
"""
try:
# Get analysis plan from AI
response = await self.service.base.callAi([
{"role": "system", "content": "You are a data analysis expert. Create detailed analysis plans. Respond with valid JSON only."},
{"role": "user", "content": analysisPrompt}
], produceUserAnswer=True)
# Extract JSON
jsonStart = response.find('{')
jsonEnd = response.rfind('}') + 1
if jsonStart >= 0 and jsonEnd > jsonStart:
plan = json.loads(response[jsonStart:jsonEnd])
return plan
else:
# Fallback plan
logger.warning(f"Not able creating analysis plan, generating fallback plan")
return {
"analysisSteps": [
{
"step": "Basic data analysis",
"purpose": "Understand the data structure and content",
"datasets": list(datasets.keys()),
"techniques": ["summary statistics", "data visualization"],
"outputs": ["summary report", "basic visualizations"]
}
],
"visualizations": [
{
"type": "basic charts",
"purpose": "Show data distribution and relationships",
"datasets": list(datasets.keys()),
"settings": {}
}
],
"insights": [
{
"type": "basic insights",
"description": "Key findings from the data",
"datasets": list(datasets.keys())
}
],
"feedback": f"I'll analyze the data and provide insights about {prompt}"
}
except Exception as e:
logger.warning(f"Error creating analysis plan: {str(e)}")
# Simple fallback plan
return {
"analysisSteps": [
{
"step": "Basic data analysis",
"purpose": "Understand the data structure and content",
"datasets": list(datasets.keys()),
"techniques": ["summary statistics", "data visualization"],
"outputs": ["summary report", "basic visualizations"]
}
],
"visualizations": [
{
"type": "basic charts",
"purpose": "Show data distribution and relationships",
"datasets": list(datasets.keys()),
"settings": {}
}
],
"insights": [
{
"type": "basic insights",
"description": "Key findings from the data",
"datasets": list(datasets.keys())
}
],
"feedback": f"I'll analyze the data and provide insights about {prompt}"
}
async def _createAnalysisPlan(self, prompt: str) -> Dict[str, Any]:
"""
Create an analysis plan based on the task prompt.
Args:
prompt: The task prompt
Returns:
Analysis plan dictionary
"""
try:
# Create analysis prompt
analysisPrompt = f"""
Analyze this data analysis task and create a detailed plan:
TASK: {prompt}
Create a detailed analysis plan in JSON format with:
{{
"requiresAnalysis": true/false,
"analysisSteps": [
{{
"step": "step description",
"purpose": "why this step is needed",
"techniques": ["technique1", "technique2"],
"outputs": ["output1", "output2"]
}}
],
"visualizations": [
{{
"type": "visualization type",
"purpose": "what it shows",
"settings": {{"key": "value"}}
}}
],
"insights": [
{{
"type": "insight type",
"description": "what to look for"
}}
],
"feedback": "explanation of the analysis approach"
}}
Respond with ONLY the JSON object, no additional text or explanations.
"""
# Get analysis plan from AI
response = await self.service.base.callAi([
{"role": "system", "content": "You are a data analysis expert. Create detailed analysis plans. Respond with valid JSON only."},
{"role": "user", "content": analysisPrompt}
], produceUserAnswer=True)
# Extract JSON
jsonStart = response.find('{')
jsonEnd = response.rfind('}') + 1
if jsonStart >= 0 and jsonEnd > jsonStart:
plan = json.loads(response[jsonStart:jsonEnd])
return plan
else:
# Fallback plan
logger.warning(f"Not able creating analysis plan, generating fallback plan")
return {
"requiresAnalysis": True,
"analysisSteps": [
{
"step": "Basic data analysis",
"purpose": "Understand the data structure and content",
"techniques": ["summary statistics", "data visualization"],
"outputs": ["summary report", "basic visualizations"]
}
],
"visualizations": [
{
"type": "basic charts",
"purpose": "Show data distribution and relationships",
"settings": {}
}
],
"insights": [
{
"type": "basic insights",
"description": "Key findings from the data"
}
],
"feedback": f"I'll analyze the data and provide insights about {prompt}"
}
except Exception as e:
logger.warning(f"Error creating analysis plan: {str(e)}")
# Simple fallback plan
return {
"requiresAnalysis": True,
"analysisSteps": [
{
"step": "Basic data analysis",
"purpose": "Understand the data structure and content",
"techniques": ["summary statistics", "data visualization"],
"outputs": ["summary report", "basic visualizations"]
}
],
"visualizations": [
{
"type": "basic charts",
"purpose": "Show data distribution and relationships",
"settings": {}
}
],
"insights": [
{
"type": "basic insights",
"description": "Key findings from the data"
}
],
"feedback": f"I'll analyze the data and provide insights about {prompt}"
}
async def _createVisualization(self, datasets: Dict, prompt: str, outputLabel: str,
analysisPlan: Dict, description: str) -> Dict:
"""
Create a visualization based on the analysis plan.
Args:
datasets: Dictionary of datasets
prompt: Original task prompt
outputLabel: Output file label
analysisPlan: Analysis plan
description: Output description
Returns:
Document dictionary with visualization
"""
try:
# Get visualization recommendations
vizRecommendations = analysisPlan.get("visualizations", [])
if not vizRecommendations:
# Generate visualization recommendations if none provided
self.service.base.logAdd(analysisPlan.get("workflowId"), "Generating visualization recommendations...", level="info", progress=50)
vizPrompt = f"""
Based on this data and task, recommend appropriate visualizations.
TASK: {prompt}
DESCRIPTION: {description}
DATASETS:
{json.dumps({name: {"shape": df.shape, "columns": df.columns.tolist()}
for name, df in datasets.items()}, indent=2)}
Recommend visualizations in JSON format:
{{
"visualizations": [
{{
"type": "chart_type",
"dataSource": "dataset_name",
"variables": ["col1", "col2"],
"purpose": "explanation"
}}
]
}}
"""
response = await self.service.base.callAi([
{"role": "system", "content": "You are a data visualization expert. Recommend appropriate visualizations based on the data and task."},
{"role": "user", "content": vizPrompt}
])
# Extract JSON
jsonStart = response.find('{')
jsonEnd = response.rfind('}') + 1
if jsonStart >= 0 and jsonEnd > jsonStart:
vizData = json.loads(response[jsonStart:jsonEnd])
vizRecommendations = vizData.get("visualizations", [])
# Determine format from filename
formatType = outputLabel.split('.')[-1].lower()
if formatType not in ['png', 'jpg', 'jpeg', 'svg']:
formatType = 'png'
# If no datasets available, create error message image
if not datasets:
plt.figure(figsize=(10, 6))
plt.text(0.5, 0.5, "No data available for visualization",
ha='center', va='center', fontsize=14)
plt.tight_layout()
imgData = self._getImageBase64(formatType)
plt.close()
return {
"label": outputLabel,
"content": imgData,
"metadata": {
"contentType": f"image/{formatType}"
}
}
# Prepare dataset info for the first dataset if none specified
if not vizRecommendations and datasets:
name, df = next(iter(datasets.items()))
vizRecommendations = [{
"type": "auto",
"dataSource": name,
"variables": df.columns.tolist()[:5],
"purpose": "general analysis"
}]
# Create visualization code prompt
vizPrompt = f"""
Generate Python matplotlib/seaborn code to create a visualization for:
TASK: {prompt}
VISUALIZATION REQUIREMENTS:
- Output format: {formatType}
- Filename: {outputLabel}
- Description: {description}
RECOMMENDED VISUALIZATION:
{json.dumps(vizRecommendations, indent=2)}
AVAILABLE DATASETS:
"""
# Add dataset info for recommended sources
for viz in vizRecommendations:
dataSource = viz.get("dataSource")
if dataSource in datasets:
df = datasets[dataSource]
vizPrompt += f"\nDataset '{dataSource}':\n"
vizPrompt += f"- Shape: {df.shape}\n"
vizPrompt += f"- Columns: {df.columns.tolist()}\n"
vizPrompt += f"- Sample data: {df.head(3).to_dict(orient='records')}\n"
vizPrompt += """
Generate ONLY Python code that:
1. Uses matplotlib and/or seaborn to create a clear visualization
2. Sets figure size to (10, 6)
3. Includes appropriate titles, labels, and legend
4. Uses professional color schemes
5. Handles any missing data gracefully
Return ONLY executable Python code, no explanations or markdown.
"""
try:
# Get visualization code from AI
vizCode = await self.service.base.callAi([
{"role": "system", "content": "You are a data visualization expert. Provide only executable Python code."},
{"role": "user", "content": vizPrompt}
], produceUserAnswer = True)
# Clean code
vizCode = vizCode.replace("```python", "").replace("```", "").strip()
# Execute visualization code
plt.figure(figsize=(10, 6))
# Make local variables available to the code
localVars = {
"plt": plt,
"sns": sns,
"pd": pd,
"np": __import__('numpy')
}
# Add datasets to local variables
for name, df in datasets.items():
# Create a sanitized variable name
varName = ''.join(c if c.isalnum() else '_' for c in name)
localVars[varName] = df
# Also add with standard names for simpler code
if "df" not in localVars:
localVars["df"] = df
elif "df2" not in localVars:
localVars["df2"] = df
# Execute the visualization code
exec(vizCode, globals(), localVars)
# Capture the image
imgData = self._getImageBase64(formatType)
plt.close()
return self.formatAgentDocumentOutput(outputLabel, imgData, f"image/{formatType}")
except Exception as e:
logger.error(f"Error creating visualization: {str(e)}", exc_info=True)
# Create error message image
plt.figure(figsize=(10, 6))
plt.text(0.5, 0.5, f"Visualization error: {str(e)}",
ha='center', va='center', fontsize=12)
plt.tight_layout()
imgData = self._getImageBase64(formatType)
plt.close()
return self.formatAgentDocumentOutput(outputLabel, imgData, f"image/{formatType}")
except Exception as e:
logger.error(f"Error creating visualization: {str(e)}", exc_info=True)
# Create error message image
plt.figure(figsize=(10, 6))
plt.text(0.5, 0.5, f"Visualization error: {str(e)}",
ha='center', va='center', fontsize=12)
plt.tight_layout()
imgData = self._getImageBase64(formatType)
plt.close()
return self.formatAgentDocumentOutput(outputLabel, imgData, f"image/{formatType}")
async def _createDataDocument(self, datasets: Dict, prompt: str, outputLabel: str,
analysisPlan: Dict, description: str) -> ChatContent:
"""
Create a data document (CSV, JSON, Excel) from analysis results.
Args:
datasets: Dictionary of datasets
prompt: Original task prompt
outputLabel: Output filename
analysisPlan: Analysis plan
description: Output description
Returns:
ChatContent object
"""
try:
# Determine format from filename
formatType = outputLabel.split('.')[-1].lower() if '.' in outputLabel else "csv"
# Process data based on format
if formatType == "csv":
result = self._convertToCsv(datasets)
elif formatType == "json":
result = json.dumps(datasets, indent=2)
elif formatType == "xlsx":
result = self._convertToExcel(datasets)
else:
result = str(datasets)
# Determine content type
contentType = "text/csv" if formatType == "csv" else \
"application/json" if formatType == "json" else \
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" if formatType == "xlsx" else \
"text/plain"
return self.formatAgentDocumentOutput(outputLabel, result, contentType)
except Exception as e:
logger.error(f"Error creating data document: {str(e)}", exc_info=True)
errorContent = f"Error generating {formatType} document: {str(e)}"
return self.formatAgentDocumentOutput(outputLabel, errorContent, "text/plain")
async def _createTextDocument(self, datasets: Dict, context: str, prompt: str,
outputLabel: str, formatType: str,
analysisPlan: Dict, description: str) -> ChatContent:
"""
Create a text document (markdown, HTML, text) from analysis results.
Args:
datasets: Dictionary of datasets
context: Document context
prompt: Original task prompt
outputLabel: Output filename
formatType: Output format
analysisPlan: Analysis plan
description: Output description
Returns:
ChatContent object
"""
try:
# Generate dataset summaries
datasetSummaries = []
for name, df in datasets.items():
summary = f"\nDataset: {name}\n"
summary += f"Shape: {df.shape}\n"
summary += f"Columns: {', '.join(df.columns)}\n"
if not df.empty:
summary += f"Sample data:\n{df.head(3).to_string()}\n"
datasetSummaries.append(summary)
# Generate analysis prompt
analysisPrompt = f"""
Create a detailed {formatType} document for:
TASK: {prompt}
OUTPUT REQUIREMENTS:
- Format: {formatType}
- Filename: {outputLabel}
- Description: {description}
ANALYSIS CONTEXT:
{json.dumps(analysisPlan, indent=2)}
DATASET SUMMARIES:
{"".join(datasetSummaries)}
DOCUMENT CONTEXT:
{context[:2000]}... (truncated)
Create a comprehensive, professional analysis document that addresses the task requirements.
The document should:
1. Have a clear structure with headings and sections
2. Include relevant data findings and insights
3. Provide appropriate interpretations and recommendations
4. Format the content according to the required output format
Your response should be the complete document content in the specified format.
"""
# Get document content from AI
documentContent = await self.service.base.callAi([
{"role": "system", "content": f"You are a data analysis expert creating a {formatType} document."},
{"role": "user", "content": analysisPrompt}
], produceUserAnswer = True)
# Clean HTML or Markdown if needed
if formatType in ["md", "markdown"] and not documentContent.strip().startswith("#"):
documentContent = f"# Analysis Report\n\n{documentContent}"
elif formatType == "html" and not "<html" in documentContent.lower():
documentContent = f"<html><body>{documentContent}</body></html>"
# Determine content type
contentType = "text/markdown" if formatType in ["md", "markdown"] else \
"text/html" if formatType == "html" else \
"text/plain"
return self.formatAgentDocumentOutput(outputLabel, documentContent, contentType)
except Exception as e:
logger.error(f"Error creating text document: {str(e)}", exc_info=True)
# Create a simple error document
if formatType in ["md", "markdown"]:
content = f"# Error in Analysis\n\nThere was an error generating the analysis: {str(e)}"
elif formatType == "html":
content = f"<html><body><h1>Error in Analysis</h1><p>There was an error generating the analysis: {str(e)}</p></body></html>"
else:
content = f"Error in Analysis\n\nThere was an error generating the analysis: {str(e)}"
return self.formatAgentDocumentOutput(outputLabel, content, contentType)
def _getImageBase64(self, formatType: str = 'png') -> str:
"""
Convert current matplotlib figure to base64 string.
Args:
formatType: Image format
Returns:
Base64 encoded string of the image
"""
buffer = io.BytesIO()
plt.savefig(buffer, format=formatType, dpi=100)
buffer.seek(0)
imageData = buffer.getvalue()
buffer.close()
# Convert to base64
return base64.b64encode(imageData).decode('utf-8')
async def _analyzeData(self, task: Dict[str, Any], analysisPlan: Dict[str, Any]) -> Dict[str, Any]:
"""
Analyze data based on the analysis plan.
Args:
task: Task dictionary with input documents and specifications
analysisPlan: Analysis plan from _createAnalysisPlan
Returns:
Analysis results dictionary
"""
try:
# Extract data from input documents
inputDocuments = task.get("inputDocuments", [])
datasets, documentContext = self._extractData(inputDocuments)
# Get task information
prompt = task.get("prompt", "")
outputSpecs = task.get("outputSpecifications", [])
# Analyze task requirements
analysisResults = await self._analyzeTask(prompt, documentContext, datasets, outputSpecs)
# Add datasets and context to results
analysisResults["datasets"] = datasets
analysisResults["documentContext"] = documentContext
return analysisResults
except Exception as e:
logger.error(f"Error analyzing data: {str(e)}", exc_info=True)
return {
"error": str(e),
"datasets": {},
"documentContext": ""
}
async def _createOutputDocuments(self, prompt: str, analysisResults: Dict[str, Any],
outputSpecs: List[Dict[str, Any]], analysisPlan: Dict[str, Any]) -> List[Dict[str, Any]]:
"""
Create output documents based on analysis results.
Args:
prompt: Original task prompt
analysisResults: Results from data analysis
outputSpecs: List of output specifications
analysisPlan: Analysis plan from _createAnalysisPlan
Returns:
List of document objects
"""
documents = []
datasets = analysisResults.get("datasets", {})
documentContext = analysisResults.get("documentContext", "")
# Process each output specification
for spec in outputSpecs:
outputLabel = spec.get("label", "")
outputDescription = spec.get("description", "")
# Determine format from filename
formatType = outputLabel.split('.')[-1].lower() if '.' in outputLabel else "txt"
try:
# Create appropriate document based on format
if formatType in ["png", "jpg", "jpeg", "svg"]:
# Visualization output
document = await self._createVisualization(
datasets, prompt, outputLabel, analysisPlan, outputDescription
)
elif formatType in ["csv", "json", "xlsx"]:
# Data document output
document = await self._createDataDocument(
datasets, prompt, outputLabel, analysisPlan, outputDescription
)
else:
# Text document output (markdown, html, text)
document = await self._createTextDocument(
datasets, documentContext, prompt, outputLabel, formatType,
analysisPlan, outputDescription
)
documents.append(document)
except Exception as e:
logger.error(f"Error creating output document {outputLabel}: {str(e)}", exc_info=True)
# Create error document
errorDoc = self.formatAgentDocumentOutput(
outputLabel,
f"Error creating document: {str(e)}",
"text/plain"
)
documents.append(errorDoc)
return documents
# Factory function for the Analyst agent
def getAgentAnalyst():
"""Returns an instance of the Analyst agent."""
return AgentAnalyst()

View file

@ -3,12 +3,31 @@ Chat model classes for the chat system.
"""
from pydantic import BaseModel, Field
from typing import List, Dict, Any, Optional
from typing import List, Dict, Any, Optional, Union
from datetime import datetime
import uuid
from modules.shared.attributeUtils import register_model_labels, ModelMixin
# USER MODELS
class UserInputRequest(BaseModel, ModelMixin):
"""Data model for a user input request"""
prompt: str = Field(description="Prompt for the user")
listFileId: List[str] = Field(default_factory=list, description="List of file IDs")
userLanguage: str = Field(default="en", description="User's preferred language")
# Register labels for UserInputRequest
register_model_labels(
"UserInputRequest",
{"en": "User Input Request", "fr": "Demande de saisie utilisateur"},
{
"prompt": {"en": "Prompt", "fr": "Invite"},
"listFileId": {"en": "File IDs", "fr": "IDs des fichiers"},
"userLanguage": {"en": "User Language", "fr": "Langue de l'utilisateur"}
}
)
# WORKFLOW MODELS
class ChatContent(BaseModel, ModelMixin):
@ -18,7 +37,6 @@ class ChatContent(BaseModel, ModelMixin):
data: str = Field(description="The actual content data")
mimeType: str = Field(description="MIME type of the content")
metadata: Dict[str, Any] = Field(default_factory=dict, description="Additional metadata")
# Register labels for ChatContent
register_model_labels(
"ChatContent",
@ -40,7 +58,6 @@ class ChatDocument(BaseModel, ModelMixin):
fileSize: int = Field(description="Size of the file")
mimeType: str = Field(description="MIME type of the file")
contents: List[ChatContent] = Field(default_factory=list, description="List of chat contents")
# Register labels for ChatDocument
register_model_labels(
"ChatDocument",
@ -64,7 +81,6 @@ class ChatStat(BaseModel, ModelMixin):
bytesReceived: Optional[int] = Field(None, description="Number of bytes received")
successRate: Optional[float] = Field(None, description="Success rate of operations")
errorCount: Optional[int] = Field(None, description="Number of errors encountered")
# Register labels for ChatStat
register_model_labels(
"ChatStat",
@ -91,7 +107,6 @@ class ChatLog(BaseModel, ModelMixin):
status: str = Field(description="Status of the log entry")
progress: Optional[int] = Field(None, description="Progress percentage")
performance: Optional[Dict[str, Any]] = Field(None, description="Performance metrics")
# Register labels for ChatLog
register_model_labels(
"ChatLog",
@ -124,7 +139,6 @@ class ChatMessage(BaseModel, ModelMixin):
finishedAt: Optional[str] = Field(None, description="When the message processing finished")
stats: Optional[ChatStat] = Field(None, description="Statistics for this message")
success: Optional[bool] = Field(None, description="Whether the message processing was successful")
# Register labels for ChatMessage
register_model_labels(
"ChatMessage",
@ -146,7 +160,7 @@ register_model_labels(
}
)
class Task(BaseModel, ModelMixin):
class AgentTask(BaseModel, ModelMixin):
"""Data model for a task"""
id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Primary key")
workflowId: str = Field(description="Foreign key to workflow")
@ -162,10 +176,9 @@ class Task(BaseModel, ModelMixin):
startedAt: str = Field(description="When the task started")
finishedAt: Optional[str] = Field(None, description="When the task finished")
performance: Optional[Dict[str, Any]] = Field(None, description="Performance metrics")
# Register labels for Task
# Register labels for AgentTask
register_model_labels(
"Task",
"AgentTask",
{"en": "Task", "fr": "Tâche"},
{
"id": {"en": "ID", "fr": "ID"},
@ -185,6 +198,28 @@ register_model_labels(
}
)
class Agent(BaseModel, ModelMixin):
"""Data model for an agent"""
id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Primary key")
name: str = Field(description="Name of the agent")
description: str = Field(description="Description of the agent")
capabilities: List[str] = Field(default_factory=list, description="List of agent capabilities")
performance: Optional[Dict[str, Any]] = Field(None, description="Performance metrics")
# Register labels for Agent
register_model_labels(
"Agent",
{"en": "Agent", "fr": "Agent"},
{
"id": {"en": "ID", "fr": "ID"},
"name": {"en": "Name", "fr": "Nom"},
"description": {"en": "Description", "fr": "Description"},
"capabilities": {"en": "Capabilities", "fr": "Capacités"},
"performance": {"en": "Performance", "fr": "Performance"}
}
)
# WORKFLOW MODELS
class ChatWorkflow(BaseModel, ModelMixin):
"""Data model for a chat workflow"""
id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Primary key")
@ -197,8 +232,7 @@ class ChatWorkflow(BaseModel, ModelMixin):
logs: List[ChatLog] = Field(default_factory=list, description="Workflow logs")
messages: List[ChatMessage] = Field(default_factory=list, description="Messages in the workflow")
stats: Optional[ChatStat] = Field(None, description="Workflow statistics")
tasks: List[Task] = Field(default_factory=list, description="List of tasks in the workflow")
tasks: List[AgentTask] = Field(default_factory=list, description="List of tasks in the workflow")
# Register labels for ChatWorkflow
register_model_labels(
"ChatWorkflow",
@ -218,151 +252,124 @@ register_model_labels(
}
)
# AGENT AND TASK MODELS
# DOCUMENT MODELS
class Agent(BaseModel, ModelMixin):
"""Data model for an agent"""
id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Primary key")
name: str = Field(description="Name of the agent")
description: str = Field(description="Description of the agent")
capabilities: List[str] = Field(default_factory=list, description="List of agent capabilities")
performance: Optional[Dict[str, Any]] = Field(None, description="Performance metrics")
class DocumentExtraction(BaseModel, ModelMixin):
"""Data model for document extraction history"""
timestamp: str = Field(description="Timestamp of extraction")
type: str = Field(description="Type of document")
sections: List[str] = Field(default_factory=list, description="Extracted sections")
metadata: Dict[str, Any] = Field(default_factory=dict, description="Extraction metadata")
# Register labels for Agent
# Register labels for DocumentExtraction
register_model_labels(
"Agent",
{"en": "Agent", "fr": "Agent"},
"DocumentExtraction",
{"en": "Document Extraction", "fr": "Extraction de document"},
{
"timestamp": {"en": "Timestamp", "fr": "Horodatage"},
"type": {"en": "Type", "fr": "Type"},
"sections": {"en": "Sections", "fr": "Sections"},
"metadata": {"en": "Metadata", "fr": "Métadonnées"}
}
)
class DocumentContext(BaseModel, ModelMixin):
"""Data model for document context"""
id: str = Field(description="Document ID")
extractionHistory: List[DocumentExtraction] = Field(default_factory=list, description="History of extractions")
relevantSections: List[str] = Field(default_factory=list, description="Relevant sections")
processingStatus: Dict[str, str] = Field(default_factory=dict, description="Processing status")
# Register labels for DocumentContext
register_model_labels(
"DocumentContext",
{"en": "Document Context", "fr": "Contexte de document"},
{
"id": {"en": "ID", "fr": "ID"},
"extractionHistory": {"en": "Extraction History", "fr": "Historique d'extraction"},
"relevantSections": {"en": "Relevant Sections", "fr": "Sections pertinentes"},
"processingStatus": {"en": "Processing Status", "fr": "Statut de traitement"}
}
)
class DocumentMetadata(BaseModel, ModelMixin):
"""Data model for document metadata"""
type: str = Field(description="Document type")
format: str = Field(description="Document format")
size: int = Field(description="Document size in bytes")
pages: Optional[int] = Field(None, description="Number of pages")
sections: Optional[List[str]] = Field(None, description="Document sections")
error: Optional[str] = Field(None, description="Processing error if any")
# Register labels for DocumentMetadata
register_model_labels(
"DocumentMetadata",
{"en": "Document Metadata", "fr": "Métadonnées de document"},
{
"type": {"en": "Type", "fr": "Type"},
"format": {"en": "Format", "fr": "Format"},
"size": {"en": "Size", "fr": "Taille"},
"pages": {"en": "Pages", "fr": "Pages"},
"sections": {"en": "Sections", "fr": "Sections"},
"error": {"en": "Error", "fr": "Erreur"}
}
)
class ImageData(BaseModel, ModelMixin):
"""Data model for image data"""
data: str = Field(description="Base64 encoded image data")
format: str = Field(description="Image format")
page: Optional[int] = Field(None, description="Page number if from a multi-page document")
index: Optional[int] = Field(None, description="Image index in the document")
# Register labels for ImageData
register_model_labels(
"ImageData",
{"en": "Image Data", "fr": "Données d'image"},
{
"data": {"en": "Image Data", "fr": "Données d'image"},
"format": {"en": "Format", "fr": "Format"},
"page": {"en": "Page", "fr": "Page"},
"index": {"en": "Index", "fr": "Index"}
}
)
class DocumentContent(BaseModel, ModelMixin):
"""Data model for document content"""
text: Optional[str] = Field(None, description="Extracted text content")
data: Optional[Dict[str, Any]] = Field(None, description="Structured data content")
images: Optional[List[ImageData]] = Field(None, description="Extracted images")
metadata: DocumentMetadata = Field(description="Document metadata")
# Register labels for DocumentContent
register_model_labels(
"DocumentContent",
{"en": "Document Content", "fr": "Contenu de document"},
{
"text": {"en": "Text", "fr": "Texte"},
"data": {"en": "Data", "fr": "Données"},
"images": {"en": "Images", "fr": "Images"},
"metadata": {"en": "Metadata", "fr": "Métadonnées"}
}
)
class ProcessedDocument(BaseModel, ModelMixin):
"""Data model for processed document"""
id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Document ID")
name: str = Field(description="Document name")
contentType: str = Field(description="Content type")
content: DocumentContent = Field(description="Document content")
context: Optional[DocumentContext] = Field(None, description="Document context")
# Register labels for ProcessedDocument
register_model_labels(
"ProcessedDocument",
{"en": "Processed Document", "fr": "Document traité"},
{
"id": {"en": "ID", "fr": "ID"},
"name": {"en": "Name", "fr": "Nom"},
"description": {"en": "Description", "fr": "Description"},
"capabilities": {"en": "Capabilities", "fr": "Capacités"},
"performance": {"en": "Performance", "fr": "Performance"}
"contentType": {"en": "Content Type", "fr": "Type de contenu"},
"content": {"en": "Content", "fr": "Contenu"},
"context": {"en": "Context", "fr": "Contexte"}
}
)
class AgentResponse(BaseModel, ModelMixin):
"""Data model for an agent response"""
success: bool = Field(description="Whether the agent execution was successful")
message: ChatMessage = Field(description="Response message from the agent")
performance: Dict[str, Any] = Field(default_factory=dict, description="Performance metrics")
progress: float = Field(description="Task progress (0-100)")
# Register labels for AgentResponse
register_model_labels(
"AgentResponse",
{"en": "Agent Response", "fr": "Réponse de l'agent"},
{
"success": {"en": "Success", "fr": "Succès"},
"message": {"en": "Message", "fr": "Message"},
"performance": {"en": "Performance", "fr": "Performance"},
"progress": {"en": "Progress", "fr": "Progression"}
}
)
class TaskPlan(BaseModel, ModelMixin):
"""Data model for a task plan"""
fileList: List[str] = Field(default_factory=list, description="List of files")
tasks: List[Task] = Field(default_factory=list, description="List of tasks in the plan")
userLanguage: str = Field(description="User's preferred language")
userResponse: str = Field(description="User's response or feedback")
# Register labels for TaskPlan
register_model_labels(
"TaskPlan",
{"en": "Task Plan", "fr": "Plan de tâches"},
{
"fileList": {"en": "File List", "fr": "Liste de fichiers"},
"tasks": {"en": "Tasks", "fr": "Tâches"},
"userLanguage": {"en": "User Language", "fr": "Langue de l'utilisateur"},
"userResponse": {"en": "User Response", "fr": "Réponse de l'utilisateur"}
}
)
class UserInputRequest(BaseModel, ModelMixin):
"""Data model for a user input request"""
prompt: str = Field(description="Prompt for the user")
listFileId: List[int] = Field(default_factory=list, description="List of file IDs")
userLanguage: str = Field(default="en", description="User's preferred language")
# Register labels for UserInputRequest
register_model_labels(
"UserInputRequest",
{"en": "User Input Request", "fr": "Demande de saisie utilisateur"},
{
"prompt": {"en": "Prompt", "fr": "Invite"},
"listFileId": {"en": "File IDs", "fr": "IDs des fichiers"},
"userLanguage": {"en": "User Language", "fr": "Langue de l'utilisateur"}
}
)
class AgentProfile(BaseModel, ModelMixin):
"""Model for agent profile information."""
id: str
name: str
description: str
capabilities: List[str] = Field(default_factory=list)
isAvailable: bool = True
lastActive: Optional[datetime] = None
stats: Optional[Dict[str, Any]] = None
# Register labels for AgentProfile
register_model_labels(
"AgentProfile",
{"en": "Agent Profile", "fr": "Profil de l'agent"},
{
"id": {"en": "ID", "fr": "ID"},
"name": {"en": "Name", "fr": "Nom"},
"description": {"en": "Description", "fr": "Description"},
"capabilities": {"en": "Capabilities", "fr": "Capacités"},
"isAvailable": {"en": "Available", "fr": "Disponible"},
"lastActive": {"en": "Last Active", "fr": "Dernière activité"},
"stats": {"en": "Statistics", "fr": "Statistiques"}
}
)
class AgentHandover(BaseModel, ModelMixin):
"""Data model for agent handover information."""
# Status values
status: str = Field(default="pending", description="One of: pending, success, failed, retry")
error: Optional[str] = Field(None, description="Error message if any")
progress: float = Field(default=0.0, description="Progress percentage")
# Document information
documentsUserInitial: List[Dict[str, Any]] = Field(default_factory=list, description="Initial user documents")
documentsInput: List[Dict[str, Any]] = Field(default_factory=list, description="Input documents")
documentsOutput: List[Dict[str, Any]] = Field(default_factory=list, description="Output documents")
# Prompt information
promptUserInitial: str = Field(default="", description="Initial user prompt")
promptFromFinishedAgent: str = Field(default="", description="Prompt from finished agent")
promptForNextAgent: str = Field(default="", description="Prompt for next agent")
# Agent information
currentAgent: Optional[str] = Field(None, description="Current agent name")
nextAgent: Optional[str] = Field(None, description="Next agent name")
# Timing information
startedAt: Optional[str] = Field(None, description="Start timestamp")
finishedAt: Optional[str] = Field(None, description="Finish timestamp")
# Register labels for AgentHandover
register_model_labels(
"AgentHandover",
{"en": "Agent Handover", "fr": "Transfert d'agent"},
{
"status": {"en": "Status", "fr": "Statut"},
"error": {"en": "Error", "fr": "Erreur"},
"progress": {"en": "Progress", "fr": "Progression"},
"documentsUserInitial": {"en": "Initial User Documents", "fr": "Documents utilisateur initiaux"},
"documentsInput": {"en": "Input Documents", "fr": "Documents d'entrée"},
"documentsOutput": {"en": "Output Documents", "fr": "Documents de sortie"},
"promptUserInitial": {"en": "Initial User Prompt", "fr": "Invite utilisateur initiale"},
"promptFromFinishedAgent": {"en": "Finished Agent Prompt", "fr": "Invite de l'agent terminé"},
"promptForNextAgent": {"en": "Next Agent Prompt", "fr": "Invite pour le prochain agent"},
"currentAgent": {"en": "Current Agent", "fr": "Agent actuel"},
"nextAgent": {"en": "Next Agent", "fr": "Prochain agent"},
"startedAt": {"en": "Started At", "fr": "Démarré le"},
"finishedAt": {"en": "Finished At", "fr": "Terminé le"}
}
)

View file

@ -0,0 +1,74 @@
from enum import Enum
from typing import Dict, List, Optional, Any, Literal
from datetime import datetime, UTC
from pydantic import BaseModel, Field
class AuthSource(str, Enum):
LOCAL = "local"
MSFT = "msft"
GOOGLE = "google"
# Add more auth sources as needed
class MethodParameter(BaseModel):
"""Model for method parameters"""
name: str
type: str
required: bool
validation: Optional[callable] = None
description: str
class MethodResult(BaseModel):
"""Model for method results"""
success: bool
data: Dict[str, Any]
metadata: Dict[str, Any] = Field(default_factory=dict)
validation: List[str] = Field(default_factory=list)
class MethodBase:
"""Base class for all methods"""
def __init__(self):
self.name: str
self.description: str
self.auth_source: AuthSource = AuthSource.LOCAL # Default to local auth
@property
def actions(self) -> Dict[str, Dict[str, Any]]:
"""Available actions and their parameters"""
raise NotImplementedError
async def execute(self, action: str, parameters: Dict[str, Any], auth_data: Optional[Dict[str, Any]] = None) -> MethodResult:
"""Execute method action with authentication data"""
raise NotImplementedError
async def validate_parameters(self, action: str, parameters: Dict[str, Any]) -> bool:
"""Validate action parameters"""
if action not in self.actions:
return False
action_def = self.actions[action]
required_params = {k for k, v in action_def['parameters'].items() if v['required']}
return all(param in parameters for param in required_params)
async def rollback(self, action: str, parameters: Dict[str, Any], auth_data: Optional[Dict[str, Any]] = None) -> None:
"""Rollback action if needed"""
pass
def _validate_auth(self, auth_data: Optional[Dict[str, Any]] = None) -> bool:
"""Validate authentication data"""
if self.auth_source == AuthSource.LOCAL:
return True
return bool(auth_data and auth_data.get('source') == self.auth_source)
def _create_result(self, success: bool, data: Dict[str, Any], metadata: Optional[Dict[str, Any]] = None) -> MethodResult:
"""Create a method result"""
return MethodResult(
success=success,
data=data,
metadata=metadata or {},
validation=[]
)
def _add_validation_message(self, result: MethodResult, message: str) -> None:
"""Add a validation message to the result"""
result.validation.append(message)

View file

@ -0,0 +1,272 @@
from typing import Dict, Any, Optional
import logging
import ast
import re
from modules.methods.methodBase import MethodBase, AuthSource, MethodResult
logger = logging.getLogger(__name__)
class MethodCoder(MethodBase):
"""Coder method implementation for code operations"""
def __init__(self):
super().__init__()
self.name = "coder"
self.description = "Handle code operations like analysis, generation, and refactoring"
self.auth_source = AuthSource.LOCAL # Code operations typically don't need auth
@property
def actions(self) -> Dict[str, Dict[str, Any]]:
"""Available actions and their parameters"""
return {
"analyze": {
"description": "Analyze code structure and quality",
"retryMax": 2,
"timeout": 30,
"parameters": {
"code": {"type": "string", "required": True},
"language": {"type": "string", "required": False},
"metrics": {"type": "array", "items": "string", "required": False}
}
},
"generate": {
"description": "Generate code based on requirements",
"retryMax": 2,
"timeout": 60,
"parameters": {
"requirements": {"type": "string", "required": True},
"language": {"type": "string", "required": False},
"style": {"type": "string", "required": False}
}
},
"refactor": {
"description": "Refactor code for better quality",
"retryMax": 2,
"timeout": 60,
"parameters": {
"code": {"type": "string", "required": True},
"language": {"type": "string", "required": False},
"improvements": {"type": "array", "items": "string", "required": False}
}
}
}
async def execute(self, action: str, parameters: Dict[str, Any], auth_data: Optional[Dict[str, Any]] = None) -> MethodResult:
"""Execute coder method"""
try:
# Validate parameters
if not await self.validate_parameters(action, parameters):
return self._create_result(
success=False,
data={"error": f"Invalid parameters for {action}"}
)
# Execute action
if action == "analyze":
return await self._analyze_code(parameters)
elif action == "generate":
return await self._generate_code(parameters)
elif action == "refactor":
return await self._refactor_code(parameters)
else:
return self._create_result(
success=False,
data={"error": f"Unknown action: {action}"}
)
except Exception as e:
logger.error(f"Error executing coder {action}: {e}")
return self._create_result(
success=False,
data={"error": str(e)}
)
async def _analyze_code(self, parameters: Dict[str, Any]) -> MethodResult:
"""Analyze code structure and quality"""
try:
code = parameters["code"]
language = parameters.get("language", "python")
metrics = parameters.get("metrics", ["complexity", "style", "documentation"])
analysis = {}
if language.lower() == "python":
# Parse Python code
try:
tree = ast.parse(code)
# Calculate basic metrics
analysis["metrics"] = {
"lines": len(code.splitlines()),
"classes": len([node for node in ast.walk(tree) if isinstance(node, ast.ClassDef)]),
"functions": len([node for node in ast.walk(tree) if isinstance(node, ast.FunctionDef)]),
"imports": len([node for node in ast.walk(tree) if isinstance(node, ast.Import) or isinstance(node, ast.ImportFrom)])
}
# Check for common issues
analysis["issues"] = []
# Check for missing docstrings
if "documentation" in metrics:
for node in ast.walk(tree):
if isinstance(node, (ast.ClassDef, ast.FunctionDef)) and not ast.get_docstring(node):
analysis["issues"].append({
"type": "missing_docstring",
"line": node.lineno,
"name": node.name
})
# Check for long functions
if "complexity" in metrics:
for node in ast.walk(tree):
if isinstance(node, ast.FunctionDef):
body_lines = len(node.body)
if body_lines > 20: # Arbitrary threshold
analysis["issues"].append({
"type": "long_function",
"line": node.lineno,
"name": node.name,
"lines": body_lines
})
# Check for style issues
if "style" in metrics:
# Check line length
for i, line in enumerate(code.splitlines(), 1):
if len(line) > 100: # PEP 8 recommendation
analysis["issues"].append({
"type": "line_too_long",
"line": i,
"length": len(line)
})
# Check for mixed tabs and spaces
if "\t" in code and " " in code:
analysis["issues"].append({
"type": "mixed_tabs_spaces",
"message": "Code mixes tabs and spaces"
})
except SyntaxError as e:
return self._create_result(
success=False,
data={"error": f"Syntax error: {str(e)}"}
)
else:
# TODO: Implement analysis for other languages
return self._create_result(
success=False,
data={"error": f"Unsupported language: {language}"}
)
return self._create_result(
success=True,
data={
"language": language,
"analysis": analysis
}
)
except Exception as e:
logger.error(f"Error analyzing code: {e}")
return self._create_result(
success=False,
data={"error": f"Analysis failed: {str(e)}"}
)
async def _generate_code(self, parameters: Dict[str, Any]) -> MethodResult:
"""Generate code based on requirements"""
try:
requirements = parameters["requirements"]
language = parameters.get("language", "python")
style = parameters.get("style", "standard")
# TODO: Implement code generation using AI or templates
# This is a placeholder implementation
if language.lower() == "python":
# Generate a simple Python class based on requirements
class_name = re.sub(r'[^a-zA-Z0-9]', '', requirements.split()[0].title())
code = f"""class {class_name}:
\"\"\"
{requirements}
\"\"\"
def __init__(self):
pass
def process(self):
pass
"""
else:
return self._create_result(
success=False,
data={"error": f"Unsupported language: {language}"}
)
return self._create_result(
success=True,
data={
"language": language,
"code": code
}
)
except Exception as e:
logger.error(f"Error generating code: {e}")
return self._create_result(
success=False,
data={"error": f"Generation failed: {str(e)}"}
)
async def _refactor_code(self, parameters: Dict[str, Any]) -> MethodResult:
"""Refactor code for better quality"""
try:
code = parameters["code"]
language = parameters.get("language", "python")
improvements = parameters.get("improvements", ["style", "complexity"])
if language.lower() == "python":
# Parse Python code
try:
tree = ast.parse(code)
# Apply improvements
if "style" in improvements:
# Format code (placeholder)
code = code.strip()
if "complexity" in improvements:
# TODO: Implement complexity reduction
pass
if "documentation" in improvements:
# Add missing docstrings
for node in ast.walk(tree):
if isinstance(node, (ast.ClassDef, ast.FunctionDef)) and not ast.get_docstring(node):
# TODO: Generate docstring
pass
except SyntaxError as e:
return self._create_result(
success=False,
data={"error": f"Syntax error: {str(e)}"}
)
else:
return self._create_result(
success=False,
data={"error": f"Unsupported language: {language}"}
)
return self._create_result(
success=True,
data={
"language": language,
"code": code,
"improvements": improvements
}
)
except Exception as e:
logger.error(f"Error refactoring code: {e}")
return self._create_result(
success=False,
data={"error": f"Refactoring failed: {str(e)}"}
)

View file

@ -0,0 +1,287 @@
from typing import Dict, Any, Optional
import logging
import os
from pathlib import Path
import docx
import PyPDF2
import json
import yaml
import xml.etree.ElementTree as ET
from datetime import datetime, UTC
from modules.methods.methodBase import MethodBase, AuthSource, MethodResult
logger = logging.getLogger(__name__)
class MethodDocument(MethodBase):
"""Document method implementation for document operations"""
def __init__(self):
super().__init__()
self.name = "document"
self.description = "Handle document operations like reading, writing, and converting documents"
self.auth_source = AuthSource.LOCAL # Document operations typically don't need auth
@property
def actions(self) -> Dict[str, Dict[str, Any]]:
"""Available actions and their parameters"""
return {
"read": {
"description": "Read document content",
"retryMax": 2,
"timeout": 30,
"parameters": {
"path": {"type": "string", "required": True},
"format": {"type": "string", "required": False},
"encoding": {"type": "string", "required": False},
"includeMetadata": {"type": "boolean", "required": False}
}
},
"write": {
"description": "Write content to document",
"retryMax": 2,
"timeout": 30,
"parameters": {
"path": {"type": "string", "required": True},
"content": {"type": "string", "required": True},
"format": {"type": "string", "required": False},
"encoding": {"type": "string", "required": False},
"template": {"type": "string", "required": False}
}
},
"convert": {
"description": "Convert document between formats",
"retryMax": 2,
"timeout": 60,
"parameters": {
"sourcePath": {"type": "string", "required": True},
"targetPath": {"type": "string", "required": True},
"sourceFormat": {"type": "string", "required": False},
"targetFormat": {"type": "string", "required": False},
"options": {"type": "object", "required": False}
}
}
}
async def execute(self, action: str, parameters: Dict[str, Any], auth_data: Optional[Dict[str, Any]] = None) -> MethodResult:
"""Execute document method"""
try:
# Validate parameters
if not await self.validate_parameters(action, parameters):
return self._create_result(
success=False,
data={"error": f"Invalid parameters for {action}"}
)
# Execute action
if action == "read":
return await self._read_document(parameters)
elif action == "write":
return await self._write_document(parameters)
elif action == "convert":
return await self._convert_document(parameters)
else:
return self._create_result(
success=False,
data={"error": f"Unknown action: {action}"}
)
except Exception as e:
logger.error(f"Error executing document {action}: {e}")
return self._create_result(
success=False,
data={"error": str(e)}
)
async def _read_document(self, parameters: Dict[str, Any]) -> MethodResult:
"""Read document content"""
try:
path = Path(parameters["path"])
if not path.exists():
return self._create_result(
success=False,
data={"error": f"File not found: {path}"}
)
# Determine format if not specified
format = parameters.get("format")
if not format:
format = path.suffix[1:] if path.suffix else "txt"
# Read content based on format
content = ""
encoding = parameters.get("encoding", "utf-8")
include_metadata = parameters.get("includeMetadata", False)
if format.lower() in ["txt", "md"]:
with open(path, "r", encoding=encoding) as f:
content = f.read()
elif format.lower() == "docx":
doc = docx.Document(path)
content = "\n".join([paragraph.text for paragraph in doc.paragraphs])
elif format.lower() == "pdf":
with open(path, "rb") as f:
pdf = PyPDF2.PdfReader(f)
content = "\n".join([page.extract_text() for page in pdf.pages])
elif format.lower() == "json":
with open(path, "r", encoding=encoding) as f:
content = json.load(f)
elif format.lower() == "yaml":
with open(path, "r", encoding=encoding) as f:
content = yaml.safe_load(f)
elif format.lower() == "xml":
tree = ET.parse(path)
root = tree.getroot()
content = ET.tostring(root, encoding=encoding).decode(encoding)
else:
return self._create_result(
success=False,
data={"error": f"Unsupported format: {format}"}
)
result = {
"path": str(path),
"format": format,
"content": content
}
if include_metadata:
result["metadata"] = {
"size": path.stat().st_size,
"modified": datetime.fromtimestamp(path.stat().st_mtime, UTC).isoformat(),
"created": datetime.fromtimestamp(path.stat().st_ctime, UTC).isoformat()
}
return self._create_result(
success=True,
data=result
)
except Exception as e:
logger.error(f"Error reading document: {e}")
return self._create_result(
success=False,
data={"error": f"Read failed: {str(e)}"}
)
async def _write_document(self, parameters: Dict[str, Any]) -> MethodResult:
"""Write content to document"""
try:
path = Path(parameters["path"])
# Create directory if it doesn't exist
path.parent.mkdir(parents=True, exist_ok=True)
# Determine format if not specified
format = parameters.get("format")
if not format:
format = path.suffix[1:] if path.suffix else "txt"
# Write content based on format
encoding = parameters.get("encoding", "utf-8")
content = parameters["content"]
template = parameters.get("template")
if format.lower() in ["txt", "md"]:
with open(path, "w", encoding=encoding) as f:
f.write(content)
elif format.lower() == "docx":
if template:
doc = docx.Document(template)
else:
doc = docx.Document()
doc.add_paragraph(content)
doc.save(path)
elif format.lower() == "pdf":
# TODO: Implement PDF writing
return self._create_result(
success=False,
data={"error": "PDF writing not implemented yet"}
)
elif format.lower() == "json":
with open(path, "w", encoding=encoding) as f:
json.dump(content, f, indent=2)
elif format.lower() == "yaml":
with open(path, "w", encoding=encoding) as f:
yaml.dump(content, f)
elif format.lower() == "xml":
with open(path, "w", encoding=encoding) as f:
f.write(content)
else:
return self._create_result(
success=False,
data={"error": f"Unsupported format: {format}"}
)
return self._create_result(
success=True,
data={
"path": str(path),
"format": format,
"size": path.stat().st_size,
"modified": datetime.now(UTC).isoformat()
}
)
except Exception as e:
logger.error(f"Error writing document: {e}")
return self._create_result(
success=False,
data={"error": f"Write failed: {str(e)}"}
)
async def _convert_document(self, parameters: Dict[str, Any]) -> MethodResult:
"""Convert document between formats"""
try:
source_path = Path(parameters["sourcePath"])
target_path = Path(parameters["targetPath"])
if not source_path.exists():
return self._create_result(
success=False,
data={"error": f"Source file not found: {source_path}"}
)
# Determine formats if not specified
source_format = parameters.get("sourceFormat")
if not source_format:
source_format = source_path.suffix[1:] if source_path.suffix else "txt"
target_format = parameters.get("targetFormat")
if not target_format:
target_format = target_path.suffix[1:] if target_path.suffix else "txt"
# Read source content
source_content = await self._read_document({
"path": str(source_path),
"format": source_format
})
if not source_content.success:
return source_content
# Write target content
target_content = await self._write_document({
"path": str(target_path),
"content": source_content.data["content"],
"format": target_format
})
if not target_content.success:
return target_content
return self._create_result(
success=True,
data={
"sourcePath": str(source_path),
"targetPath": str(target_path),
"sourceFormat": source_format,
"targetFormat": target_format,
"size": target_path.stat().st_size,
"modified": datetime.now(UTC).isoformat()
}
)
except Exception as e:
logger.error(f"Error converting document: {e}")
return self._create_result(
success=False,
data={"error": f"Conversion failed: {str(e)}"}
)

View file

@ -0,0 +1,203 @@
from typing import Dict, Any, Optional
import logging
from datetime import datetime, UTC
from O365 import Account, MSGraphProtocol
from modules.methods.methodBase import MethodBase, AuthSource, MethodResult
from modules.models.userConnection import UserConnection
logger = logging.getLogger(__name__)
class MethodOutlook(MethodBase):
"""Outlook method implementation for email operations"""
def __init__(self):
super().__init__()
self.name = "outlook"
self.description = "Handle Outlook email operations like reading and sending emails"
self.auth_source = AuthSource.MICROSOFT
@property
def actions(self) -> Dict[str, Dict[str, Any]]:
"""Available actions and their parameters"""
return {
"readMails": {
"description": "Read emails from Outlook",
"retryMax": 2,
"timeout": 30,
"parameters": {
"folder": {"type": "string", "required": False},
"query": {"type": "string", "required": False},
"maxResults": {"type": "number", "required": False},
"includeAttachments": {"type": "boolean", "required": False}
}
},
"sendMail": {
"description": "Send email through Outlook",
"retryMax": 2,
"timeout": 30,
"parameters": {
"to": {"type": "array", "items": "string", "required": True},
"subject": {"type": "string", "required": True},
"body": {"type": "string", "required": True},
"cc": {"type": "array", "items": "string", "required": False},
"bcc": {"type": "array", "items": "string", "required": False},
"attachments": {"type": "array", "items": "string", "required": False}
}
}
}
async def execute(self, action: str, parameters: Dict[str, Any], auth_data: Optional[Dict[str, Any]] = None) -> MethodResult:
"""Execute Outlook method"""
try:
# Validate parameters
if not await self.validate_parameters(action, parameters):
return self._create_result(
success=False,
data={"error": f"Invalid parameters for {action}"}
)
# Get UserConnection from auth_data
if not auth_data or "userConnection" not in auth_data:
return self._create_result(
success=False,
data={"error": "UserConnection required for Outlook operations"}
)
user_connection: UserConnection = auth_data["userConnection"]
# Execute action
if action == "readMails":
return await self._read_mails(parameters, user_connection)
elif action == "sendMail":
return await self._send_mail(parameters, user_connection)
else:
return self._create_result(
success=False,
data={"error": f"Unknown action: {action}"}
)
except Exception as e:
logger.error(f"Error executing Outlook {action}: {e}")
return self._create_result(
success=False,
data={"error": str(e)}
)
async def _read_mails(self, parameters: Dict[str, Any], user_connection: UserConnection) -> MethodResult:
"""Read emails from Outlook"""
try:
folder = parameters.get("folder", "inbox")
query = parameters.get("query")
max_results = parameters.get("maxResults", 10)
include_attachments = parameters.get("includeAttachments", False)
# Create Outlook account
account = Account(
credentials=(user_connection.authToken, user_connection.refreshToken),
protocol=MSGraphProtocol()
)
# Get mailbox
mailbox = account.mailbox()
# Get folder
target_folder = mailbox.folder(folder_name=folder)
# Get messages
if query:
messages = target_folder.get_messages(query=query, limit=max_results)
else:
messages = target_folder.get_messages(limit=max_results)
# Process messages
results = []
for message in messages:
msg_data = {
"id": message.object_id,
"subject": message.subject,
"from": message.sender.address,
"to": [to.address for to in message.to],
"cc": [cc.address for cc in message.cc],
"received": message.received.strftime("%Y-%m-%d %H:%M:%S"),
"body": message.body,
"hasAttachments": message.has_attachments
}
if include_attachments and message.has_attachments:
attachments = []
for attachment in message.attachments:
attachments.append({
"name": attachment.name,
"contentType": attachment.content_type,
"size": attachment.size
})
msg_data["attachments"] = attachments
results.append(msg_data)
return self._create_result(
success=True,
data={
"folder": folder,
"query": query,
"messages": results
}
)
except Exception as e:
logger.error(f"Error reading Outlook emails: {e}")
return self._create_result(
success=False,
data={"error": f"Read failed: {str(e)}"}
)
async def _send_mail(self, parameters: Dict[str, Any], user_connection: UserConnection) -> MethodResult:
"""Send email through Outlook"""
try:
to_addresses = parameters["to"]
subject = parameters["subject"]
body = parameters["body"]
cc_addresses = parameters.get("cc", [])
bcc_addresses = parameters.get("bcc", [])
attachments = parameters.get("attachments", [])
# Create Outlook account
account = Account(
credentials=(user_connection.authToken, user_connection.refreshToken),
protocol=MSGraphProtocol()
)
# Get mailbox
mailbox = account.mailbox()
# Create new message
message = mailbox.new_message()
message.to.add(to_addresses)
if cc_addresses:
message.cc.add(cc_addresses)
if bcc_addresses:
message.bcc.add(bcc_addresses)
message.subject = subject
message.body = body
# Add attachments
for attachment_path in attachments:
message.attachments.add(attachment_path)
# Send message
message.send()
return self._create_result(
success=True,
data={
"to": to_addresses,
"subject": subject,
"sent": datetime.now(UTC).isoformat()
}
)
except Exception as e:
logger.error(f"Error sending Outlook email: {e}")
return self._create_result(
success=False,
data={"error": f"Send failed: {str(e)}"}
)

View file

@ -0,0 +1,199 @@
from typing import Dict, Any, Optional
import logging
import os
from pathlib import Path
from modules.methods.methodBase import MethodBase, AuthSource, MethodResult
logger = logging.getLogger(__name__)
class MethodPowerpoint(MethodBase):
"""Powerpoint method implementation for PowerPoint operations"""
def __init__(self):
super().__init__()
self.name = "powerpoint"
self.description = "Handle PowerPoint operations like reading, writing, and converting presentations"
self.auth_source = AuthSource.MICROSOFT # PowerPoint operations need Microsoft auth
@property
def actions(self) -> Dict[str, Dict[str, Any]]:
"""Available actions and their parameters"""
return {
"read": {
"description": "Read PowerPoint presentation content",
"retryMax": 2,
"timeout": 30,
"parameters": {
"path": {"type": "string", "required": True},
"format": {"type": "string", "required": False},
"includeNotes": {"type": "boolean", "required": False}
}
},
"write": {
"description": "Write content to PowerPoint presentation",
"retryMax": 2,
"timeout": 60,
"parameters": {
"path": {"type": "string", "required": True},
"content": {"type": "object", "required": True},
"template": {"type": "string", "required": False}
}
},
"convert": {
"description": "Convert PowerPoint presentation between formats",
"retryMax": 2,
"timeout": 60,
"parameters": {
"sourcePath": {"type": "string", "required": True},
"targetPath": {"type": "string", "required": True},
"sourceFormat": {"type": "string", "required": False},
"targetFormat": {"type": "string", "required": False}
}
}
}
async def execute(self, action: str, parameters: Dict[str, Any], auth_data: Optional[Dict[str, Any]] = None) -> MethodResult:
"""Execute powerpoint method"""
try:
# Validate parameters
if not await self.validate_parameters(action, parameters):
return self._create_result(
success=False,
data={"error": f"Invalid parameters for {action}"}
)
# Validate authentication
if not await self.validate_auth(auth_data):
return self._create_result(
success=False,
data={"error": "Authentication required for PowerPoint operations"}
)
# Execute action
if action == "read":
return await self._read_presentation(parameters, auth_data)
elif action == "write":
return await self._write_presentation(parameters, auth_data)
elif action == "convert":
return await self._convert_presentation(parameters, auth_data)
else:
return self._create_result(
success=False,
data={"error": f"Unknown action: {action}"}
)
except Exception as e:
logger.error(f"Error executing powerpoint {action}: {e}")
return self._create_result(
success=False,
data={"error": str(e)}
)
async def _read_presentation(self, parameters: Dict[str, Any], auth_data: Dict[str, Any]) -> MethodResult:
"""Read PowerPoint presentation content"""
try:
path = Path(parameters["path"])
if not path.exists():
return self._create_result(
success=False,
data={"error": f"File not found: {path}"}
)
# Determine format if not specified
format = parameters.get("format")
if not format:
format = path.suffix[1:] if path.suffix else "pptx"
# TODO: Implement PowerPoint reading using Microsoft Graph API
# This is a placeholder implementation
return self._create_result(
success=True,
data={
"path": str(path),
"format": format,
"slides": [
{
"number": 1,
"title": "Example Slide",
"content": "Example content",
"notes": "Example notes" if parameters.get("includeNotes", False) else None
}
]
}
)
except Exception as e:
logger.error(f"Error reading presentation: {e}")
return self._create_result(
success=False,
data={"error": f"Read failed: {str(e)}"}
)
async def _write_presentation(self, parameters: Dict[str, Any], auth_data: Dict[str, Any]) -> MethodResult:
"""Write content to PowerPoint presentation"""
try:
path = Path(parameters["path"])
# Create directory if it doesn't exist
path.parent.mkdir(parents=True, exist_ok=True)
# Determine format if not specified
format = parameters.get("format")
if not format:
format = path.suffix[1:] if path.suffix else "pptx"
# TODO: Implement PowerPoint writing using Microsoft Graph API
# This is a placeholder implementation
return self._create_result(
success=True,
data={
"path": str(path),
"format": format,
"slides": len(parameters["content"].get("slides", []))
}
)
except Exception as e:
logger.error(f"Error writing presentation: {e}")
return self._create_result(
success=False,
data={"error": f"Write failed: {str(e)}"}
)
async def _convert_presentation(self, parameters: Dict[str, Any], auth_data: Dict[str, Any]) -> MethodResult:
"""Convert PowerPoint presentation between formats"""
try:
source_path = Path(parameters["sourcePath"])
target_path = Path(parameters["targetPath"])
if not source_path.exists():
return self._create_result(
success=False,
data={"error": f"Source file not found: {source_path}"}
)
# Determine formats if not specified
source_format = parameters.get("sourceFormat")
if not source_format:
source_format = source_path.suffix[1:] if source_path.suffix else "pptx"
target_format = parameters.get("targetFormat")
if not target_format:
target_format = target_path.suffix[1:] if target_path.suffix else "pptx"
# TODO: Implement PowerPoint conversion using Microsoft Graph API
# This is a placeholder implementation
return self._create_result(
success=True,
data={
"sourcePath": str(source_path),
"targetPath": str(target_path),
"sourceFormat": source_format,
"targetFormat": target_format
}
)
except Exception as e:
logger.error(f"Error converting presentation: {e}")
return self._create_result(
success=False,
data={"error": f"Conversion failed: {str(e)}"}
)

View file

@ -0,0 +1,217 @@
from typing import Dict, Any, Optional
import logging
from datetime import datetime, UTC
from office365.runtime.auth.user_credential import UserCredential
from office365.sharepoint.client_context import ClientContext
from office365.sharepoint.files.file import File
from office365.sharepoint.lists.list import List
from office365.sharepoint.lists.list_creation_information import ListCreationInformation
from modules.methods.methodBase import MethodBase, AuthSource, MethodResult
from modules.models.userConnection import UserConnection
logger = logging.getLogger(__name__)
class MethodSharepoint(MethodBase):
"""SharePoint method implementation for document operations"""
def __init__(self):
super().__init__()
self.name = "sharepoint"
self.description = "Handle SharePoint document operations like search, read, and write"
self.auth_source = AuthSource.MICROSOFT
@property
def actions(self) -> Dict[str, Dict[str, Any]]:
"""Available actions and their parameters"""
return {
"search": {
"description": "Search SharePoint documents",
"retryMax": 3,
"timeout": 30,
"parameters": {
"query": {"type": "string", "required": True},
"siteUrl": {"type": "string", "required": True},
"listName": {"type": "string", "required": False},
"maxResults": {"type": "number", "required": False}
}
},
"read": {
"description": "Read SharePoint document content",
"retryMax": 2,
"timeout": 30,
"parameters": {
"fileUrl": {"type": "string", "required": True},
"siteUrl": {"type": "string", "required": True}
}
},
"write": {
"description": "Write content to SharePoint document",
"retryMax": 2,
"timeout": 30,
"parameters": {
"fileUrl": {"type": "string", "required": True},
"siteUrl": {"type": "string", "required": True},
"content": {"type": "string", "required": True},
"contentType": {"type": "string", "required": False}
}
}
}
async def execute(self, action: str, parameters: Dict[str, Any], auth_data: Optional[Dict[str, Any]] = None) -> MethodResult:
"""Execute SharePoint method"""
try:
# Validate parameters
if not await self.validate_parameters(action, parameters):
return self._create_result(
success=False,
data={"error": f"Invalid parameters for {action}"}
)
# Get UserConnection from auth_data
if not auth_data or "userConnection" not in auth_data:
return self._create_result(
success=False,
data={"error": "UserConnection required for SharePoint operations"}
)
user_connection: UserConnection = auth_data["userConnection"]
# Execute action
if action == "search":
return await self._search_documents(parameters, user_connection)
elif action == "read":
return await self._read_document(parameters, user_connection)
elif action == "write":
return await self._write_document(parameters, user_connection)
else:
return self._create_result(
success=False,
data={"error": f"Unknown action: {action}"}
)
except Exception as e:
logger.error(f"Error executing SharePoint {action}: {e}")
return self._create_result(
success=False,
data={"error": str(e)}
)
async def _search_documents(self, parameters: Dict[str, Any], user_connection: UserConnection) -> MethodResult:
"""Search SharePoint documents"""
try:
site_url = parameters["siteUrl"]
query = parameters["query"]
list_name = parameters.get("listName")
max_results = parameters.get("maxResults", 10)
# Create SharePoint context
ctx = ClientContext(site_url).with_credentials(
UserCredential(user_connection.authToken, user_connection.refreshToken)
)
# Search in specific list or entire site
if list_name:
target_list = ctx.web.lists.get_by_title(list_name)
items = target_list.items.filter(f"Title eq '{query}'").top(max_results).get().execute_query()
results = [{
"title": item.properties["Title"],
"url": item.properties["FileRef"],
"modified": item.properties["Modified"],
"created": item.properties["Created"]
} for item in items]
else:
# Search entire site
search_results = ctx.search(query).execute_query()
results = [{
"title": result.properties["Title"],
"url": result.properties["Path"],
"modified": result.properties["LastModifiedTime"],
"created": result.properties["Created"]
} for result in search_results[:max_results]]
return self._create_result(
success=True,
data={
"query": query,
"results": results
}
)
except Exception as e:
logger.error(f"Error searching SharePoint documents: {e}")
return self._create_result(
success=False,
data={"error": f"Search failed: {str(e)}"}
)
async def _read_document(self, parameters: Dict[str, Any], user_connection: UserConnection) -> MethodResult:
"""Read SharePoint document content"""
try:
site_url = parameters["siteUrl"]
file_url = parameters["fileUrl"]
# Create SharePoint context
ctx = ClientContext(site_url).with_credentials(
UserCredential(user_connection.authToken, user_connection.refreshToken)
)
# Get file
file = ctx.web.get_file_by_server_relative_url(file_url)
file_content = file.read().execute_query()
return self._create_result(
success=True,
data={
"url": file_url,
"content": file_content.content.decode('utf-8'),
"modified": file.properties["TimeLastModified"],
"size": file.properties["Length"]
}
)
except Exception as e:
logger.error(f"Error reading SharePoint document: {e}")
return self._create_result(
success=False,
data={"error": f"Read failed: {str(e)}"}
)
async def _write_document(self, parameters: Dict[str, Any], user_connection: UserConnection) -> MethodResult:
"""Write content to SharePoint document"""
try:
site_url = parameters["siteUrl"]
file_url = parameters["fileUrl"]
content = parameters["content"]
content_type = parameters.get("contentType", "text/plain")
# Create SharePoint context
ctx = ClientContext(site_url).with_credentials(
UserCredential(user_connection.authToken, user_connection.refreshToken)
)
# Get or create file
try:
file = ctx.web.get_file_by_server_relative_url(file_url)
except:
# Create new file
folder_url = "/".join(file_url.split("/")[:-1])
file_name = file_url.split("/")[-1]
folder = ctx.web.get_folder_by_server_relative_url(folder_url)
file = folder.upload_file(file_name, content.encode('utf-8')).execute_query()
# Update file content
file.write(content.encode('utf-8')).execute_query()
return self._create_result(
success=True,
data={
"url": file_url,
"modified": datetime.now(UTC).isoformat(),
"size": len(content.encode('utf-8'))
}
)
except Exception as e:
logger.error(f"Error writing SharePoint document: {e}")
return self._create_result(
success=False,
data={"error": f"Write failed: {str(e)}"}
)

View file

@ -0,0 +1,398 @@
from typing import Dict, Any, Optional
import logging
import aiohttp
import asyncio
from bs4 import BeautifulSoup
from urllib.parse import urljoin, urlparse
import re
from datetime import datetime, UTC
import requests
import time
from modules.methods.methodBase import MethodBase, AuthSource, MethodResult
from modules.shared.configuration import APP_CONFIG
logger = logging.getLogger(__name__)
class MethodWeb(MethodBase):
"""Web method implementation for web operations"""
def __init__(self):
super().__init__()
self.name = "web"
self.description = "Handle web operations like search, crawl, and content extraction"
self.auth_source = AuthSource.LOCAL # Web operations typically don't need auth
# Web crawling configuration from agentWebcrawler
self.srcApikey = APP_CONFIG.get("Agent_Webcrawler_SERPAPI_APIKEY", "")
self.srcEngine = APP_CONFIG.get("Agent_Webcrawler_SERPAPI_ENGINE", "google")
self.srcCountry = APP_CONFIG.get("Agent_Webcrawler_SERPAPI_COUNTRY", "auto")
self.maxResults = int(APP_CONFIG.get("Agent_Webcrawler_SERPAPI_MAX_SEARCH_RESULTS", "5"))
self.timeout = int(APP_CONFIG.get("Agent_Webcrawler_SERPAPI_TIMEOUT", "30"))
self.userAgent = APP_CONFIG.get("Agent_Webcrawler_SERPAPI_USER_AGENT", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36")
if not self.srcApikey:
logger.error("SerpAPI key not configured")
@property
def actions(self) -> Dict[str, Dict[str, Any]]:
"""Available actions and their parameters"""
return {
"search": {
"description": "Search web content",
"retryMax": 3,
"timeout": 30,
"parameters": {
"query": {"type": "string", "required": True},
"maxResults": {"type": "number", "required": False},
"filters": {"type": "object", "required": False},
"searchEngine": {"type": "string", "required": False}
}
},
"crawl": {
"description": "Crawl web pages",
"retryMax": 2,
"timeout": 60,
"parameters": {
"url": {"type": "string", "required": True},
"depth": {"type": "number", "required": False},
"followLinks": {"type": "boolean", "required": False},
"includeImages": {"type": "boolean", "required": False},
"respectRobots": {"type": "boolean", "required": False}
}
},
"extract": {
"description": "Extract content from web page",
"retryMax": 2,
"timeout": 30,
"parameters": {
"url": {"type": "string", "required": True},
"selectors": {"type": "array", "items": "string", "required": False},
"format": {"type": "string", "required": False},
"includeMetadata": {"type": "boolean", "required": False}
}
}
}
async def execute(self, action: str, parameters: Dict[str, Any], auth_data: Optional[Dict[str, Any]] = None) -> MethodResult:
"""Execute web method"""
try:
# Validate parameters
if not await self.validate_parameters(action, parameters):
return self._create_result(
success=False,
data={"error": f"Invalid parameters for {action}"}
)
# Execute action
if action == "search":
return await self._search_web(parameters)
elif action == "crawl":
return await self._crawl_page(parameters)
elif action == "extract":
return await self._extract_content(parameters)
else:
return self._create_result(
success=False,
data={"error": f"Unknown action: {action}"}
)
except Exception as e:
logger.error(f"Error executing web {action}: {e}")
return self._create_result(
success=False,
data={"error": str(e)}
)
async def _search_web(self, parameters: Dict[str, Any]) -> MethodResult:
"""Search web content"""
try:
query = parameters["query"]
max_results = parameters.get("maxResults", 10)
filters = parameters.get("filters", {})
search_engine = parameters.get("searchEngine", "google")
# Implement search using different engines
if search_engine.lower() == "google":
# Use Google Custom Search API
# TODO: Implement Google Custom Search API integration
results = await self._google_search(query, max_results, filters)
elif search_engine.lower() == "bing":
# Use Bing Web Search API
# TODO: Implement Bing Web Search API integration
results = await self._bing_search(query, max_results, filters)
else:
return self._create_result(
success=False,
data={"error": f"Unsupported search engine: {search_engine}"}
)
return self._create_result(
success=True,
data={
"query": query,
"engine": search_engine,
"results": results
}
)
except Exception as e:
logger.error(f"Error searching web: {e}")
return self._create_result(
success=False,
data={"error": f"Search failed: {str(e)}"}
)
async def _google_search(self, query: str, max_results: int, filters: Dict[str, Any]) -> list:
"""Search using Google Custom Search API"""
# TODO: Implement Google Custom Search API
# This is a placeholder implementation
return [
{
"title": "Example Result",
"url": "https://example.com",
"snippet": "Example search result snippet",
"source": "google"
}
]
async def _bing_search(self, query: str, max_results: int, filters: Dict[str, Any]) -> list:
"""Search using Bing Web Search API"""
# TODO: Implement Bing Web Search API
# This is a placeholder implementation
return [
{
"title": "Example Result",
"url": "https://example.com",
"snippet": "Example search result snippet",
"source": "bing"
}
]
async def _crawl_page(self, parameters: Dict[str, Any]) -> MethodResult:
"""Crawl web pages"""
try:
url = parameters["url"]
depth = parameters.get("depth", 1)
follow_links = parameters.get("followLinks", False)
include_images = parameters.get("includeImages", False)
respect_robots = parameters.get("respectRobots", True)
# Check robots.txt if required
if respect_robots:
if not await self._check_robots_txt(url):
return self._create_result(
success=False,
data={"error": "Crawling not allowed by robots.txt"}
)
# Crawl the page
async with aiohttp.ClientSession() as session:
async with session.get(url) as response:
if response.status == 200:
html = await response.text()
soup = BeautifulSoup(html, 'html.parser')
# Extract basic information
result = {
"url": url,
"title": soup.title.string if soup.title else None,
"description": self._get_meta_description(soup),
"links": [],
"images": [] if include_images else None,
"text": soup.get_text(strip=True),
"crawled": datetime.now(UTC).isoformat()
}
# Extract links if followLinks is True
if follow_links:
base_url = url
for link in soup.find_all('a'):
href = link.get('href')
if href:
absolute_url = urljoin(base_url, href)
if self._is_valid_url(absolute_url):
result["links"].append({
"url": absolute_url,
"text": link.get_text(strip=True)
})
# Extract images if includeImages is True
if include_images:
for img in soup.find_all('img'):
src = img.get('src')
if src:
absolute_src = urljoin(url, src)
result["images"].append({
"url": absolute_src,
"alt": img.get('alt', ''),
"title": img.get('title', '')
})
return self._create_result(
success=True,
data=result
)
else:
return self._create_result(
success=False,
data={"error": f"Failed to fetch URL: {response.status}"}
)
except Exception as e:
logger.error(f"Error crawling page: {e}")
return self._create_result(
success=False,
data={"error": f"Crawl failed: {str(e)}"}
)
async def _extract_content(self, parameters: Dict[str, Any]) -> MethodResult:
"""Extract content from web page"""
try:
url = parameters["url"]
selectors = parameters.get("selectors")
format = parameters.get("format", "text")
include_metadata = parameters.get("includeMetadata", False)
async with aiohttp.ClientSession() as session:
async with session.get(url) as response:
if response.status == 200:
html = await response.text()
soup = BeautifulSoup(html, 'html.parser')
# Extract content based on selectors
content = {}
if selectors:
for selector in selectors:
elements = soup.select(selector)
content[selector] = [elem.get_text() for elem in elements]
else:
# Default extraction
content = {
"title": soup.title.string if soup.title else None,
"text": soup.get_text(strip=True),
"links": [a.get('href') for a in soup.find_all('a')]
}
# Add metadata if requested
if include_metadata:
content["metadata"] = {
"url": url,
"crawled": datetime.now(UTC).isoformat(),
"language": self._detect_language(soup),
"wordCount": len(content["text"].split()),
"linksCount": len(content["links"])
}
return self._create_result(
success=True,
data={
"url": url,
"content": content
}
)
else:
return self._create_result(
success=False,
data={"error": f"Failed to fetch URL: {response.status}"}
)
except Exception as e:
logger.error(f"Error extracting content: {e}")
return self._create_result(
success=False,
data={"error": f"Extraction failed: {str(e)}"}
)
def _get_meta_description(self, soup: BeautifulSoup) -> Optional[str]:
"""Extract meta description from HTML"""
meta_desc = soup.find('meta', attrs={'name': 'description'})
if meta_desc:
return meta_desc.get('content')
return None
def _is_valid_url(self, url: str) -> bool:
"""Check if URL is valid"""
try:
result = urlparse(url)
return all([result.scheme, result.netloc])
except:
return False
async def _check_robots_txt(self, url: str) -> bool:
"""Check if URL is allowed by robots.txt"""
try:
parsed_url = urlparse(url)
robots_url = f"{parsed_url.scheme}://{parsed_url.netloc}/robots.txt"
async with aiohttp.ClientSession() as session:
async with session.get(robots_url, headers={"User-Agent": self.userAgent}, timeout=self.timeout) as response:
if response.status == 200:
robots_content = await response.text()
# Parse robots.txt content
user_agent = "*" # Default to all user agents
disallow_paths = []
for line in robots_content.splitlines():
line = line.strip().lower()
if line.startswith("user-agent:"):
user_agent = line[11:].strip()
elif line.startswith("disallow:") and user_agent in ["*", self.userAgent.lower()]:
path = line[9:].strip()
if path:
disallow_paths.append(path)
# Check if URL path is disallowed
url_path = parsed_url.path
for disallow_path in disallow_paths:
if url_path.startswith(disallow_path):
return False
return True
else:
# If robots.txt doesn't exist, assume crawling is allowed
return True
except Exception as e:
logger.warning(f"Error checking robots.txt for {url}: {str(e)}")
# If there's an error, assume crawling is allowed
return True
def _detect_language(self, soup: BeautifulSoup) -> str:
"""Detect page language"""
try:
# Try to get language from HTML lang attribute
if soup.html and soup.html.get('lang'):
return soup.html.get('lang')
# Try to get language from meta tag
meta_lang = soup.find('meta', attrs={'http-equiv': 'content-language'})
if meta_lang:
return meta_lang.get('content', 'en')
# Try to get language from meta charset
meta_charset = soup.find('meta', attrs={'charset': True})
if meta_charset:
charset = meta_charset.get('charset', '').lower()
if 'utf-8' in charset:
return 'en' # Default to English for UTF-8
# Try to detect language from content
# This is a simple heuristic based on common words
text = soup.get_text().lower()
common_words = {
'en': ['the', 'and', 'of', 'to', 'in', 'is', 'that', 'for', 'it', 'with'],
'es': ['el', 'la', 'los', 'las', 'de', 'y', 'en', 'que', 'por', 'con'],
'fr': ['le', 'la', 'les', 'de', 'et', 'en', 'que', 'pour', 'avec', 'dans'],
'de': ['der', 'die', 'das', 'und', 'in', 'den', 'von', 'zu', 'für', 'mit']
}
word_counts = {lang: sum(1 for word in words if f' {word} ' in f' {text} ')
for lang, words in common_words.items()}
if word_counts:
return max(word_counts.items(), key=lambda x: x[1])[0]
return 'en' # Default to English if no language detected
except Exception as e:
logger.warning(f"Error detecting language: {str(e)}")
return 'en' # Default to English on error

View file

@ -1,214 +0,0 @@
"""
Agent Base Module.
Provides the base class for all chat agents.
Defines the standardized interface for task processing.
"""
import os
import logging
import uuid
from datetime import datetime, UTC
from typing import Dict, Any, List, Optional
from modules.shared.mimeUtils import isTextMimeType, determineContentEncoding
from modules.interfaces.serviceChatModel import ChatContent, Task, AgentResponse, ChatMessage
logger = logging.getLogger(__name__)
class AgentBase:
"""
Base class for all chat agents.
Defines the standardized interface for task processing.
"""
def __init__(self):
"""Initialize the base agent."""
self.name = "base"
self.label = "Base Agent"
self.description = "Base agent functionality"
self.capabilities = []
self.service = None
def setService(self, service):
"""
Set the service container reference and validate required interfaces.
Args:
service: The service container with required interfaces
"""
if not service:
logger.warning("Attempted to set null service container")
return False
# Validate required interfaces
required_interfaces = ['base', 'msft', 'google']
missing_interfaces = []
for interface in required_interfaces:
if not hasattr(service, interface):
missing_interfaces.append(interface)
if missing_interfaces:
logger.warning(f"Service container missing required interfaces: {', '.join(missing_interfaces)}")
return False
self.service = service
return True
def getAgentInfo(self) -> Dict[str, Any]:
"""
Return standardized information about the agent's capabilities.
Returns:
Dictionary with name, description, and capabilities
"""
return {
"name": self.name,
"label": self.label,
"description": self.description,
"capabilities": self.capabilities
}
async def execute(self, task: Task) -> AgentResponse:
"""
Execute a task and return the response.
This method must be implemented by all concrete agent classes.
Args:
task: Task object containing all necessary information
Returns:
AgentResponse object with execution results
"""
# Validate service manager
if not self.service:
logger.error("Service container not initialized")
return AgentResponse(
success=False,
message=ChatMessage(
id=str(uuid.uuid4()),
workflowId=task.workflowId,
agentName=self.name,
message="Error: Service container not initialized",
role="system",
status="error",
sequenceNr=0,
startedAt=datetime.now(UTC).isoformat(),
finishedAt=datetime.now(UTC).isoformat(),
success=False
),
performance={},
progress=0.0
)
try:
# Process the task using the concrete implementation
result = await self.processTask(task)
# Create response message
message = ChatMessage(
id=str(uuid.uuid4()),
workflowId=task.workflowId,
agentName=self.name,
message=result.get("feedback", ""),
role="assistant",
status="completed",
sequenceNr=0,
startedAt=datetime.now(UTC).isoformat(),
finishedAt=datetime.now(UTC).isoformat(),
success=True
)
# Create response with performance metrics
return AgentResponse(
success=True,
message=message,
performance=result.get("performance", {}),
progress=result.get("progress", 100.0)
)
except Exception as e:
logger.error(f"Error processing task: {str(e)}", exc_info=True)
return AgentResponse(
success=False,
message=ChatMessage(
id=str(uuid.uuid4()),
workflowId=task.workflowId,
agentName=self.name,
message=f"Error processing task: {str(e)}",
role="system",
status="error",
sequenceNr=0,
startedAt=datetime.now(UTC).isoformat(),
finishedAt=datetime.now(UTC).isoformat(),
success=False
),
performance={},
progress=0.0
)
async def processTask(self, task: Task) -> Dict[str, Any]:
"""
Process a task and return the results.
This method must be implemented by all concrete agent classes.
Args:
task: Task object containing all necessary information
Returns:
Dictionary containing:
- feedback: Text response explaining what the agent did
- performance: Optional performance metrics
- progress: Task progress (0-100)
"""
raise NotImplementedError("processTask must be implemented by concrete agent classes")
def determineBase64EncodingFlag(self, filename: str, content: Any, mimeType: str = None) -> bool:
"""
Determine if content should be base64 encoded.
Args:
filename: Name of the file
content: Content to check
mimeType: Optional MIME type
Returns:
Boolean indicating if content should be base64 encoded
"""
return determineContentEncoding(filename, content, mimeType)
def isTextMimeType(self, mimeType: str) -> bool:
"""
Check if MIME type is text-based.
Args:
mimeType: MIME type to check
Returns:
Boolean indicating if MIME type is text-based
"""
return isTextMimeType(mimeType)
def formatAgentDocumentOutput(self, label: str, content: str, contentType: str, base64Encoded: bool = False) -> ChatContent:
"""
Format agent document output using ChatContent model.
Args:
label: Document label/filename
content: Document content
contentType: MIME type of content
base64Encoded: Whether content is base64 encoded
Returns:
ChatContent object with the following attributes:
- sequenceNr: Sequence number (defaults to 1)
- name: Document label/filename
- mimeType: MIME type of content
- data: Actual content
- metadata: Additional metadata including base64Encoded flag
"""
return ChatContent(
sequenceNr=1,
name=label,
mimeType=contentType,
data=content,
metadata={"base64Encoded": base64Encoded}
)

View file

@ -1,212 +0,0 @@
"""
Agent Manager Module for managing agent operations and execution.
"""
import os
import logging
import importlib
from typing import Dict, Any, List, Optional, Tuple
from datetime import datetime, UTC
import uuid
from modules.interfaces.serviceChatModel import (
ChatMessage, ChatDocument, UserInputRequest, ChatWorkflow, AgentResponse
)
logger = logging.getLogger(__name__)
class AgentManager:
"""Manager for agent operations and execution."""
_instance = None
@classmethod
def getInstance(cls):
"""Return a singleton instance of the agent manager."""
if cls._instance is None:
cls._instance = cls()
return cls._instance
# Internal Methods
def __init__(self):
"""Initialize the agent manager."""
if AgentManager._instance is not None:
raise RuntimeError("Singleton instance already exists - use getInstance()")
self.service = None
self.agents = {} # Dictionary to store agent instances
self._loadAgents() # Load agents on initialization
def _loadAgents(self):
"""Load all available agents from modules dynamically."""
logger.info("Loading agent modules...")
# Get the agents directory path
agentDir = os.path.join(os.path.dirname(os.path.dirname(__file__)), "agents")
# Search for agent modules
agentModules = []
for filename in os.listdir(agentDir):
if filename.startswith("agent") and filename.endswith(".py"):
agentModules.append(filename[:-3]) # Remove .py extension
if not agentModules:
logger.warning("No agent modules found in directory: %s", agentDir)
return
logger.info(f"Found {len(agentModules)} agent modules: {', '.join(agentModules)}")
# Load each agent module
for moduleName in agentModules:
try:
# Import the module
module = importlib.import_module(f"modules.agents.{moduleName}")
# Extract agent name from module name
agentName = moduleName.split("agent")[-1]
className = f"Agent{agentName}"
getterName = f"getAgent{agentName}"
agent = None
# Try to get the agent via the getter function first
if hasattr(module, getterName):
getterFunc = getattr(module, getterName)
agent = getterFunc()
logger.info(f"Agent '{agent.name}' loaded via {getterName}()")
# If no getter, try to instantiate the agent class directly
elif hasattr(module, className):
agentClass = getattr(module, className)
agent = agentClass()
logger.info(f"Agent '{agent.name}' directly instantiated from {className}")
if agent:
# Register the agent
if self._registerAgent(agent):
logger.info(f"Successfully registered agent: {agent.name}")
else:
logger.error(f"Failed to register agent from module: {moduleName}")
else:
logger.warning(f"No agent class or getter function found in module: {moduleName}")
except ImportError as e:
logger.error(f"Failed to import module {moduleName}: {str(e)}")
except Exception as e:
logger.error(f"Error loading agent from module {moduleName}: {str(e)}")
def _registerAgent(self, agent: Any):
"""Register a new agent with the manager."""
if not hasattr(agent, 'name'):
logger.error("Agent must have a name attribute")
return False
self.agents[agent.name] = agent
if self.service and hasattr(agent, 'setService'):
agent.setService(self.service)
return True
# Public Methods
def initialize(self, service: Any):
"""Initialize the manager with service reference."""
# Store service reference
self.service = service
# Initialize agents with service
for agent in self.agents.values():
if hasattr(agent, 'setService'):
agent.setService(service)
return True
def getAgent(self, agentIdentifier: str) -> Optional[Any]:
"""
Get an agent instance by its identifier.
Args:
agentIdentifier: Name or identifier of the agent
Returns:
Agent instance if found, None otherwise
"""
agent = self.agents.get(agentIdentifier)
if not agent:
logger.warning(f"Agent '{agentIdentifier}' not found")
return agent
def getAllAgents(self) -> Dict[str, Any]:
"""
Get all registered agents.
Returns:
Dictionary mapping agent names to agent instances
"""
return self.agents.copy()
def getAgentInfos(self) -> List[Dict[str, Any]]:
"""Get information about all registered agents."""
return [
{
'name': agent.name,
'description': getattr(agent, 'description', ''),
'capabilities': getattr(agent, 'capabilities', []),
'inputTypes': getattr(agent, 'inputTypes', []),
'outputTypes': getattr(agent, 'outputTypes', [])
}
for agent in self.agents.values()
]
async def executeAgent(self, handover: Any) -> AgentResponse:
"""
Execute an agent with the given handover.
Args:
handover: Handover object containing agent execution context
Returns:
AgentResponse object with execution results
"""
try:
# Get agent instance
agent = self.agents.get(handover.currentAgent)
if not agent:
raise ValueError(f"Agent {handover.currentAgent} not found")
# Execute agent
response = await agent.execute(handover)
# Save output files if any
if response.message and response.message.documents:
self.service.document['agentOutputFilesSave'](handover, response.message.documents)
return response
except Exception as e:
logger.error(f"Error executing agent {handover.currentAgent}: {str(e)}")
# Create error message
errorMessage = ChatMessage(
id=str(uuid.uuid4()),
workflowId=handover.workflowId,
agentName=handover.currentAgent,
message=f"Error executing agent: {str(e)}",
role="system",
status="error",
sequenceNr=0,
startedAt=handover.startedAt,
finishedAt=datetime.now(UTC).isoformat(),
success=False
)
return AgentResponse(
success=False,
message=errorMessage,
error=str(e),
performance={},
progress=0.0
)
# Singleton factory for the agent manager
def getAgentManager():
return AgentManager.getInstance()

View file

@ -1,617 +0,0 @@
"""
Chat Manager Module for managing chat workflows and agent handovers.
"""
import logging
from typing import Dict, Any, List, Optional, Union
from datetime import datetime, UTC
import uuid
import json
from dataclasses import dataclass
from modules.interfaces.serviceChatModel import (
ChatLog, ChatMessage, ChatDocument, UserInputRequest, ChatWorkflow,
AgentHandover
)
from modules.workflow.agentManager import getAgentManager
from modules.workflow.documentManager import getDocumentManager
logger = logging.getLogger(__name__)
class ChatManager:
"""Manager for chat workflows and agent handovers."""
_instance = None
@classmethod
def getInstance(cls):
"""Return a singleton instance of the chat manager."""
if cls._instance is None:
cls._instance = cls()
return cls._instance
# Core functions
def __init__(self):
"""Initialize the chat manager."""
if ChatManager._instance is not None:
raise RuntimeError("Singleton instance already exists - use getInstance()")
self.service = None
self.agentManager = getAgentManager()
self.documentManager = getDocumentManager()
def initialize(self, workflow: ChatWorkflow):
"""
Initialize the manager with an optional workflow object.
Args:
workflow: Optional ChatWorkflow object to initialize with
"""
# Initialize managers
self.agentManager.initialize(self.service)
self.documentManager.initialize(self.service)
# Add basic references to service
self.service.workflow = workflow
self.service.logAdd = self.logAdd
self.service.user = {
'id': None,
'name': None,
'language': 'en'
}
self.service.functions = {
'forEach': lambda items, action: [action(item) for item in items],
'while': lambda condition, action: [action() for _ in iter(lambda: condition(), False)]
}
self.service.model = {
'callAiBasic': self._callAiBasic,
'callAiComplex': self._callAiComplex,
'callAiImage': self._callAiImage
}
# Initialize document operations
self.service.document = {
'extract': self.documentManager.extractContent,
'convertFileRefToFileId': self.documentManager.convertFileRefToId,
'convertFileIdToFileRef': self.documentManager.convertFileIdToRef,
'convertDataFormat': self.documentManager.convertDataFormat,
'agentInputFilesCreate': self.documentManager.createAgentInputFileList,
'agentOutputFilesSave': self.documentManager.saveAgentOutputFiles
}
# Initialize data access
from modules.workflow.dataAccessFunctions import get_data_access
self.service.data = get_data_access().to_service_object()
return True
def createInitialHandover(self, userInput: UserInputRequest) -> AgentHandover:
"""
Create the initial handover object from user input.
Args:
userInput: User input request
Returns:
Initial handover object
"""
try:
# Create initial handover
handover = AgentHandover(
promptUserInitial=userInput.message,
documentsUserInitial=userInput.listFileId or [],
startedAt=datetime.now(UTC).isoformat()
)
# Process user input documents
if handover.documentsUserInitial:
handover.documentsInput = handover.documentsUserInitial
# Set initial prompt for next agent
handover.promptForNextAgent = handover.promptUserInitial
return handover
except Exception as e:
logger.error(f"Error creating initial handover: {str(e)}")
return AgentHandover(status="failed", error=str(e))
async def defineNextHandover(self, currentHandover: AgentHandover) -> Optional[AgentHandover]:
"""
Define the next handover object for agent transition.
Args:
currentHandover: Current handover object
Returns:
Next handover object or None if no next agent
"""
try:
# Get available agents
availableAgents = self.agentManager.getAgentInfos()
if not availableAgents:
logger.warning("No available agents found")
return None
# Create next handover object
nextHandover = AgentHandover(
promptUserInitial=currentHandover.promptUserInitial,
documentsUserInitial=currentHandover.documentsUserInitial,
startedAt=datetime.now(UTC).isoformat()
)
# If this is the first handover, use initial documents
if not currentHandover.promptFromFinishedAgent:
nextHandover.documentsInput = currentHandover.documentsUserInitial
nextHandover.promptForNextAgent = currentHandover.promptUserInitial
else:
# Use output documents from previous agent
nextHandover.documentsInput = currentHandover.documentsOutput
nextHandover.promptForNextAgent = currentHandover.promptFromFinishedAgent
# Select next agent based on available agents and current state
nextAgent = await self._selectNextAgent(availableAgents, nextHandover)
if not nextAgent:
logger.info("No suitable next agent found")
return None
nextHandover.nextAgent = nextAgent['name']
return nextHandover
except Exception as e:
logger.error(f"Error defining next handover: {str(e)}")
return None
async def _selectNextAgent(self, availableAgents: List[Dict[str, Any]], handover: AgentHandover) -> Optional[Dict[str, Any]]:
"""
Select the next agent using AI analysis of the current state and requirements.
Args:
availableAgents: List of available agents
handover: Current handover object
Returns:
Selected agent or None if no suitable agent
"""
try:
if not availableAgents:
logger.warning("No available agents found")
return None
# Get current workflow state
workflow = self.service.workflow
if not workflow:
logger.error("No workflow context available")
return None
# Detect user language if not already set
if not workflow.userLanguage:
workflow.userLanguage = await self._detectUserLanguage(handover.promptUserInitial)
# Get workflow summary for context
workflow_summary = await self.workflowSummarize(ChatMessage(
id=str(uuid.uuid4()),
workflowId=workflow.id,
role="user",
message=handover.promptUserInitial
))
# Prepare context for AI analysis
context = {
"current_state": {
"previous_agent": handover.currentAgent,
"status": handover.status,
"error": handover.error,
"user_language": workflow.userLanguage,
"input_documents": handover.documentsInput or [],
"output_documents": handover.documentsOutput or [],
"required_capabilities": handover.requiredCapabilities or []
},
"conversation_history": workflow_summary,
"available_agents": [
{
"name": agent.get("name", ""),
"capabilities": agent.get("capabilities", {}),
"description": agent.get("description", "")
}
for agent in availableAgents
]
}
# Create prompt for AI to analyze and select next agent
prompt = f"""
Analyze the current workflow state, conversation history, and available agents to determine the most suitable next agent.
Consider the following factors:
1. Previous agent's status and any errors
2. Required capabilities for the task
3. Document type compatibility
4. Language requirements
5. Agent's capabilities and specializations
6. Conversation history and context
Current State:
{json.dumps(context['current_state'], indent=2)}
Conversation History:
{context['conversation_history']}
Available Agents:
{json.dumps(context['available_agents'], indent=2)}
Return a JSON object with the following structure:
{{
"selected_agent": "name of the most suitable agent",
"reasoning": "brief explanation of why this agent was selected",
"required_capabilities": ["list", "of", "required", "capabilities"],
"potential_risks": ["list", "of", "potential", "issues"],
"task": {{
"description": "clear description of what the agent needs to do",
"input_format": {{
"documents": ["list", "of", "required", "input", "documents"],
"data": ["list", "of", "required", "data", "fields"]
}},
"output_format": {{
"documents": ["list", "of", "expected", "output", "documents"],
"data": ["list", "of", "expected", "output", "fields"]
}},
"requirements": [
"list of specific requirements",
"format requirements",
"quality requirements"
],
"constraints": [
"list of constraints",
"time limits",
"resource limits"
]
}},
"prompt_template": "template for the agent's prompt with placeholders for dynamic content"
}}
Format your response as a valid JSON object.
"""
# Get AI's analysis and selection
response = await self._callAiComplex(prompt)
try:
analysis = json.loads(response)
selected_agent_name = analysis.get('selected_agent')
# Find the selected agent in available agents
selected_agent = next(
(agent for agent in availableAgents if agent.get('name') == selected_agent_name),
None
)
if selected_agent:
logger.info(f"AI selected agent {selected_agent_name}: {analysis.get('reasoning')}")
# Update handover with AI's analysis
handover.requiredCapabilities = analysis.get('required_capabilities', [])
handover.analysis = {
'reasoning': analysis.get('reasoning'),
'potential_risks': analysis.get('potential_risks', []),
'task': analysis.get('task', {}),
'prompt_template': analysis.get('prompt_template', '')
}
return selected_agent
else:
logger.warning(f"AI selected agent {selected_agent_name} not found in available agents")
return None
except json.JSONDecodeError as e:
logger.error(f"Error parsing AI response: {str(e)}")
return None
except Exception as e:
logger.error(f"Error selecting next agent: {str(e)}")
return None
async def processNextAgent(self, handover: AgentHandover) -> AgentHandover:
"""
Process the next agent in the workflow.
Args:
handover: Current handover object
Returns:
Updated handover object
"""
try:
# Get agent instance
agent = self.agentManager.getAgent(handover.nextAgent)
if not agent:
handover.update_status("failed", f"Agent {handover.nextAgent} not found")
return handover
# Set current agent
handover.currentAgent = handover.nextAgent
handover.nextAgent = None
# Execute agent
response = await agent.execute(handover)
# Update handover with results
if response.success:
handover.update_status("success")
handover.documentsOutput = response.message.documents if response.message else []
handover.promptFromFinishedAgent = response.message.message if response.message else ""
else:
handover.update_status("failed", response.error)
return handover
except Exception as e:
logger.error(f"Error processing next agent: {str(e)}")
handover.update_status("failed", str(e))
return handover
# Agent functions
async def _callAiBasic(self, prompt: str, context: Dict[str, Any] = None) -> str:
"""Call basic AI model."""
try:
response = await self.service.base.callAi(prompt, context or {}, model="aiBase")
return response
except Exception as e:
logger.error(f"Error calling basic AI: {str(e)}")
return ""
async def _callAiComplex(self, prompt: str, context: Dict[str, Any] = None) -> str:
"""Call complex AI model."""
try:
response = await self.service.base.callAi(prompt, context or {}, model="aiComplex")
return response
except Exception as e:
logger.error(f"Error calling complex AI: {str(e)}")
return ""
async def _callAiImage(self, prompt: str, context: Dict[str, Any] = None) -> str:
"""Call image AI model."""
try:
response = await self.service.base.callAi(prompt, context or {}, model="aiImage")
return response
except Exception as e:
logger.error(f"Error calling image AI: {str(e)}")
return ""
def logAdd(self, message: str, level: str = "info",
progress: Optional[int] = None) -> str:
"""
Add a log entry to the workflow.
Args:
message: Log message
level: Log level (info, warning, error)
progress: Optional progress percentage
Returns:
str: ID of the created log entry
"""
workflow = self.service.workflow
try:
# Generate log ID
logId = str(uuid.uuid4())
# Create log entry
logEntry = ChatLog(
id=logId,
workflowId=workflow.id,
message=message,
level=level,
progress=progress,
timestamp=datetime.now().isoformat()
)
# Add to workflow logs
workflow.logs.append(logEntry)
# Also log to Python logger
logLevel = getattr(logging, level.upper())
logger.log(logLevel, f"[Workflow {workflow.id}] {message}")
# Save to database
self.chatManager.saveWorkflowLog(workflow.id, logEntry.to_dict())
return logId
except Exception as e:
logger.error(f"Error adding log entry: {str(e)}")
return ""
async def chatMessageToWorkflow(self, role: str, agent: Union[str, Dict[str, Any]], chatMessage: UserInputRequest) -> ChatMessage:
"""
Integrates chat message input into a Message object including files with complete contents.
Args:
role: Role of the message sender (e.g., 'user', 'assistant')
agent: Agent name or configuration
chatMessage: UserInputRequest object containing message data and file references
Returns:
ChatMessage object with complete file contents
"""
try:
# Process additional files with complete contents
additionalFileIds = chatMessage.listFileId or []
additionalFiles = await self.processFileIds(additionalFileIds)
# Create message object
message = ChatMessage(
id=str(uuid.uuid4()),
workflowId=self.service.workflow.id,
role=role,
agentName=agent if isinstance(agent, str) else agent.get("name", ""),
message=chatMessage.message,
documents=additionalFiles,
status="completed",
startedAt=datetime.now().isoformat()
)
return message
except Exception as e:
logger.error(f"Error creating workflow message: {str(e)}")
raise
async def sendFinalMessage(self, handover: AgentHandover) -> ChatMessage:
"""
Send final message to user with workflow results.
Args:
handover: Final handover object
Returns:
Final message to user
"""
try:
# Create final message content from handover
messageContent = handover.promptFromFinishedAgent
if handover.status == "failed":
messageContent = f"Workflow failed: {handover.error}"
# Add summary of generated documents
if handover.documentsOutput:
messageContent += "\n\nGenerated documents:"
for doc in handover.documentsOutput:
messageContent += f"\n- {doc.get('name', 'Unknown')}"
# Create message object
finalMessage = ChatMessage(
id=str(uuid.uuid4()),
workflowId=self.service.workflow.id,
agentName="Workflow Manager",
message=messageContent,
role="assistant",
status="completed",
sequenceNr=0,
startedAt=datetime.now(UTC).isoformat(),
finishedAt=datetime.now(UTC).isoformat(),
success=handover.status == "success",
documents=handover.documentsOutput
)
return finalMessage
except Exception as e:
logger.error(f"Error sending final message: {str(e)}")
return ChatMessage(
id=str(uuid.uuid4()),
workflowId=self.service.workflow.id,
agentName="Workflow Manager",
message=f"Error in workflow: {str(e)}",
role="system",
status="error",
sequenceNr=0,
startedAt=datetime.now(UTC).isoformat(),
finishedAt=datetime.now(UTC).isoformat(),
success=False
)
async def workflowSummarize(self, messageUser: ChatMessage) -> str:
"""
Creates a summary of the workflow without the current user message.
Args:
messageUser: Current user message
Returns:
Summary of the workflow
"""
if not self.service.workflow or "messages" not in self.service.workflow or not self.service.workflow["messages"]:
return "" # First message
# Go through messages in chronological order
messages = sorted(self.service.workflow["messages"], key=lambda m: m.get("sequenceNo", 0), reverse=False)
summaryParts = []
for message in messages:
if message["id"] != messageUser["id"]:
messageSummary = await self.messageSummarize(message)
summaryParts.append(messageSummary)
return "\n\n".join(summaryParts)
async def messageSummarize(self, message: ChatMessage) -> str:
"""
Creates a summary of a message including its documents.
Args:
message: Message to summarize
Returns:
Summary of the message
"""
role = message.role
agentName = message.agentName
content = message.content
try:
# Use the serviceBase for language-aware AI calls
prompt = f"Create a very concise summary (2-3 sentences, maximum 300 characters) of the following message:\n\n{content}"
contentSummary = await self._callAiBasic(prompt)
except Exception as e:
logger.error(f"Error creating summary: {str(e)}")
contentSummary = content[:200] + "..."
# Summarize documents
docsSummary = ""
if "documents" in message and message["documents"]:
docsList = []
for i, doc in enumerate(message["documents"]):
docName = self.getFilename(doc)
docsList.append(docName)
if docsList:
docsSummary = "\nDocuments:" + "\n- ".join(docsList)
return f"[{role} {agentName}]: {contentSummary}{docsSummary}"
def getFilename(self, document: ChatDocument) -> str:
"""
Gets the filename from a document by combining name and extension.
Args:
document: Document object
Returns:
Filename with extension
"""
name = document.name
ext = document.ext
if ext:
return f"{name}.{ext}"
return name
async def _detectUserLanguage(self, text: str) -> str:
"""
Detects the language of user input using AI.
Args:
text: User input text to analyze
Returns:
Language code (e.g., 'en', 'de', 'fr')
"""
try:
# Use basic AI model for language detection
prompt = f"""
Analyze the following text and identify its language.
Return only the ISO 639-1 language code (e.g., 'en' for English, 'de' for German).
Text: {text}
"""
response = await self._callAiBasic(prompt)
# Clean and validate response
lang_code = response.strip().lower()
# Basic validation of common language codes
valid_codes = {'en', 'de', 'fr', 'es', 'it', 'pt', 'nl', 'ru', 'zh', 'ja', 'ko'}
return lang_code if lang_code in valid_codes else 'en'
except Exception as e:
logger.error(f"Error detecting language: {str(e)}")
return 'en' # Default to English on error
# Singleton factory for the chat manager
def getChatManager():
return ChatManager.getInstance()

View file

@ -1,273 +0,0 @@
"""
Data access functions for Microsoft and Google services.
Provides standardized interfaces for SharePoint, Outlook, and other services.
"""
from typing import List, Dict, Any, Optional, Union
from datetime import datetime
from pydantic import BaseModel, Field
from enum import Enum
class ServiceType(str, Enum):
"""Service types for data access"""
MSFT = "msft"
GOOGLE = "google"
class FileRef(BaseModel):
"""Reference to a file in storage"""
id: str
name: str
path: str
url: Optional[str] = None
size: Optional[int] = None
lastModified: Optional[datetime] = None
# SharePoint Functions
class SharePointSearchParams(BaseModel):
"""Parameters for SharePoint search"""
userName: str
query: str
site: Optional[str] = None
folder: Optional[str] = None
contentType: Optional[str] = None
createdAfter: Optional[datetime] = None
modifiedAfter: Optional[datetime] = None
maxResults: Optional[int] = 100
class SharePointFolderParams(BaseModel):
"""Parameters for SharePoint folder operations"""
userName: str
folderPattern: str
site: Optional[str] = None
recursive: bool = False
includeFiles: bool = True
class SharePointFileParams(BaseModel):
"""Parameters for SharePoint file operations"""
userName: str
fileName: str
site: Optional[str] = None
folder: Optional[str] = None
content: Optional[bytes] = None
contentType: Optional[str] = None
async def Msft_Sharepoint_Search(params: SharePointSearchParams) -> List[Dict[str, Any]]:
"""Search SharePoint for files and folders matching criteria"""
# Implementation would go here
pass
async def Msft_Sharepoint_GetFolders(params: SharePointFolderParams) -> Dict[str, Any]:
"""Get SharePoint folders matching pattern"""
# Implementation would go here
pass
async def Msft_Sharepoint_GetFiles(params: SharePointFileParams) -> Dict[str, Any]:
"""Get SharePoint files matching pattern"""
# Implementation would go here
pass
async def Msft_Sharepoint_GetFile(params: SharePointFileParams) -> Dict[str, Any]:
"""Get specific SharePoint file"""
# Implementation would go here
pass
async def Msft_Sharepoint_PutFile(params: SharePointFileParams) -> FileRef:
"""Upload file to SharePoint"""
# Implementation would go here
pass
# Outlook Mail Functions
class OutlookMailParams(BaseModel):
"""Parameters for Outlook mail operations"""
userName: str
folder: Optional[str] = None
messageId: Optional[str] = None
subject: Optional[str] = None
body: Optional[str] = None
to: Optional[List[str]] = None
cc: Optional[List[str]] = None
bcc: Optional[List[str]] = None
attachments: Optional[List[FileRef]] = None
searchString: Optional[str] = None
fromAddress: Optional[str] = None
receivedAfter: Optional[datetime] = None
maxResults: Optional[int] = 100
async def Msft_Outlook_ReadMails(params: OutlookMailParams) -> List[Dict[str, Any]]:
"""Read multiple emails from Outlook"""
# Implementation would go here
pass
async def Msft_Outlook_ReadMail(params: OutlookMailParams) -> Dict[str, Any]:
"""Read specific email from Outlook"""
# Implementation would go here
pass
async def Msft_Outlook_DraftMail(params: OutlookMailParams) -> Dict[str, Any]:
"""Create draft email in Outlook"""
# Implementation would go here
pass
async def Msft_Outlook_SendMail(params: OutlookMailParams) -> Dict[str, Any]:
"""Send email through Outlook"""
# Implementation would go here
pass
# Outlook Calendar Functions
class OutlookCalendarParams(BaseModel):
"""Parameters for Outlook calendar operations"""
userName: str
calendar: Optional[str] = None
eventId: Optional[str] = None
subject: Optional[str] = None
body: Optional[str] = None
startTime: Optional[datetime] = None
endTime: Optional[datetime] = None
location: Optional[str] = None
organizer: Optional[str] = None
attendees: Optional[List[str]] = None
searchString: Optional[str] = None
maxResults: Optional[int] = 100
async def Msft_Outlook_ReadAppointments(params: OutlookCalendarParams) -> List[Dict[str, Any]]:
"""Read multiple calendar appointments"""
# Implementation would go here
pass
async def Msft_Outlook_CreateAppointment(params: OutlookCalendarParams) -> Dict[str, Any]:
"""Create new calendar appointment"""
# Implementation would go here
pass
async def Msft_Outlook_ReadAppointment(params: OutlookCalendarParams) -> Dict[str, Any]:
"""Read specific calendar appointment"""
# Implementation would go here
pass
async def Msft_Outlook_UpdateAppointment(params: OutlookCalendarParams) -> Dict[str, Any]:
"""Update existing calendar appointment"""
# Implementation would go here
pass
async def Msft_Outlook_DeleteAppointment(params: OutlookCalendarParams) -> bool:
"""Delete calendar appointment"""
# Implementation would go here
pass
def get_data_access_functions() -> List[Dict[str, Any]]:
"""
Dynamically generates a comprehensive list of all available data access functions
with their parameters for use in agent prompts.
"""
import inspect
import sys
functions = []
current_module = sys.modules[__name__]
# Get all functions in the module
for name, obj in inspect.getmembers(current_module):
# Check if it's a function and starts with Msft_ or Google_
if inspect.isfunction(obj) and (name.startswith('Msft_') or name.startswith('Google_')):
# Get function signature
sig = inspect.signature(obj)
# Get return type annotation
return_type = obj.__annotations__.get('return', 'Any')
if hasattr(return_type, '__origin__'):
return_type = str(return_type)
# Get parameter model class
param_model = None
for param in sig.parameters.values():
if param.annotation.__module__ == __name__:
param_model = param.annotation
break
# Determine authority from function name
authority = ServiceType.MSFT if name.startswith('Msft_') else ServiceType.GOOGLE
# Create function entry
function_entry = {
"name": name,
"description": obj.__doc__ or "",
"parameters": param_model.schema() if param_model else {},
"return_type": str(return_type),
"authority": authority
}
functions.append(function_entry)
return functions
class DataAccess:
"""Manages data access functions for different services"""
def __init__(self):
"""Initialize the data access manager"""
self.functions = get_data_access_functions()
self._initialize_functions()
def _initialize_functions(self):
"""Initialize function groups and metadata"""
# Group functions by service type
self.msft_functions = {}
self.google_functions = {}
for func in self.functions:
func_name = func['name']
# Get the actual function object
func_obj = globals()[func_name]
if func['authority'] == ServiceType.MSFT:
self.msft_functions[func_name] = func_obj
else:
self.google_functions[func_name] = func_obj
@property
def msft(self) -> Dict[str, Any]:
"""Get Microsoft service functions and metadata"""
return {
'functions': self.msft_functions,
'metadata': {
'name': 'Microsoft Services',
'description': 'Microsoft Office 365 and SharePoint services',
'functions': [f for f in self.functions if f['authority'] == ServiceType.MSFT]
}
}
@property
def google(self) -> Dict[str, Any]:
"""Get Google service functions and metadata"""
return {
'functions': self.google_functions,
'metadata': {
'name': 'Google Services',
'description': 'Google Workspace services',
'functions': [f for f in self.functions if f['authority'] == ServiceType.GOOGLE]
}
}
@property
def utils(self) -> Dict[str, Any]:
"""Get utility functions for data access"""
return {
'getAvailableFunctions': lambda: self.functions,
'getFunctionInfo': lambda name: next((f for f in self.functions if f['name'] == name), None),
'getServiceFunctions': lambda service_type: [f for f in self.functions if f['authority'] == service_type]
}
def to_service_object(self) -> Dict[str, Any]:
"""Convert to service object format"""
return {
'msft': self.msft,
'google': self.google,
'utils': self.utils
}
def get_data_access() -> DataAccess:
"""Get a singleton instance of the data access manager"""
if not hasattr(get_data_access, '_instance'):
get_data_access._instance = DataAccess()
return get_data_access._instance

View file

@ -1,396 +0,0 @@
"""
Document Manager Module for handling document operations and content extraction.
"""
import logging
from typing import Dict, Any, List, Optional
from datetime import datetime
from modules.interfaces.serviceChatModel import ChatDocument, ChatContent
from modules.workflow.documentProcessor import getDocumentContents
import uuid
import json
import base64
logger = logging.getLogger(__name__)
class DocumentManager:
"""Manager for document operations and content extraction."""
_instance = None
@classmethod
def getInstance(cls):
"""Return a singleton instance of the document manager."""
if cls._instance is None:
cls._instance = cls()
return cls._instance
def __init__(self):
"""Initialize the document manager."""
if DocumentManager._instance is not None:
raise RuntimeError("Singleton instance already exists - use getInstance()")
self.service = None
def initialize(self, service=None):
"""Initialize or update the manager with service references."""
if service:
# Validate required interfaces
required_interfaces = ['base', 'msft', 'google']
missing_interfaces = []
for interface in required_interfaces:
if not hasattr(service, interface):
missing_interfaces.append(interface)
if missing_interfaces:
logger.warning(f"Service container missing required interfaces: {', '.join(missing_interfaces)}")
return False
self.service = service
return True
async def extractContent(self, fileId: str) -> Optional[ChatDocument]:
"""
Extract content from a file.
Args:
fileId: ID of the file to extract content from
Returns:
ChatDocument object if successful, None otherwise
"""
try:
# Get file content
fileContent = await self.getFileContent(fileId)
if not fileContent:
return None
# Get file metadata
fileMetadata = await self.getFileMetadata(fileId)
if not fileMetadata:
return None
# Create ChatDocument
return ChatDocument(
id=str(uuid.uuid4()),
fileId=fileId,
filename=fileMetadata.get("name", "Unknown"),
fileSize=fileMetadata.get("size", 0),
content=fileContent.decode('utf-8', errors='ignore'),
mimeType=fileMetadata.get("mimeType", "text/plain")
)
except Exception as e:
logger.error(f"Error extracting content from file {fileId}: {str(e)}")
return None
async def getFileContent(self, fileId: str) -> Optional[bytes]:
"""Gets the content of a file."""
try:
return self.service.functions.getFileData(fileId)
except Exception as e:
logger.error(f"Error getting file content for {fileId}: {str(e)}")
return None
async def getFileMetadata(self, fileId: str) -> Optional[Dict[str, Any]]:
"""Gets the metadata of a file."""
try:
return self.service.functions.getFile(fileId)
except Exception as e:
logger.error(f"Error getting file metadata for {fileId}: {str(e)}")
return None
async def saveFile(self, filename: str, content: bytes, mimeType: str) -> Optional[int]:
"""
Save a new file.
Args:
filename: Name of the file
content: File content as bytes
mimeType: MIME type of the file
Returns:
File ID if successful, None otherwise
"""
try:
return await self.service.base.saveFile(filename, content, mimeType)
except Exception as e:
logger.error(f"Error saving file {filename}: {str(e)}")
return None
async def deleteFile(self, fileId: str) -> bool:
"""Deletes a file."""
try:
return self.service.functions.deleteFile(fileId)
except Exception as e:
logger.error(f"Error deleting file {fileId}: {str(e)}")
return False
async def convertFileRefToId(self, ref: str) -> Optional[int]:
"""
Convert agent file reference to file ID.
Args:
ref: File reference in format 'filename;id' or just 'id'
Returns:
File ID if successful, None otherwise
"""
try:
# Extract file ID from reference format
if isinstance(ref, str) and ';' in ref:
return int(ref.split(';')[1])
return int(ref)
except Exception as e:
logger.error(f"Error converting file reference to ID: {str(e)}")
return None
async def convertFileIdToRef(self, fileId: str) -> Optional[str]:
"""
Convert file ID to agent file reference.
Args:
fileId: File ID to convert
Returns:
File reference in format 'filename;id' if successful, None otherwise
"""
try:
file = await self.getFileMetadata(fileId)
if not file:
return None
return f"{file['name']};{fileId}"
except Exception as e:
logger.error(f"Error converting file ID to reference: {str(e)}")
return None
async def convertDataFormat(self, data: Any, format: str) -> Any:
"""
Convert data between different formats.
Args:
data: Data to convert
format: Target format ('json', 'base64', etc.)
Returns:
Converted data
"""
try:
if format == 'json':
if isinstance(data, str):
return json.loads(data)
return json.dumps(data)
elif format == 'base64':
if isinstance(data, str):
return base64.b64encode(data.encode('utf-8')).decode('utf-8')
return base64.b64encode(data).decode('utf-8')
return data
except Exception as e:
logger.error(f"Error converting data format: {str(e)}")
return data
async def createAgentInputFileList(self, files: List[str]) -> List[Dict[str, Any]]:
"""
Create a list of input files for agent processing.
Args:
files: List of file references
Returns:
List of file objects with content
"""
try:
inputFiles = []
for file in files:
fileId = await self.convertFileRefToId(file)
if fileId:
fileData = await self.getFileMetadata(fileId)
if fileData:
content = await self.getFileContent(fileId)
inputFiles.append({
'id': fileId,
'name': fileData['name'],
'mimeType': fileData['mimeType'],
'content': content
})
return inputFiles
except Exception as e:
logger.error(f"Error creating agent input file list: {str(e)}")
return []
async def saveAgentOutputFiles(self, files: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""
Save output files from agent processing.
Args:
files: List of file objects with content
Returns:
List of saved file metadata
"""
try:
savedFiles = []
for file in files:
# Create file metadata
fileMeta = await self.saveFile(
filename=file['name'],
content=file['content'],
mimeType=file.get('mimeType', 'application/octet-stream')
)
if fileMeta:
savedFiles.append({
'id': fileMeta,
'name': file['name'],
'mimeType': file.get('mimeType', 'application/octet-stream')
})
return savedFiles
except Exception as e:
logger.error(f"Error saving agent output files: {str(e)}")
return []
async def contentWithPrompt(self, document: Dict[str, Any], prompt: str) -> Optional[Dict[str, Any]]:
"""
Extract content from a document using AI with a specific prompt.
Handles large files by processing in chunks and merging results.
Args:
document: Document object with file information
prompt: Specific prompt for content extraction
Returns:
Dictionary with extracted content and metadata
"""
try:
# First get the document content
chat_doc = await self.extractContent(document.get('id'))
if not chat_doc:
return None
# Prepare the content for AI processing
content = chat_doc.content
mime_type = chat_doc.mimeType
# For large files, process in chunks
if len(content) > 100000: # Arbitrary threshold, adjust as needed
chunks = self._splitContentIntoChunks(content, mime_type)
extracted_chunks = []
for chunk in chunks:
# Process each chunk with AI
chunk_result = await self._processContentChunk(chunk, prompt)
if chunk_result:
extracted_chunks.append(chunk_result)
# Merge results
return {
"content": self._mergeChunkResults(extracted_chunks),
"metadata": {
"original_size": len(content),
"chunks_processed": len(chunks),
"mime_type": mime_type
}
}
else:
# Process single chunk
result = await self._processContentChunk(content, prompt)
return {
"content": result,
"metadata": {
"original_size": len(content),
"chunks_processed": 1,
"mime_type": mime_type
}
}
except Exception as e:
logger.error(f"Error in contentWithPrompt: {str(e)}")
return None
def _splitContentIntoChunks(self, content: str, mime_type: str) -> List[str]:
"""
Split content into manageable chunks based on mime type.
Args:
content: Content to split
mime_type: MIME type of the content
Returns:
List of content chunks
"""
try:
if mime_type.startswith('text/'):
# Split text content by paragraphs or sections
return [chunk.strip() for chunk in content.split('\n\n') if chunk.strip()]
elif mime_type == 'application/json':
# Split JSON content by objects
data = json.loads(content)
if isinstance(data, list):
return [json.dumps(item) for item in data]
return [content]
else:
# Default chunking
return [content[i:i+10000] for i in range(0, len(content), 10000)]
except Exception as e:
logger.error(f"Error splitting content: {str(e)}")
return [content]
async def _processContentChunk(self, chunk: str, prompt: str) -> Optional[str]:
"""
Process a single content chunk with AI.
Args:
chunk: Content chunk to process
prompt: Extraction prompt
Returns:
Processed content
"""
try:
# Create AI prompt
ai_prompt = f"""
Extract relevant information from this content based on the following prompt:
PROMPT: {prompt}
CONTENT:
{chunk}
Return ONLY the extracted information in a clear, concise format.
"""
# Get AI response
response = await self.service.base.callAi([
{"role": "system", "content": "You are an expert at extracting relevant information from documents."},
{"role": "user", "content": ai_prompt}
])
return response.strip()
except Exception as e:
logger.error(f"Error processing content chunk: {str(e)}")
return None
def _mergeChunkResults(self, chunks: List[str]) -> str:
"""
Merge processed content chunks into a single result.
Args:
chunks: List of processed chunks
Returns:
Merged content
"""
try:
# Remove duplicates and empty chunks
chunks = [chunk for chunk in chunks if chunk and chunk.strip()]
# Merge chunks with appropriate spacing
return "\n\n".join(chunks)
except Exception as e:
logger.error(f"Error merging chunk results: {str(e)}")
return ""
# Singleton factory for the document manager
def getDocumentManager():
return DocumentManager.getInstance()

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,395 @@
import logging
import importlib
import pkgutil
import inspect
from typing import Dict, Any, Optional, List, Type
from datetime import datetime, UTC
import json
import asyncio
from modules.methods.methodBase import MethodBase, AuthSource, MethodResult
from modules.workflow.serviceContainer import ServiceContainer
from modules.interfaces.serviceChatModel import AgentTask, AgentAction, AgentResult, Action, TaskStatus
from modules.workflow.managerPrompt import AIPromptManager
from modules.workflow.processorDocument import DocumentProcessor
from modules.shared.configuration import APP_CONFIG
logger = logging.getLogger(__name__)
class ChatManager:
"""Chat manager with improved AI integration and method handling"""
def __init__(self):
self.service = ServiceContainer()
self._discover_methods()
self.workflow = None
self.current_task = None
self.workflow_history = []
def _discover_methods(self):
"""Dynamically discover all method classes in modules.methods package"""
try:
# Import the methods package
methods_package = importlib.import_module('modules.methods')
# Discover all modules in the package
for _, name, is_pkg in pkgutil.iter_modules(methods_package.__path__):
if not is_pkg and name.startswith('method'):
try:
# Import the module
module = importlib.import_module(f'modules.methods.{name}')
# Find all classes in the module that inherit from MethodBase
for item_name, item in inspect.getmembers(module):
if (inspect.isclass(item) and
issubclass(item, MethodBase) and
item != MethodBase):
# Instantiate the method and add to service
method_instance = item()
self.service.methods[method_instance.name] = method_instance
logger.info(f"Discovered method: {method_instance.name}")
except Exception as e:
logger.error(f"Error loading method module {name}: {str(e)}")
except Exception as e:
logger.error(f"Error discovering methods: {str(e)}")
async def initialize(self, workflow: Any, context: Dict[str, Any]) -> None:
"""Initialize chat manager with workflow and context"""
self.service.workflow = workflow
self.service.context = context
# Initialize AI model
self.service.model = {
'callAiBasic': self._call_ai_basic,
'callAiAdvanced': self._call_ai_advanced
}
# Initialize document processor
self.service.document_processor.initialize(context)
async def create_initial_task(self, user_input: Dict[str, Any]) -> AgentTask:
"""Create initial task from user input"""
# Get available methods and their actions
method_catalog = self.service.get_available_methods()
# Process user input with AI
processed_input = await self._process_user_input(user_input, method_catalog)
# Create actions from processed input
actions = await self._create_actions(processed_input['actions'])
# Create task
task = AgentTask(
id=f"task_{datetime.now(UTC).timestamp()}",
workflowId=self.workflow.id,
userInput=processed_input['objective'],
dataList=user_input.get('connections', []),
actionList=actions,
status=TaskStatus.PENDING,
createdAt=datetime.now(UTC),
updatedAt=datetime.now(UTC)
)
# Store in service
self.service.tasks['current'] = task
return task
async def execute_current_task(self) -> None:
"""Execute current task"""
task = self.service.tasks.get('current')
if not task:
raise ValueError("No current task to execute")
await self.service.execute_task(task)
async def define_next_task(self) -> Optional[AgentTask]:
"""Define next task based on current task results"""
current_task = self.service.tasks.get('current')
if not current_task:
return None
try:
# Analyze task results
analysis = await self._analyze_task_results(current_task)
# If workflow is complete, update task status
if analysis['isComplete']:
current_task.status = TaskStatus.COMPLETED
current_task.updatedAt = datetime.now(UTC)
return None
# If more actions needed, create next task
if not analysis['isComplete']:
next_task = self._create_next_task(current_task, analysis)
self.service.tasks['previous'] = current_task
self.service.tasks['current'] = next_task
return next_task
except Exception as e:
logger.error(f"Error defining next task: {e}")
current_task.status = TaskStatus.FAILED
current_task.updatedAt = datetime.now(UTC)
return None
async def _process_user_input(self, user_input: Dict[str, Any], method_catalog: Dict[str, Any]) -> Dict[str, Any]:
"""Process user input with AI to extract objectives and actions"""
# Create prompt with available methods and actions
prompt = f"""Given the following user input and available methods/actions, extract the objective and required actions:
User Input: {user_input.get('message', '')}
Available Methods and Actions:
{json.dumps(method_catalog, indent=2)}
Please provide a JSON response with:
1. objective: The main goal or task to accomplish
2. actions: List of required actions with method and parameters
Example format:
{{
"objective": "Search for documents about project X",
"actions": [
{{
"method": "sharepoint",
"action": "search",
"parameters": {{
"query": "project X",
"site": "projects"
}}
}}
]
}}
"""
# Call AI service
response = await self.service.model['callAiBasic'](prompt)
return json.loads(response)
async def _create_actions(self, actions_data: List[Dict[str, Any]]) -> List[AgentAction]:
"""Create action objects from processed input"""
actions = []
for action_data in actions_data:
method = self.service.get_method(action_data['method'])
if not method:
continue
action = AgentAction(
id=f"action_{datetime.now(UTC).timestamp()}",
method=action_data['method'],
action=action_data['action'],
parameters=action_data.get('parameters', {}),
status=TaskStatus.PENDING,
createdAt=datetime.now(UTC),
updatedAt=datetime.now(UTC)
)
actions.append(action)
return actions
async def _summarize_workflow(self) -> str:
"""Summarize workflow history"""
if not self.workflow.messages:
return ""
prompt = f"""Summarize the following chat history:
{json.dumps([m.dict() for m in self.workflow.messages], indent=2)}
Please provide a concise summary focusing on:
1. Main objectives
2. Key actions taken
3. Current status
4. Any issues or blockers
"""
return await self.service.model['callAiBasic'](prompt)
async def _analyze_task_results(self, task: AgentTask) -> Dict[str, Any]:
"""Analyze task results to determine next steps"""
# Get workflow summary
summary = await self._summarize_workflow()
# Create prompt for analysis
prompt = f"""Analyze the following task results and workflow history to determine next steps:
Task Results:
{json.dumps([a.dict() for a in task.actionList], indent=2)}
Workflow Summary:
{summary}
Please provide a JSON response with:
1. isComplete: Whether the workflow is complete
2. nextActions: List of next actions needed (if any)
3. issues: Any issues or blockers identified
Example format:
{{
"isComplete": false,
"nextActions": [
{{
"method": "sharepoint",
"action": "read",
"parameters": {{
"documentId": "doc123"
}}
}}
],
"issues": ["Need authentication for SharePoint"]
}}
"""
response = await self.service.model['callAiBasic'](prompt)
return json.loads(response)
def _create_next_task(self, current_task: AgentTask, analysis: Dict[str, Any]) -> AgentTask:
"""Create next task based on analysis"""
# Create actions for next task
actions = []
for action_data in analysis.get('nextActions', []):
action = AgentAction(
id=f"action_{datetime.now(UTC).timestamp()}",
method=action_data['method'],
action=action_data['action'],
parameters=action_data.get('parameters', {}),
status=TaskStatus.PENDING,
createdAt=datetime.now(UTC),
updatedAt=datetime.now(UTC)
)
actions.append(action)
# Create and return next task
return AgentTask(
id=f"task_{datetime.now(UTC).timestamp()}",
workflowId=self.workflow.id,
userInput=current_task.userInput,
dataList=current_task.dataList,
actionList=actions,
status=TaskStatus.PENDING,
createdAt=datetime.now(UTC),
updatedAt=datetime.now(UTC)
)
async def process_task(self, task: Any) -> Dict[str, Any]:
"""Process a task with improved error handling and AI integration"""
try:
# Execute task
await self.service.execute_task(task)
# Process results
if task.status == 'success':
# Generate feedback using AI
feedback = await self._process_task_results(task)
task.thisTaskFeedback = feedback
# Create output documents
documents = await self._create_output_documents(task)
task.documentsOutput = documents
return {
"status": "success",
"feedback": feedback,
"documents": documents
}
else:
return {
"status": task.status,
"error": task.error,
"feedback": f"Task failed: {task.error}"
}
except Exception as e:
logger.error(f"Error processing task: {str(e)}")
return {
"status": "error",
"error": str(e),
"feedback": f"Error processing task: {str(e)}"
}
async def _process_task_results(self, task: Any) -> str:
"""Process task results and generate feedback using AI"""
try:
# Create context for AI
context = {
"task": "Process task results",
"document": {"name": "Task Results", "type": "json"}
}
# Generate prompt
prompt = self.service.prompt_manager.generate_prompt(
context,
[
{"input": "Task results", "output": "Generate summary"}
]
)
# Call AI
response = await self.service.model['callAiBasic'](
f"""Process task results and generate feedback:
Task Input: {task.userInput}
Method Results: {task.result}
Generated Documents: {task.documentsOutput}
{prompt}
Please provide:
1. Summary of completed actions
2. Generated document descriptions
3. Next steps or completion status
Format your response as JSON:
{{
"summary": "string",
"documents": ["string"],
"nextSteps": ["string"]
}}
"""
)
# Parse and validate response
try:
result = json.loads(response)
return result.get("summary", "Task completed successfully")
except json.JSONDecodeError:
return response.strip()
except Exception as e:
logger.error(f"Error processing task results: {str(e)}")
return f"Error processing results: {str(e)}"
async def _create_output_documents(self, task: Any) -> List[Dict[str, Any]]:
"""Create output documents from task results"""
try:
documents = []
# Process each document
for doc in task.documentsOutput:
processed = self.service.document_processor.process_with_context(
doc,
{
"id": doc.get("id", ""),
"extractionHistory": doc.get("extractionHistory", []),
"relevantSections": doc.get("relevantSections", []),
"processingStatus": doc.get("processingStatus", {})
}
)
if processed:
documents.append(processed)
return documents
except Exception as e:
logger.error(f"Error creating output documents: {str(e)}")
return []
async def _call_ai_basic(self, prompt: str) -> str:
"""Call basic AI model"""
# TODO: Implement actual AI call
return "AI response placeholder"
async def _call_ai_advanced(self, prompt: str, context: Dict[str, Any]) -> str:
"""Call advanced AI model with context"""
# TODO: Implement actual AI call
return "AI response placeholder"

View file

@ -0,0 +1,478 @@
from typing import Dict, Any, Optional, List
import logging
import json
import os
from datetime import datetime, UTC
from pathlib import Path
import mimetypes
import hashlib
import shutil
import uuid
import base64
from modules.workflow.processorDocument import DocumentProcessor
from modules.shared.configuration import APP_CONFIG
from modules.interfaces.serviceChatModel import ChatDocument, ChatContent
logger = logging.getLogger(__name__)
class DocumentManager:
"""Document manager with enhanced operations and file handling"""
_instance = None
@classmethod
def getInstance(cls):
"""Return a singleton instance of the document manager."""
if cls._instance is None:
cls._instance = cls()
return cls._instance
def __init__(self):
"""Initialize document manager"""
if DocumentManager._instance is not None:
raise RuntimeError("Singleton instance already exists - use getInstance()")
self.processor = DocumentProcessor()
self.document_cache = {}
self.temp_dir = Path(APP_CONFIG.get('temp_dir', 'temp'))
self.output_dir = Path(APP_CONFIG.get('output_dir', 'output'))
self.service = None
async def initialize(self, context: Dict[str, Any], service=None) -> None:
"""Initialize document manager with context and service"""
# Initialize processor
self.processor.initialize(context)
# Initialize service container
if service:
# Validate required interfaces
required_interfaces = ['base', 'msft', 'google']
missing_interfaces = []
for interface in required_interfaces:
if not hasattr(service, interface):
missing_interfaces.append(interface)
if missing_interfaces:
logger.warning(f"Service container missing required interfaces: {', '.join(missing_interfaces)}")
return False
self.service = service
# Create directories if they don't exist
self.temp_dir.mkdir(parents=True, exist_ok=True)
self.output_dir.mkdir(parents=True, exist_ok=True)
# Clear temporary directory
self._clear_temp_directory()
def _clear_temp_directory(self) -> None:
"""Clear temporary directory"""
try:
if self.temp_dir.exists():
shutil.rmtree(self.temp_dir)
self.temp_dir.mkdir(parents=True)
except Exception as e:
logger.error(f"Error clearing temp directory: {str(e)}")
async def process_document(self, document: Dict[str, Any], context: Dict[str, Any]) -> Dict[str, Any]:
"""Process a document with context"""
try:
# Generate document ID if not present
if 'id' not in document:
document['id'] = self._generate_document_id(document)
# Process document content
processed = await self.processor.process_with_context(document, context)
# Add metadata
processed['metadata'] = {
'processedAt': datetime.now(UTC).isoformat(),
'processor': 'DocumentManager',
'version': '1.0'
}
# Cache document
self.document_cache[document['id']] = processed
return processed
except Exception as e:
logger.error(f"Error processing document: {str(e)}")
return {
'id': document.get('id', ''),
'error': str(e),
'status': 'error'
}
async def extract_content(self, file_id: str) -> Optional[ChatDocument]:
"""Extract content from a file"""
try:
# Get file content
file_content = await self.get_file_content(file_id)
if not file_content:
return None
# Get file metadata
file_metadata = await self.get_file_metadata(file_id)
if not file_metadata:
return None
# Create ChatDocument
return ChatDocument(
id=str(uuid.uuid4()),
fileId=file_id,
filename=file_metadata.get("name", "Unknown"),
fileSize=file_metadata.get("size", 0),
content=file_content.decode('utf-8', errors='ignore'),
mimeType=file_metadata.get("mimeType", "text/plain")
)
except Exception as e:
logger.error(f"Error extracting content from file {file_id}: {str(e)}")
return None
async def get_file_content(self, file_id: str) -> Optional[bytes]:
"""Get file content"""
try:
if not self.service or not self.service.functions:
logger.error("Service or functions not initialized")
return None
return self.service.functions.getFileData(file_id)
except Exception as e:
logger.error(f"Error getting file content for {file_id}: {str(e)}")
return None
async def get_file_metadata(self, file_id: str) -> Optional[Dict[str, Any]]:
"""Get file metadata"""
try:
if not self.service or not self.service.functions:
logger.error("Service or functions not initialized")
return None
return self.service.functions.getFile(file_id)
except Exception as e:
logger.error(f"Error getting file metadata for {file_id}: {str(e)}")
return None
async def save_file(self, filename: str, content: bytes, mime_type: str) -> Optional[int]:
"""Save a new file"""
try:
if not self.service or not self.service.base:
logger.error("Service or base interface not initialized")
return None
return await self.service.base.saveFile(filename, content, mime_type)
except Exception as e:
logger.error(f"Error saving file {filename}: {str(e)}")
return None
async def delete_file(self, file_id: str) -> bool:
"""Delete a file"""
try:
if not self.service or not self.service.functions:
logger.error("Service or functions not initialized")
return False
return self.service.functions.deleteFile(file_id)
except Exception as e:
logger.error(f"Error deleting file {file_id}: {str(e)}")
return False
def convert_file_ref_to_id(self, ref: str) -> Optional[int]:
"""Convert file reference to ID"""
try:
if isinstance(ref, str) and ';' in ref:
return int(ref.split(';')[1])
return int(ref)
except Exception as e:
logger.error(f"Error converting file reference to ID: {str(e)}")
return None
def convert_file_id_to_ref(self, file_id: str) -> Optional[str]:
"""Convert file ID to reference"""
try:
if not self.service or not self.service.functions:
logger.error("Service or functions not initialized")
return None
file = self.service.functions.getFile(file_id)
if not file:
return None
return f"{file.filename};{file_id}"
except Exception as e:
logger.error(f"Error converting file ID to reference: {str(e)}")
return None
async def convert_data_format(self, data: Any, format: str) -> Any:
"""Convert data between formats"""
try:
if format == 'json':
if isinstance(data, str):
return json.loads(data)
return json.dumps(data)
elif format == 'base64':
if isinstance(data, str):
return base64.b64encode(data.encode('utf-8')).decode('utf-8')
return base64.b64encode(data).decode('utf-8')
return data
except Exception as e:
logger.error(f"Error converting data format: {str(e)}")
return data
async def create_agent_input_file_list(self, files: List[str]) -> List[Dict[str, Any]]:
"""Create list of input files for agent processing"""
try:
input_files = []
for file in files:
file_id = await self.convert_file_ref_to_id(file)
if file_id:
file_data = await self.get_file_metadata(file_id)
if file_data:
content = await self.get_file_content(file_id)
input_files.append({
'id': file_id,
'name': file_data['name'],
'mimeType': file_data['mimeType'],
'content': content
})
return input_files
except Exception as e:
logger.error(f"Error creating agent input file list: {str(e)}")
return []
async def save_agent_output_files(self, files: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""Save output files from agent processing"""
try:
saved_files = []
for file in files:
file_meta = await self.save_file(
filename=file['name'],
content=file['content'],
mimeType=file.get('mimeType', 'application/octet-stream')
)
if file_meta:
saved_files.append({
'id': file_meta,
'name': file['name'],
'mimeType': file.get('mimeType', 'application/octet-stream')
})
return saved_files
except Exception as e:
logger.error(f"Error saving agent output files: {str(e)}")
return []
async def content_with_prompt(self, document: Dict[str, Any], prompt: str) -> Optional[Dict[str, Any]]:
"""Extract content using AI with specific prompt"""
try:
# Get document content
chat_doc = await self.extract_content(document.get('id'))
if not chat_doc:
return None
# Prepare content
content = chat_doc.content
mime_type = chat_doc.mimeType
# Process large files in chunks
if len(content) > 100000:
chunks = self._split_content_into_chunks(content, mime_type)
extracted_chunks = []
for chunk in chunks:
chunk_result = await self._process_content_chunk(chunk, prompt)
if chunk_result:
extracted_chunks.append(chunk_result)
return {
"content": self._merge_chunk_results(extracted_chunks),
"metadata": {
"original_size": len(content),
"chunks_processed": len(chunks),
"mime_type": mime_type
}
}
else:
result = await self._process_content_chunk(content, prompt)
return {
"content": result,
"metadata": {
"original_size": len(content),
"chunks_processed": 1,
"mime_type": mime_type
}
}
except Exception as e:
logger.error(f"Error in content_with_prompt: {str(e)}")
return None
def _split_content_into_chunks(self, content: str, mime_type: str) -> List[str]:
"""Split content into manageable chunks"""
try:
if mime_type.startswith('text/'):
return [chunk.strip() for chunk in content.split('\n\n') if chunk.strip()]
elif mime_type == 'application/json':
data = json.loads(content)
if isinstance(data, list):
return [json.dumps(item) for item in data]
return [content]
else:
return [content[i:i+10000] for i in range(0, len(content), 10000)]
except Exception as e:
logger.error(f"Error splitting content: {str(e)}")
return [content]
async def _process_content_chunk(self, chunk: str, prompt: str) -> Optional[str]:
"""Process content chunk with AI"""
try:
if not self.service or not self.service.base:
logger.error("Service or base interface not initialized")
return None
ai_prompt = f"""
Extract relevant information from this content based on the following prompt:
PROMPT: {prompt}
CONTENT:
{chunk}
Return ONLY the extracted information in a clear, concise format.
"""
response = await self.service.base.callAi([
{"role": "system", "content": "You are an expert at extracting relevant information from documents."},
{"role": "user", "content": ai_prompt}
])
return response.strip()
except Exception as e:
logger.error(f"Error processing content chunk: {str(e)}")
return None
def _merge_chunk_results(self, chunks: List[str]) -> str:
"""Merge processed content chunks"""
try:
chunks = [chunk for chunk in chunks if chunk and chunk.strip()]
return "\n\n".join(chunks)
except Exception as e:
logger.error(f"Error merging chunk results: {str(e)}")
return ""
async def save_document(self, document: Dict[str, Any], format: str = 'json') -> str:
"""Save document to output directory"""
try:
filename = f"{document['id']}.{format}"
filepath = self.output_dir / filename
if format == 'json':
with open(filepath, 'w', encoding='utf-8') as f:
json.dump(document, f, indent=2)
else:
content = document.get('content', '')
if isinstance(content, str):
with open(filepath, 'w', encoding='utf-8') as f:
f.write(content)
else:
with open(filepath, 'wb') as f:
f.write(content)
return str(filepath)
except Exception as e:
logger.error(f"Error saving document: {str(e)}")
raise
async def load_document(self, filepath: str) -> Dict[str, Any]:
"""Load document from file"""
try:
path = Path(filepath)
if not path.exists():
raise FileNotFoundError(f"Document not found: {filepath}")
format = path.suffix[1:].lower()
if format == 'json':
with open(path, 'r', encoding='utf-8') as f:
document = json.load(f)
else:
mime_type = mimetypes.guess_type(filepath)[0]
if mime_type and mime_type.startswith('text/'):
with open(path, 'r', encoding='utf-8') as f:
content = f.read()
else:
with open(path, 'rb') as f:
content = f.read()
document = {
'id': path.stem,
'content': content,
'format': format,
'mime_type': mime_type
}
document['metadata'] = {
'loadedAt': datetime.now(UTC).isoformat(),
'filepath': str(path),
'size': path.stat().st_size
}
return document
except Exception as e:
logger.error(f"Error loading document: {str(e)}")
raise
async def convert_document(self, document: Dict[str, Any], target_format: str) -> Dict[str, Any]:
"""Convert document to target format"""
try:
current_format = document.get('format', 'json')
if current_format == 'json' and target_format == 'text':
content = json.dumps(document, indent=2)
return {
'id': document['id'],
'content': content,
'format': 'text',
'mime_type': 'text/plain'
}
elif current_format == 'text' and target_format == 'json':
try:
content = json.loads(document['content'])
return {
'id': document['id'],
'content': content,
'format': 'json',
'mime_type': 'application/json'
}
except json.JSONDecodeError:
return {
'id': document['id'],
'content': document['content'],
'format': 'json',
'mime_type': 'application/json'
}
else:
raise ValueError(f"Unsupported conversion: {current_format} to {target_format}")
except Exception as e:
logger.error(f"Error converting document: {str(e)}")
raise
def _generate_document_id(self, document: Dict[str, Any]) -> str:
"""Generate unique document ID"""
if 'content' in document:
content = str(document['content'])
return hashlib.md5(content.encode()).hexdigest()
return f"doc_{int(datetime.now(UTC).timestamp())}"
async def cleanup(self) -> None:
"""Clean up temporary files and cache"""
try:
self._clear_temp_directory()
self.document_cache.clear()
except Exception as e:
logger.error(f"Error during cleanup: {str(e)}")
# Singleton factory for the document manager
def getDocumentManager():
return DocumentManager.getInstance()

View file

@ -0,0 +1,182 @@
from typing import Dict, Any, List, Optional
import logging
import json
from datetime import datetime, UTC
logger = logging.getLogger(__name__)
class AIPromptManager:
"""Manages AI prompts and response validation"""
def __init__(self):
self.prompt_templates = {}
self.response_schemas = {}
self._load_templates()
def _load_templates(self) -> None:
"""Load prompt templates and schemas"""
# Basic templates
self.prompt_templates = {
"task_analysis": {
"template": """Analyze the following task and determine required actions:
Task: {task}
Context: {context}
Available Methods: {methods}
Please provide:
1. Main objective
2. Required actions
3. Required data sources
4. Document processing requirements
5. Expected output format
Format your response as JSON:
{{
"objective": "string",
"actions": [
{{
"method": "string",
"action": "string",
"parameters": {{
"param1": "value1"
}}
}}
],
"dataSources": ["string"],
"documentRequirements": ["string"],
"outputFormat": "string"
}}
""",
"schema": {
"type": "object",
"required": ["objective", "actions"],
"properties": {
"objective": {"type": "string"},
"actions": {
"type": "array",
"items": {
"type": "object",
"required": ["method", "action"],
"properties": {
"method": {"type": "string"},
"action": {"type": "string"},
"parameters": {"type": "object"}
}
}
},
"dataSources": {
"type": "array",
"items": {"type": "string"}
},
"documentRequirements": {
"type": "array",
"items": {"type": "string"}
},
"outputFormat": {"type": "string"}
}
}
},
"result_analysis": {
"template": """Analyze the following task results and determine next steps:
Task Results: {results}
Workflow History: {history}
Please provide:
1. Task completion status
2. Next required actions
3. Required documents
4. Method recommendations
Format your response as JSON:
{{
"isComplete": boolean,
"nextActions": ["string"],
"requiredDocuments": ["string"],
"recommendedMethods": ["string"]
}}
""",
"schema": {
"type": "object",
"required": ["isComplete"],
"properties": {
"isComplete": {"type": "boolean"},
"nextActions": {
"type": "array",
"items": {"type": "string"}
},
"requiredDocuments": {
"type": "array",
"items": {"type": "string"}
},
"recommendedMethods": {
"type": "array",
"items": {"type": "string"}
}
}
}
}
}
def generate_prompt(self, context: Dict[str, Any], examples: List[Dict]) -> str:
"""Generate a context-aware prompt with few-shot examples"""
try:
# Get template
template = self.prompt_templates.get(context.get("type", "task_analysis"))
if not template:
raise ValueError(f"Unknown prompt type: {context.get('type')}")
# Format prompt
prompt = template["template"].format(
task=context.get("task", ""),
context=json.dumps(context.get("context", {}), indent=2),
methods=json.dumps(context.get("methods", {}), indent=2),
results=json.dumps(context.get("results", {}), indent=2),
history=json.dumps(context.get("history", []), indent=2)
)
# Add examples if provided
if examples:
prompt += "\nExamples:\n"
for ex in examples:
prompt += f"- {ex['input']} => {ex['output']}\n"
return prompt
except Exception as e:
logger.error(f"Error generating prompt: {str(e)}")
raise
def validate_response(self, response: str, schema: Dict) -> bool:
"""Validate AI response against a schema"""
try:
# Parse response
if isinstance(response, str):
try:
response = json.loads(response)
except json.JSONDecodeError:
return False
# Validate against schema
import jsonschema
jsonschema.validate(instance=response, schema=schema)
return True
except Exception as e:
logger.error(f"Error validating response: {str(e)}")
return False
def get_schema(self, prompt_type: str) -> Optional[Dict]:
"""Get schema for prompt type"""
template = self.prompt_templates.get(prompt_type)
return template.get("schema") if template else None
def add_template(self, name: str, template: str, schema: Dict) -> None:
"""Add new prompt template"""
self.prompt_templates[name] = {
"template": template,
"schema": schema
}
def remove_template(self, name: str) -> None:
"""Remove prompt template"""
self.prompt_templates.pop(name, None)

View file

@ -0,0 +1,239 @@
from typing import Dict, Any, Optional, List
import logging
import json
import asyncio
from datetime import datetime, UTC
import uuid
from modules.workflow.managerChat import ChatManager
from modules.workflow.managerDocument import DocumentManager
from modules.interfaces.serviceChatModel import AgentTask, TaskStatus, ActionStatus
from modules.shared.configuration import APP_CONFIG
logger = logging.getLogger(__name__)
class WorkflowManager:
"""Workflow manager with improved task management and error recovery"""
def __init__(self):
self.chat_manager = ChatManager()
self.document_manager = DocumentManager()
self.workflow = None
self.context = {}
self.task_queue = asyncio.Queue()
self.active_tasks = {}
self.task_history = []
async def initialize(self, workflow: Any, context: Dict[str, Any]) -> None:
"""Initialize workflow manager with workflow and context"""
self.workflow = workflow
self.context = context
# Initialize managers
await self.chat_manager.initialize(workflow, context)
await self.document_manager.initialize(context)
# Start task processor
asyncio.create_task(self._process_task_queue())
async def process_workflow(self, user_input: Dict[str, Any]) -> Dict[str, Any]:
"""Process workflow with user input"""
try:
# Create initial task
task = await self.chat_manager.create_initial_task(user_input)
# Add to queue
await self.task_queue.put(task)
# Wait for completion
while not task.is_complete() and not task.has_failed():
await asyncio.sleep(0.1)
# Process results
if task.status == TaskStatus.SUCCESS:
return {
"status": "success",
"result": task.result,
"documents": task.documentsOutput
}
else:
return {
"status": "error",
"error": task.error,
"feedback": task.thisTaskFeedback
}
except Exception as e:
logger.error(f"Error processing workflow: {str(e)}")
return {
"status": "error",
"error": str(e)
}
async def _process_task_queue(self) -> None:
"""Process tasks in queue"""
while True:
try:
# Get task from queue
task = await self.task_queue.get()
# Process task
result = await self.chat_manager.process_task(task)
# Update task status
if result["status"] == "success":
task.status = TaskStatus.SUCCESS
task.result = result.get("result")
task.documentsOutput = result.get("documents", [])
else:
task.status = TaskStatus.FAILED
task.error = result.get("error")
# Add to history
self.task_history.append({
"id": task.id,
"status": task.status,
"startedAt": task.startedAt,
"finishedAt": datetime.now(UTC).isoformat(),
"error": task.error
})
# Check for next task
if not task.is_complete():
next_task = await self._define_next_task(task)
if next_task:
await self.task_queue.put(next_task)
# Mark task as done
self.task_queue.task_done()
except Exception as e:
logger.error(f"Error processing task queue: {str(e)}")
await asyncio.sleep(1) # Prevent tight loop on error
async def _define_next_task(self, current_task: AgentTask) -> Optional[AgentTask]:
"""Define next task based on current task results"""
try:
# Analyze current task
analysis = await self.chat_manager._analyze_task_results(current_task)
# Check if next task needed
if not analysis.get("isComplete", True):
# Create next task
next_task = await self.chat_manager.create_next_task(
current_task,
analysis.get("nextActions", []),
analysis.get("requiredDocuments", [])
)
# Add dependencies
next_task.dependencies = [current_task.id]
return next_task
return None
except Exception as e:
logger.error(f"Error defining next task: {str(e)}")
return None
async def handle_error(self, task: AgentTask, error: str) -> None:
"""Handle task error with recovery strategies"""
try:
# Log error
logger.error(f"Task {task.id} failed: {error}")
# Update task status
task.status = TaskStatus.FAILED
task.error = error
# Check for retryable errors
if self._is_retryable_error(error):
if task.retryCount < task.retryMax:
# Retry task
task.retryCount += 1
task.status = TaskStatus.RETRY
await self.task_queue.put(task)
return
# Check for rollback needed
if task.rollback_on_failure:
await self._rollback_task(task)
# Notify workflow
self.workflow.status = "error"
self.workflow.error = error
except Exception as e:
logger.error(f"Error handling task error: {str(e)}")
async def _rollback_task(self, task: AgentTask) -> None:
"""Rollback task actions"""
try:
for action in task.actionList:
if action.status == ActionStatus.SUCCESS:
# Get method
method = self.chat_manager.service.methods.get(action.method)
if method:
# Rollback action
await method.rollback(
action.action,
action.parameters,
task.get_auth_data(action.auth_source)
)
except Exception as e:
logger.error(f"Error rolling back task: {str(e)}")
def _is_retryable_error(self, error: str) -> bool:
"""Check if error is retryable"""
retryable_errors = [
"timeout",
"rate limit",
"temporary",
"connection",
"server error"
]
return any(err in error.lower() for err in retryable_errors)
async def cleanup(self) -> None:
"""Clean up workflow resources"""
try:
# Clean up managers
await self.chat_manager.cleanup()
await self.document_manager.cleanup()
# Clear task queue
while not self.task_queue.empty():
self.task_queue.get_nowait()
self.task_queue.task_done()
# Clear active tasks
self.active_tasks.clear()
except Exception as e:
logger.error(f"Error during cleanup: {str(e)}")
async def get_workflow_status(self, workflow_id: str) -> Dict[str, Any]:
"""Get current status of workflow"""
current_task = self.chat_manager.service.tasks.get('current')
previous_task = self.chat_manager.service.tasks.get('previous')
return {
'workflowId': workflow_id,
'currentTask': current_task.dict() if current_task else None,
'previousTask': previous_task.dict() if previous_task else None,
'status': self.chat_manager.workflow.status if self.chat_manager.workflow else None
}
async def stop_workflow(self, workflow_id: str) -> None:
"""Stop workflow execution"""
if self.chat_manager.workflow and self.chat_manager.workflow.id == workflow_id:
self.chat_manager.workflow.status = TaskStatus.STOPPED
self.chat_manager.workflow.updatedAt = datetime.now(UTC)
# Stop current task if any
current_task = self.chat_manager.service.tasks.get('current')
if current_task:
current_task.status = TaskStatus.STOPPED
current_task.updatedAt = datetime.now(UTC)

View file

@ -0,0 +1,604 @@
from typing import Dict, Any, List, Optional, Union, Tuple, TypedDict
import logging
import json
import os
import io
import base64
from datetime import datetime, UTC
from pathlib import Path
import mimetypes
import hashlib
import shutil
import re
import uuid
from modules.interfaces.serviceChatModel import (
DocumentContext,
DocumentExtraction,
DocumentMetadata,
DocumentContent,
ProcessedDocument,
ImageData
)
logger = logging.getLogger(__name__)
# Optional imports - only loaded when needed
pdfExtractorLoaded = False
officeExtractorLoaded = False
imageProcessorLoaded = False
class FileProcessingError(Exception):
"""Custom exception for file processing errors."""
pass
class DocumentProcessor:
"""Processes documents with context awareness"""
def __init__(self):
self.supported_types = {
"text/plain": self._process_text,
"text/csv": self._process_csv,
"application/json": self._process_json,
"text/html": self._process_html,
"image/svg+xml": self._process_svg,
"application/pdf": self._process_pdf,
"application/vnd.openxmlformats-officedocument.wordprocessingml.document": self._process_docx,
"application/msword": self._process_docx,
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": self._process_xlsx,
"application/vnd.ms-excel": self._process_xlsx,
"application/vnd.openxmlformats-officedocument.presentationml.presentation": self._process_pptx,
"application/vnd.ms-powerpoint": self._process_pptx
}
# Add image types
for img_type in ["image/jpeg", "image/png", "image/gif", "image/bmp", "image/tiff"]:
self.supported_types[img_type] = self._process_image
def _load_pdf_extractor(self):
"""Loads PDF extraction libraries when needed"""
global pdfExtractorLoaded
if not pdfExtractorLoaded:
try:
global PyPDF2, fitz
import PyPDF2
import fitz # PyMuPDF for more extensive PDF processing
pdfExtractorLoaded = True
logger.info("PDF extraction libraries successfully loaded")
except ImportError as e:
logger.warning(f"PDF extraction libraries could not be loaded: {e}")
def _load_office_extractor(self):
"""Loads Office document extraction libraries when needed"""
global officeExtractorLoaded
if not officeExtractorLoaded:
try:
global docx, openpyxl
import docx # python-docx for Word documents
import openpyxl # for Excel files
officeExtractorLoaded = True
logger.info("Office extraction libraries successfully loaded")
except ImportError as e:
logger.warning(f"Office extraction libraries could not be loaded: {e}")
def _load_image_processor(self):
"""Loads image processing libraries when needed"""
global imageProcessorLoaded
if not imageProcessorLoaded:
try:
global PIL, Image
from PIL import Image
imageProcessorLoaded = True
logger.info("Image processing libraries successfully loaded")
except ImportError as e:
logger.warning(f"Image processing libraries could not be loaded: {e}")
def process_with_context(self, doc: Dict[str, Any], context: DocumentContext) -> ProcessedDocument:
"""Process document with context"""
try:
# Get content type
content_type = doc.get("contentType", "text/plain")
if content_type == "application/octet-stream":
# Try to detect actual file type
content_type = self._detect_content_type(doc)
if content_type not in self.supported_types:
# Fallback to binary processing
return self._process_binary(doc, context)
# Process document
processor = self.supported_types[content_type]
extracted = processor(doc, context)
# Track extraction
self._track_extraction(doc, extracted, context)
# Create ProcessedDocument
return ProcessedDocument(
id=doc.get("id", str(uuid.uuid4())),
name=doc.get("name", "Unknown"),
contentType=content_type,
content=extracted,
context=context
)
except Exception as e:
logger.error(f"Error processing document: {str(e)}")
# Fallback to binary processing
return self._process_binary(doc, context)
def _detect_content_type(self, doc: Dict[str, Any]) -> str:
"""Detect content type from file content"""
try:
# Check file extension first
file_name = doc.get("name", "")
ext = os.path.splitext(file_name)[1].lower()
if ext:
# Map common extensions to MIME types
ext_to_mime = {
'.txt': 'text/plain',
'.md': 'text/markdown',
'.csv': 'text/csv',
'.json': 'application/json',
'.xml': 'application/xml',
'.js': 'application/javascript',
'.py': 'application/x-python',
'.svg': 'image/svg+xml',
'.jpg': 'image/jpeg',
'.jpeg': 'image/jpeg',
'.png': 'image/png',
'.gif': 'image/gif',
'.pdf': 'application/pdf',
'.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
'.doc': 'application/msword',
'.xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
'.xls': 'application/vnd.ms-excel',
'.pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
'.ppt': 'application/vnd.ms-powerpoint'
}
if ext in ext_to_mime:
return ext_to_mime[ext]
# Try to detect if it's text content
content = doc.get("content", "")
if isinstance(content, bytes):
try:
content.decode('utf-8')
return 'text/plain'
except UnicodeDecodeError:
pass
return 'application/octet-stream'
except Exception as e:
logger.error(f"Error detecting content type: {str(e)}")
return 'application/octet-stream'
def _process_text(self, doc: Dict[str, Any], context: DocumentContext) -> DocumentContent:
"""Process text document"""
content = doc.get("content", "")
if isinstance(content, bytes):
try:
content = content.decode('utf-8')
except UnicodeDecodeError:
# Try alternative encodings
for encoding in ['latin-1', 'cp1252', 'iso-8859-1']:
try:
content = content.decode(encoding)
break
except UnicodeDecodeError:
continue
sections = self._extract_sections(content)
return DocumentContent(
text=content,
metadata=DocumentMetadata(
type="text",
format="text",
size=len(content.encode('utf-8')),
sections=sections
)
)
def _process_csv(self, doc: Dict[str, Any], context: DocumentContext) -> DocumentContent:
"""Process CSV document"""
content = doc.get("content", "")
if isinstance(content, bytes):
content = content.decode('utf-8')
return DocumentContent(
text=content,
metadata=DocumentMetadata(
type="csv",
format="csv",
size=len(content.encode('utf-8')),
sections=[f"Row {i+1}" for i in range(len(content.splitlines()))]
)
)
def _process_json(self, doc: Dict[str, Any], context: DocumentContext) -> DocumentContent:
"""Process JSON document"""
content = doc.get("content", {})
if isinstance(content, str):
content = json.loads(content)
elif isinstance(content, bytes):
content = json.loads(content.decode('utf-8'))
structure = self._analyze_structure(content)
return DocumentContent(
data=content,
metadata=DocumentMetadata(
type="json",
format="json",
size=len(json.dumps(content).encode('utf-8')),
sections=list(content.keys()) if isinstance(content, dict) else []
)
)
def _process_html(self, doc: Dict[str, Any], context: DocumentContext) -> DocumentContent:
"""Process HTML document"""
content = doc.get("content", "")
if isinstance(content, bytes):
content = content.decode('utf-8')
return DocumentContent(
text=content,
metadata=DocumentMetadata(
type="html",
format="html",
size=len(content.encode('utf-8')),
sections=[
self._extract_title(content) or "Untitled",
*self._extract_links(content),
*self._extract_images(content)
]
)
)
def _process_svg(self, doc: Dict[str, Any], context: DocumentContext) -> DocumentContent:
"""Process SVG document"""
content = doc.get("content", "")
if isinstance(content, bytes):
content = content.decode('utf-8')
# Check if it's actually SVG
is_svg = "<svg" in content.lower()
return DocumentContent(
text=content if is_svg else None,
metadata=DocumentMetadata(
type="svg",
format="svg",
size=len(content.encode('utf-8')),
error=None if is_svg else "Invalid SVG content"
)
)
def _process_image(self, doc: Dict[str, Any], context: DocumentContext) -> DocumentContent:
"""Process image document"""
content = doc.get("content", b"")
if not isinstance(content, bytes):
try:
content = content.encode('utf-8')
except Exception as e:
logger.error(f"Error encoding image content: {str(e)}")
return DocumentContent(
metadata=DocumentMetadata(
type="image",
format="unknown",
size=0,
error=f"Invalid image content: {str(e)}"
)
)
metadata = DocumentMetadata(
type="image",
format=doc.get("contentType", "").split("/")[-1],
size=len(content)
)
try:
self._load_image_processor()
if imageProcessorLoaded:
with io.BytesIO(content) as img_stream:
img = Image.open(img_stream)
img.verify()
img_stream.seek(0)
img = Image.open(img_stream)
metadata.pages = 1
if hasattr(img, '_getexif') and callable(img._getexif):
exif = img._getexif()
if exif:
metadata.sections = [f"EXIF_{tag_id}" for tag_id in exif.keys()]
except Exception as e:
logger.warning(f"Error processing image: {str(e)}")
metadata.error = str(e)
try:
image_data = ImageData(
data=base64.b64encode(content).decode('utf-8'),
format=metadata.format,
page=None,
index=None
)
except Exception as e:
logger.error(f"Error creating image data: {str(e)}")
return DocumentContent(
metadata=DocumentMetadata(
type="image",
format=metadata.format,
size=len(content),
error=f"Error creating image data: {str(e)}"
)
)
return DocumentContent(
images=[image_data],
metadata=metadata
)
def _process_pdf(self, doc: Dict[str, Any], context: DocumentContext) -> DocumentContent:
"""Process PDF document"""
content = doc.get("content", b"")
if not isinstance(content, bytes):
content = content.encode('utf-8')
metadata = DocumentMetadata(
type="pdf",
format="pdf",
size=len(content)
)
text_content = ""
images: List[ImageData] = []
try:
self._load_pdf_extractor()
if pdfExtractorLoaded:
with io.BytesIO(content) as pdf_stream:
# Extract text with PyPDF2
pdf_reader = PyPDF2.PdfReader(pdf_stream)
metadata.pages = len(pdf_reader.pages)
# Extract text from all pages
for page_num in range(len(pdf_reader.pages)):
page = pdf_reader.pages[page_num]
page_text = page.extract_text()
if page_text:
text_content += f"--- Page {page_num + 1} ---\n{page_text}\n\n"
# Extract images with PyMuPDF
pdf_stream.seek(0)
doc = fitz.open(stream=pdf_stream, filetype="pdf")
for page_num in range(len(doc)):
page = doc[page_num]
for img_index, img_info in enumerate(page.get_images(full=True)):
try:
xref = img_info[0]
base_image = doc.extract_image(xref)
if base_image:
image_bytes = base_image.get("image", b"")
image_ext = base_image.get("ext", "png")
if image_bytes:
image_data = ImageData(
data=base64.b64encode(image_bytes).decode('utf-8'),
format=image_ext,
page=page_num + 1,
index=img_index
)
images.append(image_data)
except Exception as img_e:
logger.warning(f"Error extracting image {img_index} on page {page_num + 1}: {str(img_e)}")
doc.close()
except Exception as e:
logger.error(f"Error processing PDF: {str(e)}")
metadata.error = str(e)
return DocumentContent(
text=text_content,
images=images,
metadata=metadata
)
def _process_docx(self, doc: Dict[str, Any], context: DocumentContext) -> DocumentContent:
"""Process Word document"""
content = doc.get("content", b"")
if not isinstance(content, bytes):
content = content.encode('utf-8')
metadata = DocumentMetadata(
type="docx",
format="docx",
size=len(content)
)
text_content = ""
try:
self._load_office_extractor()
if officeExtractorLoaded:
with io.BytesIO(content) as docx_stream:
doc = docx.Document(docx_stream)
# Extract text
full_text = []
for para in doc.paragraphs:
full_text.append(para.text)
# Extract tables
for table in doc.tables:
for row in table.rows:
row_text = []
for cell in row.cells:
row_text.append(cell.text)
full_text.append(" | ".join(row_text))
text_content = "\n\n".join(full_text)
metadata.pages = len(doc.paragraphs)
metadata.sections = [f"Paragraph {i+1}" for i in range(len(doc.paragraphs))]
except Exception as e:
logger.error(f"Error processing Word document: {str(e)}")
metadata.error = str(e)
return DocumentContent(
text=text_content,
metadata=metadata
)
def _process_xlsx(self, doc: Dict[str, Any], context: DocumentContext) -> DocumentContent:
"""Process Excel document"""
content = doc.get("content", b"")
if not isinstance(content, bytes):
content = content.encode('utf-8')
metadata = DocumentMetadata(
type="xlsx",
format="xlsx",
size=len(content)
)
sheets_data = []
try:
self._load_office_extractor()
if officeExtractorLoaded:
with io.BytesIO(content) as xlsx_stream:
workbook = openpyxl.load_workbook(xlsx_stream, data_only=True)
metadata.pages = len(workbook.sheetnames)
for sheet_name in workbook.sheetnames:
sheet = workbook[sheet_name]
csv_rows = []
for row in sheet.iter_rows():
csv_row = []
for cell in row:
value = cell.value
if value is None:
csv_row.append("")
else:
csv_row.append(str(value).replace('"', '""'))
csv_rows.append(','.join(f'"{cell}"' for cell in csv_row))
sheets_data.append({
"name": sheet_name,
"data": "\n".join(csv_rows)
})
metadata.sections = workbook.sheetnames
except Exception as e:
logger.error(f"Error processing Excel document: {str(e)}")
metadata.error = str(e)
return DocumentContent(
data={"sheets": sheets_data},
metadata=metadata
)
def _process_pptx(self, doc: Dict[str, Any], context: DocumentContext) -> DocumentContent:
"""Process PowerPoint document"""
return DocumentContent(
metadata=DocumentMetadata(
type="pptx",
format="pptx",
size=len(doc.get("content", b"")),
error="PowerPoint processing not implemented"
)
)
def _process_binary(self, doc: Dict[str, Any], context: DocumentContext) -> ProcessedDocument:
"""Process binary document"""
content = doc.get("content", b"")
if not isinstance(content, bytes):
content = content.encode('utf-8')
return ProcessedDocument(
id=doc.get("id", str(uuid.uuid4())),
name=doc.get("name", "Unknown"),
contentType="application/octet-stream",
content=DocumentContent(
data={"binary": base64.b64encode(content).decode('utf-8')},
metadata=DocumentMetadata(
type="binary",
format="binary",
size=len(content)
)
),
context=context
)
def _extract_sections(self, content: str) -> List[str]:
"""Extract sections from text content"""
sections = []
current_section = []
for line in content.split("\n"):
if line.startswith("#"):
if current_section:
sections.append("".join(current_section))
current_section = []
current_section.append(line)
if current_section:
sections.append("".join(current_section))
return sections
def _analyze_structure(self, content: Any) -> Dict[str, Any]:
"""Analyze structure of JSON content"""
if isinstance(content, dict):
return {
"type": "object",
"properties": {
k: self._analyze_structure(v)
for k, v in content.items()
}
}
elif isinstance(content, list):
return {
"type": "array",
"items": self._analyze_structure(content[0]) if content else {}
}
else:
return {
"type": type(content).__name__
}
def _extract_title(self, html: str) -> Optional[str]:
"""Extract title from HTML"""
match = re.search(r"<title>(.*?)</title>", html, re.IGNORECASE)
return match.group(1) if match else None
def _extract_links(self, html: str) -> List[str]:
"""Extract links from HTML"""
return re.findall(r'href=[\'"]?([^\'" >]+)', html)
def _extract_images(self, html: str) -> List[str]:
"""Extract images from HTML"""
return re.findall(r'src=[\'"]?([^\'" >]+)', html)
def _track_extraction(self, doc: Dict[str, Any], extracted: DocumentContent, context: DocumentContext) -> None:
"""Track document extraction"""
extraction = DocumentExtraction(
timestamp=datetime.now(UTC).isoformat(),
type=doc.get("contentType", "unknown"),
sections=extracted.metadata.sections or [],
metadata=extracted.metadata.dict()
)
context.extractionHistory.append(extraction)
def get_supported_types(self) -> List[str]:
"""Get list of supported content types"""
return list(self.supported_types.keys())
def add_processor(self, content_type: str, processor: callable) -> None:
"""Add new document processor"""
self.supported_types[content_type] = processor
def remove_processor(self, content_type: str) -> None:
"""Remove document processor"""
self.supported_types.pop(content_type, None)

View file

@ -0,0 +1,129 @@
import logging
from typing import Dict, Any, Optional
from datetime import datetime, UTC
import asyncio
from modules.methods import MethodBase, MethodResult
from modules.interfaces.serviceChatModel import AgentTask, AgentAction, AgentResult, Action, TaskStatus, ActionStatus
logger = logging.getLogger(__name__)
class ServiceContainer:
"""Service container with improved state management"""
def __init__(self):
self.state = {
'status': TaskStatus.PENDING,
'retryCount': 0,
'retryMax': 3,
'timeout': 300, # 5 minutes
'lastError': None,
'lastErrorTime': None
}
self.methods: Dict[str, MethodBase] = {}
self.tasks: Dict[str, Any] = {} # Will be populated with AgentTask instances
def register_method(self, method: MethodBase) -> None:
"""Register a method in the container"""
self.methods[method.name] = method
logger.info(f"Registered method: {method.name}")
async def execute_task(self, task: Any) -> None: # task: AgentTask
"""Execute task with improved error handling and timeout"""
try:
# Check for timeout
if (datetime.now(UTC) - datetime.fromisoformat(task.startedAt)).seconds > self.state['timeout']:
task.status = TaskStatus.TIMEOUT
return
# Execute actions
for action in task.actionList:
if not task.can_execute_action(action):
if not task.get_auth_data(action.auth_source):
action.status = ActionStatus.FAILED
task.error = f"Missing authentication for {action.auth_source}"
else:
action.status = ActionStatus.DEPENDENCY_FAILED
continue
try:
# Get method
method = self.methods.get(action.method)
if not method:
raise ValueError(f"Unknown method: {action.method}")
# Validate parameters
if not await method.validate_parameters(action.action, action.parameters):
raise ValueError(f"Invalid parameters for {action.method}:{action.action}")
# Get auth data if needed
auth_data = None
if action.auth_source and action.auth_source != "local":
auth_data = task.get_auth_data(action.auth_source)
if not auth_data:
raise ValueError(f"Missing authentication data for {action.auth_source}")
# Execute with timeout
result = await asyncio.wait_for(
method.execute(action.action, action.parameters, auth_data),
timeout=action.timeout or 60
)
if result.success:
action.status = ActionStatus.SUCCESS
else:
if self._should_retry(result.data.get('error')):
action.retryCount += 1
if action.retryCount > action.retryMax:
action.status = ActionStatus.FAILED
if action.rollback_on_failure:
await method.rollback(action.action, action.parameters, auth_data)
else:
action.status = ActionStatus.RETRY
else:
action.status = ActionStatus.FAILED
if action.rollback_on_failure:
await method.rollback(action.action, action.parameters, auth_data)
except asyncio.TimeoutError:
action.status = ActionStatus.TIMEOUT
except Exception as e:
action.status = ActionStatus.FAILED
if action.rollback_on_failure:
await method.rollback(action.action, action.parameters, auth_data)
# Update task status
if task.has_failed():
task.status = TaskStatus.FAILED
elif task.is_complete():
task.status = TaskStatus.SUCCESS
task.finishedAt = datetime.now(UTC).isoformat()
except Exception as e:
task.status = TaskStatus.FAILED
task.error = str(e)
def _should_retry(self, error: str) -> bool:
"""Determine if error is retryable"""
retryable_errors = [
"AI down",
"Document not found",
"Content extraction failed",
"Network error",
"Temporary failure"
]
return any(err in error for err in retryable_errors)
def get_method(self, name: str) -> Optional[MethodBase]:
"""Get a method by name"""
return self.methods.get(name)
def get_available_methods(self) -> Dict[str, Dict[str, Any]]:
"""Get catalog of available methods and their actions"""
return {
name: {
"description": method.description,
"actions": method.actions
}
for name, method in self.methods.items()
}

View file

@ -1,156 +0,0 @@
"""
Workflow Manager Module for state machine-based backend chat workflow.
Implements the state machine as defined in the documentation.
"""
import logging
from datetime import datetime, UTC, timedelta
import time
from modules.workflow.chatManager import getChatManager
from modules.interfaces.serviceChatModel import ( UserInputRequest, ChatWorkflow )
# Configure logger
logger = logging.getLogger(__name__)
class WorkflowStoppedException(Exception):
"""Exception raised when a workflow is forcibly stopped with function checkExitCriteria() """
pass
class WorkflowManager:
"""Manages the execution of workflows and their associated agents."""
def __init__(self):
"""Initialize the workflow manager."""
self.chatManager = getChatManager()
def initialize(self, workflow: ChatWorkflow):
"""
Initialize the workflow manager with a workflow object.
Args:
workflow: ChatWorkflow object to initialize with
"""
self.chatManager.initialize(workflow)
return True
# Main function to start workflow process
async def workflowProcess(self, userInput: UserInputRequest, workflow: ChatWorkflow) -> ChatWorkflow:
"""
Main processing function that implements the workflow state machine.
Handles the complete workflow process from user input to final response.
Args:
userInput: User input with prompt and optional file list
workflow: Current ChatWorkflow object
Returns:
Updated ChatWorkflow object with processing results
"""
startTime = time.time()
try:
# Initialize workflow
self.initialize(workflow)
# Check if workflow should exit
self.checkExitCriteria(workflow)
# Create initial handover
handover = self.chatManager.createInitialHandover(userInput)
# Process agents until completion or failure
while True:
self.checkExitCriteria(workflow)
# Define next handover
nextHandover = await self.chatManager.defineNextHandover(handover)
if not nextHandover:
break
# Process next agent
handover = await self.chatManager.processNextAgent(nextHandover)
# Check if we should continue
if handover.status in ["failed", "retry"]:
break
# Send final message
finalMessage = await self.chatManager.sendFinalMessage(handover)
workflow.messages.append(finalMessage)
# Update workflow stats
endTime = time.time()
workflow.stats.processingTime = endTime - startTime
# Update workflow status
workflow.status = "completed"
workflow.lastActivity = datetime.now(UTC).isoformat()
return workflow
except WorkflowStoppedException:
# Handle workflow stop
workflow.status = "stopped"
workflow.lastActivity = datetime.now(UTC).isoformat()
return workflow
except Exception as e:
# Handle workflow failure
logger.error(f"Workflow processing error: {str(e)}", exc_info=True)
workflow.status = "failed"
workflow.lastActivity = datetime.now(UTC).isoformat()
# Update processing time even on error
endTime = time.time()
workflow.stats.processingTime = endTime - startTime
logger.error(f"Workflow failed: {str(e)}", level="error", progress=100)
return workflow
# Workflow state machine functions
def checkExitCriteria(self, workflow: ChatWorkflow) -> None:
"""
Check if the workflow should exit based on the current state.
Raises WorkflowStoppedException if workflow should stop.
Args:
workflow: ChatWorkflow object to check
"""
if workflow.status in ["stopped", "failed"]:
logger.info(f"Workflow processing terminated due to status: {workflow.status}")
# Raise an exception to stop execution
raise WorkflowStoppedException(f"Workflow execution stopped due to status: {workflow.status}")
def workflowFinish(self, workflow: ChatWorkflow) -> ChatWorkflow:
"""
Finalizes a workflow and sets the status to 'completed'.
Args:
workflow: ChatWorkflow object
Returns:
Updated ChatWorkflow object
"""
# Prepare workflow update data
workflowUpdate = {
"status": "completed",
"lastActivity": datetime.now().isoformat(),
}
# Update the workflow object in memory
workflow.status = workflowUpdate["status"]
workflow.lastActivity = workflowUpdate["lastActivity"]
# Save workflow state to database
self.chatManager.updateWorkflow(workflow.id, workflowUpdate)
logger.info(f"Workflow completed successfully", level="info", progress=100)
return workflow
async def getWorkflowManager() -> WorkflowManager:
"""Get or create a workflow manager instance."""
# Create new instance
manager = WorkflowManager()
return manager

File diff suppressed because it is too large Load diff

129
notes/arch_modelbased.html Normal file
View file

@ -0,0 +1,129 @@
<!DOCTYPE html>
<html lang="de">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Management Summary: Methoden-basierte Chat-Architektur</title>
<style>
body {
font-family: Arial, sans-serif;
line-height: 1.6;
max-width: 800px;
margin: 0 auto;
padding: 20px;
color: #333;
}
h1 {
color: #2c3e50;
border-bottom: 2px solid #3498db;
padding-bottom: 10px;
}
h2 {
color: #2c3e50;
margin-top: 30px;
}
.example {
background-color: #f8f9fa;
border-left: 4px solid #3498db;
padding: 15px;
margin: 20px 0;
}
.old-arch, .new-arch {
margin: 15px 0;
padding: 15px;
border-radius: 5px;
}
.old-arch {
background-color: #fff3cd;
border: 1px solid #ffeeba;
}
.new-arch {
background-color: #d4edda;
border: 1px solid #c3e6cb;
}
.benefits {
background-color: #e8f4f8;
padding: 15px;
border-radius: 5px;
margin: 20px 0;
}
.benefits ul {
margin: 10px 0;
padding-left: 20px;
}
.benefits li {
margin: 5px 0;
}
</style>
</head>
<body>
<h1>Management Summary: Methoden-basierte Chat-Architektur</h1>
<p>Die Umstellung von einer Agenten-basierten auf eine Methoden-basierte Chat-Architektur stellt einen fundamentalen Paradigmenwechsel dar. Während die Mehrheit der KI-Chat-Systeme weiterhin auf Agenten-Architekturen setzt, ermöglicht unser methoden-basierter Ansatz eine präzisere Kontrolle und effizientere Integration.</p>
<p>Der methoden-basierte Ansatz definiert klare, selbstbeschreibende Operationen mit festgelegten Parametern und Ergebnissen. Im Gegensatz zu Agenten, die als Blackbox-Operationen fungieren, bieten Methoden eine transparente, validierbare und vorhersehbare Ausführung. Diese Struktur ermöglicht eine präzise Fehlerbehandlung und Retry-Logik auf Aktions-Ebene, anstatt auf Agenten-Ebene.</p>
<p>Die Integration mit Benutzerdaten erfolgt direkt über definierte Authentifizierungspfade, was die Sicherheit erhöht und die Komplexität reduziert. Jede Methode ist selbstbeschreibend und enthält ihre eigenen Validierungsregeln, was die Wartbarkeit verbessert und die Entwicklung neuer Funktionen beschleunigt.</p>
<p>Der methoden-basierte Ansatz reduziert die KI-Abhängigkeit bei der Ausführung von Operationen, während die KI weiterhin für die Planung und Koordination der Methoden eingesetzt wird. Diese Trennung von Planung und Ausführung führt zu zuverlässigeren Ergebnissen und besserer Nachvollziehbarkeit.</p>
<p>Die Architektur ermöglicht eine präzise Dokumentation und Validierung jeder Operation, was in einer regulierten Umgebung von besonderem Wert ist. Die klare Struktur erleichtert die Integration neuer Dienste und die Erweiterung bestehender Funktionalitäten.</p>
<h2>Praktisches Beispiel: Dokumentenverarbeitung und E-Mail-Versand</h2>
<div class="example">
<div class="old-arch">
<strong>Alte Agenten-basierte Architektur:</strong><br>
<pre>
Benutzer: "Suche nach Verträgen im SharePoint und sende mir eine Zusammenfassung per E-Mail"
Agent SharePoint:
- Sucht nach Verträgen
- Extrahiert Inhalte
- Speichert Ergebnisse
Agent Outlook:
- Liest Ergebnisse
- Erstellt E-Mail
- Sendet E-Mail</pre>
</div>
<div class="new-arch">
<strong>Neue Methoden-basierte Architektur:</strong><br>
<pre>
Benutzer: "Suche nach Verträgen im SharePoint und sende mir eine Zusammenfassung per E-Mail"
Methoden-Katalog:
1. SharePoint.searchDocuments
- Parameter: {query: "Verträge", site: "valueon"}
- Retry: 3x bei Netzwerkfehler
- Auth: MSFT
2. Document.extractContent
- Parameter: {documents: [...], sections: ["Zusammenfassung"]}
- Retry: 2x bei Extraktionsfehler
- Auth: LOCAL
3. Outlook.sendMail
- Parameter: {to: ["user@example.com"], subject: "Vertragszusammenfassung"}
- Retry: 1x bei SMTP-Fehler
- Auth: MSFT</pre>
</div>
<div class="benefits">
<strong>Vorteile im Beispiel:</strong>
<ul>
<li>Jede Operation ist klar definiert und validierbar</li>
<li>Retry-Logik ist spezifisch für jede Operation</li>
<li>Authentifizierung ist explizit definiert</li>
<li>Fehler können präzise zugeordnet werden</li>
<li>Operationen können unabhängig voneinander getestet werden</li>
<li>Neue Operationen können einfach hinzugefügt werden</li>
</ul>
</div>
</div>
<p>Die KI plant die Ausführung dieser Methoden, aber die eigentliche Ausführung erfolgt durch die definierten Methoden mit klaren Parametern und Ergebnissen. Dies führt zu einer zuverlässigeren und besser nachvollziehbaren Ausführung.</p>
</body>
</html>

View file

@ -1,14 +1,124 @@
....................... TASKS
Agents and Manager:
- To adapt prompts to match document handling, done by agents
- agents to use service object and to work stepwise:
1. to extract document content with prompts
2. to run ai propmt with integrated content-data in the prompt, including document reference (name, id)
3. to analyse success and to give back instruction to task manager
4. task manager to add a task based on agents result and feedback
- document extraction to have error handling for big documents. if document too large, then to get content in pieces - depending on document type
We need to adapt the agent's orchestration. in the center is the handover mechanism. there to decide upon result from previous result and the history of results, what next step to do to complete user prompt. the mechanism shall ensure stepwise procedure using needed tools from self.service and to use existing model classes in @serviceChatModel.py
1. to remove object AgentHandover and to use AgentTask instead (to adapt in workflowManager and chatManager).
2. ChatMessage model to change:
- "success" attribute to be boolean
- adapt references to the object in the codebase
3. Orchestration logic:
- workflowManager.workflowProcess to keep, but to be adapted using AgentTask parameter "agentTask" instead of "handover"
- createInitialHandover --> rename to createInitialTask
- defineNextHandover --> rename to createNextTask
4. chatManager to adapt:
- to work with self.service object only
- functions "createInitialTask" and "createNextTask" only to be different in handling the result of the last task, but the preparation of the next task to be the same routine "defineNextTask" to deliver AgentTask object.
- All agentTask objects to store in self.service as self.service.tasks.history[]
- self.service.tasks.next: to be the next task reference --> initially None
- self.service.tasks.previous: to be the previous task reference --> createInitialTask() to set it to None; createNextTask to set self.service.tasks.previous = self.service.tasks.next
- function "defineNextTask" to:
- update self.service.state, error handling, update counters and stats, check if limits are reached
- analyse result with AI call and produce:
- message object to give feedback to the user in his language
- decision whether user input completed, or to retry with different approach, or to do next task step towards user input to complete --> to put into feedback from previous task
- create a new instance of AgentTask, to add it to self.service.tasks.history, to set reference in self.service.tasks.next
- HELP: HERE TOO COMPLEX: HOW TO DO IT using AI to have a generic approach to read document contents with dedicated prompts, then to handle any user request. E.g. "Search all sharepoint documents from valueon account and extract parts containing customer data into a summary excel file" or find websites for product "shampoo" and generate a marketing flyer for our product to show usp"
- to process actionMessages of tasks.next --> results to integrate in tasks.next object
- error handling and return agentTask object
6. Adapt self.service object:
HELP: How to structure and organize this object to be used in the context?
- self.service.state:
- roundsMax
- roundsCount
- self.service.tasks part to add and to use in the code
- self.service.context to add:
- userInput: UserInputMessage
- dataConnections: list of UserConnection
- methodList: list of MethodObject
7. Adapt AgentTask object:
- userInput: summary for ai prompt what finally to deliver to the user based on UserInputMessage
- dataList: list of user connections for AI prompt (in the format "authority":"externalUsername" from user's connections object UserConnection)
- methodList: list of methods for AI prompt
- chatHistory: summary from the chat messages in the workflow before the user input (message summary with file list per message, as existing workflow could be continued)
- taskHistory: summary of the messages with file list per message after user input message id (there could also be user inputs before this message in former chat rounds)
- previousTaskFeedback
- thisTaskFeedback
- status: One of pending, success, failed, retry
5. what to ensure for "promptTaskGeneration"
- to have clear ai prompt for the task to do and for the result format to deliver. the result shall
- feedback from previous task to include
- instruction on how to use methodList catalog
- HELP: How to do the prompt?
***************************
TO include...
- Dict: agents objects accessible by "name"
Core:
logAdd: Logging functionality
workflow: Direct reference to workflow object
user: User information containing:
id: User ID
name: User name
language: User's preferred language (defaults to 'en')
Function Components:
functions: Dictionary containing utility functions:
forEach: Lambda function for iterating over items
while: Lambda function for while loop operations
getFile: Function to get file information
Model Components:
model: Dictionary containing AI model operations:
callAiBasic: Basic AI call function
callAiComplex: Complex AI call function
callAiImage: Image AI call function
Document Operations:
document: Dictionary containing document-related functions:
extract: Extract content from documents
convertFileRefToFileId: Convert file references to file IDs
convertFileIdToFileRef: Convert file IDs to file references
convertDataFormat: Convert data formats
agentInputFilesCreate: Create agent input file lists
agentOutputFilesSave: Save agent output files
Data Operations:
connections: Connection data storage
msft: Microsoft service functions and metadata
google: Google service functions and metadata
Document Operations:
document: Dictionary for document operations (populated by agentManager)
Test paths:

View file

@ -0,0 +1,128 @@
<!DOCTYPE html>
<html lang="de">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Management Summary: Methoden-basierte Chat-Architektur</title>
<style>
body {
font-family: Arial, sans-serif;
line-height: 1.6;
max-width: 800px;
margin: 0 auto;
padding: 20px;
color: #333;
}
h1 {
color: #2c3e50;
border-bottom: 2px solid #3498db;
padding-bottom: 10px;
}
h2 {
color: #2c3e50;
margin-top: 30px;
}
.example {
background-color: #f8f9fa;
border-left: 4px solid #3498db;
padding: 15px;
margin: 20px 0;
}
.old-arch, .new-arch {
margin: 15px 0;
padding: 15px;
border-radius: 5px;
}
.old-arch {
background-color: #fff3cd;
border: 1px solid #ffeeba;
}
.new-arch {
background-color: #d4edda;
border: 1px solid #c3e6cb;
}
.benefits {
background-color: #e8f4f8;
padding: 15px;
border-radius: 5px;
margin: 20px 0;
}
.benefits ul {
margin: 10px 0;
padding-left: 20px;
}
.benefits li {
margin: 5px 0;
}
</style>
</head>
<body>
<h1>Management Summary: Methoden-basierte Chat-Architektur</h1>
<p>Die Umstellung von einer Agenten-basierten auf eine Methoden-basierte Chat-Architektur stellt einen fundamentalen Paradigmenwechsel dar. Während die Mehrheit der KI-Chat-Systeme weiterhin auf Agenten-Architekturen setzt, ermöglicht unser methoden-basierter Ansatz eine präzisere Kontrolle und effizientere Integration.</p>
<p>Der methoden-basierte Ansatz definiert klare, selbstbeschreibende Operationen mit festgelegten Parametern und Ergebnissen. Im Gegensatz zu Agenten, die als Blackbox-Operationen fungieren, bieten Methoden eine transparente, validierbare und vorhersehbare Ausführung. Diese Struktur ermöglicht eine präzise Fehlerbehandlung und Retry-Logik auf Aktions-Ebene, anstatt auf Agenten-Ebene.</p>
<p>Die Integration mit Benutzerdaten erfolgt direkt über definierte Authentifizierungspfade, was die Sicherheit erhöht und die Komplexität reduziert. Jede Methode ist selbstbeschreibend und enthält ihre eigenen Validierungsregeln, was die Wartbarkeit verbessert und die Entwicklung neuer Funktionen beschleunigt.</p>
<p>Der methoden-basierte Ansatz reduziert die KI-Abhängigkeit bei der Ausführung von Operationen, während die KI weiterhin für die Planung und Koordination der Methoden eingesetzt wird. Diese Trennung von Planung und Ausführung führt zu zuverlässigeren Ergebnissen und besserer Nachvollziehbarkeit.</p>
<p>Die Architektur ermöglicht eine präzise Dokumentation und Validierung jeder Operation, was in einer regulierten Umgebung von besonderem Wert ist. Die klare Struktur erleichtert die Integration neuer Dienste und die Erweiterung bestehender Funktionalitäten.</p>
<h2>Praktisches Beispiel: Dokumentenverarbeitung und E-Mail-Versand</h2>
<div class="example">
<div class="old-arch">
<strong>Alte Agenten-basierte Architektur:</strong><br>
<pre>
Benutzer: "Suche nach Verträgen im SharePoint und sende mir eine Zusammenfassung per E-Mail"
Agent SharePoint:
- Sucht nach Verträgen
- Extrahiert Inhalte
- Speichert Ergebnisse
Agent Outlook:
- Liest Ergebnisse
- Erstellt E-Mail
- Sendet E-Mail</pre>
</div>
<div class="new-arch">
<strong>Neue Methoden-basierte Architektur:</strong><br>
<pre>
Benutzer: "Suche nach Verträgen im SharePoint und sende mir eine Zusammenfassung per E-Mail"
Methoden-Katalog:
1. SharePoint.searchDocuments
- Parameter: {query: "Verträge", site: "valueon"}
- Retry: 3x bei Netzwerkfehler
- Auth: MSFT
2. Document.extractContent
- Parameter: {documents: [...], sections: ["Zusammenfassung"]}
- Retry: 2x bei Extraktionsfehler
- Auth: LOCAL
3. Outlook.sendMail
- Parameter: {to: ["user@example.com"], subject: "Vertragszusammenfassung"}
- Retry: 1x bei SMTP-Fehler
- Auth: MSFT</pre>
</div>
<div class="benefits">
<strong>Vorteile im Beispiel:</strong>
<ul>
<li>Jede Operation ist klar definiert und validierbar</li>
<li>Retry-Logik ist spezifisch für jede Operation</li>
<li>Authentifizierung ist explizit definiert</li>
<li>Fehler können präzise zugeordnet werden</li>
<li>Operationen können unabhängig voneinander getestet werden</li>
<li>Neue Operationen können einfach hinzugefügt werden</li>
</ul>
</div>
</div>
<p>Die KI plant die Ausführung dieser Methoden, aber die eigentliche Ausführung erfolgt durch die definierten Methoden mit klaren Parametern und Ergebnissen. Dies führt zu einer zuverlässigeren und besser nachvollziehbaren Ausführung.</p>
</body>
</html>

File diff suppressed because it is too large Load diff

3
test/pytest.ini Normal file
View file

@ -0,0 +1,3 @@
[pytest]
asyncio_mode = strict
asyncio_default_fixture_loop_scope = function

71
test/test_chat.py Normal file
View file

@ -0,0 +1,71 @@
"""
Test module for chat workflow functionality.
Tests the workflow process with analysis tasks.
"""
import sys
import os
from pathlib import Path
from datetime import datetime, UTC
from unittest.mock import patch
# Add gateway directory to Python path
gateway_dir = Path(__file__).parent.parent
sys.path.append(str(gateway_dir))
sys.path.append(str(gateway_dir.parent))
from modules.workflow.workflowManager import WorkflowManager
from modules.interfaces.serviceChatModel import ChatWorkflow, UserInputRequest, ChatStat
from modules.workflow.chatManager import getChatManager
from modules.interfaces.serviceManagementModel import FileItem
def test_workflow_process():
# Initialize workflow manager
workflowManager = WorkflowManager()
# Create test workflow
workflow = ChatWorkflow(
id="test-workflow",
mandateId="test-mandate",
status="running",
currentRound=1,
lastActivity=datetime.now(UTC).isoformat(),
startedAt=datetime.now(UTC).isoformat(),
messages=[],
stats=ChatStat(),
tasks=[]
)
# Initialize chat manager with workflow
chatManager = getChatManager()
chatManager.initialize(workflow)
# Create mock file
mock_file = FileItem(
id="550e8400-e29b-41d4-a716-446655440000",
mandateId="test-mandate",
filename="test_file.txt",
mimeType="text/plain",
fileHash="test_hash",
fileSize=1024,
creationDate=datetime.now(UTC).isoformat()
)
# Mock the getFile function
with patch.object(chatManager.serviceManagement, 'getFile', return_value=mock_file):
# Create test user input
userInput = UserInputRequest(
prompt="Test prompt",
listFileId=["550e8400-e29b-41d4-a716-446655440000"] # UUID string
)
# Process workflow
result = workflowManager.workflowProcess(userInput, workflow)
# Verify workflow completed successfully
assert result.status in ["completed", "failed"], f"Unexpected workflow status: {result.status}"
assert len(result.messages) > 0, "No messages were generated"
assert result.messages[-1].role == "assistant", "Last message should be from assistant"
if __name__ == "__main__":
test_workflow_process()