adapted to model system

This commit is contained in:
ValueOn AG 2025-06-10 18:19:33 +02:00
parent 739f22785c
commit ea5f42d981
33 changed files with 3530 additions and 7690 deletions

View file

@ -32,6 +32,9 @@ Security_PASSWORD_REQUIRE_SPECIAL = True
Security_FAILED_LOGIN_LIMIT = 5
Security_LOCK_DURATION_MINUTES = 30
# Content Neutralization configuration
Content_Neutralization_ENABLED = False
# Agent Webcrawler configuration
Agent_Webcrawler_SERPAPI_ENGINE = google
Agent_Webcrawler_SERPAPI_APIKEY = 7304bd34bca767aa52dd3233297e30a9edc0abc57871f702b3f8238b9d3ee7bc

File diff suppressed because it is too large Load diff

View file

@ -1,380 +0,0 @@
"""
Coach agent for answering questions and generating structured content.
Provides direct AI-based responses using extracted data from documents.
"""
import logging
from typing import Dict, Any, List
import json
from datetime import datetime
import uuid
from modules.workflow.agentBase import AgentBase
from modules.interfaces.serviceChatModel import Task, ChatDocument, ChatContent
logger = logging.getLogger(__name__)
class AgentCoach(AgentBase):
"""AI-driven agent for answering questions and generating structured content from extracted data"""
def __init__(self):
"""Initialize the coach agent"""
super().__init__()
self.name = "coach"
self.label = "Coach & Assistant"
self.description = "Answers questions, converts and generates content directly from data without complex processing"
self.capabilities = [
"dataConversion",
"questionAnswering",
"contentGeneration",
"simpleDataFormatting",
"informationSynthesis",
"directResponse",
"imageInterpretation",
"structuredOutput"
]
def setDependencies(self, serviceBase=None):
"""Set external dependencies for the agent."""
self.setService(serviceBase)
async def processTask(self, task: Task) -> Dict[str, Any]:
"""
Process a task by directly using AI to provide answers or content based on extracted data.
Args:
task: Task object with prompt, inputDocuments, outputSpecifications
Returns:
Dictionary with feedback and documents
"""
try:
# Extract task information
prompt = task.prompt
inputDocuments = task.filesInput
outputSpecs = task.filesOutput
# Check AI service
if not self.service or not self.service.base:
return {
"feedback": "The Coach agent requires an AI service to function.",
"documents": []
}
# Collect all extracted data from input documents
documentContext = self._collectExtractedData(inputDocuments)
# Generate task understanding to guide response creation
taskUnderstanding = await self._analyzeTask(prompt, documentContext)
# Generate documents based on output specifications
documents = []
# If no output specs provided, create a default document
if not outputSpecs:
defaultFormat = taskUnderstanding.get("recommendedFormat", "md")
defaultTitle = taskUnderstanding.get("suggestedFilename", "response")
outputSpecs = [{
"label": f"{defaultTitle}.{defaultFormat}",
"description": "Response to your request"
}]
# Process each output specification
for spec in outputSpecs:
outputLabel = spec.get("label", "output.txt")
outputDescription = spec.get("description", "")
# Determine format based on file extension
outputFormat = outputLabel.split('.')[-1].lower() if '.' in outputLabel else "txt"
# Generate document based on format
document = await self._generateDocument(
prompt,
documentContext,
outputLabel,
outputFormat,
outputDescription,
taskUnderstanding
)
documents.append(document)
# Generate feedback
feedback = taskUnderstanding.get("feedback", "I've created content based on your request.")
return {
"feedback": feedback,
"documents": documents
}
except Exception as e:
logger.error(f"Error in coach processing: {str(e)}", exc_info=True)
return {
"feedback": f"Error while processing your request: {str(e)}",
"documents": []
}
def _collectExtractedData(self, documents: List[ChatDocument]) -> str:
"""
Collect extracted data from input documents.
Args:
documents: List of input documents
Returns:
Combined extracted data as text
"""
contextParts = []
for doc in documents:
docName = doc.name
if doc.ext:
docName = f"{docName}.{doc.ext}"
contextParts.append(f"\n\n--- {docName} ---\n")
# Process contents, focusing on dataExtracted field
for content in doc.contents:
if content.data:
contextParts.append(content.data)
return "\n".join(contextParts)
async def _analyzeTask(self, prompt: str, context: str) -> Dict:
"""
Use AI to analyze the task and develop an understanding of what's required.
Args:
prompt: The task prompt
context: Extracted document data
Returns:
Task understanding dictionary
"""
analysisPrompt = f"""
Analyze this request to determine the best approach for creating a response.
REQUEST: {prompt}
EXTRACTED DATA:
{context[:1500]}... (truncated if longer)
Create a task analysis in JSON format with the following structure:
{{
"requestType": "question|content|data|report|description",
"recommendedFormat": "md|txt|html|csv|json",
"suggestedFilename": "appropriate_filename_without_extension",
"contentFocus": "brief description of what to focus on",
"feedback": "brief explanation of how you'll approach this request",
"complexity": "simple|moderate|complex"
}}
Only return valid JSON. No preamble or explanations.
"""
try:
# Get task understanding from AI
response = await self.service.base.callAi([
{"role": "system", "content": "You are a task analysis expert. Respond with valid JSON only."},
{"role": "user", "content": analysisPrompt}
])
# Extract JSON from response
jsonStart = response.find('{')
jsonEnd = response.rfind('}') + 1
if jsonStart >= 0 and jsonEnd > jsonStart:
taskUnderstanding = json.loads(response[jsonStart:jsonEnd])
return taskUnderstanding
else:
# Fallback if JSON not found
return {
"requestType": "content",
"recommendedFormat": "md",
"suggestedFilename": "response",
"contentFocus": "Addressing the main request",
"feedback": "I've created content based on your request and the provided data.",
"complexity": "moderate"
}
except Exception as e:
logger.warning(f"Error analyzing task: {str(e)}")
return {
"requestType": "content",
"recommendedFormat": "md",
"suggestedFilename": "response",
"contentFocus": "Addressing the main request",
"feedback": "I've created content based on your request and the provided data.",
"complexity": "moderate"
}
async def _generateDocument(self, prompt: str, context: str, outputLabel: str,
outputFormat: str, description: str, taskUnderstanding: Dict) -> ChatDocument:
"""
Generate a document based on the request and extracted data.
Args:
prompt: The task prompt
context: Extracted document data
outputLabel: Output filename
outputFormat: Output format (file extension)
description: Output description
taskUnderstanding: Task understanding from analysis
Returns:
ChatDocument object
"""
# Determine content type based on format
contentType = self._getContentType(outputFormat)
# Build prompt based on output format
generationPrompt = f"""
Create a response to the following request in {outputFormat} format:
REQUEST: {prompt}
EXTRACTED DATA:
{context}
OUTPUT REQUIREMENTS:
- Filename: {outputLabel}
- Format: {outputFormat}
- Description: {description}
- Focus on: {taskUnderstanding.get("contentFocus", "Addressing the main request")}
Guidelines:
1. Create content that directly addresses the request
2. Use the extracted data to inform your response
3. Format the output appropriately for {outputFormat}
4. Be comprehensive but focused
5. Include appropriate formatting, structure, and organization
Only return the content. No explanations or additional text.
"""
try:
# Get content from AI
content = await self.service.base.callAi([
{"role": "system", "content": f"You are a content generation expert. Create content in {outputFormat} format."},
{"role": "user", "content": generationPrompt}
])
# Extract content from code blocks if present
content = self._extractFromCodeBlocks(content, outputFormat)
# Create document object
return ChatDocument(
id=str(uuid.uuid4()),
name=outputLabel.split('.')[0],
ext=outputFormat,
data=content,
contents=[
ChatContent(
name="main",
data=content,
summary=description,
metadata={"format": outputFormat}
)
]
)
except Exception as e:
logger.error(f"Error generating document: {str(e)}")
errorContent = self._createErrorContent(str(e), outputFormat)
return ChatDocument(
id=str(uuid.uuid4()),
name=outputLabel.split('.')[0],
ext=outputFormat,
data=errorContent,
contents=[
ChatContent(
name="error",
data=errorContent,
summary="Error generating content",
metadata={"format": outputFormat, "error": str(e)}
)
]
)
def _getContentType(self, outputFormat: str) -> str:
"""
Get content type based on format.
Args:
outputFormat: Output format
Returns:
Content type
"""
contentTypeMap = {
"md": "text/markdown",
"markdown": "text/markdown",
"html": "text/html",
"txt": "text/plain",
"text": "text/plain",
"json": "application/json",
"csv": "text/csv",
"xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
}
return contentTypeMap.get(outputFormat, "text/plain")
def _extractFromCodeBlocks(self, content: str, format: str) -> str:
"""
Extract content from code blocks if present.
Args:
content: Raw content
format: Expected format
Returns:
Extracted content
"""
# Check for code blocks
codeBlockStart = f"```{format}"
if codeBlockStart in content:
start = content.find(codeBlockStart) + len(codeBlockStart)
end = content.find("```", start)
if end > start:
return content[start:end].strip()
# Check for generic code blocks
if "```" in content:
start = content.find("```") + 3
# Skip format identifier if present
if content[start:].strip() and not content[start:start+1].isalnum():
start = content.find("\n", start) + 1
end = content.find("```", start)
if end > start:
return content[start:end].strip()
return content
def _createErrorContent(self, errorMessage: str, outputFormat: str) -> str:
"""
Create error content in the appropriate format.
Args:
errorMessage: Error message
outputFormat: Output format
Returns:
Formatted error content
"""
if outputFormat == "json":
return json.dumps({"error": errorMessage})
elif outputFormat == "csv":
return f"error\n{errorMessage}"
elif outputFormat in ["md", "markdown"]:
return f"# Error\n\n{errorMessage}"
elif outputFormat == "html":
return f"<html><body><h1>Error</h1><p>{errorMessage}</p></body></html>"
else:
return f"Error: {errorMessage}"
# Factory function for the Coach agent
def getAgentCoach():
"""Returns an instance of the Coach agent."""
return AgentCoach()

File diff suppressed because it is too large Load diff

View file

@ -1,537 +0,0 @@
"""
Documentation agent for generating structured documentation.
Provides comprehensive documentation generation capabilities.
"""
import logging
from typing import Dict, Any, List, Optional
import json
import re
from datetime import datetime
import os
import hashlib
import base64
import uuid
import shutil
from pathlib import Path
import traceback
import sys
import importlib.util
import inspect
from pydantic import BaseModel
from modules.workflow.agentBase import AgentBase
from modules.interfaces.serviceChatModel import ChatContent
logger = logging.getLogger(__name__)
class AgentDocumentation(AgentBase):
"""AI-driven agent for creating documentation and structured content using multi-step generation"""
def __init__(self):
"""Initialize the documentation agent"""
super().__init__()
self.name = "documentation"
self.label = "Documentation"
self.description = "Creates structured documentation, reports, and content using AI with multi-step generation"
self.capabilities = [
"report_generation",
"documentation",
"content_structuring",
"technical_writing",
"knowledge_organization"
]
def setDependencies(self, serviceBase=None):
"""Set external dependencies for the agent."""
self.setService(serviceBase)
async def processTask(self, task: Dict[str, Any]) -> Dict[str, Any]:
"""
Process a task by focusing on required outputs and using AI to generate them.
Args:
task: Task dictionary with prompt, inputDocuments, outputSpecifications
Returns:
Dictionary with feedback and documents
"""
try:
# Extract task information
prompt = task.get("prompt", "")
inputDocuments = task.get("inputDocuments", [])
outputSpecs = task.get("outputSpecifications", [])
# Check AI service
if not self.service or not self.service.base:
return {
"feedback": "The Documentation agent requires an AI service to function.",
"documents": []
}
# Extract context from input documents - focusing only on dataExtracted
documentContext = self._extractDocumentContext(inputDocuments)
# Create task analysis to understand the requirements
documentationPlan = await self._analyzeTask(prompt, documentContext, outputSpecs)
logger.debug(f"Documentation plan: {documentationPlan}")
# Generate all required output documents
documents = []
# If no output specs provided, create default document
if not outputSpecs:
defaultFormat = documentationPlan.get("recommendedFormat", "markdown")
defaultTitle = documentationPlan.get("title", "Documentation")
safeTitle = self._sanitizeFilename(defaultTitle)
outputSpecs = [
{"label": f"{safeTitle}.{defaultFormat}", "description": "Comprehensive documentation"}
]
# Process each output specification
for spec in outputSpecs:
outputLabel = spec.get("label", "")
outputDescription = spec.get("description", "")
# Generate the document using multi-step approach
document = await self._createDocumentMultiStep(
prompt,
documentContext,
outputLabel,
outputDescription,
documentationPlan
)
documents.append(document)
# Generate feedback
feedback = documentationPlan.get("feedback", f"Created {len(documents)} documents based on your requirements.")
return {
"feedback": feedback,
"documents": documents
}
except Exception as e:
logger.error(f"Error in documentation generation: {str(e)}", exc_info=True)
return {
"feedback": f"Error during documentation generation: {str(e)}",
"documents": []
}
def _extractDocumentContext(self, documents: List[Dict[str, Any]]) -> str:
"""
Extract context from input documents, focusing on dataExtracted.
Args:
documents: List of document objects
Returns:
Extracted context as text
"""
contextParts = []
for doc in documents:
docName = doc.get("name", "unnamed")
if doc.get("ext"):
docName = f"{docName}.{doc.get('ext')}"
contextParts.append(f"\n\n--- {docName} ---\n")
# Process contents for dataExtracted
for content in doc.get("contents", []):
if content.get("dataExtracted"):
contextParts.append(content.get("dataExtracted", ""))
return "\n".join(contextParts)
def _sanitizeFilename(self, filename: str) -> str:
"""
Sanitize a filename by removing invalid characters.
Args:
filename: Filename to sanitize
Returns:
Sanitized filename
"""
# Replace invalid characters with underscores
invalidChars = r'<>:"/\|?*'
for char in invalidChars:
filename = filename.replace(char, '_')
# Trim filename if too long
if len(filename) > 100:
filename = filename[:97] + "..."
return filename
async def _analyzeTask(self, prompt: str, context: str, outputSpecs: List) -> Dict:
"""
Use AI to analyze the task and create a documentation plan.
Args:
prompt: The task prompt
context: Document context
outputSpecs: Output specifications
Returns:
Documentation plan dictionary
"""
analysisPrompt = f"""
Analyze this documentation task and create a detailed plan.
TASK: {prompt}
DOCUMENT CONTEXT SAMPLE:
{context[:1000]}... (truncated)
OUTPUT REQUIREMENTS:
{json.dumps(outputSpecs, indent=2)}
Create a detailed documentation plan in JSON format with the following structure:
{{
"title": "Document Title",
"documentType": "report|manual|guide|whitepaper|etc",
"audience": "technical|general|executive|etc",
"detailedStructure": [
{{
"title": "Chapter/Section Title",
"keyPoints": ["point1", "point2", ...],
"subsections": ["subsection1", "subsection2", ...],
"importance": "high|medium|low",
"estimatedLength": "short|medium|long"
}},
... more sections ...
],
"keyTopics": ["topic1", "topic2", ...],
"tone": "formal|conversational|instructional|etc",
"recommendedFormat": "markdown|html|text|etc",
"formattingRequirements": ["requirement1", "requirement2", ...],
"executiveSummary": "Brief description of what the document will cover",
"feedback": "Brief message explaining the documentation approach"
}}
Only return valid JSON. No preamble or explanations.
"""
try:
response = await self.service.base.callAi([
{"role": "system", "content": "You are a documentation expert. Respond with valid JSON only."},
{"role": "user", "content": analysisPrompt}
])
# Extract JSON from response
jsonStart = response.find('{')
jsonEnd = response.rfind('}') + 1
if jsonStart >= 0 and jsonEnd > jsonStart:
plan = json.loads(response[jsonStart:jsonEnd])
return plan
else:
# Fallback if JSON not found
return {
"title": "Documentation (DEFAULT)",
"documentType": "report",
"audience": "general",
"detailedStructure": [
{
"title": "Introduction",
"keyPoints": ["Purpose", "Scope"],
"subsections": [],
"importance": "high",
"estimatedLength": "short"
},
{
"title": "Main Content",
"keyPoints": ["Core Information"],
"subsections": ["Key Findings", "Analysis"],
"importance": "high",
"estimatedLength": "long"
},
{
"title": "Conclusion",
"keyPoints": ["Summary", "Next Steps"],
"subsections": [],
"importance": "medium",
"estimatedLength": "short"
}
],
"keyTopics": ["General Information"],
"tone": "formal",
"recommendedFormat": "markdown",
"formattingRequirements": ["Clear headings", "Professional formatting"],
"executiveSummary": "A comprehensive documentation covering the requested topics.",
"feedback": "Created documentation based on your requirements."
}
except Exception as e:
logger.warning(f"Error creating documentation plan: {str(e)}")
return {
"title": "Documentation",
"documentType": "report",
"audience": "general",
"detailedStructure": [
{
"title": "Introduction",
"keyPoints": ["Purpose", "Scope"],
"subsections": [],
"importance": "high",
"estimatedLength": "short"
},
{
"title": "Main Content",
"keyPoints": ["Core Information"],
"subsections": ["Key Findings", "Analysis"],
"importance": "high",
"estimatedLength": "long"
},
{
"title": "Conclusion",
"keyPoints": ["Summary", "Next Steps"],
"subsections": [],
"importance": "medium",
"estimatedLength": "short"
}
],
"keyTopics": ["General Information"],
"tone": "formal",
"recommendedFormat": "markdown",
"formattingRequirements": ["Clear headings", "Professional formatting"],
"executiveSummary": "A comprehensive documentation covering the requested topics.",
"feedback": "Created documentation based on your requirements."
}
async def _createDocumentMultiStep(self, prompt: str, context: str, outputLabel: str,
outputDescription: str, documentationPlan: Dict) -> ChatContent:
"""
Create a document using a multi-step approach with separate AI calls for each section.
Args:
prompt: Original task prompt
context: Document context
outputLabel: Output filename
outputDescription: Description of desired output
documentationPlan: Documentation plan from AI
Returns:
ChatContent object
"""
try:
# Determine format from filename
formatType = outputLabel.split('.')[-1].lower() if '.' in outputLabel else "md"
# Map format to contentType
contentTypeMap = {
"md": "text/markdown",
"markdown": "text/markdown",
"html": "text/html",
"txt": "text/plain",
"text": "text/plain",
"json": "application/json",
"csv": "text/csv"
}
contentType = contentTypeMap.get(formatType, "text/plain")
# Get document information
title = documentationPlan.get("title", "Documentation")
documentType = documentationPlan.get("documentType", "document")
audience = documentationPlan.get("audience", "general")
tone = documentationPlan.get("tone", "formal")
keyTopics = documentationPlan.get("keyTopics", [])
formattingRequirements = documentationPlan.get("formattingRequirements", [])
# Get the detailed structure
detailedStructure = documentationPlan.get("detailedStructure", [])
# Step 1: Generate executive summary
summaryPrompt = f"""
Create an executive summary for a {documentType} titled "{title}".
DOCUMENT OVERVIEW:
- Type: {documentType}
- Audience: {audience}
- Key Topics: {', '.join(keyTopics)}
TASK CONTEXT: {prompt}
The executive summary should:
1. Provide a concise overview of the document's purpose
2. Highlight key points and findings
3. Be clear and engaging for the target audience
4. Set expectations for the document's content
Keep the summary brief but comprehensive.
"""
executiveSummary = await self.service.base.callAi([
{"role": "system", "content": f"You are a documentation expert creating an executive summary in {formatType} format."},
{"role": "user", "content": summaryPrompt}
], produceUserAnswer = True)
# Step 2: Generate introduction
introPrompt = f"""
Create an introduction for a {documentType} titled "{title}".
DOCUMENT OVERVIEW:
- Type: {documentType}
- Audience: {audience}
- Key Topics: {', '.join(keyTopics)}
TASK CONTEXT: {prompt}
The introduction should:
1. Set the context and purpose of the document
2. Outline the scope and objectives
3. Preview the main topics to be covered
4. Engage the reader's interest
Format the introduction according to {formatType} standards.
"""
introduction = await self.service.base.callAi([
{"role": "system", "content": f"You are a documentation expert creating an introduction in {formatType} format."},
{"role": "user", "content": introPrompt}
], produceUserAnswer = True)
# Step 3: Generate main sections
sections = []
for section in detailedStructure:
sectionTitle = section.get("title", "Section")
keyPoints = section.get("keyPoints", [])
subsections = section.get("subsections", [])
importance = section.get("importance", "medium")
estimatedLength = section.get("estimatedLength", "medium")
sectionPrompt = f"""
Create the {sectionTitle} section for a {documentType} titled "{title}".
SECTION DETAILS:
- Title: {sectionTitle}
- Key Points: {', '.join(keyPoints)}
- Subsections: {', '.join(subsections)}
- Importance: {importance}
- Estimated Length: {estimatedLength}
DOCUMENT CONTEXT:
- Type: {documentType}
- Audience: {audience}
- Key Topics: {', '.join(keyTopics)}
TASK CONTEXT: {prompt}
The section should:
1. Cover all key points thoroughly
2. Include relevant subsections
3. Maintain appropriate depth based on importance
4. Follow the document's tone and style
Format the section according to {formatType} standards.
"""
sectionContent = await self.service.base.callAi([
{"role": "system", "content": f"You are a documentation expert creating a section in {formatType} format."},
{"role": "user", "content": sectionPrompt}
], produceUserAnswer = True)
sections.append(sectionContent)
# Step 4: Generate conclusion
conclusionPrompt = f"""
Create the conclusion for a {documentType} titled "{title}".
DOCUMENT OVERVIEW:
- Type: {documentType}
- Audience: {audience}
- Key Topics: {', '.join(keyTopics)}
TASK CONTEXT: {prompt}
This conclusion should:
1. Summarize the key points covered in the document
2. Provide closure to the topics discussed
3. Include any relevant recommendations or next steps
4. Leave the reader with a clear understanding of the document's significance
The conclusion should be professional and impactful, formatted according to {formatType} standards.
"""
conclusion = await self.service.base.callAi([
{"role": "system", "content": f"You are a documentation expert creating a conclusion in {formatType} format."},
{"role": "user", "content": conclusionPrompt}
], produceUserAnswer = True)
# Step 5: Assemble the complete document
if formatType in ["md", "markdown"]:
# Markdown format
documentContent = f"# {title}\n\n"
if executiveSummary:
documentContent += f"## Executive Summary\n\n{executiveSummary}\n\n"
documentContent += f"{introduction}\n\n"
for i, sectionContent in enumerate(sections):
# Ensure section starts with heading if not already
sectionTitle = detailedStructure[i].get("title", f"Section {i+1}")
if not sectionContent.strip().startswith("#"):
documentContent += f"## {sectionTitle}\n\n"
documentContent += f"{sectionContent}\n\n"
documentContent += f"## Conclusion\n\n{conclusion}\n"
elif formatType == "html":
# HTML format
documentContent = f"<html>\n<head>\n<title>{title}</title>\n</head>\n<body>\n"
documentContent += f"<h1>{title}</h1>\n\n"
if executiveSummary:
documentContent += f"<h2>Executive Summary</h2>\n<div>{executiveSummary}</div>\n\n"
documentContent += f"<div>{introduction}</div>\n\n"
for i, sectionContent in enumerate(sections):
sectionTitle = detailedStructure[i].get("title", f"Section {i+1}")
documentContent += f"<h2>{sectionTitle}</h2>\n<div>{sectionContent}</div>\n\n"
documentContent += f"<h2>Conclusion</h2>\n<div>{conclusion}</div>\n"
documentContent += "</body>\n</html>"
else:
# Plain text format
documentContent = f"{title}\n{'=' * len(title)}\n\n"
if executiveSummary:
documentContent += f"EXECUTIVE SUMMARY\n{'-' * 17}\n\n{executiveSummary}\n\n"
documentContent += f"{introduction}\n\n"
for i, sectionContent in enumerate(sections):
sectionTitle = detailedStructure[i].get("title", f"Section {i+1}")
documentContent += f"{sectionTitle}\n{'-' * len(sectionTitle)}\n\n{sectionContent}\n\n"
documentContent += f"CONCLUSION\n{'-' * 10}\n\n{conclusion}\n"
# Create document object
return self.formatAgentDocumentOutput(outputLabel, documentContent, contentType)
except Exception as e:
logger.error(f"Error creating document: {str(e)}", exc_info=True)
# Create a simple error document
if formatType in ["md", "markdown"]:
content = f"# Error in Documentation\n\nThere was an error generating the documentation: {str(e)}"
elif formatType == "html":
content = f"<html><body><h1>Error in Documentation</h1><p>There was an error generating the documentation: {str(e)}</p></body></html>"
else:
content = f"Error in Documentation\n\nThere was an error generating the documentation: {str(e)}"
return self.formatAgentDocumentOutput(outputLabel, content, contentType)
# Factory function for the Documentation agent
def getAgentDocumentation():
"""Returns an instance of the Documentation agent."""
return AgentDocumentation()

View file

@ -1,380 +0,0 @@
"""
Email Agent Module.
Handles email-related tasks using Microsoft Graph API.
"""
import logging
import json
from typing import Dict, Any, List, Optional, Tuple
import uuid
import os
from modules.workflow.agentBase import AgentBase
from modules.interfaces.serviceChatModel import Task, ChatDocument, ChatContent
logger = logging.getLogger(__name__)
class AgentEmail(AgentBase):
"""Agent for handling email-related tasks."""
def __init__(self):
"""Initialize the email agent."""
super().__init__()
self.name = "email"
self.label = "Email Agent"
self.description = "Handles email composition and sending using Microsoft Graph API"
self.capabilities = [
"email_composition",
"email_draft_creation",
"email_template_generation"
]
self.serviceBase = None
def setDependencies(self, serviceBase=None):
"""Set external dependencies for the agent."""
self.serviceBase = serviceBase
async def processTask(self, task: Task) -> Dict[str, Any]:
"""
Process an email-related task.
Args:
task: Task object containing:
- prompt: Instructions for the agent
- inputDocuments: List of documents to process
- outputSpecifications: List of required output documents
- context: Additional context including workflow info
Returns:
Dictionary containing:
- feedback: Text response explaining what was done
- documents: List of created documents
"""
try:
# Extract task information
prompt = task.prompt
inputDocuments = task.filesInput
outputSpecs = task.filesOutput
# Check AI service
if not self.service.base:
return {
"feedback": "The Email agent requires an AI service to function.",
"documents": []
}
# Check if Microsoft connector is available
if not hasattr(self.service, 'msft'):
return {
"feedback": "Microsoft connector not available. Please ensure Microsoft integration is properly configured.",
"documents": []
}
# Get Microsoft token
token_data = self.service.msft.getMsftToken()
if not token_data:
# Create authentication trigger document
auth_doc = self._createFrontendAuthTriggerDocument()
return {
"feedback": "Microsoft authentication required. Please authenticate to continue.",
"documents": [auth_doc]
}
# Extract document data from input
documentContents, attachments = self._processInputDocuments(inputDocuments)
# Generate email subject and body using AI
emailTemplate = await self._generateEmailTemplate(prompt, documentContents)
# Create HTML preview of the email
htmlPreview = self._createHtmlPreview(emailTemplate)
# Attempt to create a draft email using Microsoft Graph API
draft_result = self.service.msft.createDraftEmail(
emailTemplate["recipient"],
emailTemplate["subject"],
emailTemplate["htmlBody"],
attachments
)
# Prepare output documents
documents = []
# Process output specifications
for spec in outputSpecs:
label = spec.get("label", "")
description = spec.get("description", "")
if label.endswith(".html"):
# Create the HTML template file
templateDoc = self.formatAgentDocumentOutput(
label,
emailTemplate["htmlBody"], # Use the actual HTML body, not the preview
"text/html"
)
documents.append(templateDoc)
elif label.endswith(".json"):
# Create JSON template if requested
templateJson = json.dumps(emailTemplate, indent=2)
templateDoc = self.formatAgentDocumentOutput(
label,
templateJson,
"application/json"
)
documents.append(templateDoc)
else:
# Default to preview for other cases
previewDoc = self.formatAgentDocumentOutput(
label,
htmlPreview,
"text/html"
)
documents.append(previewDoc)
# Prepare feedback message
if draft_result:
feedback = f"Email draft created successfully for {emailTemplate.get('recipient')}. The subject is: '{emailTemplate['subject']}'"
if attachments:
feedback += f" with {len(attachments)} attachment(s)"
feedback += ". You can open and edit it in your Outlook draft folder."
else:
feedback = "Email template created but could not save as draft. HTML preview and template are available as documents."
return {
"feedback": feedback,
"documents": documents
}
except Exception as e:
logger.error(f"Error in email agent: {str(e)}")
return {
"feedback": f"Error processing email task: {str(e)}",
"documents": []
}
def _createFrontendAuthTriggerDocument(self) -> ChatDocument:
"""Create a document that triggers Microsoft authentication in the frontend."""
return ChatDocument(
id=str(uuid.uuid4()),
name="microsoft_auth",
ext="html",
data="""
<div>
<h2>Microsoft Authentication Required</h2>
<p>Please click the button below to authenticate with Microsoft:</p>
<button onclick="window.location.href='/api/auth/microsoft'">Authenticate with Microsoft</button>
</div>
""",
contents=[
ChatContent(
name="main",
data="""
<div>
<h2>Microsoft Authentication Required</h2>
<p>Please click the button below to authenticate with Microsoft:</p>
<button onclick="window.location.href='/api/auth/microsoft'">Authenticate with Microsoft</button>
</div>
""",
summary="Microsoft authentication trigger page",
metadata={
"contentType": "text/html",
"isText": True
}
)
]
)
def _processInputDocuments(self, input_docs: List[ChatDocument]) -> Tuple[str, List[Dict[str, Any]]]:
"""
Process input documents to extract content and prepare attachments.
Args:
input_docs: List of input documents
Returns:
Tuple of (document content text, list of attachments)
"""
documentContents = []
attachments = []
for doc in input_docs:
docName = doc.name
if doc.ext:
docName = f"{docName}.{doc.ext}"
# Add document name to contents
documentContents.append(f"\n\n--- {docName} ---\n")
# Process document data directly
if doc.data:
# Add to attachments with proper metadata
attachments.append({
"name": docName,
"document": {
"data": doc.data,
"mimeType": doc.contents[0].metadata.get("contentType", "application/octet-stream") if doc.contents else "application/octet-stream",
"base64Encoded": doc.contents[0].metadata.get("base64Encoded", False) if doc.contents else False
}
})
documentContents.append(f"Document attached: {docName}")
else:
documentContents.append(f"Document referenced: {docName}")
return "\n".join(documentContents), attachments
def formatAgentDocumentOutput(self, filename: str, content: str, contentType: str) -> ChatDocument:
"""
Format a document for agent output.
Args:
filename: Output filename
content: Document content
contentType: MIME type of the content
Returns:
ChatDocument object
"""
# Split filename into name and extension
name, ext = os.path.splitext(filename)
if ext.startswith('.'):
ext = ext[1:]
# Create document object
return ChatDocument(
id=str(uuid.uuid4()),
name=name,
ext=ext,
data=content,
contents=[
ChatContent(
name="main",
data=content,
summary=f"Generated {filename}",
metadata={"contentType": contentType}
)
]
)
async def _generateEmailTemplate(self, prompt: str, documentContents: str) -> Dict[str, Any]:
"""
Generate email template using AI.
Args:
prompt: The task prompt
documentContents: Extracted document content
Returns:
Email template dictionary with recipient, subject, body
"""
emailPrompt = f"""
Create an email based on the following request:
REQUEST: {prompt}
DOCUMENT CONTENTS:
{documentContents[:2000]}... (truncated if longer)
Generate an email template with:
1. A relevant recipient (use placeholder or derive from content if possible)
2. A concise but descriptive subject line
3. A professional HTML-formatted email body
4. Appropriate greeting and closing
Format your response as JSON with these fields:
- recipient: email address
- subject: subject line
- plainBody: plain text version
- htmlBody: HTML formatted version
Only return valid JSON. No preamble or explanations.
"""
try:
response = await self.service.base.callAi([
{"role": "system", "content": "You are an email template specialist. Create professional emails. Respond with valid JSON only."},
{"role": "user", "content": emailPrompt}
])
# Extract JSON from response
jsonStart = response.find('{')
jsonEnd = response.rfind('}') + 1
if jsonStart >= 0 and jsonEnd > jsonStart:
template = json.loads(response[jsonStart:jsonEnd])
return template
else:
# Fallback plan
logger.warning(f"Not able creating email template, generating fallback plan")
return {
"recipient": "recipient@example.com",
"subject": "Information Regarding Your Request",
"plainBody": f"This email is regarding your request: {prompt}",
"htmlBody": f"<html><body><p>This email is regarding your request: {prompt}</p></body></html>"
}
except Exception as e:
logger.warning(f"Error generating email template: {str(e)}")
return {
"recipient": "recipient@example.com",
"subject": "Information Regarding Your Request",
"plainBody": f"This email is regarding your request: {prompt}",
"htmlBody": f"<html><body><p>This email is regarding your request: {prompt}</p></body></html>"
}
def _createHtmlPreview(self, emailTemplate: Dict[str, Any]) -> str:
"""
Create an HTML preview of the email template.
Args:
emailTemplate: Email template dictionary
Returns:
HTML string for preview
"""
html = f"""
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<title>Email Preview: {emailTemplate.get('subject', 'Email Template')}</title>
<style>
body {{ font-family: Arial, sans-serif; margin: 0; padding: 0; background-color: #f5f5f5; }}
.email-container {{ max-width: 600px; margin: 20px auto; background-color: white; border: 1px solid #ddd; border-radius: 5px; overflow: hidden; }}
.email-header {{ background-color: #f0f0f0; padding: 15px; border-bottom: 1px solid #ddd; }}
.email-content {{ padding: 20px; }}
.email-footer {{ background-color: #f0f0f0; padding: 15px; border-top: 1px solid #ddd; font-size: 12px; color: #666; }}
.field {{ margin-bottom: 10px; }}
.field-label {{ font-weight: bold; color: #555; }}
.email-body {{ margin-top: 20px; padding-top: 20px; border-top: 1px solid #eee; }}
</style>
</head>
<body>
<div class="email-container">
<div class="email-header">
<h2>Email Template Preview</h2>
</div>
<div class="email-content">
<div class="field">
<div class="field-label">To:</div>
<div>{emailTemplate.get('recipient', 'recipient@example.com')}</div>
</div>
<div class="field">
<div class="field-label">Subject:</div>
<div>{emailTemplate.get('subject', 'No Subject')}</div>
</div>
<div class="email-body">
{emailTemplate.get('htmlBody', '<p>No content</p>')}
</div>
</div>
<div class="email-footer">
<p>This is a preview of the email template. The actual email may appear differently in various email clients.</p>
</div>
</div>
</body>
</html>
"""
return html
def getAgentEmail() -> AgentEmail:
"""Factory function to create and return an EmailAgent instance."""
return AgentEmail()

View file

@ -1,348 +0,0 @@
"""
SharePoint Agent Module.
Handles SharePoint document search and data extraction using Microsoft Graph API.
"""
import logging
import json
from typing import Dict, Any, List, Optional
from modules.workflow.agentBase import AgentBase
logger = logging.getLogger(__name__)
class AgentSharepoint(AgentBase):
"""Agent for handling SharePoint document operations."""
def __init__(self):
"""Initialize the SharePoint agent."""
super().__init__()
self.name = "sharepoint"
self.label = "SharePoint Agent"
self.description = "Searches and extracts data from SharePoint documents using Microsoft Graph API"
self.capabilities = [
"document_search",
"content_extraction",
"metadata_analysis",
"document_processing"
]
async def processTask(self, task: Dict[str, Any]) -> Dict[str, Any]:
"""
Process a SharePoint-related task.
Args:
task: Task object containing:
- prompt: Instructions for the agent
- inputDocuments: List of documents to process
- outputSpecifications: List of required output documents
- context: Additional context including workflow info
Returns:
Dictionary containing:
- feedback: Text response explaining what was done
- documents: List of created documents
"""
try:
# Extract task information
prompt = task.get("prompt", "")
inputDocuments = task.get("inputDocuments", [])
outputSpecs = task.get("outputSpecifications", [])
# Check AI service
if not self.service.base:
return {
"feedback": "The SharePoint agent requires an AI service to function.",
"documents": []
}
# Check if Microsoft connector is available
if not hasattr(self.service, 'msft'):
return {
"feedback": "Microsoft connector not available. Please ensure Microsoft integration is properly configured.",
"documents": []
}
# Get Microsoft token
token_data = self.service.msft.getMsftToken()
if not token_data:
# Create authentication trigger document
auth_doc = self._createFrontendAuthTriggerDocument()
return {
"feedback": "Microsoft authentication required. Please authenticate to continue.",
"documents": [auth_doc]
}
# Parse the search query from the prompt
searchQuery = await self._parseSearchQuery(prompt)
# Search SharePoint documents
searchResults = await self._searchSharePointDocuments(searchQuery)
# Process search results
documents = []
for spec in outputSpecs:
label = spec.get("label", "")
description = spec.get("description", "")
if label.endswith(".json"):
# Create JSON summary of search results
summaryDoc = self._createSearchSummaryJson(searchResults, description)
documents.append(summaryDoc)
elif label.endswith(".csv"):
# Create CSV summary of search results
summaryDoc = self._createSearchSummaryCsv(searchResults, description)
documents.append(summaryDoc)
else:
# Create text summary of search results
summaryDoc = self._createSearchSummaryText(searchResults, description)
documents.append(summaryDoc)
# Prepare feedback message
feedback = f"Found {len(searchResults)} documents matching your search criteria. "
if searchResults:
feedback += "The results have been saved as documents."
else:
feedback += "No matching documents were found."
return {
"feedback": feedback,
"documents": documents
}
except Exception as e:
logger.error(f"Error in SharePoint agent: {str(e)}")
return {
"feedback": f"Error processing SharePoint task: {str(e)}",
"documents": []
}
def _createFrontendAuthTriggerDocument(self) -> Dict[str, Any]:
"""Create a document that triggers Microsoft authentication in the frontend."""
return self.formatAgentDocumentOutput(
"microsoft_auth.html",
"""
<div>
<h2>Microsoft Authentication Required</h2>
<p>Please click the button below to authenticate with Microsoft:</p>
<button onclick="window.location.href='/api/auth/microsoft'">Authenticate with Microsoft</button>
</div>
""",
"text/html"
)
async def _parseSearchQuery(self, prompt: str) -> Dict[str, Any]:
"""
Parse the search query from the prompt using AI.
Args:
prompt: The task prompt
Returns:
Dictionary containing search parameters
"""
try:
# Use AI to parse the search query
response = await self.service.base.callAi([
{"role": "system", "content": "You are a SharePoint search query parser. Extract search parameters from the user's request."},
{"role": "user", "content": f"""
Parse the following SharePoint search request into structured parameters:
{prompt}
Return a JSON object with these fields:
- query: The main search query
- site: Optional SharePoint site name
- folder: Optional folder path
- fileTypes: List of file types to search for
- dateRange: Optional date range for filtering
- maxResults: Maximum number of results to return
Only return valid JSON. No preamble or explanations.
"""}
])
# Extract JSON from response
jsonStart = response.find('{')
jsonEnd = response.rfind('}') + 1
if jsonStart >= 0 and jsonEnd > jsonStart:
return json.loads(response[jsonStart:jsonEnd])
else:
# Fallback to simple query
return {
"query": prompt,
"maxResults": 10
}
except Exception as e:
logger.warning(f"Error parsing search query: {str(e)}")
return {
"query": prompt,
"maxResults": 10
}
async def _searchSharePointDocuments(self, searchParams: Dict[str, Any]) -> List[Dict[str, Any]]:
"""
Search SharePoint documents using Microsoft Graph API.
Args:
searchParams: Search parameters
Returns:
List of search results
"""
try:
# Get Microsoft token
token = self.service.msft.getMsftToken()
if not token:
return []
# Prepare search query
query = searchParams.get("query", "")
site = searchParams.get("site", "")
folder = searchParams.get("folder", "")
fileTypes = searchParams.get("fileTypes", [])
maxResults = searchParams.get("maxResults", 10)
# Build search URL
searchUrl = "https://graph.microsoft.com/v1.0/sites/root/drives"
if site:
searchUrl = f"https://graph.microsoft.com/v1.0/sites/{site}/drives"
# Get drives (document libraries)
response = self.service.msft.makeGraphRequest("GET", searchUrl)
if not response or "value" not in response:
return []
results = []
for drive in response["value"]:
# Search in each drive
driveId = drive["id"]
searchEndpoint = f"https://graph.microsoft.com/v1.0/drives/{driveId}/root/search(q='{query}')"
# Add file type filters if specified
if fileTypes:
typeFilter = " or ".join([f"fileType eq '{ft}'" for ft in fileTypes])
searchEndpoint += f"&filter={typeFilter}"
# Add folder filter if specified
if folder:
searchEndpoint += f"&filter=parentReference/path eq '/{folder}'"
# Add result limit
searchEndpoint += f"&top={maxResults}"
# Make the search request
searchResponse = self.service.msft.makeGraphRequest("GET", searchEndpoint)
if searchResponse and "value" in searchResponse:
for item in searchResponse["value"]:
# Get file content
fileContent = await self._getFileContent(driveId, item["id"])
results.append({
"name": item["name"],
"id": item["id"],
"driveId": driveId,
"webUrl": item["webUrl"],
"lastModified": item["lastModifiedDateTime"],
"size": item["size"],
"content": fileContent
})
return results
except Exception as e:
logger.error(f"Error searching SharePoint: {str(e)}")
return []
async def _getFileContent(self, driveId: str, fileId: str) -> str:
"""
Get file content from SharePoint.
Args:
driveId: Drive ID
fileId: File ID
Returns:
File content as string
"""
try:
# Get file content URL
contentUrl = f"https://graph.microsoft.com/v1.0/drives/{driveId}/items/{fileId}/content"
# Download file content
response = self.service.msft.makeGraphRequest("GET", contentUrl, raw=True)
if response:
return response.decode('utf-8')
return ""
except Exception as e:
logger.error(f"Error getting file content: {str(e)}")
return ""
def _createSearchSummaryJson(self, results: List[Dict[str, Any]], description: str) -> Dict[str, Any]:
"""Create a JSON summary of search results."""
summary = {
"description": description,
"totalResults": len(results),
"results": []
}
for result in results:
summary["results"].append({
"name": result["name"],
"url": result["webUrl"],
"lastModified": result["lastModified"],
"size": result["size"]
})
return self.formatAgentDocumentOutput(
"sharepoint_search_results.json",
json.dumps(summary, indent=2),
"application/json"
)
def _createSearchSummaryCsv(self, results: List[Dict[str, Any]], description: str) -> Dict[str, Any]:
"""Create a CSV summary of search results."""
csvLines = ["Name,URL,Last Modified,Size (bytes)"]
for result in results:
name = result["name"].replace('"', '""')
url = result["webUrl"].replace('"', '""')
lastModified = result["lastModified"].replace('"', '""')
size = str(result["size"])
csvLines.append(f'"{name}","{url}","{lastModified}",{size}')
return self.formatAgentDocumentOutput(
"sharepoint_search_results.csv",
"\n".join(csvLines),
"text/csv"
)
def _createSearchSummaryText(self, results: List[Dict[str, Any]], description: str) -> Dict[str, Any]:
"""Create a text summary of search results."""
textLines = [
f"SharePoint Search Results",
f"Description: {description}",
f"Total Results: {len(results)}",
"\nResults:"
]
for result in results:
textLines.extend([
f"\nName: {result['name']}",
f"URL: {result['webUrl']}",
f"Last Modified: {result['lastModified']}",
f"Size: {result['size']} bytes"
])
return self.formatAgentDocumentOutput(
"sharepoint_search_results.txt",
"\n".join(textLines),
"text/plain"
)
def getAgentSharepoint() -> AgentSharepoint:
"""Factory function to create and return a SharePointAgent instance."""
return AgentSharepoint()

View file

@ -1,814 +0,0 @@
"""
Web crawler agent for gathering and analyzing web content.
Provides web research and content extraction capabilities.
"""
import logging
import json
import re
import time
import os
from typing import Dict, Any, List
from urllib.parse import quote_plus, unquote
from bs4 import BeautifulSoup
import requests
import markdown
from modules.workflow.agentBase import AgentBase
from modules.shared.configuration import APP_CONFIG
logger = logging.getLogger(__name__)
class AgentWebcrawler(AgentBase):
"""AI-driven agent for web research and information retrieval"""
def __init__(self):
"""Initialize the web crawler agent"""
super().__init__()
self.name = "webcrawler"
self.label = "Web Crawler"
self.description = "Gathers and analyzes web content using AI with multi-step research"
self.capabilities = [
"web_research",
"content_gathering",
"data_extraction",
"information_synthesis",
"source_verification"
]
# Web crawling configuration
self.srcApikey = APP_CONFIG.get("Agent_Webcrawler_SERPAPI_APIKEY","")
self.srcEngine = APP_CONFIG.get("Agent_Webcrawler_SERPAPI_ENGINE","google")
self.srcCountry = APP_CONFIG.get("Agent_Webcrawler_SERPAPI_COUNTRY","auto")
self.maxUrl = int(APP_CONFIG.get("Agent_Webcrawler_SERPAPI_MAX_URLS", "5"))
self.maxSearchTerms = int(APP_CONFIG.get("Agent_Webcrawler_SERPAPI_MAX_SEARCH_KEYWORDS", "3"))
self.maxResults = int(APP_CONFIG.get("Agent_Webcrawler_SERPAPI_MAX_SEARCH_RESULTS", "5"))
self.timeout = int(APP_CONFIG.get("Agent_Webcrawler_SERPAPI_TIMEOUT", "30"))
self.userAgent = APP_CONFIG.get("Agent_Webcrawler_SERPAPI_USER_AGENT", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36")
if not self.srcApikey:
logger.error("SerpAPI key not configured")
def setDependencies(self, serviceBase=None):
"""Set external dependencies for the agent."""
self.setService(serviceBase)
async def processTask(self, task: Dict[str, Any]) -> Dict[str, Any]:
"""
Process a task by focusing on required outputs and using AI to guide the research process.
Args:
task: Task dictionary with prompt, inputDocuments, outputSpecifications
Returns:
Dictionary with feedback and documents
"""
try:
# Extract task information
prompt = task.get("prompt", "")
inputDocuments = task.get("inputDocuments", [])
outputSpecs = task.get("outputSpecifications", [])
workflow = task.get("context", {}).get("workflow", {})
# Check AI service
if not self.service or not self.service.base:
return {
"feedback": "The Web Crawler agent requires an AI service to function.",
"documents": []
}
# Create research plan
if workflow:
self.service.logAdd(workflow, "Creating research plan...", level="info", progress=35)
researchPlan = await self._createResearchPlan(prompt)
# Check if this is truly a web research task
if not researchPlan.get("requiresWebResearch", True):
return {
"feedback": "This task doesn't appear to require web research. Please try a different agent.",
"documents": []
}
# Gather raw material through web research
if workflow:
self.service.logAdd(workflow, "Gathering research material...", level="info", progress=45)
rawResults = await self._gatherResearchMaterial(researchPlan, workflow)
# Format results into requested output documents
if workflow:
self.service.logAdd(workflow, "Creating output documents...", level="info", progress=55)
documents = await self._createOutputDocuments(
prompt,
rawResults,
outputSpecs,
researchPlan
)
# Generate feedback
feedback = researchPlan.get("feedback", f"I conducted web research on '{prompt[:50]}...' and gathered information from {len(rawResults)} relevant sources.")
return {
"feedback": feedback,
"documents": documents
}
except Exception as e:
logger.error(f"Error during web research: {str(e)}", exc_info=True)
return {
"feedback": f"Error during web research: {str(e)}",
"documents": []
}
async def _createResearchPlan(self, prompt: str) -> Dict[str, Any]:
"""
Use AI to create a detailed research plan.
Args:
prompt: The research query
Returns:
Research plan dictionary
"""
researchPrompt = f"""
Create a detailed web research plan for this task: "{prompt}"
Analyze the request carefully and create a structured plan in JSON format with the following elements:
{{
"requiresWebResearch": true/false, # Whether this genuinely requires web research
"researchQuestions": ["question1", "question2", ...], # 2-4 specific questions to answer
"searchTerms": ["term1", "term2", ...], # Up to {self.maxSearchTerms} effective search terms
"directUrls": ["url1", "url2", ...], # Any URLs directly mentioned in the request (up to {self.maxUrl})
"expectedSources": ["type1", "type2", ...], # Types of sources that would be most valuable
"contentFocus": "what specific content to extract or focus on",
"feedback": "explanation of how the research will be conducted"
}}
Respond with ONLY the JSON object, no additional text or explanations.
"""
try:
# Get research plan from AI
response = await self.service.base.callAi([
{"role": "system", "content": "You are a research expert. Respond with valid JSON only."},
{"role": "user", "content": researchPrompt}
])
# Extract JSON
jsonStart = response.find('{')
jsonEnd = response.rfind('}') + 1
if jsonStart >= 0 and jsonEnd > jsonStart:
plan = json.loads(response[jsonStart:jsonEnd])
# Ensure we have the expected fields with defaults if missing
if "searchTerms" not in plan:
plan["searchTerms"] = [prompt]
if "directUrls" not in plan:
plan["directUrls"] = []
if "researchQuestions" not in plan:
plan["researchQuestions"] = ["What information can be found about this topic?"]
return plan
else:
# Fallback plan
logger.warning(f"Not able creating research plan, generating fallback plan")
return {
"requiresWebResearch": True,
"researchQuestions": ["What information can be found about this topic?"],
"searchTerms": [prompt],
"directUrls": [],
"expectedSources": ["Web pages", "Articles"],
"contentFocus": "Relevant information about the topic",
"feedback": f"I'll conduct web research on '{prompt}' and gather relevant information."
}
except Exception as e:
logger.warning(f"Error creating research plan: {str(e)}")
# Simple fallback plan
return {
"requiresWebResearch": True,
"researchQuestions": ["What information can be found about this topic?"],
"searchTerms": [prompt],
"directUrls": [],
"expectedSources": ["Web pages", "Articles"],
"contentFocus": "Relevant information about the topic",
"feedback": f"I'll conduct web research on '{prompt}' and gather relevant information."
}
async def _gatherResearchMaterial(self, researchPlan: Dict[str, Any], workflow: Dict[str, Any]) -> List[Dict[str, Any]]:
"""
Gather research material based on the research plan.
Args:
researchPlan: Research plan dictionary
workflow: Current workflow object
Returns:
List of research results
"""
allResults = []
# Process direct URLs
directUrls = researchPlan.get("directUrls", [])[:self.maxUrl]
for i, url in enumerate(directUrls):
progress = 45 + int((i / len(directUrls)) * 5) # Progress from 45% to 50%
self.service.logAdd(workflow, f"Processing direct URL {i+1}/{len(directUrls)}...", level="info", progress=progress)
logger.info(f"Processing direct URL: {url}")
try:
# Fetch and extract content
soup = self._readUrl(url)
if soup:
# Extract title and content
title = self._extractTitle(soup, url)
content = self._extractMainContent(soup)
# Add to results
allResults.append({
"title": title,
"url": url,
"sourceType": "directUrl",
"content": content,
"summary": "" # Will be filled later
})
except Exception as e:
logger.warning(f"Error processing URL {url}: {str(e)}")
# Process search terms
searchTerms = researchPlan.get("searchTerms", [])[:self.maxSearchTerms]
for i, term in enumerate(searchTerms):
progress = 50 + int((i / len(searchTerms)) * 5) # Progress from 50% to 55%
self.service.logAdd(workflow, f"Searching term {i+1}/{len(searchTerms)}...", level="info", progress=progress)
logger.info(f"Searching for: {term}")
try:
# Perform search
searchResults = self._searchWeb(term)
# Process each search result
for result in searchResults:
# Check if URL is already in results
if not any(r["url"] == result["url"] for r in allResults):
allResults.append({
"title": result["title"],
"url": result["url"],
"sourceType": "searchResult",
"content": result["data"],
"snippet": result["snippet"],
"summary": "" # Will be filled later
})
# Stop if we've reached the maximum results
if len(allResults) >= self.maxResults:
break
except Exception as e:
logger.warning(f"Error searching for {term}: {str(e)}")
# Stop if we've reached the maximum results
if len(allResults) >= self.maxResults:
break
# Create summaries for all results
allResults = await self._summarizeAllResults(allResults, researchPlan)
return allResults
async def _summarizeAllResults(self, results: List[Dict[str, Any]], researchPlan: Dict[str, Any]) -> List[Dict[str, Any]]:
"""
Create summaries for all research results.
Args:
results: List of research results
researchPlan: Research plan with questions and focus
Returns:
Results with added summaries
"""
for i, result in enumerate(results):
logger.info(f"Summarizing result {i+1}/{len(results)}: {result['title'][:30]}...")
try:
# Limit content length to avoid token issues
content = self._limitText(result.get("content", ""), maxChars=8000)
researchQuestions = researchPlan.get("researchQuestions", ["What relevant information does this page contain?"])
contentFocus = researchPlan.get("contentFocus", "Relevant information")
# Create summary using AI
summaryPrompt = f"""
Summarize this web page content based on these research questions:
{', '.join(researchQuestions)}
Focus on: {contentFocus}
Web page: {result['url']}
Title: {result['title']}
Content:
{content}
Create a concise summary that:
1. Directly answers the research questions if possible
2. Extracts the most relevant information from the page
3. Includes specific facts, figures, or quotes if available
4. Is around 2000 characters long
Only include information actually found in the content. No fabrications or assumptions.
"""
# Get summary from AI
summary = await self.service.base.callAi([
{"role": "system", "content": "You are a research expert. Respond with valid JSON only."},
{"role": "user", "content": summaryPrompt}
])
# Add summary to result
result["summary"] = summary.strip()
except Exception as e:
logger.warning(f"Error summarizing result {i+1}: {str(e)}")
result["summary"] = f"Error creating summary: {str(e)}"
return results
async def _createOutputDocuments(self, prompt: str, results: List[Dict[str, Any]],
outputSpecs: List[Dict[str, Any]], researchPlan: Dict[str, Any]) -> List[Dict[str, Any]]:
"""
Create output documents based on research results and specifications.
Args:
prompt: Original research prompt
results: List of research results
outputSpecs: Output specifications
researchPlan: Research plan
Returns:
List of output documents
"""
# If no output specs provided, create default output
if not outputSpecs:
outputSpecs = [{
"label": "webResearchResults.md",
"description": "Comprehensive web research results"
}]
# Generate documents
documents = []
# Process each output specification
for spec in outputSpecs:
outputLabel = spec.get("label", "")
outputDescription = spec.get("description", "")
# Determine format based on file extension
formatType = self._determineFormatType(outputLabel)
# Create appropriate document based on format
if formatType == "json":
# JSON output - structured data
document = await self._createJsonDocument(prompt, results, researchPlan, outputLabel)
elif formatType == "csv":
# CSV output - tabular data
document = await self._createCsvDocument(results, outputLabel)
else:
# Text-based output (markdown, html, text) - narrative report
document = await self._createNarrativeDocument(
prompt, results, researchPlan, formatType, outputLabel, outputDescription
)
documents.append(document)
return documents
async def _createNarrativeDocument(self, prompt: str, results: List[Dict[str, Any]],
researchPlan: Dict[str, Any], formatType: str,
outputLabel: str, outputDescription: str) -> Dict[str, Any]:
"""
Create a narrative document (markdown, html, text) from research results.
Args:
prompt: Original research prompt
results: Research results
researchPlan: Research plan
formatType: Output format (markdown, html, text)
outputLabel: Output filename
outputDescription: Output description
Returns:
Document object
"""
# Create content based on format
if formatType == "markdown":
contentType = "text/markdown"
templateFormat = "markdown"
elif formatType == "html":
contentType = "text/html"
templateFormat = "html"
else:
contentType = "text/plain"
templateFormat = "text"
# Prepare research context
researchQuestions = researchPlan.get("researchQuestions", [])
searchTerms = researchPlan.get("searchTerms", [])
# Create document structure based on results
sourcesSummary = []
for result in results:
sourcesSummary.append({
"title": result.get("title", "Untitled"),
"url": result.get("url", ""),
"summary": result.get("summary", ""),
"snippet": result.get("snippet", "")
})
# Truncate content for prompt
sourcesJson = json.dumps(sourcesSummary, indent=2)
if len(sourcesJson) > 10000:
# Logic to truncate each summary while preserving structure
for i in range(len(sourcesSummary)):
if len(sourcesJson) <= 10000:
break
# Gradually truncate summaries
sourcesSummary[i]["summary"] = sourcesSummary[i]["summary"][:500] + "..."
sourcesJson = json.dumps(sourcesSummary, indent=2)
# Create report prompt
reportPrompt = f"""
Create a comprehensive {formatType} research report based on the following web research:
TASK: {prompt}
RESEARCH QUESTIONS:
{', '.join(researchQuestions)}
SEARCH TERMS USED:
{', '.join(searchTerms)}
SOURCES AND FINDINGS:
{sourcesJson}
REPORT DETAILS:
- Format: {templateFormat}
- Filename: {outputLabel}
- Description: {outputDescription}
Create a well-structured report that:
1. Includes an executive summary of key findings
2. Addresses each research question directly
3. Integrates information from all relevant sources
4. Cites sources appropriately for each piece of information
5. Provides a comprehensive synthesis of the research
6. Is formatted professionally and appropriately for {templateFormat}
The report should be scholarly, accurate, and focused on the original research task.
"""
try:
# Generate report with AI
reportContent = await self.service.base.callAi([
{"role": "system", "content": "You are a research expert. Respond with valid JSON only."},
{"role": "user", "content": reportPrompt}
])
# Convert to HTML if needed
if formatType == "html" and not reportContent.lower().startswith("<html"):
# Check if it's markdown that needs conversion
if reportContent.startswith("#"):
reportContent = markdown.markdown(reportContent)
# Wrap in basic HTML structure if needed
if not reportContent.lower().startswith("<html"):
reportContent = f"<html><head><title>Web Research Results</title></head><body>{reportContent}</body></html>"
return self.formatAgentDocumentOutput(outputLabel, reportContent, contentType)
except Exception as e:
logger.error(f"Error creating narrative document: {str(e)}")
# Create error document
if formatType == "markdown":
content = f"# Web Research Error\n\nAn error occurred: {str(e)}"
elif formatType == "html":
content = f"<html><body><h1>Web Research Error</h1><p>An error occurred: {str(e)}</p></body></html>"
else:
content = f"WEB RESEARCH ERROR\n\nAn error occurred: {str(e)}"
return self.formatAgentDocumentOutput(outputLabel, content, contentType)
async def _createJsonDocument(self, prompt: str, results: List[Dict[str, Any]],
researchPlan: Dict[str, Any], outputLabel: str) -> Dict[str, Any]:
"""
Create a JSON document from research results.
Args:
prompt: Original research prompt
results: Research results
researchPlan: Research plan
outputLabel: Output filename
Returns:
Document object
"""
try:
# Create structured data
sourcesData = []
for result in results:
sourcesData.append({
"title": result.get("title", "Untitled"),
"url": result.get("url", ""),
"summary": result.get("summary", ""),
"snippet": result.get("snippet", ""),
"sourceType": result.get("sourceType", "")
})
# Create metadata
metadata = {
"query": prompt,
"timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
"researchQuestions": researchPlan.get("researchQuestions", []),
"searchTerms": researchPlan.get("searchTerms", [])
}
# Compile complete report object
jsonContent = {
"metadata": metadata,
"summary": researchPlan.get("feedback", "Web research results"),
"sources": sourcesData
}
# Convert to JSON string
content = json.dumps(jsonContent, indent=2)
return self.formatAgentDocumentOutput(outputLabel, content, "application/json")
except Exception as e:
logger.error(f"Error creating JSON document: {str(e)}")
return self.formatAgentDocumentOutput(outputLabel, json.dumps({"error": str(e)}), "application/json")
async def _createCsvDocument(self, results: List[Dict[str, Any]], outputLabel: str) -> Dict[str, Any]:
"""
Create a CSV document from research results.
Args:
results: Research results
outputLabel: Output filename
Returns:
Document object
"""
try:
# Create CSV header
csvLines = ["Title,URL,Source Type,Snippet"]
# Add results
for result in results:
# Escape CSV fields
title = result.get("title", "").replace('"', '""')
url = result.get("url", "").replace('"', '""')
sourceType = result.get("sourceType", "").replace('"', '""')
snippet = result.get("snippet", "").replace('"', '""')
csvLines.append(f'"{title}","{url}","{sourceType}","{snippet}"')
# Combine into CSV content
content = "\n".join(csvLines)
return self.formatAgentDocumentOutput(outputLabel, content, "text/csv")
except Exception as e:
logger.error(f"Error creating CSV document: {str(e)}")
return self.formatAgentDocumentOutput(outputLabel, "Error,Error\nFailed to create CSV,{0}".format(str(e)), "text/csv")
def _determineFormatType(self, outputLabel: str) -> str:
"""
Determine the format type based on the filename.
Args:
outputLabel: Output filename
Returns:
Format type (markdown, html, text, json, csv)
"""
outputLabelLower = outputLabel.lower()
if outputLabelLower.endswith(".md"):
return "markdown"
elif outputLabelLower.endswith(".html"):
return "html"
elif outputLabelLower.endswith(".txt"):
return "text"
elif outputLabelLower.endswith(".json"):
return "json"
elif outputLabelLower.endswith(".csv"):
return "csv"
else:
# Default to markdown
return "markdown"
def _searchWeb(self, query: str) -> List[Dict[str, str]]:
"""
Conduct a web search using SerpAPI and return the results.
Args:
query: The search query
Returns:
List of search results
"""
if not self.srcApikey:
return []
# Get user language from serviceBase if available
userLanguage = "en" # Default language
if self.service.base.userLanguage:
userLanguage = self.service.base.userLanguage
try:
# Format the search request for SerpAPI
params = {
"engine": self.srcEngine,
"q": query,
"api_key": self.srcApikey,
"num": self.maxResults, # Number of results to return
"hl": userLanguage # Identified user language
}
# Make the API request
response = requests.get("https://serpapi.com/search", params=params, timeout=self.timeout)
response.raise_for_status()
# Parse JSON response
search_results = response.json()
# Extract organic results
results = []
if "organic_results" in search_results:
for result in search_results["organic_results"][:self.maxResults]:
# Extract title
title = result.get("title", "No title")
# Extract URL
url = result.get("link", "No URL")
# Extract snippet
snippet = result.get("snippet", "No description")
# Get actual page content
try:
targetPageSoup = self._readUrl(url)
content = self._extractMainContent(targetPageSoup)
except Exception as e:
logger.warning(f"Error extracting content from {url}: {str(e)}")
content = f"Error extracting content: {str(e)}"
results.append({
'title': title,
'url': url,
'snippet': snippet,
'data': content
})
# Limit number of results
if len(results) >= self.maxResults:
break
else:
logger.warning(f"No organic results found in SerpAPI response for: {query}")
return results
except Exception as e:
logger.error(f"Error searching with SerpAPI for {query}: {str(e)}")
return []
def _readUrl(self, url: str) -> BeautifulSoup:
"""
Read a URL and return a BeautifulSoup parser for the content.
Args:
url: The URL to read
Returns:
BeautifulSoup object with the content or None on errors
"""
if not url or not url.startswith(('http://', 'https://')):
return None
headers = {
'User-Agent': self.userAgent,
'Accept': 'text/html,application/xhtml+xml,application/xml',
'Accept-Language': 'en-US,en;q=0.9',
}
try:
# Initial request
response = requests.get(url, headers=headers, timeout=self.timeout)
# Handling for status 202
if response.status_code == 202:
# Retry with backoff
backoffTimes = [0.5, 1.0, 2.0, 5.0]
for waitTime in backoffTimes:
time.sleep(waitTime)
response = requests.get(url, headers=headers, timeout=self.timeout)
if response.status_code != 202:
break
# Raise for error status codes
response.raise_for_status()
# Parse HTML
return BeautifulSoup(response.text, 'html.parser')
except Exception as e:
logger.error(f"Error reading URL {url}: {str(e)}")
return None
def _extractTitle(self, soup: BeautifulSoup, url: str) -> str:
"""
Extract the title from a webpage.
Args:
soup: BeautifulSoup object of the webpage
url: URL of the webpage
Returns:
Extracted title
"""
if not soup:
return f"Error with {url}"
# Extract title from title tag
titleTag = soup.find('title')
title = titleTag.text.strip() if titleTag else "No title"
# Alternative: Also look for h1 tags if title tag is missing
if title == "No title":
h1Tag = soup.find('h1')
if h1Tag:
title = h1Tag.text.strip()
return title
def _extractMainContent(self, soup: BeautifulSoup, maxChars: int = 10000) -> str:
"""
Extract the main content from an HTML page.
Args:
soup: BeautifulSoup object of the webpage
maxChars: Maximum number of characters
Returns:
Extracted main content as a string
"""
if not soup:
return ""
# Try to find main content elements in priority order
mainContent = None
for selector in ['main', 'article', '#content', '.content', '#main', '.main']:
content = soup.select_one(selector)
if content:
mainContent = content
break
# If no main content found, use the body
if not mainContent:
mainContent = soup.find('body') or soup
# Remove script, style, nav, footer elements that don't contribute to main content
for element in mainContent.select('script, style, nav, footer, header, aside, .sidebar, #sidebar, .comments, #comments, .advertisement, .ads, iframe'):
element.extract()
# Extract text content
textContent = mainContent.get_text(separator=' ', strip=True)
# Limit to maxChars
return textContent[:maxChars]
def _limitText(self, text: str, maxChars: int = 10000) -> str:
"""
Limit text to a maximum number of characters.
Args:
text: Input text
maxChars: Maximum number of characters
Returns:
Limited text
"""
if not text:
return ""
# If text is already under the limit, return unchanged
if len(text) <= maxChars:
return text
# Otherwise limit text to maxChars
return text[:maxChars] + "... [Content truncated due to length]"
# Factory function for the Webcrawler agent
def getAgentWebcrawler():
"""Returns an instance of the Webcrawler agent."""
return AgentWebcrawler()

View file

@ -12,7 +12,6 @@ from typing import Dict, Any, List, Optional, Union
import hashlib
import asyncio
from modules.shared.mimeUtils import isTextMimeType
from modules.interfaces.serviceChatAccess import ChatAccess
from modules.interfaces.serviceChatModel import (
ChatContent, ChatDocument, ChatStat, ChatMessage,
@ -20,6 +19,7 @@ from modules.interfaces.serviceChatModel import (
Task, TaskPlan, UserInputRequest
)
from modules.interfaces.serviceAppModel import User
from modules.workflow.managerDocument import DocumentManager
# DYNAMIC PART: Connectors to the Interface
from modules.connectors.connectorDbJson import DatabaseConnector
@ -46,6 +46,9 @@ class ChatInterface:
self.mandateId = currentUser.mandateId if currentUser else None
self.access = None # Will be set when user context is provided
# Initialize services
self._initializeServices()
# Initialize database
self._initializeDatabase()
@ -53,6 +56,10 @@ class ChatInterface:
if currentUser:
self.setUserContext(currentUser)
def _initializeServices(self):
"""Initialize service dependencies"""
self.documentManager = DocumentManager(self.service)
def setUserContext(self, currentUser: User):
"""Sets the user context for the interface."""
if not currentUser:
@ -380,23 +387,9 @@ class ChatInterface:
messageData["id"] = f"msg_{uuid.uuid4()}"
logger.warning(f"Automatically generated ID for workflow message: {messageData['id']}")
# Ensure required fields are present
if "startedAt" not in messageData and "createdAt" not in messageData:
messageData["startedAt"] = self._getCurrentTimestamp()
if "createdAt" in messageData and "startedAt" not in messageData:
messageData["startedAt"] = messageData["createdAt"]
del messageData["createdAt"]
# Set status if not present
if "status" not in messageData:
messageData["status"] = "completed"
# Set sequence number if not present
if "sequenceNo" not in messageData:
# Get current messages to determine next sequence number
existingMessages = self.getWorkflowMessages(workflowId)
messageData["sequenceNo"] = len(existingMessages) + 1
messageData["status"] = "step" # Default status for intermediate messages
# Ensure role and agentName are present
if "role" not in messageData:
@ -427,10 +420,9 @@ class ChatInterface:
documents=[ChatDocument(**doc) for doc in createdMessage.get("documents", [])],
message=createdMessage.get("message"),
role=createdMessage.get("role", "assistant"),
status=createdMessage.get("status", "completed"),
sequenceNr=createdMessage.get("sequenceNo", 0),
startedAt=createdMessage.get("startedAt", self._getCurrentTimestamp()),
finishedAt=createdMessage.get("finishedAt"),
status=createdMessage.get("status", "step"),
sequenceNr=len(messageIds), # Set sequence number based on message position
publishedAt=createdMessage.get("publishedAt", self._getCurrentTimestamp()),
stats=ChatStat(**createdMessage.get("stats", {})) if createdMessage.get("stats") else None
)
except Exception as e:
@ -848,7 +840,6 @@ class ChatInterface:
async def workflowStart(self, userInput: UserInputRequest, workflowId: Optional[str] = None) -> ChatWorkflow:
"""
Starts a new workflow or continues an existing one.
Corresponds to State 1 in the state machine documentation.
Args:
userInput: The user input request containing workflow initialization data
@ -861,29 +852,40 @@ class ChatInterface:
# Get current timestamp
currentTime = self._getCurrentTimestamp()
# Process files if any
documents = []
if userInput.listFileId:
documents = await self._processFileIds(userInput.listFileId)
# Create initial message
initialMessage = ChatMessage(
id=str(uuid.uuid4()),
role="user",
content=userInput.prompt,
timestamp=currentTime,
documents=documents
)
if workflowId:
# Continue existing workflow
workflow = self.getWorkflow(workflowId)
if not workflow:
raise ValueError(f"Workflow {workflowId} not found")
# Update workflow status
workflow.status = "running"
workflow.lastActivity = currentTime
# Update in database
self.updateWorkflow(workflowId, {
"status": "running",
"lastActivity": currentTime
# Add message to workflow
self.createWorkflowMessage({
"workflowId": workflowId,
"messageId": initialMessage.id,
"role": initialMessage.role,
"content": initialMessage.content,
"timestamp": initialMessage.timestamp,
"documents": [doc.dict() for doc in initialMessage.documents]
})
# Add log entry
self.createWorkflowLog({
"workflowId": workflowId,
"message": "Workflow continued",
"type": "info",
"status": "running",
"progress": 0
# Update workflow
self.updateWorkflow(workflowId, {
"lastActivity": currentTime,
"currentRound": workflow.currentRound + 1
})
else:
@ -895,10 +897,10 @@ class ChatInterface:
"lastActivity": currentTime,
"currentRound": 1,
"mandateId": self.mandateId,
"messageIds": [],
"messageIds": [initialMessage.id],
"dataStats": {
"totalMessages": 0,
"totalDocuments": 0,
"totalMessages": 1,
"totalDocuments": len(documents),
"totalTokens": 0
}
}
@ -906,6 +908,16 @@ class ChatInterface:
# Create workflow
workflow = self.createWorkflow(workflowData)
# Add initial message
self.createWorkflowMessage({
"workflowId": workflow.id,
"messageId": initialMessage.id,
"role": initialMessage.role,
"content": initialMessage.content,
"timestamp": initialMessage.timestamp,
"documents": [doc.dict() for doc in initialMessage.documents]
})
# Add log entry
self.createWorkflowLog({
"workflowId": workflow.id,
@ -916,8 +928,8 @@ class ChatInterface:
})
# Start workflow processing
from modules.workflow.workflowManager import getWorkflowManager
workflowManager = await getWorkflowManager(self)
from modules.workflow.managerWorkflow import WorkflowManager
workflowManager = WorkflowManager(self)
asyncio.create_task(workflowManager.workflowProcess(userInput, workflow))
return workflow
@ -979,30 +991,22 @@ class ChatInterface:
"""
documents = []
for fileId in fileIds:
try:
# Get file content
fileContent = self.service.functions.getFileData(fileId)
if not fileContent:
continue
# Get file metadata
fileMetadata = self.service.functions.getFile(fileId)
if not fileMetadata:
continue
# Create ChatDocument
document = ChatDocument(
id=str(uuid.uuid4()),
fileId=fileId,
filename=fileMetadata.get("name", "Unknown"),
fileSize=fileMetadata.get("size", 0),
content=fileContent.decode('utf-8', errors='ignore'),
mimeType=fileMetadata.get("mimeType", "text/plain")
)
documents.append(document)
except Exception as e:
logger.error(f"Error processing file {fileId}: {str(e)}")
# Get file metadata
fileMetadata = self.service.functions.getFile(fileId)
if not fileMetadata:
logger.warning(f"File metadata not found for {fileId}")
continue
# Create ChatDocument
document = ChatDocument(
id=str(uuid.uuid4()),
fileId=fileId,
filename=fileMetadata.get("name", "Unknown"),
fileSize=fileMetadata.get("size", 0),
mimeType=fileMetadata.get("mimeType", "text/plain")
)
documents.append(document)
return documents

View file

@ -4,12 +4,38 @@ Chat model classes for the chat system.
from pydantic import BaseModel, Field
from typing import List, Dict, Any, Optional, Union
from datetime import datetime
from datetime import datetime, UTC
import uuid
from enum import Enum
from modules.shared.attributeUtils import register_model_labels, ModelMixin
# ENUMS
class TaskStatus(str, Enum):
"""Task status enumeration"""
PENDING = "pending"
RUNNING = "running"
COMPLETED = "completed"
FAILED = "failed"
CANCELLED = "cancelled"
ROLLED_BACK = "rolled_back"
# Register labels for TaskStatus
register_model_labels(
"TaskStatus",
{"en": "Task Status", "fr": "Statut de la tâche"},
{
"PENDING": {"en": "Pending", "fr": "En attente"},
"RUNNING": {"en": "Running", "fr": "En cours"},
"COMPLETED": {"en": "Completed", "fr": "Terminé"},
"FAILED": {"en": "Failed", "fr": "Échec"},
"CANCELLED": {"en": "Cancelled", "fr": "Annulé"},
"ROLLED_BACK": {"en": "Rolled Back", "fr": "Annulé"}
}
)
# USER MODELS
class UserInputRequest(BaseModel, ModelMixin):
@ -28,24 +54,49 @@ register_model_labels(
}
)
# WORKFLOW MODELS
# DOCUMENT MODELS
class ChatContent(BaseModel, ModelMixin):
"""Data model for chat content"""
sequenceNr: int = Field(description="Sequence number of the content")
name: str = Field(description="Name of the content")
data: str = Field(description="The actual content data")
mimeType: str = Field(description="MIME type of the content")
metadata: Dict[str, Any] = Field(default_factory=dict, description="Additional metadata")
# Register labels for ChatContent
class ContentMetadata(BaseModel, ModelMixin):
"""Metadata for content items"""
size: int = Field(description="Content size in bytes")
pages: Optional[int] = Field(None, description="Number of pages for multi-page content")
error: Optional[str] = Field(None, description="Processing error if any")
# Media-specific attributes
width: Optional[int] = Field(None, description="Width in pixels for images/videos")
height: Optional[int] = Field(None, description="Height in pixels for images/videos")
colorMode: Optional[str] = Field(None, description="Color mode (e.g., RGB, CMYK, grayscale)")
fps: Optional[float] = Field(None, description="Frames per second for videos")
durationSec: Optional[float] = Field(None, description="Duration in seconds for videos/audio")
# Register labels for ContentMetadata
register_model_labels(
"ChatContent",
{"en": "Chat Content", "fr": "Contenu de chat"},
"ContentMetadata",
{"en": "Content Metadata", "fr": "Métadonnées du contenu"},
{
"sequenceNr": {"en": "Sequence Number", "fr": "Numéro de séquence"},
"name": {"en": "Name", "fr": "Nom"},
"size": {"en": "Size", "fr": "Taille"},
"pages": {"en": "Pages", "fr": "Pages"},
"error": {"en": "Error", "fr": "Erreur"},
"width": {"en": "Width", "fr": "Largeur"},
"height": {"en": "Height", "fr": "Hauteur"},
"colorMode": {"en": "Color Mode", "fr": "Mode de couleur"},
"fps": {"en": "FPS", "fr": "IPS"},
"durationSec": {"en": "Duration", "fr": "Durée"}
}
)
class ContentItem(BaseModel, ModelMixin):
"""Individual content item from a document"""
label: str = Field(description="Content label (e.g., tab name, tag name)")
data: str = Field(description="Extracted text content")
metadata: ContentMetadata = Field(description="Content metadata")
# Register labels for ContentItem
register_model_labels(
"ContentItem",
{"en": "Content Item", "fr": "Élément de contenu"},
{
"label": {"en": "Label", "fr": "Étiquette"},
"data": {"en": "Data", "fr": "Données"},
"mimeType": {"en": "MIME Type", "fr": "Type MIME"},
"metadata": {"en": "Metadata", "fr": "Métadonnées"}
}
)
@ -57,7 +108,7 @@ class ChatDocument(BaseModel, ModelMixin):
filename: str = Field(description="Name of the file")
fileSize: int = Field(description="Size of the file")
mimeType: str = Field(description="MIME type of the file")
contents: List[ChatContent] = Field(default_factory=list, description="List of chat contents")
# Register labels for ChatDocument
register_model_labels(
"ChatDocument",
@ -67,11 +118,50 @@ register_model_labels(
"fileId": {"en": "File ID", "fr": "ID du fichier"},
"filename": {"en": "Filename", "fr": "Nom de fichier"},
"fileSize": {"en": "File Size", "fr": "Taille du fichier"},
"mimeType": {"en": "MIME Type", "fr": "Type MIME"}
}
)
class TaskDocument(BaseModel, ModelMixin):
"""Data model for a task document"""
id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Primary key")
data: str = Field(description="Base64 encoded file data")
filename: str = Field(description="Name of the file")
fileSize: int = Field(description="Size of the file")
mimeType: str = Field(description="MIME type of the file")
# Register labels for TaskDocument
register_model_labels(
"TaskDocument",
{"en": "Task Document", "fr": "Document de tâche"},
{
"id": {"en": "ID", "fr": "ID"},
"filename": {"en": "Filename", "fr": "Nom de fichier"},
"fileSize": {"en": "File Size", "fr": "Taille du fichier"},
"mimeType": {"en": "MIME Type", "fr": "Type MIME"},
"data": {"en": "Data", "fr": "Données"}
}
)
class ExtractedContent(BaseModel, ModelMixin):
"""Data model for extracted content"""
objectId: str = Field(description="Reference to source document")
objectType: str = Field(description="Type of source object ('ChatDocument' or 'TaskDocument')")
contents: List[ContentItem] = Field(default_factory=list, description="List of content items")
# Register labels for ExtractedContent
register_model_labels(
"ExtractedContent",
{"en": "Extracted Content", "fr": "Contenu extrait"},
{
"objectId": {"en": "Object ID", "fr": "ID de l'objet"},
"objectType": {"en": "Object Type", "fr": "Type d'objet"},
"contents": {"en": "Contents", "fr": "Contenus"}
}
)
# WORKFLOW MODELS
class ChatStat(BaseModel, ModelMixin):
"""Data model for chat statistics"""
id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Primary key")
@ -133,10 +223,9 @@ class ChatMessage(BaseModel, ModelMixin):
documents: List[ChatDocument] = Field(default_factory=list, description="Associated documents")
message: Optional[str] = Field(None, description="Message content")
role: str = Field(description="Role of the message sender")
status: str = Field(description="Status of the message")
sequenceNr: int = Field(description="Sequence number of the message")
startedAt: str = Field(description="When the message processing started")
finishedAt: Optional[str] = Field(None, description="When the message processing finished")
status: str = Field(description="Status of the message (first, step, last)")
sequenceNr: int = Field(description="Sequence number of the message (set automatically)")
publishedAt: str = Field(description="When the message was published")
stats: Optional[ChatStat] = Field(None, description="Statistics for this message")
success: Optional[bool] = Field(None, description="Whether the message processing was successful")
# Register labels for ChatMessage
@ -153,29 +242,92 @@ register_model_labels(
"role": {"en": "Role", "fr": "Rôle"},
"status": {"en": "Status", "fr": "Statut"},
"sequenceNr": {"en": "Sequence Number", "fr": "Numéro de séquence"},
"startedAt": {"en": "Started At", "fr": "Démarré le"},
"finishedAt": {"en": "Finished At", "fr": "Terminé le"},
"publishedAt": {"en": "Published At", "fr": "Publié le"},
"stats": {"en": "Statistics", "fr": "Statistiques"},
"success": {"en": "Success", "fr": "Succès"}
}
)
class AgentTask(BaseModel, ModelMixin):
"""Data model for a task"""
id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Primary key")
workflowId: str = Field(description="Foreign key to workflow")
agentName: str = Field(description="Name of the agent assigned to this task")
status: str = Field(description="Current status of the task")
progress: float = Field(description="Task progress (0-100)")
prompt: str = Field(description="Prompt for the task")
userLanguage: str = Field(description="User's preferred language")
filesInput: List[str] = Field(default_factory=list, description="Input files")
filesOutput: List[str] = Field(default_factory=list, description="Output files")
result: Optional[ChatMessage] = Field(None, description="Task result message")
error: Optional[str] = Field(None, description="Error message if failed")
startedAt: str = Field(description="When the task started")
finishedAt: Optional[str] = Field(None, description="When the task finished")
performance: Optional[Dict[str, Any]] = Field(None, description="Performance metrics")
"""Model for agent tasks"""
id: str = Field(..., description="Unique task identifier")
workflowId: str = Field(..., description="Associated workflow ID")
status: TaskStatus = Field(default=TaskStatus.PENDING, description="Current task status")
error: Optional[str] = Field(None, description="Error message if task failed")
startedAt: Optional[datetime] = Field(None, description="Task start timestamp")
finishedAt: Optional[datetime] = Field(None, description="Task completion timestamp")
actionList: List[Dict[str, Any]] = Field(default_factory=list, description="List of actions to execute")
documentsOutput: List[Dict[str, Any]] = Field(default_factory=list, description="Output documents")
retryCount: int = Field(default=0, description="Number of retry attempts")
retryMax: int = Field(default=3, description="Maximum number of retry attempts")
rollbackOnFailure: bool = Field(default=True, description="Whether to rollback on failure")
dependencies: List[str] = Field(default_factory=list, description="List of dependent task IDs")
thisTaskFeedback: Optional[Dict[str, Any]] = Field(None, description="Task feedback data")
def isCompleted(self) -> bool:
"""Check if task is completed"""
return self.status == TaskStatus.COMPLETED
def isFailed(self) -> bool:
"""Check if task has failed"""
return self.status == TaskStatus.FAILED
def canRetry(self) -> bool:
"""Check if task can be retried"""
return self.retryCount < self.retryMax
def start(self) -> None:
"""Start the task"""
self.status = TaskStatus.RUNNING
self.startedAt = datetime.now(UTC)
def complete(self) -> None:
"""Mark task as completed"""
self.status = TaskStatus.COMPLETED
self.finishedAt = datetime.now(UTC)
def fail(self, error: str) -> None:
"""Mark task as failed"""
self.status = TaskStatus.FAILED
self.error = error
self.finishedAt = datetime.now(UTC)
def cancel(self) -> None:
"""Cancel the task"""
self.status = TaskStatus.CANCELLED
self.finishedAt = datetime.now(UTC)
def rollback(self) -> None:
"""Mark task as rolled back"""
self.status = TaskStatus.ROLLED_BACK
self.finishedAt = datetime.now(UTC)
def incrementRetry(self) -> None:
"""Increment retry count"""
self.retryCount += 1
def addDependency(self, taskId: str) -> None:
"""Add a task dependency"""
if taskId not in self.dependencies:
self.dependencies.append(taskId)
def removeDependency(self, taskId: str) -> None:
"""Remove a task dependency"""
if taskId in self.dependencies:
self.dependencies.remove(taskId)
def addAction(self, action: Dict[str, Any]) -> None:
"""Add an action to the task"""
self.actionList.append(action)
def addDocumentOutput(self, document: Dict[str, Any]) -> None:
"""Add an output document"""
self.documentsOutput.append(document)
def setFeedback(self, feedback: Dict[str, Any]) -> None:
"""Set task feedback"""
self.thisTaskFeedback = feedback
# Register labels for AgentTask
register_model_labels(
"AgentTask",
@ -183,42 +335,21 @@ register_model_labels(
{
"id": {"en": "ID", "fr": "ID"},
"workflowId": {"en": "Workflow ID", "fr": "ID du flux de travail"},
"agentName": {"en": "Agent Name", "fr": "Nom de l'agent"},
"status": {"en": "Status", "fr": "Statut"},
"progress": {"en": "Progress", "fr": "Progression"},
"prompt": {"en": "Prompt", "fr": "Invite"},
"userLanguage": {"en": "User Language", "fr": "Langue de l'utilisateur"},
"filesInput": {"en": "Input Files", "fr": "Fichiers d'entrée"},
"filesOutput": {"en": "Output Files", "fr": "Fichiers de sortie"},
"result": {"en": "Result", "fr": "Résultat"},
"error": {"en": "Error", "fr": "Erreur"},
"startedAt": {"en": "Started At", "fr": "Démarré le"},
"finishedAt": {"en": "Finished At", "fr": "Terminé le"},
"performance": {"en": "Performance", "fr": "Performance"}
"actionList": {"en": "Action List", "fr": "Liste d'actions"},
"documentsOutput": {"en": "Output Documents", "fr": "Documents de sortie"},
"retryCount": {"en": "Retry Count", "fr": "Nombre de tentatives"},
"retryMax": {"en": "Max Retries", "fr": "Tentatives maximales"},
"rollbackOnFailure": {"en": "Rollback on Failure", "fr": "Annulation en cas d'échec"},
"dependencies": {"en": "Dependencies", "fr": "Dépendances"},
"thisTaskFeedback": {"en": "Task Feedback", "fr": "Retour sur la tâche"}
}
)
class Agent(BaseModel, ModelMixin):
"""Data model for an agent"""
id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Primary key")
name: str = Field(description="Name of the agent")
description: str = Field(description="Description of the agent")
capabilities: List[str] = Field(default_factory=list, description="List of agent capabilities")
performance: Optional[Dict[str, Any]] = Field(None, description="Performance metrics")
# Register labels for Agent
register_model_labels(
"Agent",
{"en": "Agent", "fr": "Agent"},
{
"id": {"en": "ID", "fr": "ID"},
"name": {"en": "Name", "fr": "Nom"},
"description": {"en": "Description", "fr": "Description"},
"capabilities": {"en": "Capabilities", "fr": "Capacités"},
"performance": {"en": "Performance", "fr": "Performance"}
}
)
# WORKFLOW MODELS
# WORKFLOW MODEL
class ChatWorkflow(BaseModel, ModelMixin):
"""Data model for a chat workflow"""
@ -251,125 +382,3 @@ register_model_labels(
"tasks": {"en": "Tasks", "fr": "Tâches"}
}
)
# DOCUMENT MODELS
class DocumentExtraction(BaseModel, ModelMixin):
"""Data model for document extraction history"""
timestamp: str = Field(description="Timestamp of extraction")
type: str = Field(description="Type of document")
sections: List[str] = Field(default_factory=list, description="Extracted sections")
metadata: Dict[str, Any] = Field(default_factory=dict, description="Extraction metadata")
# Register labels for DocumentExtraction
register_model_labels(
"DocumentExtraction",
{"en": "Document Extraction", "fr": "Extraction de document"},
{
"timestamp": {"en": "Timestamp", "fr": "Horodatage"},
"type": {"en": "Type", "fr": "Type"},
"sections": {"en": "Sections", "fr": "Sections"},
"metadata": {"en": "Metadata", "fr": "Métadonnées"}
}
)
class DocumentContext(BaseModel, ModelMixin):
"""Data model for document context"""
id: str = Field(description="Document ID")
extractionHistory: List[DocumentExtraction] = Field(default_factory=list, description="History of extractions")
relevantSections: List[str] = Field(default_factory=list, description="Relevant sections")
processingStatus: Dict[str, str] = Field(default_factory=dict, description="Processing status")
# Register labels for DocumentContext
register_model_labels(
"DocumentContext",
{"en": "Document Context", "fr": "Contexte de document"},
{
"id": {"en": "ID", "fr": "ID"},
"extractionHistory": {"en": "Extraction History", "fr": "Historique d'extraction"},
"relevantSections": {"en": "Relevant Sections", "fr": "Sections pertinentes"},
"processingStatus": {"en": "Processing Status", "fr": "Statut de traitement"}
}
)
class DocumentMetadata(BaseModel, ModelMixin):
"""Data model for document metadata"""
type: str = Field(description="Document type")
format: str = Field(description="Document format")
size: int = Field(description="Document size in bytes")
pages: Optional[int] = Field(None, description="Number of pages")
sections: Optional[List[str]] = Field(None, description="Document sections")
error: Optional[str] = Field(None, description="Processing error if any")
# Register labels for DocumentMetadata
register_model_labels(
"DocumentMetadata",
{"en": "Document Metadata", "fr": "Métadonnées de document"},
{
"type": {"en": "Type", "fr": "Type"},
"format": {"en": "Format", "fr": "Format"},
"size": {"en": "Size", "fr": "Taille"},
"pages": {"en": "Pages", "fr": "Pages"},
"sections": {"en": "Sections", "fr": "Sections"},
"error": {"en": "Error", "fr": "Erreur"}
}
)
class ImageData(BaseModel, ModelMixin):
"""Data model for image data"""
data: str = Field(description="Base64 encoded image data")
format: str = Field(description="Image format")
page: Optional[int] = Field(None, description="Page number if from a multi-page document")
index: Optional[int] = Field(None, description="Image index in the document")
# Register labels for ImageData
register_model_labels(
"ImageData",
{"en": "Image Data", "fr": "Données d'image"},
{
"data": {"en": "Image Data", "fr": "Données d'image"},
"format": {"en": "Format", "fr": "Format"},
"page": {"en": "Page", "fr": "Page"},
"index": {"en": "Index", "fr": "Index"}
}
)
class DocumentContent(BaseModel, ModelMixin):
"""Data model for document content"""
text: Optional[str] = Field(None, description="Extracted text content")
data: Optional[Dict[str, Any]] = Field(None, description="Structured data content")
images: Optional[List[ImageData]] = Field(None, description="Extracted images")
metadata: DocumentMetadata = Field(description="Document metadata")
# Register labels for DocumentContent
register_model_labels(
"DocumentContent",
{"en": "Document Content", "fr": "Contenu de document"},
{
"text": {"en": "Text", "fr": "Texte"},
"data": {"en": "Data", "fr": "Données"},
"images": {"en": "Images", "fr": "Images"},
"metadata": {"en": "Metadata", "fr": "Métadonnées"}
}
)
class ProcessedDocument(BaseModel, ModelMixin):
"""Data model for processed document"""
id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Document ID")
name: str = Field(description="Document name")
contentType: str = Field(description="Content type")
content: DocumentContent = Field(description="Document content")
context: Optional[DocumentContext] = Field(None, description="Document context")
# Register labels for ProcessedDocument
register_model_labels(
"ProcessedDocument",
{"en": "Processed Document", "fr": "Document traité"},
{
"id": {"en": "ID", "fr": "ID"},
"name": {"en": "Name", "fr": "Nom"},
"contentType": {"en": "Content Type", "fr": "Type de contenu"},
"content": {"en": "Content", "fr": "Contenu"},
"context": {"en": "Context", "fr": "Contexte"}
}
)

View file

@ -11,7 +11,6 @@ from typing import Dict, Any, List, Optional, Union
import hashlib
from modules.shared.mimeUtils import isTextMimeType
from modules.interfaces.serviceManagementAccess import ManagementAccess
from modules.interfaces.serviceManagementModel import (
Prompt, FileItem, FileData

View file

@ -2,8 +2,12 @@ from enum import Enum
from typing import Dict, List, Optional, Any, Literal
from datetime import datetime, UTC
from pydantic import BaseModel, Field
import logging
logger = logging.getLogger(__name__)
class AuthSource(str, Enum):
"""Authentication source enumeration"""
LOCAL = "local"
MSFT = "msft"
GOOGLE = "google"
@ -23,52 +27,122 @@ class MethodResult(BaseModel):
data: Dict[str, Any]
metadata: Dict[str, Any] = Field(default_factory=dict)
validation: List[str] = Field(default_factory=list)
error: Optional[str] = Field(None, description="Error message if any")
class MethodBase:
"""Base class for all methods"""
def __init__(self):
def __init__(self, serviceContainer: Any):
"""Initialize method with service container"""
self.service = serviceContainer
self.name: str
self.description: str
self.auth_source: AuthSource = AuthSource.LOCAL # Default to local auth
self.authSource: AuthSource = AuthSource.LOCAL # Default to local auth
self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}")
@property
def actions(self) -> Dict[str, Dict[str, Any]]:
"""Available actions and their parameters"""
raise NotImplementedError
async def execute(self, action: str, parameters: Dict[str, Any], auth_data: Optional[Dict[str, Any]] = None) -> MethodResult:
"""Execute method action with authentication data"""
async def execute(self, action: str, parameters: Dict[str, Any], authData: Optional[Dict[str, Any]] = None) -> MethodResult:
"""
Execute method action with authentication data
Args:
action: The action to execute
parameters: Action parameters
authData: Authentication data
Returns:
MethodResult containing execution results
Raises:
ValueError: If action is not supported
RuntimeError: If authentication fails
"""
try:
# Validate action
if action not in self.actions:
raise ValueError(f"Unsupported action: {action}")
# Validate parameters
if not await self.validateParameters(action, parameters):
return self._createResult(
success=False,
data={},
error="Invalid parameters"
)
# Validate authentication
if not self._validateAuth(authData):
return self._createResult(
success=False,
data={},
error="Authentication failed"
)
# Execute action
return await self._executeAction(action, parameters, authData)
except Exception as e:
self.logger.error(f"Error executing action {action}: {str(e)}")
return self._createResult(
success=False,
data={},
error=str(e)
)
async def _executeAction(self, action: str, parameters: Dict[str, Any], authData: Optional[Dict[str, Any]] = None) -> MethodResult:
"""Execute specific action - to be implemented by subclasses"""
raise NotImplementedError
async def validate_parameters(self, action: str, parameters: Dict[str, Any]) -> bool:
async def validateParameters(self, action: str, parameters: Dict[str, Any]) -> bool:
"""Validate action parameters"""
if action not in self.actions:
return False
try:
if action not in self.actions:
return False
actionDef = self.actions[action]
requiredParams = {k for k, v in actionDef['parameters'].items() if v['required']}
return all(param in parameters for param in requiredParams)
action_def = self.actions[action]
required_params = {k for k, v in action_def['parameters'].items() if v['required']}
return all(param in parameters for param in required_params)
except Exception as e:
self.logger.error(f"Error validating parameters: {str(e)}")
return False
async def rollback(self, action: str, parameters: Dict[str, Any], auth_data: Optional[Dict[str, Any]] = None) -> None:
async def rollback(self, action: str, parameters: Dict[str, Any], authData: Optional[Dict[str, Any]] = None) -> None:
"""Rollback action if needed"""
try:
await self._rollbackAction(action, parameters, authData)
except Exception as e:
self.logger.error(f"Error rolling back action {action}: {str(e)}")
raise
async def _rollbackAction(self, action: str, parameters: Dict[str, Any], authData: Optional[Dict[str, Any]] = None) -> None:
"""Rollback specific action - to be implemented by subclasses"""
pass
def _validate_auth(self, auth_data: Optional[Dict[str, Any]] = None) -> bool:
def _validateAuth(self, authData: Optional[Dict[str, Any]] = None) -> bool:
"""Validate authentication data"""
if self.auth_source == AuthSource.LOCAL:
return True
return bool(auth_data and auth_data.get('source') == self.auth_source)
try:
if self.authSource == AuthSource.LOCAL:
return True
return bool(authData and authData.get('source') == self.authSource)
except Exception as e:
self.logger.error(f"Error validating auth: {str(e)}")
return False
def _create_result(self, success: bool, data: Dict[str, Any], metadata: Optional[Dict[str, Any]] = None) -> MethodResult:
def _createResult(self, success: bool, data: Dict[str, Any], metadata: Optional[Dict[str, Any]] = None, error: Optional[str] = None) -> MethodResult:
"""Create a method result"""
return MethodResult(
success=success,
data=data,
metadata=metadata or {},
validation=[]
validation=[],
error=error
)
def _add_validation_message(self, result: MethodResult, message: str) -> None:
def _addValidationMessage(self, result: MethodResult, message: str) -> None:
"""Add a validation message to the result"""
result.validation.append(message)

View file

@ -14,7 +14,7 @@ class MethodCoder(MethodBase):
super().__init__()
self.name = "coder"
self.description = "Handle code operations like analysis, generation, and refactoring"
self.auth_source = AuthSource.LOCAL # Code operations typically don't need auth
self.authSource = AuthSource.LOCAL # Code operations typically don't need auth
@property
def actions(self) -> Dict[str, Dict[str, Any]]:
@ -52,37 +52,37 @@ class MethodCoder(MethodBase):
}
}
async def execute(self, action: str, parameters: Dict[str, Any], auth_data: Optional[Dict[str, Any]] = None) -> MethodResult:
async def execute(self, action: str, parameters: Dict[str, Any], authData: Optional[Dict[str, Any]] = None) -> MethodResult:
"""Execute coder method"""
try:
# Validate parameters
if not await self.validate_parameters(action, parameters):
return self._create_result(
if not await self.validateParameters(action, parameters):
return self._createResult(
success=False,
data={"error": f"Invalid parameters for {action}"}
)
# Execute action
if action == "analyze":
return await self._analyze_code(parameters)
return await self._analyzeCode(parameters)
elif action == "generate":
return await self._generate_code(parameters)
return await self._generateCode(parameters)
elif action == "refactor":
return await self._refactor_code(parameters)
return await self._refactorCode(parameters)
else:
return self._create_result(
return self._createResult(
success=False,
data={"error": f"Unknown action: {action}"}
)
except Exception as e:
logger.error(f"Error executing coder {action}: {e}")
return self._create_result(
return self._createResult(
success=False,
data={"error": str(e)}
)
async def _analyze_code(self, parameters: Dict[str, Any]) -> MethodResult:
async def _analyzeCode(self, parameters: Dict[str, Any]) -> MethodResult:
"""Analyze code structure and quality"""
try:
code = parameters["code"]
@ -121,13 +121,13 @@ class MethodCoder(MethodBase):
if "complexity" in metrics:
for node in ast.walk(tree):
if isinstance(node, ast.FunctionDef):
body_lines = len(node.body)
if body_lines > 20: # Arbitrary threshold
bodyLines = len(node.body)
if bodyLines > 20: # Arbitrary threshold
analysis["issues"].append({
"type": "long_function",
"line": node.lineno,
"name": node.name,
"lines": body_lines
"lines": bodyLines
})
# Check for style issues
@ -149,18 +149,18 @@ class MethodCoder(MethodBase):
})
except SyntaxError as e:
return self._create_result(
return self._createResult(
success=False,
data={"error": f"Syntax error: {str(e)}"}
)
else:
# TODO: Implement analysis for other languages
return self._create_result(
return self._createResult(
success=False,
data={"error": f"Unsupported language: {language}"}
)
return self._create_result(
return self._createResult(
success=True,
data={
"language": language,
@ -169,12 +169,12 @@ class MethodCoder(MethodBase):
)
except Exception as e:
logger.error(f"Error analyzing code: {e}")
return self._create_result(
return self._createResult(
success=False,
data={"error": f"Analysis failed: {str(e)}"}
)
async def _generate_code(self, parameters: Dict[str, Any]) -> MethodResult:
async def _generateCode(self, parameters: Dict[str, Any]) -> MethodResult:
"""Generate code based on requirements"""
try:
requirements = parameters["requirements"]
@ -185,8 +185,8 @@ class MethodCoder(MethodBase):
# This is a placeholder implementation
if language.lower() == "python":
# Generate a simple Python class based on requirements
class_name = re.sub(r'[^a-zA-Z0-9]', '', requirements.split()[0].title())
code = f"""class {class_name}:
className = re.sub(r'[^a-zA-Z0-9]', '', requirements.split()[0].title())
code = f"""class {className}:
\"\"\"
{requirements}
\"\"\"
@ -198,12 +198,12 @@ class MethodCoder(MethodBase):
pass
"""
else:
return self._create_result(
return self._createResult(
success=False,
data={"error": f"Unsupported language: {language}"}
)
return self._create_result(
return self._createResult(
success=True,
data={
"language": language,
@ -212,12 +212,12 @@ class MethodCoder(MethodBase):
)
except Exception as e:
logger.error(f"Error generating code: {e}")
return self._create_result(
return self._createResult(
success=False,
data={"error": f"Generation failed: {str(e)}"}
)
async def _refactor_code(self, parameters: Dict[str, Any]) -> MethodResult:
async def _refactorCode(self, parameters: Dict[str, Any]) -> MethodResult:
"""Refactor code for better quality"""
try:
code = parameters["code"]
@ -246,17 +246,17 @@ class MethodCoder(MethodBase):
pass
except SyntaxError as e:
return self._create_result(
return self._createResult(
success=False,
data={"error": f"Syntax error: {str(e)}"}
)
else:
return self._create_result(
return self._createResult(
success=False,
data={"error": f"Unsupported language: {language}"}
)
return self._create_result(
return self._createResult(
success=True,
data={
"language": language,
@ -266,7 +266,7 @@ class MethodCoder(MethodBase):
)
except Exception as e:
logger.error(f"Error refactoring code: {e}")
return self._create_result(
return self._createResult(
success=False,
data={"error": f"Refactoring failed: {str(e)}"}
)

View file

@ -1,287 +1,215 @@
from typing import Dict, Any, Optional
import logging
import os
from pathlib import Path
import docx
import PyPDF2
import json
import yaml
import xml.etree.ElementTree as ET
from datetime import datetime, UTC
"""
Document processing method module.
Handles document operations using the document service.
"""
from modules.methods.methodBase import MethodBase, AuthSource, MethodResult
import logging
from typing import Dict, Any, List, Optional
from datetime import datetime
from modules.interfaces.serviceChatModel import (
ChatDocument,
TaskDocument,
ExtractedContent,
ContentItem
)
from modules.workflow.managerDocument import DocumentManager
from modules.methods.methodBase import MethodBase
logger = logging.getLogger(__name__)
class MethodDocument(MethodBase):
"""Document method implementation for document operations"""
"""Document processing method implementation"""
def __init__(self):
super().__init__()
self.name = "document"
self.description = "Handle document operations like reading, writing, and converting documents"
self.auth_source = AuthSource.LOCAL # Document operations typically don't need auth
def __init__(self, serviceContainer):
"""Initialize the document method"""
super().__init__(serviceContainer)
self.documentManager = DocumentManager(serviceContainer)
async def process(self, action: str, parameters: Dict[str, Any]) -> Dict[str, Any]:
"""
Process document operations
@property
def actions(self) -> Dict[str, Dict[str, Any]]:
"""Available actions and their parameters"""
return {
"read": {
"description": "Read document content",
"retryMax": 2,
"timeout": 30,
"parameters": {
"path": {"type": "string", "required": True},
"format": {"type": "string", "required": False},
"encoding": {"type": "string", "required": False},
"includeMetadata": {"type": "boolean", "required": False}
}
},
"write": {
"description": "Write content to document",
"retryMax": 2,
"timeout": 30,
"parameters": {
"path": {"type": "string", "required": True},
"content": {"type": "string", "required": True},
"format": {"type": "string", "required": False},
"encoding": {"type": "string", "required": False},
"template": {"type": "string", "required": False}
}
},
"convert": {
"description": "Convert document between formats",
"retryMax": 2,
"timeout": 60,
"parameters": {
"sourcePath": {"type": "string", "required": True},
"targetPath": {"type": "string", "required": True},
"sourceFormat": {"type": "string", "required": False},
"targetFormat": {"type": "string", "required": False},
"options": {"type": "object", "required": False}
}
}
}
async def execute(self, action: str, parameters: Dict[str, Any], auth_data: Optional[Dict[str, Any]] = None) -> MethodResult:
"""Execute document method"""
try:
# Validate parameters
if not await self.validate_parameters(action, parameters):
return self._create_result(
success=False,
data={"error": f"Invalid parameters for {action}"}
)
Args:
action: The action to perform
parameters: Action parameters
# Execute action
if action == "read":
return await self._read_document(parameters)
elif action == "write":
return await self._write_document(parameters)
elif action == "convert":
return await self._convert_document(parameters)
Returns:
Dictionary containing the operation result
Raises:
ValueError: If action is not supported
"""
try:
if action == "extract":
return await self._extractContent(parameters)
elif action == "analyze":
return await self._analyzeDocument(parameters)
elif action == "summarize":
return await self._summarizeDocument(parameters)
else:
return self._create_result(
success=False,
data={"error": f"Unknown action: {action}"}
)
raise ValueError(f"Unsupported action: {action}")
except Exception as e:
logger.error(f"Error processing document action {action}: {str(e)}")
raise
async def _extractContent(self, parameters: Dict[str, Any]) -> Dict[str, Any]:
"""
Extract content from a document
Args:
parameters: Dictionary containing:
- documentId: ID of the document to process
- documentType: Type of document ('ChatDocument' or 'TaskDocument')
Returns:
Dictionary containing extracted content
"""
try:
documentId = parameters.get("documentId")
documentType = parameters.get("documentType", "ChatDocument")
if not documentId:
raise ValueError("documentId is required")
# Get document from database
if documentType == "ChatDocument":
document = await self._getChatDocument(documentId)
if not document:
raise ValueError(f"ChatDocument {documentId} not found")
extracted = await self.documentManager.extractFromChatDocument(document)
else:
document = await self._getTaskDocument(documentId)
if not document:
raise ValueError(f"TaskDocument {documentId} not found")
extracted = await self.documentManager.extractFromTaskDocument(document)
return {
"success": True,
"content": extracted.dict(),
"metadata": await self.documentManager.getDocumentMetadata(document)
}
except Exception as e:
logger.error(f"Error extracting content: {str(e)}")
return {
"success": False,
"error": str(e)
}
async def _analyzeDocument(self, parameters: Dict[str, Any]) -> Dict[str, Any]:
"""
Analyze document content
Args:
parameters: Dictionary containing:
- documentId: ID of the document to analyze
- documentType: Type of document
- analysisType: Type of analysis to perform
Returns:
Dictionary containing analysis results
"""
try:
# Extract content first
contentResult = await self._extractContent(parameters)
if not contentResult["success"]:
return contentResult
# Perform analysis based on type
analysisType = parameters.get("analysisType", "basic")
content = ExtractedContent(**contentResult["content"])
if analysisType == "basic":
# Basic analysis: count items, calculate statistics
stats = {
"totalItems": len(content.contents),
"totalSize": sum(item.metadata.size for item in content.contents),
"itemTypes": {}
}
for item in content.contents:
itemType = item.label
if itemType not in stats["itemTypes"]:
stats["itemTypes"][itemType] = 0
stats["itemTypes"][itemType] += 1
return {
"success": True,
"analysis": stats
}
else:
raise ValueError(f"Unsupported analysis type: {analysisType}")
except Exception as e:
logger.error(f"Error executing document {action}: {e}")
return self._create_result(
success=False,
data={"error": str(e)}
)
async def _read_document(self, parameters: Dict[str, Any]) -> MethodResult:
"""Read document content"""
try:
path = Path(parameters["path"])
if not path.exists():
return self._create_result(
success=False,
data={"error": f"File not found: {path}"}
)
# Determine format if not specified
format = parameters.get("format")
if not format:
format = path.suffix[1:] if path.suffix else "txt"
# Read content based on format
content = ""
encoding = parameters.get("encoding", "utf-8")
include_metadata = parameters.get("includeMetadata", False)
if format.lower() in ["txt", "md"]:
with open(path, "r", encoding=encoding) as f:
content = f.read()
elif format.lower() == "docx":
doc = docx.Document(path)
content = "\n".join([paragraph.text for paragraph in doc.paragraphs])
elif format.lower() == "pdf":
with open(path, "rb") as f:
pdf = PyPDF2.PdfReader(f)
content = "\n".join([page.extract_text() for page in pdf.pages])
elif format.lower() == "json":
with open(path, "r", encoding=encoding) as f:
content = json.load(f)
elif format.lower() == "yaml":
with open(path, "r", encoding=encoding) as f:
content = yaml.safe_load(f)
elif format.lower() == "xml":
tree = ET.parse(path)
root = tree.getroot()
content = ET.tostring(root, encoding=encoding).decode(encoding)
else:
return self._create_result(
success=False,
data={"error": f"Unsupported format: {format}"}
)
result = {
"path": str(path),
"format": format,
"content": content
logger.error(f"Error analyzing document: {str(e)}")
return {
"success": False,
"error": str(e)
}
if include_metadata:
result["metadata"] = {
"size": path.stat().st_size,
"modified": datetime.fromtimestamp(path.stat().st_mtime, UTC).isoformat(),
"created": datetime.fromtimestamp(path.stat().st_ctime, UTC).isoformat()
}
return self._create_result(
success=True,
data=result
)
except Exception as e:
logger.error(f"Error reading document: {e}")
return self._create_result(
success=False,
data={"error": f"Read failed: {str(e)}"}
)
async def _write_document(self, parameters: Dict[str, Any]) -> MethodResult:
"""Write content to document"""
async def _summarizeDocument(self, parameters: Dict[str, Any]) -> Dict[str, Any]:
"""
Generate document summary
Args:
parameters: Dictionary containing:
- documentId: ID of the document to summarize
- documentType: Type of document
- summaryType: Type of summary to generate
Returns:
Dictionary containing summary
"""
try:
path = Path(parameters["path"])
# Extract content first
contentResult = await self._extractContent(parameters)
if not contentResult["success"]:
return contentResult
# Create directory if it doesn't exist
path.parent.mkdir(parents=True, exist_ok=True)
# Generate summary based on type
summaryType = parameters.get("summaryType", "basic")
content = ExtractedContent(**contentResult["content"])
# Determine format if not specified
format = parameters.get("format")
if not format:
format = path.suffix[1:] if path.suffix else "txt"
# Write content based on format
encoding = parameters.get("encoding", "utf-8")
content = parameters["content"]
template = parameters.get("template")
if format.lower() in ["txt", "md"]:
with open(path, "w", encoding=encoding) as f:
f.write(content)
elif format.lower() == "docx":
if template:
doc = docx.Document(template)
else:
doc = docx.Document()
doc.add_paragraph(content)
doc.save(path)
elif format.lower() == "pdf":
# TODO: Implement PDF writing
return self._create_result(
success=False,
data={"error": "PDF writing not implemented yet"}
if summaryType == "basic":
# Basic summary: concatenate all text content
summary = "\n".join(
item.data for item in content.contents
if item.label == "main"
)
elif format.lower() == "json":
with open(path, "w", encoding=encoding) as f:
json.dump(content, f, indent=2)
elif format.lower() == "yaml":
with open(path, "w", encoding=encoding) as f:
yaml.dump(content, f)
elif format.lower() == "xml":
with open(path, "w", encoding=encoding) as f:
f.write(content)
return {
"success": True,
"summary": summary
}
else:
return self._create_result(
success=False,
data={"error": f"Unsupported format: {format}"}
)
return self._create_result(
success=True,
data={
"path": str(path),
"format": format,
"size": path.stat().st_size,
"modified": datetime.now(UTC).isoformat()
}
)
raise ValueError(f"Unsupported summary type: {summaryType}")
except Exception as e:
logger.error(f"Error writing document: {e}")
return self._create_result(
success=False,
data={"error": f"Write failed: {str(e)}"}
)
logger.error(f"Error summarizing document: {str(e)}")
return {
"success": False,
"error": str(e)
}
async def _convert_document(self, parameters: Dict[str, Any]) -> MethodResult:
"""Convert document between formats"""
async def _getChatDocument(self, documentId: str) -> Optional[ChatDocument]:
"""Get ChatDocument from database"""
try:
source_path = Path(parameters["sourcePath"])
target_path = Path(parameters["targetPath"])
if not source_path.exists():
return self._create_result(
success=False,
data={"error": f"Source file not found: {source_path}"}
)
# Determine formats if not specified
source_format = parameters.get("sourceFormat")
if not source_format:
source_format = source_path.suffix[1:] if source_path.suffix else "txt"
target_format = parameters.get("targetFormat")
if not target_format:
target_format = target_path.suffix[1:] if target_path.suffix else "txt"
# Read source content
source_content = await self._read_document({
"path": str(source_path),
"format": source_format
})
if not source_content.success:
return source_content
# Write target content
target_content = await self._write_document({
"path": str(target_path),
"content": source_content.data["content"],
"format": target_format
})
if not target_content.success:
return target_content
return self._create_result(
success=True,
data={
"sourcePath": str(source_path),
"targetPath": str(target_path),
"sourceFormat": source_format,
"targetFormat": target_format,
"size": target_path.stat().st_size,
"modified": datetime.now(UTC).isoformat()
}
)
documentData = self.service.db.getRecord("chatDocuments", documentId)
if documentData:
return ChatDocument(**documentData)
return None
except Exception as e:
logger.error(f"Error converting document: {e}")
return self._create_result(
success=False,
data={"error": f"Conversion failed: {str(e)}"}
)
logger.error(f"Error getting ChatDocument {documentId}: {str(e)}")
return None
async def _getTaskDocument(self, documentId: str) -> Optional[TaskDocument]:
"""Get TaskDocument from database"""
try:
documentData = self.service.db.getRecord("taskDocuments", documentId)
if documentData:
return TaskDocument(**documentData)
return None
except Exception as e:
logger.error(f"Error getting TaskDocument {documentId}: {str(e)}")
return None

View file

@ -15,7 +15,7 @@ class MethodOutlook(MethodBase):
super().__init__()
self.name = "outlook"
self.description = "Handle Outlook email operations like reading and sending emails"
self.auth_source = AuthSource.MICROSOFT
self.authSource = AuthSource.MICROSOFT
@property
def actions(self) -> Dict[str, Dict[str, Any]]:
@ -47,54 +47,54 @@ class MethodOutlook(MethodBase):
}
}
async def execute(self, action: str, parameters: Dict[str, Any], auth_data: Optional[Dict[str, Any]] = None) -> MethodResult:
async def execute(self, action: str, parameters: Dict[str, Any], authData: Optional[Dict[str, Any]] = None) -> MethodResult:
"""Execute Outlook method"""
try:
# Validate parameters
if not await self.validate_parameters(action, parameters):
return self._create_result(
if not await self.validateParameters(action, parameters):
return self._createResult(
success=False,
data={"error": f"Invalid parameters for {action}"}
)
# Get UserConnection from auth_data
if not auth_data or "userConnection" not in auth_data:
return self._create_result(
if not authData or "userConnection" not in authData:
return self._createResult(
success=False,
data={"error": "UserConnection required for Outlook operations"}
)
user_connection: UserConnection = auth_data["userConnection"]
userConnection: UserConnection = authData["userConnection"]
# Execute action
if action == "readMails":
return await self._read_mails(parameters, user_connection)
return await self._readMails(parameters, userConnection)
elif action == "sendMail":
return await self._send_mail(parameters, user_connection)
return await self._sendMail(parameters, userConnection)
else:
return self._create_result(
return self._createResult(
success=False,
data={"error": f"Unknown action: {action}"}
)
except Exception as e:
logger.error(f"Error executing Outlook {action}: {e}")
return self._create_result(
return self._createResult(
success=False,
data={"error": str(e)}
)
async def _read_mails(self, parameters: Dict[str, Any], user_connection: UserConnection) -> MethodResult:
async def _readMails(self, parameters: Dict[str, Any], userConnection: UserConnection) -> MethodResult:
"""Read emails from Outlook"""
try:
folder = parameters.get("folder", "inbox")
query = parameters.get("query")
max_results = parameters.get("maxResults", 10)
include_attachments = parameters.get("includeAttachments", False)
maxResults = parameters.get("maxResults", 10)
includeAttachments = parameters.get("includeAttachments", False)
# Create Outlook account
account = Account(
credentials=(user_connection.authToken, user_connection.refreshToken),
credentials=(userConnection.authToken, userConnection.refreshToken),
protocol=MSGraphProtocol()
)
@ -102,18 +102,18 @@ class MethodOutlook(MethodBase):
mailbox = account.mailbox()
# Get folder
target_folder = mailbox.folder(folder_name=folder)
targetFolder = mailbox.folder(folder_name=folder)
# Get messages
if query:
messages = target_folder.get_messages(query=query, limit=max_results)
messages = targetFolder.get_messages(query=query, limit=maxResults)
else:
messages = target_folder.get_messages(limit=max_results)
messages = targetFolder.get_messages(limit=maxResults)
# Process messages
results = []
for message in messages:
msg_data = {
msgData = {
"id": message.object_id,
"subject": message.subject,
"from": message.sender.address,
@ -124,7 +124,7 @@ class MethodOutlook(MethodBase):
"hasAttachments": message.has_attachments
}
if include_attachments and message.has_attachments:
if includeAttachments and message.has_attachments:
attachments = []
for attachment in message.attachments:
attachments.append({
@ -132,11 +132,11 @@ class MethodOutlook(MethodBase):
"contentType": attachment.content_type,
"size": attachment.size
})
msg_data["attachments"] = attachments
msgData["attachments"] = attachments
results.append(msg_data)
results.append(msgData)
return self._create_result(
return self._createResult(
success=True,
data={
"folder": folder,
@ -146,24 +146,24 @@ class MethodOutlook(MethodBase):
)
except Exception as e:
logger.error(f"Error reading Outlook emails: {e}")
return self._create_result(
return self._createResult(
success=False,
data={"error": f"Read failed: {str(e)}"}
)
async def _send_mail(self, parameters: Dict[str, Any], user_connection: UserConnection) -> MethodResult:
async def _sendMail(self, parameters: Dict[str, Any], userConnection: UserConnection) -> MethodResult:
"""Send email through Outlook"""
try:
to_addresses = parameters["to"]
toAddresses = parameters["to"]
subject = parameters["subject"]
body = parameters["body"]
cc_addresses = parameters.get("cc", [])
bcc_addresses = parameters.get("bcc", [])
ccAddresses = parameters.get("cc", [])
bccAddresses = parameters.get("bcc", [])
attachments = parameters.get("attachments", [])
# Create Outlook account
account = Account(
credentials=(user_connection.authToken, user_connection.refreshToken),
credentials=(userConnection.authToken, userConnection.refreshToken),
protocol=MSGraphProtocol()
)
@ -172,32 +172,32 @@ class MethodOutlook(MethodBase):
# Create new message
message = mailbox.new_message()
message.to.add(to_addresses)
if cc_addresses:
message.cc.add(cc_addresses)
if bcc_addresses:
message.bcc.add(bcc_addresses)
message.to.add(toAddresses)
if ccAddresses:
message.cc.add(ccAddresses)
if bccAddresses:
message.bcc.add(bccAddresses)
message.subject = subject
message.body = body
# Add attachments
for attachment_path in attachments:
message.attachments.add(attachment_path)
for attachmentPath in attachments:
message.attachments.add(attachmentPath)
# Send message
message.send()
return self._create_result(
return self._createResult(
success=True,
data={
"to": to_addresses,
"to": toAddresses,
"subject": subject,
"sent": datetime.now(UTC).isoformat()
}
)
except Exception as e:
logger.error(f"Error sending Outlook email: {e}")
return self._create_result(
return self._createResult(
success=False,
data={"error": f"Send failed: {str(e)}"}
)

View file

@ -4,6 +4,9 @@ import os
from pathlib import Path
from modules.methods.methodBase import MethodBase, AuthSource, MethodResult
from modules.models.userConnection import UserConnection
from modules.models.account import Account
from modules.protocols.msGraphProtocol import MSGraphProtocol
logger = logging.getLogger(__name__)
@ -14,7 +17,7 @@ class MethodPowerpoint(MethodBase):
super().__init__()
self.name = "powerpoint"
self.description = "Handle PowerPoint operations like reading, writing, and converting presentations"
self.auth_source = AuthSource.MICROSOFT # PowerPoint operations need Microsoft auth
self.authSource = AuthSource.MICROSOFT # PowerPoint operations need Microsoft auth
@property
def actions(self) -> Dict[str, Dict[str, Any]]:
@ -50,52 +53,85 @@ class MethodPowerpoint(MethodBase):
"sourceFormat": {"type": "string", "required": False},
"targetFormat": {"type": "string", "required": False}
}
},
"createPresentation": {
"description": "Create a new PowerPoint presentation",
"retryMax": 2,
"timeout": 60,
"parameters": {
"title": {"type": "string", "required": True},
"template": {"type": "string", "required": False}
}
},
"addSlide": {
"description": "Add a new slide to presentation",
"retryMax": 2,
"timeout": 60,
"parameters": {
"presentationId": {"type": "string", "required": True},
"layout": {"type": "string", "required": False},
"title": {"type": "string", "required": False}
}
},
"addContent": {
"description": "Add content to a slide",
"retryMax": 2,
"timeout": 60,
"parameters": {
"presentationId": {"type": "string", "required": True},
"slideId": {"type": "string", "required": True},
"contentType": {"type": "string", "required": True},
"content": {"type": "object", "required": True},
"position": {"type": "object", "required": False}
}
}
}
async def execute(self, action: str, parameters: Dict[str, Any], auth_data: Optional[Dict[str, Any]] = None) -> MethodResult:
"""Execute powerpoint method"""
async def execute(self, action: str, parameters: Dict[str, Any], authData: Optional[Dict[str, Any]] = None) -> MethodResult:
"""Execute PowerPoint method"""
try:
# Validate parameters
if not await self.validate_parameters(action, parameters):
return self._create_result(
if not await self.validateParameters(action, parameters):
return self._createResult(
success=False,
data={"error": f"Invalid parameters for {action}"}
)
# Validate authentication
if not await self.validate_auth(auth_data):
return self._create_result(
# Get UserConnection from auth_data
if not authData or "userConnection" not in authData:
return self._createResult(
success=False,
data={"error": "Authentication required for PowerPoint operations"}
data={"error": "UserConnection required for PowerPoint operations"}
)
userConnection: UserConnection = authData["userConnection"]
# Execute action
if action == "read":
return await self._read_presentation(parameters, auth_data)
elif action == "write":
return await self._write_presentation(parameters, auth_data)
elif action == "convert":
return await self._convert_presentation(parameters, auth_data)
if action == "createPresentation":
return await self._createPresentation(parameters, userConnection)
elif action == "addSlide":
return await self._addSlide(parameters, userConnection)
elif action == "addContent":
return await self._addContent(parameters, userConnection)
else:
return self._create_result(
return self._createResult(
success=False,
data={"error": f"Unknown action: {action}"}
)
except Exception as e:
logger.error(f"Error executing powerpoint {action}: {e}")
return self._create_result(
logger.error(f"Error executing PowerPoint {action}: {e}")
return self._createResult(
success=False,
data={"error": str(e)}
)
async def _read_presentation(self, parameters: Dict[str, Any], auth_data: Dict[str, Any]) -> MethodResult:
async def _read_presentation(self, parameters: Dict[str, Any], authData: Dict[str, Any]) -> MethodResult:
"""Read PowerPoint presentation content"""
try:
path = Path(parameters["path"])
if not path.exists():
return self._create_result(
return self._createResult(
success=False,
data={"error": f"File not found: {path}"}
)
@ -107,7 +143,7 @@ class MethodPowerpoint(MethodBase):
# TODO: Implement PowerPoint reading using Microsoft Graph API
# This is a placeholder implementation
return self._create_result(
return self._createResult(
success=True,
data={
"path": str(path),
@ -124,12 +160,12 @@ class MethodPowerpoint(MethodBase):
)
except Exception as e:
logger.error(f"Error reading presentation: {e}")
return self._create_result(
return self._createResult(
success=False,
data={"error": f"Read failed: {str(e)}"}
)
async def _write_presentation(self, parameters: Dict[str, Any], auth_data: Dict[str, Any]) -> MethodResult:
async def _write_presentation(self, parameters: Dict[str, Any], authData: Dict[str, Any]) -> MethodResult:
"""Write content to PowerPoint presentation"""
try:
path = Path(parameters["path"])
@ -144,7 +180,7 @@ class MethodPowerpoint(MethodBase):
# TODO: Implement PowerPoint writing using Microsoft Graph API
# This is a placeholder implementation
return self._create_result(
return self._createResult(
success=True,
data={
"path": str(path),
@ -154,19 +190,19 @@ class MethodPowerpoint(MethodBase):
)
except Exception as e:
logger.error(f"Error writing presentation: {e}")
return self._create_result(
return self._createResult(
success=False,
data={"error": f"Write failed: {str(e)}"}
)
async def _convert_presentation(self, parameters: Dict[str, Any], auth_data: Dict[str, Any]) -> MethodResult:
async def _convert_presentation(self, parameters: Dict[str, Any], authData: Dict[str, Any]) -> MethodResult:
"""Convert PowerPoint presentation between formats"""
try:
source_path = Path(parameters["sourcePath"])
target_path = Path(parameters["targetPath"])
if not source_path.exists():
return self._create_result(
return self._createResult(
success=False,
data={"error": f"Source file not found: {source_path}"}
)
@ -182,7 +218,7 @@ class MethodPowerpoint(MethodBase):
# TODO: Implement PowerPoint conversion using Microsoft Graph API
# This is a placeholder implementation
return self._create_result(
return self._createResult(
success=True,
data={
"sourcePath": str(source_path),
@ -193,7 +229,148 @@ class MethodPowerpoint(MethodBase):
)
except Exception as e:
logger.error(f"Error converting presentation: {e}")
return self._create_result(
return self._createResult(
success=False,
data={"error": f"Conversion failed: {str(e)}"}
)
async def _createPresentation(self, parameters: Dict[str, Any], userConnection: UserConnection) -> MethodResult:
"""Create a new PowerPoint presentation"""
try:
title = parameters["title"]
template = parameters.get("template")
# Create PowerPoint account
account = Account(
credentials=(userConnection.authToken, userConnection.refreshToken),
protocol=MSGraphProtocol()
)
# Get drive
drive = account.drive()
# Create presentation
if template:
# Copy template
templateFile = drive.get_item_by_path(template)
newFile = templateFile.copy(f"{title}.pptx")
else:
# Create blank presentation
newFile = drive.create_file(
name=f"{title}.pptx",
content_type="application/vnd.openxmlformats-officedocument.presentationml.presentation"
)
return self._createResult(
success=True,
data={
"id": newFile.object_id,
"name": newFile.name,
"webUrl": newFile.web_url
}
)
except Exception as e:
logger.error(f"Error creating PowerPoint presentation: {e}")
return self._createResult(
success=False,
data={"error": f"Create failed: {str(e)}"}
)
async def _addSlide(self, parameters: Dict[str, Any], userConnection: UserConnection) -> MethodResult:
"""Add a new slide to presentation"""
try:
presentationId = parameters["presentationId"]
layout = parameters.get("layout", "title")
title = parameters.get("title")
# Create PowerPoint account
account = Account(
credentials=(userConnection.authToken, userConnection.refreshToken),
protocol=MSGraphProtocol()
)
# Get drive
drive = account.drive()
# Get presentation
presentation = drive.get_item_by_id(presentationId)
# Add slide
slide = presentation.add_slide(layout=layout)
if title:
slide.title = title
return self._createResult(
success=True,
data={
"slideId": slide.object_id,
"layout": layout,
"title": title
}
)
except Exception as e:
logger.error(f"Error adding PowerPoint slide: {e}")
return self._createResult(
success=False,
data={"error": f"Add slide failed: {str(e)}"}
)
async def _addContent(self, parameters: Dict[str, Any], userConnection: UserConnection) -> MethodResult:
"""Add content to a slide"""
try:
presentationId = parameters["presentationId"]
slideId = parameters["slideId"]
contentType = parameters["contentType"]
content = parameters["content"]
position = parameters.get("position", {"x": 0, "y": 0})
# Create PowerPoint account
account = Account(
credentials=(userConnection.authToken, userConnection.refreshToken),
protocol=MSGraphProtocol()
)
# Get drive
drive = account.drive()
# Get presentation and slide
presentation = drive.get_item_by_id(presentationId)
slide = presentation.get_slide(slideId)
# Add content based on type
if contentType == "text":
shape = slide.add_text_box(
text=content,
left=position["x"],
top=position["y"]
)
elif contentType == "image":
shape = slide.add_picture(
image_path=content,
left=position["x"],
top=position["y"]
)
elif contentType == "table":
shape = slide.add_table(
rows=content["rows"],
cols=content["cols"],
left=position["x"],
top=position["y"]
)
else:
raise ValueError(f"Unsupported content type: {contentType}")
return self._createResult(
success=True,
data={
"shapeId": shape.object_id,
"contentType": contentType,
"position": position
}
)
except Exception as e:
logger.error(f"Error adding PowerPoint content: {e}")
return self._createResult(
success=False,
data={"error": f"Add content failed: {str(e)}"}
)

View file

@ -19,7 +19,7 @@ class MethodSharepoint(MethodBase):
super().__init__()
self.name = "sharepoint"
self.description = "Handle SharePoint document operations like search, read, and write"
self.auth_source = AuthSource.MICROSOFT
self.authSource = AuthSource.MICROSOFT
@property
def actions(self) -> Dict[str, Dict[str, Any]]:
@ -55,65 +55,104 @@ class MethodSharepoint(MethodBase):
"content": {"type": "string", "required": True},
"contentType": {"type": "string", "required": False}
}
},
"readList": {
"description": "Read items from SharePoint list",
"retryMax": 2,
"timeout": 30,
"parameters": {
"siteUrl": {"type": "string", "required": True},
"listName": {"type": "string", "required": True},
"query": {"type": "string", "required": False},
"fields": {"type": "array", "required": False}
}
},
"writeList": {
"description": "Write items to SharePoint list",
"retryMax": 2,
"timeout": 30,
"parameters": {
"siteUrl": {"type": "string", "required": True},
"listName": {"type": "string", "required": True},
"items": {"type": "array", "required": True}
}
},
"createList": {
"description": "Create a new SharePoint list",
"retryMax": 2,
"timeout": 30,
"parameters": {
"siteUrl": {"type": "string", "required": True},
"listName": {"type": "string", "required": True},
"description": {"type": "string", "required": False},
"template": {"type": "string", "required": False},
"fields": {"type": "array", "required": False}
}
}
}
async def execute(self, action: str, parameters: Dict[str, Any], auth_data: Optional[Dict[str, Any]] = None) -> MethodResult:
async def execute(self, action: str, parameters: Dict[str, Any], authData: Optional[Dict[str, Any]] = None) -> MethodResult:
"""Execute SharePoint method"""
try:
# Validate parameters
if not await self.validate_parameters(action, parameters):
return self._create_result(
if not await self.validateParameters(action, parameters):
return self._createResult(
success=False,
data={"error": f"Invalid parameters for {action}"}
)
# Get UserConnection from auth_data
if not auth_data or "userConnection" not in auth_data:
return self._create_result(
if not authData or "userConnection" not in authData:
return self._createResult(
success=False,
data={"error": "UserConnection required for SharePoint operations"}
)
user_connection: UserConnection = auth_data["userConnection"]
userConnection: UserConnection = authData["userConnection"]
# Execute action
if action == "search":
return await self._search_documents(parameters, user_connection)
return await self._search_documents(parameters, userConnection)
elif action == "read":
return await self._read_document(parameters, user_connection)
return await self._read_document(parameters, userConnection)
elif action == "write":
return await self._write_document(parameters, user_connection)
return await self._write_document(parameters, userConnection)
elif action == "readList":
return await self._readList(parameters, userConnection)
elif action == "writeList":
return await self._writeList(parameters, userConnection)
elif action == "createList":
return await self._createList(parameters, userConnection)
else:
return self._create_result(
return self._createResult(
success=False,
data={"error": f"Unknown action: {action}"}
)
except Exception as e:
logger.error(f"Error executing SharePoint {action}: {e}")
return self._create_result(
return self._createResult(
success=False,
data={"error": str(e)}
)
async def _search_documents(self, parameters: Dict[str, Any], user_connection: UserConnection) -> MethodResult:
async def _search_documents(self, parameters: Dict[str, Any], userConnection: UserConnection) -> MethodResult:
"""Search SharePoint documents"""
try:
site_url = parameters["siteUrl"]
siteUrl = parameters["siteUrl"]
query = parameters["query"]
list_name = parameters.get("listName")
max_results = parameters.get("maxResults", 10)
listName = parameters.get("listName")
maxResults = parameters.get("maxResults", 10)
# Create SharePoint context
ctx = ClientContext(site_url).with_credentials(
UserCredential(user_connection.authToken, user_connection.refreshToken)
ctx = ClientContext(siteUrl).with_credentials(
UserCredential(userConnection.authToken, userConnection.refreshToken)
)
# Search in specific list or entire site
if list_name:
target_list = ctx.web.lists.get_by_title(list_name)
items = target_list.items.filter(f"Title eq '{query}'").top(max_results).get().execute_query()
if listName:
targetList = ctx.web.lists.get_by_title(listName)
items = targetList.items.filter(f"Title eq '{query}'").top(maxResults).get().execute_query()
results = [{
"title": item.properties["Title"],
"url": item.properties["FileRef"],
@ -128,9 +167,9 @@ class MethodSharepoint(MethodBase):
"url": result.properties["Path"],
"modified": result.properties["LastModifiedTime"],
"created": result.properties["Created"]
} for result in search_results[:max_results]]
} for result in search_results[:maxResults]]
return self._create_result(
return self._createResult(
success=True,
data={
"query": query,
@ -139,30 +178,30 @@ class MethodSharepoint(MethodBase):
)
except Exception as e:
logger.error(f"Error searching SharePoint documents: {e}")
return self._create_result(
return self._createResult(
success=False,
data={"error": f"Search failed: {str(e)}"}
)
async def _read_document(self, parameters: Dict[str, Any], user_connection: UserConnection) -> MethodResult:
async def _read_document(self, parameters: Dict[str, Any], userConnection: UserConnection) -> MethodResult:
"""Read SharePoint document content"""
try:
site_url = parameters["siteUrl"]
file_url = parameters["fileUrl"]
siteUrl = parameters["siteUrl"]
fileUrl = parameters["fileUrl"]
# Create SharePoint context
ctx = ClientContext(site_url).with_credentials(
UserCredential(user_connection.authToken, user_connection.refreshToken)
ctx = ClientContext(siteUrl).with_credentials(
UserCredential(userConnection.authToken, userConnection.refreshToken)
)
# Get file
file = ctx.web.get_file_by_server_relative_url(file_url)
file = ctx.web.get_file_by_server_relative_url(fileUrl)
file_content = file.read().execute_query()
return self._create_result(
return self._createResult(
success=True,
data={
"url": file_url,
"url": fileUrl,
"content": file_content.content.decode('utf-8'),
"modified": file.properties["TimeLastModified"],
"size": file.properties["Length"]
@ -170,48 +209,182 @@ class MethodSharepoint(MethodBase):
)
except Exception as e:
logger.error(f"Error reading SharePoint document: {e}")
return self._create_result(
return self._createResult(
success=False,
data={"error": f"Read failed: {str(e)}"}
)
async def _write_document(self, parameters: Dict[str, Any], user_connection: UserConnection) -> MethodResult:
async def _write_document(self, parameters: Dict[str, Any], userConnection: UserConnection) -> MethodResult:
"""Write content to SharePoint document"""
try:
site_url = parameters["siteUrl"]
file_url = parameters["fileUrl"]
siteUrl = parameters["siteUrl"]
fileUrl = parameters["fileUrl"]
content = parameters["content"]
content_type = parameters.get("contentType", "text/plain")
contentType = parameters.get("contentType", "text/plain")
# Create SharePoint context
ctx = ClientContext(site_url).with_credentials(
UserCredential(user_connection.authToken, user_connection.refreshToken)
ctx = ClientContext(siteUrl).with_credentials(
UserCredential(userConnection.authToken, userConnection.refreshToken)
)
# Get or create file
try:
file = ctx.web.get_file_by_server_relative_url(file_url)
file = ctx.web.get_file_by_server_relative_url(fileUrl)
except:
# Create new file
folder_url = "/".join(file_url.split("/")[:-1])
file_name = file_url.split("/")[-1]
folder = ctx.web.get_folder_by_server_relative_url(folder_url)
file = folder.upload_file(file_name, content.encode('utf-8')).execute_query()
folderUrl = "/".join(fileUrl.split("/")[:-1])
fileName = fileUrl.split("/")[-1]
folder = ctx.web.get_folder_by_server_relative_url(folderUrl)
file = folder.upload_file(fileName, content.encode('utf-8')).execute_query()
# Update file content
file.write(content.encode('utf-8')).execute_query()
return self._create_result(
return self._createResult(
success=True,
data={
"url": file_url,
"url": fileUrl,
"modified": datetime.now(UTC).isoformat(),
"size": len(content.encode('utf-8'))
}
)
except Exception as e:
logger.error(f"Error writing SharePoint document: {e}")
return self._create_result(
return self._createResult(
success=False,
data={"error": f"Write failed: {str(e)}"}
)
async def _readList(self, parameters: Dict[str, Any], userConnection: UserConnection) -> MethodResult:
"""Read items from SharePoint list"""
try:
siteUrl = parameters["siteUrl"]
listName = parameters["listName"]
query = parameters.get("query")
fields = parameters.get("fields", ["*"])
# Create SharePoint account
account = Account(
credentials=(userConnection.authToken, userConnection.refreshToken),
protocol=MSGraphProtocol()
)
# Get site
site = account.get_site(siteUrl)
# Get list
list = site.get_list(listName)
# Get items
if query:
items = list.get_items(query=query, fields=fields)
else:
items = list.get_items(fields=fields)
return self._createResult(
success=True,
data={
"siteUrl": siteUrl,
"listName": listName,
"items": items
}
)
except Exception as e:
logger.error(f"Error reading SharePoint list: {e}")
return self._createResult(
success=False,
data={"error": f"Read failed: {str(e)}"}
)
async def _writeList(self, parameters: Dict[str, Any], userConnection: UserConnection) -> MethodResult:
"""Write items to SharePoint list"""
try:
siteUrl = parameters["siteUrl"]
listName = parameters["listName"]
items = parameters["items"]
# Create SharePoint account
account = Account(
credentials=(userConnection.authToken, userConnection.refreshToken),
protocol=MSGraphProtocol()
)
# Get site
site = account.get_site(siteUrl)
# Get list
list = site.get_list(listName)
# Add items
results = []
for item in items:
result = list.add_item(item)
results.append({
"id": result.id,
"status": "success"
})
return self._createResult(
success=True,
data={
"siteUrl": siteUrl,
"listName": listName,
"results": results
}
)
except Exception as e:
logger.error(f"Error writing to SharePoint list: {e}")
return self._createResult(
success=False,
data={"error": f"Write failed: {str(e)}"}
)
async def _createList(self, parameters: Dict[str, Any], userConnection: UserConnection) -> MethodResult:
"""Create a new SharePoint list"""
try:
siteUrl = parameters["siteUrl"]
listName = parameters["listName"]
description = parameters.get("description")
template = parameters.get("template", "generic")
fields = parameters.get("fields", [])
# Create SharePoint account
account = Account(
credentials=(userConnection.authToken, userConnection.refreshToken),
protocol=MSGraphProtocol()
)
# Get site
site = account.get_site(siteUrl)
# Create list
list = site.create_list(
name=listName,
description=description,
template=template
)
# Add fields
for field in fields:
list.add_field(
name=field["name"],
field_type=field["type"],
required=field.get("required", False),
description=field.get("description")
)
return self._createResult(
success=True,
data={
"siteUrl": siteUrl,
"listName": listName,
"id": list.id,
"webUrl": list.web_url
}
)
except Exception as e:
logger.error(f"Error creating SharePoint list: {e}")
return self._createResult(
success=False,
data={"error": f"Create failed: {str(e)}"}
)

View file

@ -8,6 +8,7 @@ import re
from datetime import datetime, UTC
import requests
import time
import json
from modules.methods.methodBase import MethodBase, AuthSource, MethodResult
from modules.shared.configuration import APP_CONFIG
@ -74,70 +75,197 @@ class MethodWeb(MethodBase):
}
}
async def execute(self, action: str, parameters: Dict[str, Any], auth_data: Optional[Dict[str, Any]] = None) -> MethodResult:
async def execute(self, action: str, parameters: Dict[str, Any], authData: Optional[Dict[str, Any]] = None) -> MethodResult:
"""Execute web method"""
try:
# Validate parameters
if not await self.validate_parameters(action, parameters):
return self._create_result(
if not await self.validateParameters(action, parameters):
return self._createResult(
success=False,
data={"error": f"Invalid parameters for {action}"}
)
# Execute action
if action == "search":
return await self._search_web(parameters)
elif action == "crawl":
return await self._crawl_page(parameters)
elif action == "extract":
return await self._extract_content(parameters)
if action == "fetchUrl":
return await self._fetchUrl(parameters)
elif action == "parseContent":
return await self._parseContent(parameters)
elif action == "extractData":
return await self._extractData(parameters)
else:
return self._create_result(
return self._createResult(
success=False,
data={"error": f"Unknown action: {action}"}
)
except Exception as e:
logger.error(f"Error executing web {action}: {e}")
return self._create_result(
return self._createResult(
success=False,
data={"error": str(e)}
)
async def _fetchUrl(self, parameters: Dict[str, Any]) -> MethodResult:
"""Fetch content from URL"""
try:
url = parameters["url"]
method = parameters.get("method", "GET")
headers = parameters.get("headers", {})
data = parameters.get("data")
timeout = parameters.get("timeout", 30)
async with aiohttp.ClientSession() as session:
async with session.request(
method=method,
url=url,
headers=headers,
data=data,
timeout=timeout
) as response:
content = await response.text()
return self._createResult(
success=True,
data={
"url": url,
"status": response.status,
"headers": dict(response.headers),
"content": content
}
)
except Exception as e:
logger.error(f"Error fetching URL: {e}")
return self._createResult(
success=False,
data={"error": f"Fetch failed: {str(e)}"}
)
async def _parseContent(self, parameters: Dict[str, Any]) -> MethodResult:
"""Parse web content"""
try:
content = parameters["content"]
contentType = parameters.get("contentType", "html")
if contentType == "html":
soup = BeautifulSoup(content, "html.parser")
return self._createResult(
success=True,
data={
"type": "html",
"title": soup.title.string if soup.title else None,
"text": soup.get_text(),
"links": [a.get("href") for a in soup.find_all("a", href=True)],
"images": [img.get("src") for img in soup.find_all("img", src=True)]
}
)
elif contentType == "json":
data = json.loads(content)
return self._createResult(
success=True,
data={
"type": "json",
"data": data
}
)
else:
raise ValueError(f"Unsupported content type: {contentType}")
except Exception as e:
logger.error(f"Error parsing content: {e}")
return self._createResult(
success=False,
data={"error": f"Parse failed: {str(e)}"}
)
async def _extractData(self, parameters: Dict[str, Any]) -> MethodResult:
"""Extract data from web content"""
try:
content = parameters["content"]
contentType = parameters.get("contentType", "html")
selectors = parameters["selectors"]
if contentType == "html":
soup = BeautifulSoup(content, "html.parser")
results = {}
for key, selector in selectors.items():
elements = soup.select(selector)
if len(elements) == 1:
results[key] = elements[0].get_text().strip()
else:
results[key] = [el.get_text().strip() for el in elements]
return self._createResult(
success=True,
data={
"type": "html",
"results": results
}
)
elif contentType == "json":
data = json.loads(content)
results = {}
for key, path in selectors.items():
value = data
for part in path.split("."):
if isinstance(value, dict):
value = value.get(part)
elif isinstance(value, list) and part.isdigit():
value = value[int(part)]
else:
value = None
break
results[key] = value
return self._createResult(
success=True,
data={
"type": "json",
"results": results
}
)
else:
raise ValueError(f"Unsupported content type: {contentType}")
except Exception as e:
logger.error(f"Error extracting data: {e}")
return self._createResult(
success=False,
data={"error": f"Extract failed: {str(e)}"}
)
async def _search_web(self, parameters: Dict[str, Any]) -> MethodResult:
"""Search web content"""
try:
query = parameters["query"]
max_results = parameters.get("maxResults", 10)
maxResults = parameters.get("maxResults", 10)
filters = parameters.get("filters", {})
search_engine = parameters.get("searchEngine", "google")
searchEngine = parameters.get("searchEngine", "google")
# Implement search using different engines
if search_engine.lower() == "google":
if searchEngine.lower() == "google":
# Use Google Custom Search API
# TODO: Implement Google Custom Search API integration
results = await self._google_search(query, max_results, filters)
elif search_engine.lower() == "bing":
results = await self._google_search(query, maxResults, filters)
elif searchEngine.lower() == "bing":
# Use Bing Web Search API
# TODO: Implement Bing Web Search API integration
results = await self._bing_search(query, max_results, filters)
results = await self._bing_search(query, maxResults, filters)
else:
return self._create_result(
return self._createResult(
success=False,
data={"error": f"Unsupported search engine: {search_engine}"}
data={"error": f"Unsupported search engine: {searchEngine}"}
)
return self._create_result(
return self._createResult(
success=True,
data={
"query": query,
"engine": search_engine,
"engine": searchEngine,
"results": results
}
)
except Exception as e:
logger.error(f"Error searching web: {e}")
return self._create_result(
return self._createResult(
success=False,
data={"error": f"Search failed: {str(e)}"}
)
@ -173,14 +301,14 @@ class MethodWeb(MethodBase):
try:
url = parameters["url"]
depth = parameters.get("depth", 1)
follow_links = parameters.get("followLinks", False)
include_images = parameters.get("includeImages", False)
respect_robots = parameters.get("respectRobots", True)
followLinks = parameters.get("followLinks", False)
includeImages = parameters.get("includeImages", False)
respectRobots = parameters.get("respectRobots", True)
# Check robots.txt if required
if respect_robots:
if respectRobots:
if not await self._check_robots_txt(url):
return self._create_result(
return self._createResult(
success=False,
data={"error": "Crawling not allowed by robots.txt"}
)
@ -198,114 +326,57 @@ class MethodWeb(MethodBase):
"title": soup.title.string if soup.title else None,
"description": self._get_meta_description(soup),
"links": [],
"images": [] if include_images else None,
"images": [] if includeImages else None,
"text": soup.get_text(strip=True),
"crawled": datetime.now(UTC).isoformat()
}
# Extract links if followLinks is True
if follow_links:
base_url = url
if followLinks:
baseUrl = url
for link in soup.find_all('a'):
href = link.get('href')
if href:
absolute_url = urljoin(base_url, href)
if self._is_valid_url(absolute_url):
absoluteUrl = urljoin(baseUrl, href)
if self._is_valid_url(absoluteUrl):
result["links"].append({
"url": absolute_url,
"url": absoluteUrl,
"text": link.get_text(strip=True)
})
# Extract images if includeImages is True
if include_images:
if includeImages:
for img in soup.find_all('img'):
src = img.get('src')
if src:
absolute_src = urljoin(url, src)
absoluteSrc = urljoin(url, src)
result["images"].append({
"url": absolute_src,
"url": absoluteSrc,
"alt": img.get('alt', ''),
"title": img.get('title', '')
})
return self._create_result(
return self._createResult(
success=True,
data=result
)
else:
return self._create_result(
return self._createResult(
success=False,
data={"error": f"Failed to fetch URL: {response.status}"}
)
except Exception as e:
logger.error(f"Error crawling page: {e}")
return self._create_result(
return self._createResult(
success=False,
data={"error": f"Crawl failed: {str(e)}"}
)
async def _extract_content(self, parameters: Dict[str, Any]) -> MethodResult:
"""Extract content from web page"""
try:
url = parameters["url"]
selectors = parameters.get("selectors")
format = parameters.get("format", "text")
include_metadata = parameters.get("includeMetadata", False)
async with aiohttp.ClientSession() as session:
async with session.get(url) as response:
if response.status == 200:
html = await response.text()
soup = BeautifulSoup(html, 'html.parser')
# Extract content based on selectors
content = {}
if selectors:
for selector in selectors:
elements = soup.select(selector)
content[selector] = [elem.get_text() for elem in elements]
else:
# Default extraction
content = {
"title": soup.title.string if soup.title else None,
"text": soup.get_text(strip=True),
"links": [a.get('href') for a in soup.find_all('a')]
}
# Add metadata if requested
if include_metadata:
content["metadata"] = {
"url": url,
"crawled": datetime.now(UTC).isoformat(),
"language": self._detect_language(soup),
"wordCount": len(content["text"].split()),
"linksCount": len(content["links"])
}
return self._create_result(
success=True,
data={
"url": url,
"content": content
}
)
else:
return self._create_result(
success=False,
data={"error": f"Failed to fetch URL: {response.status}"}
)
except Exception as e:
logger.error(f"Error extracting content: {e}")
return self._create_result(
success=False,
data={"error": f"Extraction failed: {str(e)}"}
)
def _get_meta_description(self, soup: BeautifulSoup) -> Optional[str]:
"""Extract meta description from HTML"""
meta_desc = soup.find('meta', attrs={'name': 'description'})
if meta_desc:
return meta_desc.get('content')
metaDesc = soup.find('meta', attrs={'name': 'description'})
if metaDesc:
return metaDesc.get('content')
return None
def _is_valid_url(self, url: str) -> bool:
@ -319,31 +390,31 @@ class MethodWeb(MethodBase):
async def _check_robots_txt(self, url: str) -> bool:
"""Check if URL is allowed by robots.txt"""
try:
parsed_url = urlparse(url)
robots_url = f"{parsed_url.scheme}://{parsed_url.netloc}/robots.txt"
parsedUrl = urlparse(url)
robotsUrl = f"{parsedUrl.scheme}://{parsedUrl.netloc}/robots.txt"
async with aiohttp.ClientSession() as session:
async with session.get(robots_url, headers={"User-Agent": self.userAgent}, timeout=self.timeout) as response:
async with session.get(robotsUrl, headers={"User-Agent": self.userAgent}, timeout=self.timeout) as response:
if response.status == 200:
robots_content = await response.text()
robotsContent = await response.text()
# Parse robots.txt content
user_agent = "*" # Default to all user agents
disallow_paths = []
userAgent = "*" # Default to all user agents
disallowPaths = []
for line in robots_content.splitlines():
for line in robotsContent.splitlines():
line = line.strip().lower()
if line.startswith("user-agent:"):
user_agent = line[11:].strip()
elif line.startswith("disallow:") and user_agent in ["*", self.userAgent.lower()]:
userAgent = line[11:].strip()
elif line.startswith("disallow:") and userAgent in ["*", self.userAgent.lower()]:
path = line[9:].strip()
if path:
disallow_paths.append(path)
disallowPaths.append(path)
# Check if URL path is disallowed
url_path = parsed_url.path
for disallow_path in disallow_paths:
if url_path.startswith(disallow_path):
urlPath = parsedUrl.path
for disallowPath in disallowPaths:
if urlPath.startswith(disallowPath):
return False
return True
@ -364,32 +435,32 @@ class MethodWeb(MethodBase):
return soup.html.get('lang')
# Try to get language from meta tag
meta_lang = soup.find('meta', attrs={'http-equiv': 'content-language'})
if meta_lang:
return meta_lang.get('content', 'en')
metaLang = soup.find('meta', attrs={'http-equiv': 'content-language'})
if metaLang:
return metaLang.get('content', 'en')
# Try to get language from meta charset
meta_charset = soup.find('meta', attrs={'charset': True})
if meta_charset:
charset = meta_charset.get('charset', '').lower()
metaCharset = soup.find('meta', attrs={'charset': True})
if metaCharset:
charset = metaCharset.get('charset', '').lower()
if 'utf-8' in charset:
return 'en' # Default to English for UTF-8
# Try to detect language from content
# This is a simple heuristic based on common words
text = soup.get_text().lower()
common_words = {
commonWords = {
'en': ['the', 'and', 'of', 'to', 'in', 'is', 'that', 'for', 'it', 'with'],
'es': ['el', 'la', 'los', 'las', 'de', 'y', 'en', 'que', 'por', 'con'],
'fr': ['le', 'la', 'les', 'de', 'et', 'en', 'que', 'pour', 'avec', 'dans'],
'de': ['der', 'die', 'das', 'und', 'in', 'den', 'von', 'zu', 'für', 'mit']
}
word_counts = {lang: sum(1 for word in words if f' {word} ' in f' {text} ')
for lang, words in common_words.items()}
wordCounts = {lang: sum(1 for word in words if f' {word} ' in f' {text} ')
for lang, words in commonWords.items()}
if word_counts:
return max(word_counts.items(), key=lambda x: x[1])[0]
if wordCounts:
return max(wordCounts.items(), key=lambda x: x[1])[0]
return 'en' # Default to English if no language detected

View file

@ -0,0 +1,368 @@
"""
DSGVO-konformer Daten-Neutralisierer für KI-Agentensysteme
Unterstützt TXT, JSON, CSV, Excel und Word-Dateien
Mehrsprachig: DE, EN, FR, IT
"""
import re
import json
import pandas as pd
import docx
from pathlib import Path
from typing import Dict, List, Tuple, Any, Union, Optional
from dataclasses import dataclass
import uuid
import logging
import traceback
import csv
from datetime import datetime
import xml.etree.ElementTree as ET
import os
import random
from io import StringIO
from patterns import Pattern, HeaderPatterns, DataPatterns, get_pattern_for_header, find_patterns_in_text, TextTablePatterns
import base64
# Configure logging
logger = logging.getLogger(__name__)
@dataclass
class TableData:
"""Repräsentiert Tabellendaten"""
headers: List[str]
rows: List[List[str]]
source_type: str # 'csv', 'json', 'xml', 'text_table'
@dataclass
class PlainText:
"""Repräsentiert normalen Text"""
content: str
source_type: str # 'txt', 'docx', 'text_plain'
@dataclass
class ProcessResult:
"""Result of content processing"""
data: Any
mapping: Dict[str, str]
replaced_fields: List[str]
processed_info: Dict[str, Any] # Additional processing information
class DataAnonymizer:
"""Hauptklasse für die Datenanonymisierung"""
def __init__(self):
"""Initialize the anonymizer with patterns"""
self.header_patterns = HeaderPatterns.patterns
self.data_patterns = DataPatterns.patterns
self.replaced_fields = set()
self.mapping = {}
self.processing_info = []
def _normalize_whitespace(self, text: str) -> str:
"""Normalize whitespace in text"""
text = re.sub(r'\s+', ' ', text)
text = text.replace('\r\n', '\n').replace('\r', '\n')
return text.strip()
def _is_table_line(self, line: str) -> bool:
"""Check if a line represents a table row"""
return bool(re.match(r'^\s*[^:]+:\s*[^:]+$', line) or
re.match(r'^\s*[^\t]+\t[^\t]+$', line))
def _extract_tables_from_text(self, content: str) -> Tuple[List[TableData], List[PlainText]]:
"""
Extract tables and plain text from content
Args:
content: Content to process
Returns:
Tuple of (list of tables, list of plain text sections)
"""
tables = []
plain_texts = []
# Process the entire content as plain text
plain_texts.append(PlainText(content=content, source_type='text_plain'))
return tables, plain_texts
def _anonymize_table(self, table: TableData) -> TableData:
"""Anonymize table data"""
try:
anonymized_table = TableData(
headers=table.headers.copy(),
rows=[row.copy() for row in table.rows],
source_type=table.source_type
)
for i, header in enumerate(anonymized_table.headers):
pattern = get_pattern_for_header(header, self.header_patterns)
if pattern:
for row in anonymized_table.rows:
if row[i] is not None:
original = str(row[i])
if original not in self.mapping:
self.mapping[original] = pattern.replacement_template.format(len(self.mapping) + 1)
row[i] = self.mapping[original]
return anonymized_table
except Exception as e:
logger.error(f"Error anonymizing table: {str(e)}")
logger.debug(traceback.format_exc())
raise
def _anonymize_plain_text(self, text: PlainText) -> PlainText:
"""Anonymize plain text content"""
try:
# Process the entire text at once instead of line by line
current_text = text.content
# Find all matches in the entire text
matches = find_patterns_in_text(current_text, self.data_patterns)
# Process matches in reverse order to avoid position shifting
for match in sorted(matches, key=lambda x: x[2], reverse=True):
pattern_name, matched_text, start, end = match
# Skip if the matched text is already a placeholder
if re.match(r'\[[A-Z_]+\d+\]', matched_text):
continue
# Find the pattern that matched
pattern = next((p for p in self.data_patterns if p.name == pattern_name), None)
if pattern:
# Use the pattern's replacement template
if matched_text not in self.mapping:
self.mapping[matched_text] = pattern.replacement_template.format(len(self.mapping) + 1)
replacement = self.mapping[matched_text]
if pattern_name == 'email':
print(f"DEBUG: Replacing email '{matched_text}' with '{replacement}'")
print(f"DEBUG: Text after replacement: {current_text[:start] + replacement + current_text[end:]}")
# Replace the matched text while preserving surrounding whitespace
current_text = current_text[:start] + replacement + current_text[end:]
return PlainText(content=current_text, source_type=text.source_type)
except Exception as e:
logger.error(f"Error anonymizing plain text: {str(e)}")
logger.debug(traceback.format_exc())
raise
def _anonymize_json_value(self, value: Any, key: str = None) -> Any:
"""
Recursively anonymize JSON values based on their keys and content
Args:
value: Value to anonymize
key: Key name (if part of a key-value pair)
Returns:
Anonymized value
"""
if isinstance(value, dict):
return {k: self._anonymize_json_value(v, k) for k, v in value.items()}
elif isinstance(value, list):
return [self._anonymize_json_value(item) for item in value]
elif isinstance(value, str):
# Check if this is a key we should process
if key:
pattern = get_pattern_for_header(key, self.header_patterns)
if pattern:
if value not in self.mapping:
self.mapping[value] = pattern.replacement_template.format(len(self.mapping) + 1)
return self.mapping[value]
# Check if the value itself matches any patterns
matches = find_patterns_in_text(value, self.data_patterns)
if matches:
# Use the first match's pattern
pattern_name = matches[0][0]
if value not in self.mapping:
self.mapping[value] = f"{pattern_name.upper()}_{len(self.mapping) + 1}"
return self.mapping[value]
return value
else:
return value
def _anonymize_xml_element(self, element: ET.Element, indent: str = '') -> str:
"""
Recursively process XML element and return formatted string
Args:
element: XML element to process
indent: Current indentation level
Returns:
Formatted XML string
"""
# Process attributes
processed_attrs = {}
for attr_name, attr_value in element.attrib.items():
# Check if attribute name matches any header patterns
pattern = get_pattern_for_header(attr_name, self.header_patterns)
if pattern:
if attr_value not in self.mapping:
self.mapping[attr_value] = pattern.replacement_template.format(len(self.mapping) + 1)
processed_attrs[attr_name] = self.mapping[attr_value]
else:
# Check if attribute value matches any data patterns
matches = find_patterns_in_text(attr_value, self.data_patterns)
if matches:
pattern_name = matches[0][0]
pattern = next((p for p in self.data_patterns if p.name == pattern_name), None)
if pattern:
if attr_value not in self.mapping:
self.mapping[attr_value] = pattern.replacement_template.format(len(self.mapping) + 1)
processed_attrs[attr_name] = self.mapping[attr_value]
else:
processed_attrs[attr_name] = attr_value
else:
processed_attrs[attr_name] = attr_value
attrs = ' '.join(f'{k}="{v}"' for k, v in processed_attrs.items())
attrs = f' {attrs}' if attrs else ''
# Process text content
text = element.text.strip() if element.text and element.text.strip() else ''
if text:
# Check if text matches any patterns
matches = find_patterns_in_text(text, self.data_patterns)
if matches:
pattern_name = matches[0][0]
pattern = next((p for p in self.data_patterns if p.name == pattern_name), None)
if pattern:
if text not in self.mapping:
self.mapping[text] = pattern.replacement_template.format(len(self.mapping) + 1)
text = self.mapping[text]
# Process child elements
children = []
for child in element:
child_str = self._anonymize_xml_element(child, indent + ' ')
children.append(child_str)
# Build element string
if not children and not text:
return f"{indent}<{element.tag}{attrs}/>"
elif not children:
return f"{indent}<{element.tag}{attrs}>{text}</{element.tag}>"
else:
result = [f"{indent}<{element.tag}{attrs}>"]
if text:
result.append(f"{indent} {text}")
result.extend(children)
result.append(f"{indent}</{element.tag}>")
return '\n'.join(result)
def process_content(self, content: str, content_type: str) -> ProcessResult:
"""
Process content and return anonymized data
Args:
content: Content to process
content_type: Type of content ('csv', 'json', 'xml', 'text')
Returns:
ProcessResult: Contains anonymized data, mapping, replaced fields and processing info
"""
try:
# Check if content is binary data
is_binary = False
try:
# Try to decode base64 if it's a string
try:
decoded = base64.b64decode(content)
# If it's not valid text, consider it binary
decoded.decode('utf-8')
except (base64.binascii.Error, UnicodeDecodeError):
is_binary = True
except Exception:
is_binary = True
if is_binary:
# TODO: Implement binary data neutralization
# This would require:
# 1. Detecting binary data types (images, audio, video, etc.)
# 2. Implementing specific neutralization for each type
# 3. Handling metadata and embedded content
# 4. Preserving binary integrity while removing sensitive data
return ProcessResult(content, self.mapping, [], {'type': 'binary', 'status': 'not_implemented'})
replaced_fields = []
processed_info = {}
if content_type in ['csv', 'json', 'xml']:
# Handle as table
if content_type == 'csv':
df = pd.read_csv(StringIO(content), encoding='utf-8')
table = TableData(
headers=df.columns.tolist(),
rows=df.values.tolist(),
source_type='csv'
)
processed_info['type'] = 'table'
processed_info['headers'] = table.headers
processed_info['row_count'] = len(table.rows)
elif content_type == 'json':
data = json.loads(content)
# Process JSON recursively
result = self._anonymize_json_value(data)
processed_info['type'] = 'json'
return ProcessResult(result, self.mapping, replaced_fields, processed_info)
else: # xml
root = ET.fromstring(content)
# Process XML recursively with proper formatting
result = self._anonymize_xml_element(root)
processed_info['type'] = 'xml'
return ProcessResult(result, self.mapping, replaced_fields, processed_info)
if not table.rows:
return ProcessResult(None, self.mapping, [], processed_info)
anonymized_table = self._anonymize_table(table)
# Track replaced fields
for i, header in enumerate(anonymized_table.headers):
for orig_row, anon_row in zip(table.rows, anonymized_table.rows):
if anon_row[i] != orig_row[i]:
replaced_fields.append(header)
# Convert back to original format
if content_type == 'csv':
result = pd.DataFrame(anonymized_table.rows, columns=anonymized_table.headers)
elif content_type == 'json':
if len(anonymized_table.headers) == 1 and anonymized_table.headers[0] == 'value':
result = anonymized_table.rows[0][0]
else:
result = dict(zip(anonymized_table.headers, anonymized_table.rows[0]))
else: # xml
result = ET.tostring(root, encoding='unicode')
return ProcessResult(result, self.mapping, replaced_fields, processed_info)
else:
# Handle as text
# First, identify what needs to be replaced using table detection
tables, plain_texts = self._extract_tables_from_text(content)
processed_info['type'] = 'text'
processed_info['tables'] = [{'headers': t.headers, 'row_count': len(t.rows)} for t in tables]
# Process plain text sections
anonymized_texts = [self._anonymize_plain_text(text) for text in plain_texts]
# Combine all processed content
result = content
for text, anonymized_text in zip(plain_texts, anonymized_texts):
if text.content != anonymized_text.content:
result = result.replace(text.content, anonymized_text.content)
return ProcessResult(result, self.mapping, replaced_fields, processed_info)
except Exception as e:
logger.error(f"Error processing content: {str(e)}")
logger.debug(traceback.format_exc())
return ProcessResult(None, self.mapping, [], {'type': 'error', 'error': str(e)})

View file

@ -0,0 +1,402 @@
"""
Pattern definitions for data anonymization
Separates header patterns from data patterns
"""
from dataclasses import dataclass
from typing import List, Optional, Tuple
import re
@dataclass
class Pattern:
"""Base class for patterns"""
name: str
patterns: List[str]
replacement_template: str
class HeaderPatterns:
"""Patterns for identifying sensitive data in headers"""
patterns = [
# Name patterns
Pattern(
name="name",
patterns=[
# Simple variations
r'\b(?:name|first[-_\s]*name|last[-_\s]*name|full[-_\s]*name)\b',
r'\b(?:customer[-_\s]*name|client[-_\s]*name|user[-_\s]*name)\b',
r'\b(?:given[-_\s]*name|family[-_\s]*name|surname)\b',
# German variations
r'\b(?:vorname|nachname|vollständiger[-_\s]*name|name)\b',
r'\b(?:kunden[-_\s]*name|kunde[-_\s]*name|benutzer[-_\s]*name)\b',
# French variations
r'\b(?:prénom|nom|nom[-_\s]*complet)\b',
r'\b(?:nom[-_\s]*du[-_\s]*client|nom[-_\s]*d\'utilisateur)\b',
# Italian variations
r'\b(?:nome|cognome|nome[-_\s]*completo)\b',
r'\b(?:nome[-_\s]*cliente|nome[-_\s]*utente)\b',
# Common variations
r'\b(?:nom|name|nome|naam)\b'
],
replacement_template="[NAME_{}]"
),
# Email patterns
Pattern(
name="email",
patterns=[
# Simple variations - only labels
r'\b(?:email|e[-_\s]*mail|mail)\s*:?\b',
r'\b(?:contact[-_\s]*email|user[-_\s]*email|client[-_\s]*email)\s*:?\b',
r'\b(?:customer[-_\s]*email|customer[-_\s]*mail|customer[-_\s]*e[-_\s]*mail)\s*:?\b',
# German variations - only labels
r'\b(?:e[-_\s]*mail|e[-_\s]*post|mail[-_\s]*adresse)\s*:?\b',
r'\b(?:kontakt[-_\s]*email|benutzer[-_\s]*email|kunden[-_\s]*email)\s*:?\b',
r'\b(?:kunden[-_\s]*mail|kunden[-_\s]*e[-_\s]*mail|kunden[-_\s]*e[-_\s]*post)\s*:?\b',
# French variations - only labels
r'\b(?:courriel|e[-_\s]*mail|adresse[-_\s]*e[-_\s]*mail)\s*:?\b',
r'\b(?:courriel[-_\s]*de[-_\s]*contact|e[-_\s]*mail[-_\s]*client)\s*:?\b',
r'\b(?:courriel[-_\s]*client|courriel[-_\s]*utilisateur|mail[-_\s]*client)\s*:?\b',
# Italian variations - only labels
r'\b(?:posta[-_\s]*elettronica|e[-_\s]*mail|indirizzo[-_\s]*e[-_\s]*mail)\s*:?\b',
r'\b(?:email[-_\s]*cliente|email[-_\s]*utente)\s*:?\b',
r'\b(?:mail[-_\s]*cliente|mail[-_\s]*utente|posta[-_\s]*cliente)\s*:?\b'
],
replacement_template="[EMAIL_{}]"
),
# Phone patterns
Pattern(
name="phone",
patterns=[
# Simple variations
r'\b(?:phone|tel|telephone|mobile)\b',
r'\b(?:contact[-_\s]*number|phone[-_\s]*number|tel[-_\s]*number)\b',
# German variations
r'\b(?:telefon|mobil|handy|telefon[-_\s]*nummer)\b',
r'\b(?:kontakt[-_\s]*nummer|telefon[-_\s]*nummer|tel[-_\s]*nummer)\b',
# French variations
r'\b(?:téléphone|portable|mobile|numéro[-_\s]*de[-_\s]*téléphone)\b',
r'\b(?:numéro[-_\s]*de[-_\s]*contact|tél[-_\s]*fixe|tél[-_\s]*mobile)\b',
# Italian variations
r'\b(?:telefono|cellulare|mobile|numero[-_\s]*di[-_\s]*telefono)\b',
r'\b(?:numero[-_\s]*di[-_\s]*contatto|tel[-_\s]*fisso|tel[-_\s]*mobile)\b'
],
replacement_template="[PHONE_{}]"
),
# IBAN patterns
Pattern(
name="iban",
patterns=[
# Simple variations
r'\b(?:iban|bank[-_\s]*account|account[-_\s]*number)\b',
r'\b(?:bank[-_\s]*details|account[-_\s]*details|banking[-_\s]*info)\b',
# German variations
r'\b(?:iban|bank[-_\s]*konto|konto[-_\s]*nummer)\b',
r'\b(?:bank[-_\s]*verbindung|konto[-_\s]*verbindung|bank[-_\s]*daten)\b',
# French variations
r'\b(?:iban|compte[-_\s]*bancaire|numéro[-_\s]*de[-_\s]*compte)\b',
r'\b(?:coordonnées[-_\s]*bancaires|détails[-_\s]*bancaires)\b',
# Credit card variations in French
r'\b(?:carte[-_\s]*de[-_\s]*credit|carte[-_\s]*credit|numero[-_\s]*carte[-_\s]*credit)\b',
r'\b(?:carte[-_\s]*bancaire|carte[-_\s]*de[-_\s]*paiement)\b',
r'\b(?:carte[-_\s]*de[-_\s]*crédit|carte[-_\s]*crédit|numéro[-_\s]*carte[-_\s]*crédit)\b',
r'\b(?:carte[-_\s]*de[-_\s]*débit|carte[-_\s]*débit|numéro[-_\s]*carte[-_\s]*débit)\b',
# Italian variations
r'\b(?:iban|conto[-_\s]*bancario|numero[-_\s]*di[-_\s]*conto)\b',
r'\b(?:coordinate[-_\s]*bancarie|dettagli[-_\s]*bancari)\b',
# Common variations
r'\b(?:bankkonto|bank[-_\s]*konto|conto[-_\s]*di[-_\s]*banca)\b',
# Credit card variations
r'\b(?:credit[-_\s]*card|credit[-_\s]*card[-_\s]*number|credit[-_\s]*card[-_\s]*no)\b',
r'\b(?:credit[-_\s]*card[-_\s]*nr|credit[-_\s]*card[-_\s]*num)\b',
r'\b(?:credit[-_\s]*card[-_\s]*id|credit[-_\s]*card[-_\s]*code)\b',
r'\b(?:credit[-_\s]*card[-_\s]*reference|credit[-_\s]*card[-_\s]*ref)\b',
r'\b(?:credit[-_\s]*card[-_\s]*details|credit[-_\s]*card[-_\s]*info)\b',
r'\b(?:credit[-_\s]*card[-_\s]*data|credit[-_\s]*card[-_\s]*account)\b',
# Credit card variations in other languages
r'\b(?:kredit[-_\s]*karte|kreditkarte|kredit[-_\s]*karten[-_\s]*nummer)\b',
r'\b(?:carta[-_\s]*di[-_\s]*credito|carta[-_\s]*credito|numero[-_\s]*carta[-_\s]*credito)\b',
# Payment variations
r'\b(?:payment[-_\s]*details|payment[-_\s]*info|payment[-_\s]*data)\b',
r'\b(?:zahlungs[-_\s]*details|zahlungs[-_\s]*informationen|zahlungs[-_\s]*daten)\b',
r'\b(?:détails[-_\s]*de[-_\s]*paiement|informations[-_\s]*de[-_\s]*paiement)\b',
r'\b(?:dettagli[-_\s]*di[-_\s]*pagamento|informazioni[-_\s]*di[-_\s]*pagamento)\b',
# Common credit card abbreviations
r'\b(?:cc[-_\s]*number|cc[-_\s]*no|cc[-_\s]*nr)\b',
r'\b(?:cc[-_\s]*num|cc[-_\s]*id|cc[-_\s]*code)\b',
r'\b(?:cc[-_\s]*ref|cc[-_\s]*details|cc[-_\s]*info)\b',
r'\b(?:cc[-_\s]*data|cc[-_\s]*account)\b',
# Simple credit card
r'\b(?:credit[-_\s]*card|credit[-_\s]*card[-_\s]*number)\b',
# Additional credit card variations
r'\b(?:card[-_\s]*number|card[-_\s]*no|card[-_\s]*nr)\b',
r'\b(?:card[-_\s]*num|card[-_\s]*id|card[-_\s]*code)\b',
r'\b(?:card[-_\s]*ref|card[-_\s]*details|card[-_\s]*info)\b',
r'\b(?:card[-_\s]*data|card[-_\s]*account)\b'
],
replacement_template="[IBAN_{}]"
),
# Address patterns
Pattern(
name="address",
patterns=[
# English variations
r'\b(?:address|street[-_\s]*address|mailing[-_\s]*address)\b',
r'\b(?:home[-_\s]*address|work[-_\s]*address|billing[-_\s]*address)\b',
r'\b(?:.*address.*)\b', # Match any text containing "address"
# German variations
r'\b(?:adresse|strassen[-_\s]*adresse|post[-_\s]*adresse)\b',
r'\b(?:wohn[-_\s]*adresse|geschäfts[-_\s]*adresse|rechnungs[-_\s]*adresse)\b',
r'\b(?:.*adresse.*)\b', # Match any text containing "adresse"
# French variations
r'\b(?:adresse|adresse[-_\s]*postale|adresse[-_\s]*de[-_\s]*livraison)\b',
r'\b(?:adresse[-_\s]*personnelle|adresse[-_\s]*professionnelle)\b',
r'\b(?:.*adresse.*)\b', # Match any text containing "adresse"
# Italian variations
r'\b(?:indirizzo|indirizzo[-_\s]*postale|indirizzo[-_\s]*di[-_\s]*consegna)\b',
r'\b(?:indirizzo[-_\s]*personale|indirizzo[-_\s]*professionale)\b',
r'\b(?:.*indirizzo.*)\b', # Match any text containing "indirizzo"
# Common variations
r'\b(?:location|place|residence|domicile)\b',
r'\b(?:standort|ort|wohnort|domizil)\b',
r'\b(?:lieu|emplacement|résidence|domicile)\b',
r'\b(?:luogo|posizione|residenza|domicilio)\b'
],
replacement_template="[ADDRESS_{}]"
),
# Date patterns
Pattern(
name="date",
patterns=[
# English variations
r'\b(?:date|birth[-_\s]*date|date[-_\s]*of[-_\s]*birth)\b',
r'\b(?:dob|birthday|anniversary)\b',
# German variations
r'\b(?:datum|geburt[-_\s]*datum|geboren[-_\s]*am)\b',
r'\b(?:geburtstag|jubiläum|feier[-_\s]*tag)\b',
r'\b(?:geboren|geb\.|geboren[-_\s]*am)\b',
# French variations
r'\b(?:date|date[-_\s]*de[-_\s]*naissance|né[-_\s]*le)\b',
r'\b(?:anniversaire|date[-_\s]*anniversaire)\b',
r'\b(?:né|née|né[-_\s]*le)\b',
# Italian variations
r'\b(?:data|data[-_\s]*di[-_\s]*nascita|nato[-_\s]*il)\b',
r'\b(?:compleanno|anniversario)\b',
r'\b(?:nato|nata|nato[-_\s]*il)\b',
# Common variations
r'\b(?:birth|born|geboren|né|nato)\b'
],
replacement_template="[DATE_{}]"
),
# SSN patterns
Pattern(
name="ssn",
patterns=[
# English variations
r'\b(?:ssn|social[-_\s]*security[-_\s]*number|tax[-_\s]*id)\b',
r'\b(?:tax[-_\s]*identification|national[-_\s]*id)\b',
# German variations
r'\b(?:ahv[-_\s]*nummer|sozial[-_\s]*versicherungs[-_\s]*nummer)\b',
r'\b(?:steuer[-_\s]*nummer|steuer[-_\s]*id|svn)\b',
r'\b(?:ahv[-_\s]*nr|ahv[-_\s]*no|ahv[-_\s]*num)\b',
# French variations
r'\b(?:numéro[-_\s]*avs|numéro[-_\s]*de[-_\s]*sécurité[-_\s]*sociale)\b',
r'\b(?:numéro[-_\s]*fiscal|numéro[-_\s]*d\'identification)\b',
# Italian variations
r'\b(?:numero[-_\s]*avs|numero[-_\s]*di[-_\s]*sicurezza[-_\s]*sociale)\b',
r'\b(?:numero[-_\s]*fiscale|codice[-_\s]*fiscale)\b',
# Common variations
r'\b(?:ahv|svn|nss|avs)\b',
# Additional AHV variations
r'\b(?:ahv_nummer|ahvnummer|ahv-nummer|ahv_number)\b',
r'\b(?:ahv[-_\s]*nr|ahv[-_\s]*no|ahv[-_\s]*num)\b',
r'\b(?:ahv[-_\s]*number|ahv[-_\s]*number)\b',
r'\b(?:ahv[-_\s]*id|ahv[-_\s]*id)\b',
r'\b(?:ahv[-_\s]*code|ahv[-_\s]*code)\b',
r'\b(?:ahv[-_\s]*reference|ahv[-_\s]*reference)\b',
r'\b(?:ahv[-_\s]*reference[-_\s]*number|ahv[-_\s]*reference[-_\s]*number)\b',
r'\b(?:ahv[-_\s]*reference[-_\s]*no|ahv[-_\s]*reference[-_\s]*no)\b',
r'\b(?:ahv[-_\s]*reference[-_\s]*nr|ahv[-_\s]*reference[-_\s]*nr)\b',
r'\b(?:ahv[-_\s]*reference[-_\s]*num|ahv[-_\s]*reference[-_\s]*num)\b',
r'\b(?:ahv[-_\s]*reference[-_\s]*id|ahv[-_\s]*reference[-_\s]*id)\b',
r'\b(?:ahv[-_\s]*reference[-_\s]*code|ahv[-_\s]*reference[-_\s]*code)\b'
],
replacement_template="[SSN_{}]"
)
]
class DataPatterns:
"""Patterns for identifying sensitive data in content"""
patterns = [
# Name patterns
Pattern(
name="name",
patterns=[
# Person names with titles and academic degrees
r'\b(?:Dr\.|Prof\.|PhD\.?|MD\.?|Herr|Frau|Mr\.|Mrs\.|Ms\.|Monsieur|Madame|Signore|Signora)\s+[A-Z][a-z]{2,}(?:\s+[A-Za-z]{2,}){1,2}\b'
],
replacement_template="[NAME_{}]"
),
# Email pattern for plain text
Pattern(
name="email",
patterns=[
# Basic email pattern
r'[A-Za-z0-9._%+-]+@[A-Za-z0-9-]+(?:\.[A-Za-z0-9-]+)*'
],
replacement_template="[EMAIL_{}]"
),
# Phone patterns
Pattern(
name="phone",
patterns=[
# International format
r'\+\d{1,3}[-.\s]?\d{1,4}[-.\s]?\d{1,4}[-.\s]?\d{1,9}\b',
# Swiss format
r'\b(?:0\d{1,2}|0041\d{1,2})[-.\s]?\d{3}[-.\s]?\d{2}[-.\s]?\d{2}\b',
# German format
r'\b(?:0\d{1,4}|0049\d{1,4})[-.\s]?\d{3,}[-.\s]?\d{3,}\b',
# French format
r'\b(?:0\d{1,2}|0033\d{1,2})[-.\s]?\d{1,2}[-.\s]?\d{2}[-.\s]?\d{2}[-.\s]?\d{2}\b',
# Italian format
r'\b(?:0\d{1,3}|0039\d{1,3})[-.\s]?\d{3,}[-.\s]?\d{3,}\b',
# Mobile numbers
r'\b(?:07|00417|004917|00337|00397)\d{8,9}\b',
# Emergency numbers
r'\b(?:112|911|118|117|144|1414)\b'
],
replacement_template="[PHONE_{}]"
),
# IBAN patterns
Pattern(
name="iban",
patterns=[
r'\b(?:CH|DE|FR|IT)\d{2}\s?(?:\d{4}\s?){5}\d{2}\b',
r'\b(?:CH|DE|FR|IT)\d{2}(?:\d{4}){5}\d{2}\b'
],
replacement_template="[IBAN_{}]"
),
# Address patterns
Pattern(
name="address",
patterns=[
r'\b(?:[A-Za-zäöüßÄÖÜ]+(?:strasse|str\.|gasse|weg|platz|allee|boulevard|avenue|via|strada|rue|chemin|route))\s+\d{1,4}(?:[a-z])?\b',
r'\b\d{4}\s+[A-Za-zäöüßÄÖÜ]+\b'
],
replacement_template="[ADDRESS_{}]"
),
# Date patterns
Pattern(
name="date",
patterns=[
# Specific date formats with context
r'\b(?:geboren|birth|né|nato)\s+am\s+[0-9]{2}[./-][0-9]{2}[./-][0-9]{4}\b', # Birth dates
r'\b(?:geboren|birth|né|nato)\s+am\s+[0-9]{4}[./-][0-9]{2}[./-][0-9]{2}\b', # Birth dates
r'\b(?:vertrag|contract|contrat|contratto)\s+vom\s+[0-9]{2}[./-][0-9]{2}[./-][0-9]{4}\b', # Contract dates
r'\b(?:vertrag|contract|contrat|contratto)\s+vom\s+[0-9]{4}[./-][0-9]{2}[./-][0-9]{2}\b', # Contract dates
# Specific date formats with month names
r'\b(?:geboren|birth|né|nato)\s+am\s+(?:jan|feb|mar|apr|mai|jun|jul|aug|sep|okt|nov|dez|januar|februar|märz|april|mai|juni|juli|august|september|oktober|november|dezember)[a-z]*\s+\d{4}\b', # Birth dates with month
r'\b(?:vertrag|contract|contrat|contratto)\s+vom\s+(?:jan|feb|mar|apr|mai|jun|jul|aug|sep|okt|nov|dez|januar|februar|märz|april|mai|juni|juli|august|september|oktober|november|dezember)[a-z]*\s+\d{4}\b' # Contract dates with month
],
replacement_template="[DATE_{}]"
),
# SSN patterns
Pattern(
name="ssn",
patterns=[
r'\b(?:756|757|758|759)\.\d{4}\.\d{4}\.\d{2}\b', # Swiss AHV
r'\b(?:CHE|DE|FR|IT)-\d{3}\.\d{3}\.\d{3}\b', # Company IDs
r'\b\d{3}\.\d{3}\.\d{3}\b' # Generic SSN format
],
replacement_template="[SSN_{}]"
)
]
class TextTablePatterns:
"""Patterns for identifying table-like structures in text"""
@staticmethod
def get_patterns() -> List[Tuple[str, str]]:
return [
# key: value pattern (with optional whitespace)
(r'^([^:]+):\s*(.+)$', ':'),
# key = value pattern (with optional whitespace)
(r'^([^=]+)=\s*(.+)$', '='),
# key = value pattern (with required whitespace)
(r'^([^=]+)\s+=\s+(.+)$', '='),
# key: value pattern (with required whitespace)
(r'^([^:]+)\s+:\s+(.+)$', ':'),
]
@staticmethod
def is_table_line(line: str) -> bool:
"""Check if a line matches any table pattern"""
patterns = TextTablePatterns.get_patterns()
return any(re.match(pattern[0], line.strip()) for pattern in patterns)
@staticmethod
def extract_key_value(line: str) -> Optional[Tuple[str, str]]:
"""Extract key and value from a table line"""
patterns = TextTablePatterns.get_patterns()
for pattern, separator in patterns:
match = re.match(pattern, line.strip())
if match:
key = match.group(1).strip()
value = match.group(2).strip()
return key, value
return None
def get_pattern_for_header(header: str, patterns: List[Pattern]) -> Optional[Pattern]:
"""
Find matching pattern for a header
Args:
header: The header to check
patterns: List of patterns to check against
Returns:
Optional[Pattern]: Matching pattern or None
"""
if not header:
return None
header = header.lower().strip()
for pattern in patterns:
for p in pattern.patterns:
if re.search(p, header, re.IGNORECASE):
return pattern
return None
def find_patterns_in_text(text: str, patterns: List[Pattern]) -> List[tuple]:
"""
Find all pattern matches in text
Args:
text: Text to search
patterns: List of patterns to check
Returns:
List[tuple]: List of (pattern_name, match, start, end)
"""
matches = []
for pattern in patterns:
for p in pattern.patterns:
if pattern.name == 'email':
print(f"\nDEBUG: Checking email pattern '{p}'")
for match in re.finditer(p, text, re.IGNORECASE):
if pattern.name == 'email':
print(f"DEBUG: Found email match: '{match.group(0)}' at position {match.start()}-{match.end()}")
print(f"DEBUG: Context: '{text[max(0, match.start()-20):match.end()+20]}'")
matches.append((pattern.name, match.group(0), match.start(), match.end()))
return sorted(matches, key=lambda x: x[2]) # Sort by start position

View file

@ -1,67 +0,0 @@
"""
Utility functions for MIME type handling and file format determination.
"""
def isTextMimeType(mimeType: str) -> bool:
"""
Determines if a MIME type represents a text format that should not be base64 encoded.
Args:
mimeType: The MIME type to check
Returns:
True if the content is a text format, False otherwise
"""
return (
mimeType.startswith("text/") or
mimeType in [
"application/json",
"application/xml",
"application/javascript",
"application/x-python",
"image/svg+xml"
]
)
def determineContentEncoding(fileName: str, content: any, mimeType: str = None) -> bool:
"""
Determines if content should be base64 encoded based on file type and MIME type.
Args:
fileName: Name of the file including extension
content: The content of the file
mimeType: Optional MIME type of the content
Returns:
True if content should be base64 encoded, False otherwise
"""
# If MIME type is provided, use it for determination
if mimeType:
if isTextMimeType(mimeType):
return False if isinstance(content, str) else True
# Import here to avoid circular imports
import os
# Extract file extension
_, extension = os.path.splitext(fileName)
extension = extension.lower().lstrip('.')
# Determine if we should base64 encode based on file type
text_extensions = {'txt', 'csv', 'json', 'xml', 'html', 'md', 'svg', 'js', 'css', 'py'}
# If it's a text format and content is a string, don't base64 encode
if extension in text_extensions and isinstance(content, str):
return False
# For binary formats, always base64 encode
binary_extensions = {'jpg', 'jpeg', 'png', 'gif', 'pdf', 'doc', 'docx', 'xls', 'xlsx', 'zip', 'rar'}
if extension in binary_extensions:
return True
# If content is bytes, base64 encode regardless of extension
if isinstance(content, bytes):
return True
# Default for unknown types
return not isinstance(content, str)

View file

@ -0,0 +1,106 @@
"""
Document Manager Module for handling document operations and content extraction.
"""
import base64
import logging
from typing import List, Optional, Dict, Any, Union
from pathlib import Path
import uuid
from modules.interfaces.serviceChatModel import (
ChatDocument,
TaskDocument,
ExtractedContent,
ContentItem,
ContentMetadata
)
from modules.workflow.serviceContainer import ServiceContainer
from modules.workflow.processorDocument import DocumentProcessor
logger = logging.getLogger(__name__)
class DocumentManager:
"""Manager for document operations and content extraction"""
def __init__(self, serviceContainer: ServiceContainer):
self.service = serviceContainer
self._processor = DocumentProcessor()
async def extractFromChatDocument(self, prompt: str, document: ChatDocument) -> ExtractedContent:
"""
Extract content from a ChatDocument with AI processing.
Args:
prompt: Prompt for AI content extraction
document: The ChatDocument to process
Returns:
ExtractedContent containing the processed content
"""
# Convert ChatDocument to TaskDocument
taskDoc = await self._convertToTaskDocument(document)
# Process document using processor
extractedContent = await self._processor.processDocument(taskDoc, prompt)
# Update the objectId and objectType to reference the original ChatDocument
extractedContent.objectId = document.id
extractedContent.objectType = "ChatDocument"
return extractedContent
async def extractFromTaskDocument(self, prompt: str, document: TaskDocument) -> ExtractedContent:
"""
Extract content directly from a task document.
Args:
prompt: The prompt to use for content extraction
document: The task document to extract content from
Returns:
ExtractedContent containing the processed content
Raises:
ValueError: If document is invalid
IOError: If file cannot be read
"""
try:
return await self._processor.processDocument(document, prompt)
except Exception as e:
logger.error(f"Error extracting from task document: {str(e)}")
raise
async def _convertToTaskDocument(self, chatDoc: ChatDocument) -> TaskDocument:
"""
Convert a ChatDocument to a TaskDocument.
Args:
chatDoc: The chat document to convert
Returns:
TaskDocument containing the converted data
Raises:
ValueError: If document is invalid
IOError: If file cannot be read
"""
try:
# Get file content
fileContent = await self.service.functions.getFileData(chatDoc.fileId)
if not fileContent:
raise ValueError(f"Could not get content for file {chatDoc.fileId}")
# Convert to base64
base64Data = base64.b64encode(fileContent).decode('utf-8')
return TaskDocument(
id=str(uuid.uuid4()),
filename=chatDoc.filename,
fileSize=chatDoc.fileSize,
mimeType=chatDoc.mimeType,
data=base64Data
)
except Exception as e:
logger.error(f"Error converting chat document to task document: {str(e)}")
raise

View file

@ -2,15 +2,18 @@ import logging
import importlib
import pkgutil
import inspect
from typing import Dict, Any, Optional, List, Type
from typing import Dict, Any, Optional, List, Type, Callable, Awaitable
from datetime import datetime, UTC
import json
import asyncio
import base64
from modules.methods.methodBase import MethodBase, AuthSource, MethodResult
from modules.workflow.serviceContainer import ServiceContainer
from modules.interfaces.serviceChatModel import AgentTask, AgentAction, AgentResult, Action, TaskStatus
from modules.workflow.managerPrompt import AIPromptManager
from modules.interfaces.serviceChatModel import (
AgentTask, AgentAction, AgentResult, Action, TaskStatus, ChatWorkflow,
ChatMessage, ChatDocument, ChatStat, ExtractedContent, ContentItem
)
from modules.workflow.processorDocument import DocumentProcessor
from modules.shared.configuration import APP_CONFIG
@ -21,33 +24,34 @@ class ChatManager:
def __init__(self):
self.service = ServiceContainer()
self._discover_methods()
self.workflow = None
self.current_task = None
self.workflow_history = []
self._discoverMethods()
self.workflow: Optional[ChatWorkflow] = None
self.currentTask: Optional[AgentTask] = None
self.workflowHistory: List[ChatMessage] = []
self.documentProcessor = DocumentProcessor()
def _discover_methods(self):
def _discoverMethods(self):
"""Dynamically discover all method classes in modules.methods package"""
try:
# Import the methods package
methods_package = importlib.import_module('modules.methods')
methodsPackage = importlib.import_module('modules.methods')
# Discover all modules in the package
for _, name, is_pkg in pkgutil.iter_modules(methods_package.__path__):
if not is_pkg and name.startswith('method'):
for _, name, isPkg in pkgutil.iter_modules(methodsPackage.__path__):
if not isPkg and name.startswith('method'):
try:
# Import the module
module = importlib.import_module(f'modules.methods.{name}')
# Find all classes in the module that inherit from MethodBase
for item_name, item in inspect.getmembers(module):
for itemName, item in inspect.getmembers(module):
if (inspect.isclass(item) and
issubclass(item, MethodBase) and
item != MethodBase):
# Instantiate the method and add to service
method_instance = item()
self.service.methods[method_instance.name] = method_instance
logger.info(f"Discovered method: {method_instance.name}")
methodInstance = item()
self.service.methods[methodInstance.name] = methodInstance
logger.info(f"Discovered method: {methodInstance.name}")
except Exception as e:
logger.error(f"Error loading method module {name}: {str(e)}")
@ -55,37 +59,58 @@ class ChatManager:
except Exception as e:
logger.error(f"Error discovering methods: {str(e)}")
async def initialize(self, workflow: Any, context: Dict[str, Any]) -> None:
"""Initialize chat manager with workflow and context"""
async def initialize(self, workflow: ChatWorkflow) -> None:
"""Initialize chat manager with workflow"""
self.service.workflow = workflow
self.service.context = context
# Initialize AI model
self.service.model = {
'callAiBasic': self._call_ai_basic,
'callAiAdvanced': self._call_ai_advanced
'callAiBasic': self._callAiBasic,
'callAiAdvanced': self._callAiAdvanced
}
# Initialize document processor
self.service.document_processor.initialize(context)
self.service.documentProcessor.initialize()
async def create_initial_task(self, user_input: Dict[str, Any]) -> AgentTask:
def _generatePrompt(self, task: str, document: ChatDocument, examples: List[Dict[str, str]] = None) -> str:
"""Generate a prompt based on task and document"""
try:
# Create base prompt
prompt = f"""Task: {task}
Document: {document.filename} ({document.mimeType})
"""
# Add examples if provided
if examples:
prompt += "\nExamples:\n"
for example in examples:
prompt += f"Input: {example.get('input', '')}\n"
prompt += f"Output: {example.get('output', '')}\n\n"
return prompt
except Exception as e:
logger.error(f"Error generating prompt: {str(e)}")
return ""
async def createInitialTask(self, userInput: Dict[str, Any]) -> AgentTask:
"""Create initial task from user input"""
# Get available methods and their actions
method_catalog = self.service.get_available_methods()
methodCatalog = self.service.getAvailableMethods()
# Process user input with AI
processed_input = await self._process_user_input(user_input, method_catalog)
processedInput = await self._processUserInput(userInput, methodCatalog)
# Create actions from processed input
actions = await self._create_actions(processed_input['actions'])
actions = await self._createActions(processedInput['actions'])
# Create task
task = AgentTask(
id=f"task_{datetime.now(UTC).timestamp()}",
workflowId=self.workflow.id,
userInput=processed_input['objective'],
dataList=user_input.get('connections', []),
userInput=processedInput['objective'],
dataList=userInput.get('connections', []),
actionList=actions,
status=TaskStatus.PENDING,
createdAt=datetime.now(UTC),
@ -96,15 +121,15 @@ class ChatManager:
self.service.tasks['current'] = task
return task
async def execute_current_task(self) -> None:
async def executeCurrentTask(self) -> None:
"""Execute current task"""
task = self.service.tasks.get('current')
if not task:
raise ValueError("No current task to execute")
await self.service.execute_task(task)
await self.service.executeTask(task)
async def define_next_task(self) -> Optional[AgentTask]:
async def defineNextTask(self) -> Optional[AgentTask]:
"""Define next task based on current task results"""
current_task = self.service.tasks.get('current')
if not current_task:
@ -112,7 +137,7 @@ class ChatManager:
try:
# Analyze task results
analysis = await self._analyze_task_results(current_task)
analysis = await self._analyzeTaskResults(current_task)
# If workflow is complete, update task status
if analysis['isComplete']:
@ -122,7 +147,7 @@ class ChatManager:
# If more actions needed, create next task
if not analysis['isComplete']:
next_task = self._create_next_task(current_task, analysis)
next_task = self._createNextTask(current_task, analysis)
self.service.tasks['previous'] = current_task
self.service.tasks['current'] = next_task
return next_task
@ -133,15 +158,15 @@ class ChatManager:
current_task.updatedAt = datetime.now(UTC)
return None
async def _process_user_input(self, user_input: Dict[str, Any], method_catalog: Dict[str, Any]) -> Dict[str, Any]:
async def _processUserInput(self, userInput: Dict[str, Any], methodCatalog: Dict[str, Any]) -> Dict[str, Any]:
"""Process user input with AI to extract objectives and actions"""
# Create prompt with available methods and actions
prompt = f"""Given the following user input and available methods/actions, extract the objective and required actions:
User Input: {user_input.get('message', '')}
User Input: {userInput.get('message', '')}
Available Methods and Actions:
{json.dumps(method_catalog, indent=2)}
{json.dumps(methodCatalog, indent=2)}
Please provide a JSON response with:
1. objective: The main goal or task to accomplish
@ -164,22 +189,22 @@ Example format:
"""
# Call AI service
response = await self.service.model['callAiBasic'](prompt)
response = await self._callAiBasic(prompt)
return json.loads(response)
async def _create_actions(self, actions_data: List[Dict[str, Any]]) -> List[AgentAction]:
async def _createActions(self, actionsData: List[Dict[str, Any]]) -> List[AgentAction]:
"""Create action objects from processed input"""
actions = []
for action_data in actions_data:
method = self.service.get_method(action_data['method'])
for actionData in actionsData:
method = self.service.getMethod(actionData['method'])
if not method:
continue
action = AgentAction(
id=f"action_{datetime.now(UTC).timestamp()}",
method=action_data['method'],
action=action_data['action'],
parameters=action_data.get('parameters', {}),
method=actionData['method'],
action=actionData['action'],
parameters=actionData.get('parameters', {}),
status=TaskStatus.PENDING,
createdAt=datetime.now(UTC),
updatedAt=datetime.now(UTC)
@ -188,7 +213,7 @@ Example format:
return actions
async def _summarize_workflow(self) -> str:
async def _summarizeWorkflow(self) -> str:
"""Summarize workflow history"""
if not self.workflow.messages:
return ""
@ -203,12 +228,12 @@ Example format:
4. Any issues or blockers
"""
return await self.service.model['callAiBasic'](prompt)
return await self._callAiBasic(prompt)
async def _analyze_task_results(self, task: AgentTask) -> Dict[str, Any]:
async def _analyzeTaskResults(self, task: AgentTask) -> Dict[str, Any]:
"""Analyze task results to determine next steps"""
# Get workflow summary
summary = await self._summarize_workflow()
summary = await self._summarizeWorkflow()
# Create prompt for analysis
prompt = f"""Analyze the following task results and workflow history to determine next steps:
@ -240,10 +265,10 @@ Example format:
}}
"""
response = await self.service.model['callAiBasic'](prompt)
response = await self._callAiBasic(prompt)
return json.loads(response)
def _create_next_task(self, current_task: AgentTask, analysis: Dict[str, Any]) -> AgentTask:
def _createNextTask(self, current_task: AgentTask, analysis: Dict[str, Any]) -> AgentTask:
"""Create next task based on analysis"""
# Create actions for next task
actions = []
@ -271,20 +296,20 @@ Example format:
updatedAt=datetime.now(UTC)
)
async def process_task(self, task: Any) -> Dict[str, Any]:
async def processTask(self, task: AgentTask) -> Dict[str, Any]:
"""Process a task with improved error handling and AI integration"""
try:
# Execute task
await self.service.execute_task(task)
await self.service.executeTask(task)
# Process results
if task.status == 'success':
if task.status == TaskStatus.COMPLETED:
# Generate feedback using AI
feedback = await self._process_task_results(task)
feedback = await self._processTaskResults(task)
task.thisTaskFeedback = feedback
# Create output documents
documents = await self._create_output_documents(task)
documents = await self._createOutputDocuments(task)
task.documentsOutput = documents
return {
@ -307,89 +332,168 @@ Example format:
"feedback": f"Error processing task: {str(e)}"
}
async def _process_task_results(self, task: Any) -> str:
"""Process task results and generate feedback using AI"""
def _generateDocumentPrompt(self, task: str) -> str:
"""Generate a prompt for document generation"""
return f"""Generate output documents for the following task:
Task: {task}
For each document you need to generate, provide a TaskDocument object with the following structure:
{{
"filename": "string", # Filename with extension
"mimeType": "string", # MIME type of the file
"data": "string", # File content as text or base64
"base64Encoded": boolean # True if data is base64 encoded
}}
Rules:
1. For text files (txt, json, xml, etc.), provide content directly in the data field
2. For binary files (images, videos, etc.), encode content in base64 and set base64Encoded to true
3. Use appropriate MIME types (e.g., text/plain, image/jpeg, application/pdf)
4. Include file extensions in filenames
Return a JSON array of TaskDocument objects.
"""
async def _processTaskResults(self, task: AgentTask) -> str:
"""Process task results and generate feedback"""
try:
# Create context for AI
context = {
"task": "Process task results",
"document": {"name": "Task Results", "type": "json"}
}
# Generate document prompt
docPrompt = self._generateDocumentPrompt(task.userInput)
# Generate prompt
prompt = self.service.prompt_manager.generate_prompt(
context,
[
{"input": "Task results", "output": "Generate summary"}
]
)
# Get AI response for document generation
docResponse = await self._callAiBasic(docPrompt)
# Call AI
response = await self.service.model['callAiBasic'](
f"""Process task results and generate feedback:
Task Input: {task.userInput}
Method Results: {task.result}
Generated Documents: {task.documentsOutput}
# Parse response into TaskDocument objects
try:
taskDocs = json.loads(docResponse)
task.documentsOutput = taskDocs
except json.JSONDecodeError as e:
logger.error(f"Error parsing document response: {str(e)}")
return f"Error processing results: {str(e)}"
# Generate feedback
feedback = await self._callAiBasic(
f"""Generate feedback for the completed task:
Task: {task.userInput}
Generated Documents: {len(task.documentsOutput)} files
{prompt}
Please provide:
1. Summary of completed actions
2. Generated document descriptions
3. Next steps or completion status
Format your response as JSON:
{{
"summary": "string",
"documents": ["string"],
"nextSteps": ["string"]
}}
Provide a concise summary of what was accomplished.
"""
)
# Parse and validate response
try:
result = json.loads(response)
return result.get("summary", "Task completed successfully")
except json.JSONDecodeError:
return response.strip()
return feedback
except Exception as e:
logger.error(f"Error processing task results: {str(e)}")
return f"Error processing results: {str(e)}"
async def _create_output_documents(self, task: Any) -> List[Dict[str, Any]]:
async def _createOutputDocuments(self, task: AgentTask) -> List[ChatDocument]:
"""Create output documents from task results"""
try:
documents = []
fileIds = []
# Process each document
for doc in task.documentsOutput:
processed = self.service.document_processor.process_with_context(
doc,
{
"id": doc.get("id", ""),
"extractionHistory": doc.get("extractionHistory", []),
"relevantSections": doc.get("relevantSections", []),
"processingStatus": doc.get("processingStatus", {})
}
# Process each TaskDocument from AI output
for taskDoc in task.documentsOutput:
# Store file in database
fileItem = self.service.functions.createFile(
name=taskDoc.filename,
mimeType=taskDoc.mimeType
)
if processed:
documents.append(processed)
# Store file content
if taskDoc.base64Encoded:
# Decode base64 content
content = base64.b64decode(taskDoc.data)
else:
# Use text content directly
content = taskDoc.data.encode('utf-8')
# Store file data
self.service.functions.createFileData(fileItem.id, content)
fileIds.append(fileItem.id)
return documents
# Convert all files to ChatDocuments in one call
if fileIds:
return await self.service.chat.processFileIds(fileIds)
return []
except Exception as e:
logger.error(f"Error creating output documents: {str(e)}")
return []
async def _call_ai_basic(self, prompt: str) -> str:
"""Call basic AI model"""
# TODO: Implement actual AI call
return "AI response placeholder"
async def _callAiBasic(self, prompt: str) -> str:
"""Call basic AI service"""
try:
if not self.service or not self.service.base:
raise ValueError("Service or base interface not initialized")
return await self.service.base.callAi([
{"role": "system", "content": "You are an AI assistant that helps process user requests."},
{"role": "user", "content": prompt}
])
except Exception as e:
logger.error(f"Error calling AI service: {str(e)}")
raise
async def _call_ai_advanced(self, prompt: str, context: Dict[str, Any]) -> str:
async def _callAiAdvanced(self, prompt: str, context: Dict[str, Any]) -> str:
"""Call advanced AI model with context"""
# TODO: Implement actual AI call
return "AI response placeholder"
return "AI response placeholder"
async def generateWorkflowFeedback(self, workflow: ChatWorkflow) -> str:
"""
Generates a final feedback message for the workflow in the user's language.
Args:
workflow: The completed workflow to generate feedback for
Returns:
str: The generated feedback message
"""
try:
# Get workflow summary
workflowSummary = {
"status": workflow.status,
"totalMessages": len(workflow.messages),
"totalDocuments": sum(len(msg.documents) for msg in workflow.messages),
"duration": (datetime.now(UTC) - datetime.fromisoformat(workflow.startedAt)).total_seconds()
}
# Get user language from workflow mandate
userLanguage = workflow.mandateId.split('_')[0] if workflow.mandateId else 'en'
# Prepare messages for AI context
messages = [
{
"role": "system",
"content": f"You are an AI assistant providing a summary of a completed workflow. "
f"Please respond in '{userLanguage}' language. "
f"Summarize the workflow's activities, outcomes, and any important points. "
f"Be concise but informative. Use a professional but friendly tone."
},
{
"role": "user",
"content": f"Please provide a summary of this workflow:\n"
f"Status: {workflowSummary['status']}\n"
f"Total Messages: {workflowSummary['totalMessages']}\n"
f"Total Documents: {workflowSummary['totalDocuments']}\n"
f"Duration: {workflowSummary['duration']:.1f} seconds"
}
]
# Add relevant workflow messages for context
for msg in workflow.messages:
if msg.role == "user" or msg.status in ["first", "last"]:
messages.append({
"role": msg.role,
"content": msg.message
})
# Generate feedback using AI
feedback = await self.service.aiService.callApi(messages, temperature=0.7)
return feedback
except Exception as e:
logger.error(f"Error generating workflow feedback: {str(e)}")
return "Workflow completed successfully."

View file

@ -1,478 +1,106 @@
from typing import Dict, Any, Optional, List
import logging
import json
import os
from datetime import datetime, UTC
from pathlib import Path
import mimetypes
import hashlib
import shutil
import uuid
import base64
"""
Document Manager Module for handling document operations and content extraction.
"""
import base64
import logging
from typing import List, Optional, Dict, Any, Union
from pathlib import Path
import uuid
from modules.interfaces.serviceChatModel import (
ChatDocument,
TaskDocument,
ExtractedContent,
ContentItem,
ContentMetadata
)
from modules.workflow.serviceContainer import ServiceContainer
from modules.workflow.processorDocument import DocumentProcessor
from modules.shared.configuration import APP_CONFIG
from modules.interfaces.serviceChatModel import ChatDocument, ChatContent
logger = logging.getLogger(__name__)
class DocumentManager:
"""Document manager with enhanced operations and file handling"""
"""Manager for document operations and content extraction"""
_instance = None
def __init__(self, serviceContainer: ServiceContainer):
self.service = serviceContainer
self._processor = DocumentProcessor()
@classmethod
def getInstance(cls):
"""Return a singleton instance of the document manager."""
if cls._instance is None:
cls._instance = cls()
return cls._instance
def __init__(self):
"""Initialize document manager"""
if DocumentManager._instance is not None:
raise RuntimeError("Singleton instance already exists - use getInstance()")
async def extractFromChatDocument(self, prompt: str, document: ChatDocument) -> ExtractedContent:
"""
Extract content from a ChatDocument with AI processing.
Args:
prompt: Prompt for AI content extraction
document: The ChatDocument to process
self.processor = DocumentProcessor()
self.document_cache = {}
self.temp_dir = Path(APP_CONFIG.get('temp_dir', 'temp'))
self.output_dir = Path(APP_CONFIG.get('output_dir', 'output'))
self.service = None
Returns:
ExtractedContent containing the processed content
"""
# Convert ChatDocument to TaskDocument
taskDoc = await self._convertToTaskDocument(document)
async def initialize(self, context: Dict[str, Any], service=None) -> None:
"""Initialize document manager with context and service"""
# Initialize processor
self.processor.initialize(context)
# Process document using processor
extractedContent = await self._processor.processDocument(taskDoc, prompt)
# Initialize service container
if service:
# Validate required interfaces
required_interfaces = ['base', 'msft', 'google']
missing_interfaces = []
for interface in required_interfaces:
if not hasattr(service, interface):
missing_interfaces.append(interface)
# Update the objectId and objectType to reference the original ChatDocument
extractedContent.objectId = document.id
extractedContent.objectType = "ChatDocument"
return extractedContent
async def extractFromTaskDocument(self, prompt: str, document: TaskDocument) -> ExtractedContent:
"""
Extract content directly from a task document.
Args:
prompt: The prompt to use for content extraction
document: The task document to extract content from
if missing_interfaces:
logger.warning(f"Service container missing required interfaces: {', '.join(missing_interfaces)}")
return False
self.service = service
# Create directories if they don't exist
self.temp_dir.mkdir(parents=True, exist_ok=True)
self.output_dir.mkdir(parents=True, exist_ok=True)
# Clear temporary directory
self._clear_temp_directory()
def _clear_temp_directory(self) -> None:
"""Clear temporary directory"""
Returns:
ExtractedContent containing the processed content
Raises:
ValueError: If document is invalid
IOError: If file cannot be read
"""
try:
if self.temp_dir.exists():
shutil.rmtree(self.temp_dir)
self.temp_dir.mkdir(parents=True)
return await self._processor.processDocument(document, prompt)
except Exception as e:
logger.error(f"Error clearing temp directory: {str(e)}")
logger.error(f"Error extracting from task document: {str(e)}")
raise
async def process_document(self, document: Dict[str, Any], context: Dict[str, Any]) -> Dict[str, Any]:
"""Process a document with context"""
try:
# Generate document ID if not present
if 'id' not in document:
document['id'] = self._generate_document_id(document)
async def _convertToTaskDocument(self, chatDoc: ChatDocument) -> TaskDocument:
"""
Convert a ChatDocument to a TaskDocument.
Args:
chatDoc: The chat document to convert
# Process document content
processed = await self.processor.process_with_context(document, context)
Returns:
TaskDocument containing the converted data
# Add metadata
processed['metadata'] = {
'processedAt': datetime.now(UTC).isoformat(),
'processor': 'DocumentManager',
'version': '1.0'
}
# Cache document
self.document_cache[document['id']] = processed
return processed
except Exception as e:
logger.error(f"Error processing document: {str(e)}")
return {
'id': document.get('id', ''),
'error': str(e),
'status': 'error'
}
async def extract_content(self, file_id: str) -> Optional[ChatDocument]:
"""Extract content from a file"""
Raises:
ValueError: If document is invalid
IOError: If file cannot be read
"""
try:
# Get file content
file_content = await self.get_file_content(file_id)
if not file_content:
return None
# Get file metadata
file_metadata = await self.get_file_metadata(file_id)
if not file_metadata:
return None
fileContent = await self.service.functions.getFileData(chatDoc.fileId)
if not fileContent:
raise ValueError(f"Could not get content for file {chatDoc.fileId}")
# Create ChatDocument
return ChatDocument(
# Convert to base64
base64Data = base64.b64encode(fileContent).decode('utf-8')
return TaskDocument(
id=str(uuid.uuid4()),
fileId=file_id,
filename=file_metadata.get("name", "Unknown"),
fileSize=file_metadata.get("size", 0),
content=file_content.decode('utf-8', errors='ignore'),
mimeType=file_metadata.get("mimeType", "text/plain")
filename=chatDoc.filename,
fileSize=chatDoc.fileSize,
mimeType=chatDoc.mimeType,
data=base64Data
)
except Exception as e:
logger.error(f"Error extracting content from file {file_id}: {str(e)}")
return None
async def get_file_content(self, file_id: str) -> Optional[bytes]:
"""Get file content"""
try:
if not self.service or not self.service.functions:
logger.error("Service or functions not initialized")
return None
return self.service.functions.getFileData(file_id)
except Exception as e:
logger.error(f"Error getting file content for {file_id}: {str(e)}")
return None
async def get_file_metadata(self, file_id: str) -> Optional[Dict[str, Any]]:
"""Get file metadata"""
try:
if not self.service or not self.service.functions:
logger.error("Service or functions not initialized")
return None
return self.service.functions.getFile(file_id)
except Exception as e:
logger.error(f"Error getting file metadata for {file_id}: {str(e)}")
return None
async def save_file(self, filename: str, content: bytes, mime_type: str) -> Optional[int]:
"""Save a new file"""
try:
if not self.service or not self.service.base:
logger.error("Service or base interface not initialized")
return None
return await self.service.base.saveFile(filename, content, mime_type)
except Exception as e:
logger.error(f"Error saving file {filename}: {str(e)}")
return None
async def delete_file(self, file_id: str) -> bool:
"""Delete a file"""
try:
if not self.service or not self.service.functions:
logger.error("Service or functions not initialized")
return False
return self.service.functions.deleteFile(file_id)
except Exception as e:
logger.error(f"Error deleting file {file_id}: {str(e)}")
return False
def convert_file_ref_to_id(self, ref: str) -> Optional[int]:
"""Convert file reference to ID"""
try:
if isinstance(ref, str) and ';' in ref:
return int(ref.split(';')[1])
return int(ref)
except Exception as e:
logger.error(f"Error converting file reference to ID: {str(e)}")
return None
def convert_file_id_to_ref(self, file_id: str) -> Optional[str]:
"""Convert file ID to reference"""
try:
if not self.service or not self.service.functions:
logger.error("Service or functions not initialized")
return None
file = self.service.functions.getFile(file_id)
if not file:
return None
return f"{file.filename};{file_id}"
except Exception as e:
logger.error(f"Error converting file ID to reference: {str(e)}")
return None
async def convert_data_format(self, data: Any, format: str) -> Any:
"""Convert data between formats"""
try:
if format == 'json':
if isinstance(data, str):
return json.loads(data)
return json.dumps(data)
elif format == 'base64':
if isinstance(data, str):
return base64.b64encode(data.encode('utf-8')).decode('utf-8')
return base64.b64encode(data).decode('utf-8')
return data
except Exception as e:
logger.error(f"Error converting data format: {str(e)}")
return data
async def create_agent_input_file_list(self, files: List[str]) -> List[Dict[str, Any]]:
"""Create list of input files for agent processing"""
try:
input_files = []
for file in files:
file_id = await self.convert_file_ref_to_id(file)
if file_id:
file_data = await self.get_file_metadata(file_id)
if file_data:
content = await self.get_file_content(file_id)
input_files.append({
'id': file_id,
'name': file_data['name'],
'mimeType': file_data['mimeType'],
'content': content
})
return input_files
except Exception as e:
logger.error(f"Error creating agent input file list: {str(e)}")
return []
async def save_agent_output_files(self, files: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""Save output files from agent processing"""
try:
saved_files = []
for file in files:
file_meta = await self.save_file(
filename=file['name'],
content=file['content'],
mimeType=file.get('mimeType', 'application/octet-stream')
)
if file_meta:
saved_files.append({
'id': file_meta,
'name': file['name'],
'mimeType': file.get('mimeType', 'application/octet-stream')
})
return saved_files
except Exception as e:
logger.error(f"Error saving agent output files: {str(e)}")
return []
async def content_with_prompt(self, document: Dict[str, Any], prompt: str) -> Optional[Dict[str, Any]]:
"""Extract content using AI with specific prompt"""
try:
# Get document content
chat_doc = await self.extract_content(document.get('id'))
if not chat_doc:
return None
# Prepare content
content = chat_doc.content
mime_type = chat_doc.mimeType
# Process large files in chunks
if len(content) > 100000:
chunks = self._split_content_into_chunks(content, mime_type)
extracted_chunks = []
for chunk in chunks:
chunk_result = await self._process_content_chunk(chunk, prompt)
if chunk_result:
extracted_chunks.append(chunk_result)
return {
"content": self._merge_chunk_results(extracted_chunks),
"metadata": {
"original_size": len(content),
"chunks_processed": len(chunks),
"mime_type": mime_type
}
}
else:
result = await self._process_content_chunk(content, prompt)
return {
"content": result,
"metadata": {
"original_size": len(content),
"chunks_processed": 1,
"mime_type": mime_type
}
}
except Exception as e:
logger.error(f"Error in content_with_prompt: {str(e)}")
return None
def _split_content_into_chunks(self, content: str, mime_type: str) -> List[str]:
"""Split content into manageable chunks"""
try:
if mime_type.startswith('text/'):
return [chunk.strip() for chunk in content.split('\n\n') if chunk.strip()]
elif mime_type == 'application/json':
data = json.loads(content)
if isinstance(data, list):
return [json.dumps(item) for item in data]
return [content]
else:
return [content[i:i+10000] for i in range(0, len(content), 10000)]
except Exception as e:
logger.error(f"Error splitting content: {str(e)}")
return [content]
async def _process_content_chunk(self, chunk: str, prompt: str) -> Optional[str]:
"""Process content chunk with AI"""
try:
if not self.service or not self.service.base:
logger.error("Service or base interface not initialized")
return None
ai_prompt = f"""
Extract relevant information from this content based on the following prompt:
PROMPT: {prompt}
CONTENT:
{chunk}
Return ONLY the extracted information in a clear, concise format.
"""
response = await self.service.base.callAi([
{"role": "system", "content": "You are an expert at extracting relevant information from documents."},
{"role": "user", "content": ai_prompt}
])
return response.strip()
except Exception as e:
logger.error(f"Error processing content chunk: {str(e)}")
return None
def _merge_chunk_results(self, chunks: List[str]) -> str:
"""Merge processed content chunks"""
try:
chunks = [chunk for chunk in chunks if chunk and chunk.strip()]
return "\n\n".join(chunks)
except Exception as e:
logger.error(f"Error merging chunk results: {str(e)}")
return ""
async def save_document(self, document: Dict[str, Any], format: str = 'json') -> str:
"""Save document to output directory"""
try:
filename = f"{document['id']}.{format}"
filepath = self.output_dir / filename
if format == 'json':
with open(filepath, 'w', encoding='utf-8') as f:
json.dump(document, f, indent=2)
else:
content = document.get('content', '')
if isinstance(content, str):
with open(filepath, 'w', encoding='utf-8') as f:
f.write(content)
else:
with open(filepath, 'wb') as f:
f.write(content)
return str(filepath)
except Exception as e:
logger.error(f"Error saving document: {str(e)}")
logger.error(f"Error converting chat document to task document: {str(e)}")
raise
async def load_document(self, filepath: str) -> Dict[str, Any]:
"""Load document from file"""
try:
path = Path(filepath)
if not path.exists():
raise FileNotFoundError(f"Document not found: {filepath}")
format = path.suffix[1:].lower()
if format == 'json':
with open(path, 'r', encoding='utf-8') as f:
document = json.load(f)
else:
mime_type = mimetypes.guess_type(filepath)[0]
if mime_type and mime_type.startswith('text/'):
with open(path, 'r', encoding='utf-8') as f:
content = f.read()
else:
with open(path, 'rb') as f:
content = f.read()
document = {
'id': path.stem,
'content': content,
'format': format,
'mime_type': mime_type
}
document['metadata'] = {
'loadedAt': datetime.now(UTC).isoformat(),
'filepath': str(path),
'size': path.stat().st_size
}
return document
except Exception as e:
logger.error(f"Error loading document: {str(e)}")
raise
async def convert_document(self, document: Dict[str, Any], target_format: str) -> Dict[str, Any]:
"""Convert document to target format"""
try:
current_format = document.get('format', 'json')
if current_format == 'json' and target_format == 'text':
content = json.dumps(document, indent=2)
return {
'id': document['id'],
'content': content,
'format': 'text',
'mime_type': 'text/plain'
}
elif current_format == 'text' and target_format == 'json':
try:
content = json.loads(document['content'])
return {
'id': document['id'],
'content': content,
'format': 'json',
'mime_type': 'application/json'
}
except json.JSONDecodeError:
return {
'id': document['id'],
'content': document['content'],
'format': 'json',
'mime_type': 'application/json'
}
else:
raise ValueError(f"Unsupported conversion: {current_format} to {target_format}")
except Exception as e:
logger.error(f"Error converting document: {str(e)}")
raise
def _generate_document_id(self, document: Dict[str, Any]) -> str:
"""Generate unique document ID"""
if 'content' in document:
content = str(document['content'])
return hashlib.md5(content.encode()).hexdigest()
return f"doc_{int(datetime.now(UTC).timestamp())}"
async def cleanup(self) -> None:
"""Clean up temporary files and cache"""
try:
self._clear_temp_directory()
self.document_cache.clear()
except Exception as e:
logger.error(f"Error during cleanup: {str(e)}")
# Singleton factory for the document manager
def getDocumentManager():
return DocumentManager.getInstance()

View file

@ -1,182 +0,0 @@
from typing import Dict, Any, List, Optional
import logging
import json
from datetime import datetime, UTC
logger = logging.getLogger(__name__)
class AIPromptManager:
"""Manages AI prompts and response validation"""
def __init__(self):
self.prompt_templates = {}
self.response_schemas = {}
self._load_templates()
def _load_templates(self) -> None:
"""Load prompt templates and schemas"""
# Basic templates
self.prompt_templates = {
"task_analysis": {
"template": """Analyze the following task and determine required actions:
Task: {task}
Context: {context}
Available Methods: {methods}
Please provide:
1. Main objective
2. Required actions
3. Required data sources
4. Document processing requirements
5. Expected output format
Format your response as JSON:
{{
"objective": "string",
"actions": [
{{
"method": "string",
"action": "string",
"parameters": {{
"param1": "value1"
}}
}}
],
"dataSources": ["string"],
"documentRequirements": ["string"],
"outputFormat": "string"
}}
""",
"schema": {
"type": "object",
"required": ["objective", "actions"],
"properties": {
"objective": {"type": "string"},
"actions": {
"type": "array",
"items": {
"type": "object",
"required": ["method", "action"],
"properties": {
"method": {"type": "string"},
"action": {"type": "string"},
"parameters": {"type": "object"}
}
}
},
"dataSources": {
"type": "array",
"items": {"type": "string"}
},
"documentRequirements": {
"type": "array",
"items": {"type": "string"}
},
"outputFormat": {"type": "string"}
}
}
},
"result_analysis": {
"template": """Analyze the following task results and determine next steps:
Task Results: {results}
Workflow History: {history}
Please provide:
1. Task completion status
2. Next required actions
3. Required documents
4. Method recommendations
Format your response as JSON:
{{
"isComplete": boolean,
"nextActions": ["string"],
"requiredDocuments": ["string"],
"recommendedMethods": ["string"]
}}
""",
"schema": {
"type": "object",
"required": ["isComplete"],
"properties": {
"isComplete": {"type": "boolean"},
"nextActions": {
"type": "array",
"items": {"type": "string"}
},
"requiredDocuments": {
"type": "array",
"items": {"type": "string"}
},
"recommendedMethods": {
"type": "array",
"items": {"type": "string"}
}
}
}
}
}
def generate_prompt(self, context: Dict[str, Any], examples: List[Dict]) -> str:
"""Generate a context-aware prompt with few-shot examples"""
try:
# Get template
template = self.prompt_templates.get(context.get("type", "task_analysis"))
if not template:
raise ValueError(f"Unknown prompt type: {context.get('type')}")
# Format prompt
prompt = template["template"].format(
task=context.get("task", ""),
context=json.dumps(context.get("context", {}), indent=2),
methods=json.dumps(context.get("methods", {}), indent=2),
results=json.dumps(context.get("results", {}), indent=2),
history=json.dumps(context.get("history", []), indent=2)
)
# Add examples if provided
if examples:
prompt += "\nExamples:\n"
for ex in examples:
prompt += f"- {ex['input']} => {ex['output']}\n"
return prompt
except Exception as e:
logger.error(f"Error generating prompt: {str(e)}")
raise
def validate_response(self, response: str, schema: Dict) -> bool:
"""Validate AI response against a schema"""
try:
# Parse response
if isinstance(response, str):
try:
response = json.loads(response)
except json.JSONDecodeError:
return False
# Validate against schema
import jsonschema
jsonschema.validate(instance=response, schema=schema)
return True
except Exception as e:
logger.error(f"Error validating response: {str(e)}")
return False
def get_schema(self, prompt_type: str) -> Optional[Dict]:
"""Get schema for prompt type"""
template = self.prompt_templates.get(prompt_type)
return template.get("schema") if template else None
def add_template(self, name: str, template: str, schema: Dict) -> None:
"""Add new prompt template"""
self.prompt_templates[name] = {
"template": template,
"schema": schema
}
def remove_template(self, name: str) -> None:
"""Remove prompt template"""
self.prompt_templates.pop(name, None)

View file

@ -1,239 +1,147 @@
from typing import Dict, Any, Optional, List
from typing import Dict, Any
import logging
import json
import asyncio
from datetime import datetime, UTC
import uuid
from modules.interfaces.serviceChatModel import (
AgentTask, AgentResult, TaskStatus, ChatMessage,
UserInputRequest, ChatWorkflow, ChatDocument
)
from modules.interfaces.serviceChatClass import ChatInterface
from modules.workflow.managerChat import ChatManager
from modules.workflow.managerDocument import DocumentManager
from modules.interfaces.serviceChatModel import AgentTask, TaskStatus, ActionStatus
from modules.shared.configuration import APP_CONFIG
logger = logging.getLogger(__name__)
class WorkflowStoppedException(Exception):
"""Exception raised when workflow is stopped by user"""
pass
class WorkflowManager:
"""Workflow manager with improved task management and error recovery"""
"""Manages workflow execution lifecycle"""
def __init__(self):
self.chat_manager = ChatManager()
self.document_manager = DocumentManager()
def __init__(self, chatInterface: ChatInterface):
self.workflow = None
self.context = {}
self.task_queue = asyncio.Queue()
self.active_tasks = {}
self.task_history = []
async def initialize(self, workflow: Any, context: Dict[str, Any]) -> None:
"""Initialize workflow manager with workflow and context"""
self.workflow = workflow
self.context = context
# Initialize managers
await self.chat_manager.initialize(workflow, context)
await self.document_manager.initialize(context)
# Start task processor
asyncio.create_task(self._process_task_queue())
async def process_workflow(self, user_input: Dict[str, Any]) -> Dict[str, Any]:
"""Process workflow with user input"""
self.isRunning = False
self.chatInterface = chatInterface
self.chatManager = ChatManager()
def _checkWorkflowStopped(self, workflow: ChatWorkflow) -> None:
if workflow.status == "stopped":
logger.info(f"Workflow {workflow.id} stopped by user")
raise WorkflowStoppedException("User stopped workflow")
async def workflowProcess(self, userInput: UserInputRequest, workflow: ChatWorkflow) -> None:
"""Main workflow execution process"""
try:
self.workflow = workflow
self.isRunning = True
# Process documents from userInput using ChatInterface's method
documents = []
if userInput.listFileId:
documents = await self.chatInterface.processFileIds(userInput.listFileId)
# Create initial ChatMessage from userInput
initialMessage = ChatMessage(
id=str(uuid.uuid4()),
workflowId=workflow.id,
role="user",
message=userInput.prompt,
status="first", # First message in workflow
documents=documents
)
# Add message to workflow
await self.chatInterface.createWorkflowMessage(initialMessage.dict())
# Create initial task
task = await self.chat_manager.create_initial_task(user_input)
task = await self.chatInterface.createInitialTask(workflow, initialMessage)
if not task:
logger.error("Failed to create initial task")
workflow.status = "error"
workflow.error = "Failed to create initial task"
return
# Add to queue
await self.task_queue.put(task)
# Wait for completion
while not task.is_complete() and not task.has_failed():
await asyncio.sleep(0.1)
# Process results
if task.status == TaskStatus.SUCCESS:
return {
"status": "success",
"result": task.result,
"documents": task.documentsOutput
}
else:
return {
"status": "error",
"error": task.error,
"feedback": task.thisTaskFeedback
}
except Exception as e:
logger.error(f"Error processing workflow: {str(e)}")
return {
"status": "error",
"error": str(e)
}
async def _process_task_queue(self) -> None:
"""Process tasks in queue"""
while True:
try:
# Get task from queue
task = await self.task_queue.get()
# Process task
result = await self.chat_manager.process_task(task)
# Update task status
if result["status"] == "success":
task.status = TaskStatus.SUCCESS
task.result = result.get("result")
task.documentsOutput = result.get("documents", [])
else:
task.status = TaskStatus.FAILED
task.error = result.get("error")
# Add to history
self.task_history.append({
"id": task.id,
"status": task.status,
"startedAt": task.startedAt,
"finishedAt": datetime.now(UTC).isoformat(),
"error": task.error
})
# Check for next task
if not task.is_complete():
next_task = await self._define_next_task(task)
if next_task:
await self.task_queue.put(next_task)
# Mark task as done
self.task_queue.task_done()
except Exception as e:
logger.error(f"Error processing task queue: {str(e)}")
await asyncio.sleep(1) # Prevent tight loop on error
async def _define_next_task(self, current_task: AgentTask) -> Optional[AgentTask]:
"""Define next task based on current task results"""
try:
# Analyze current task
analysis = await self.chat_manager._analyze_task_results(current_task)
# Check if next task needed
if not analysis.get("isComplete", True):
# Create next task
next_task = await self.chat_manager.create_next_task(
current_task,
analysis.get("nextActions", []),
analysis.get("requiredDocuments", [])
# Main workflow loop
while self.isRunning and workflow.status == "running":
self._checkWorkflowStopped(workflow)
# Execute task
result = AgentResult(
id=task.id,
status=TaskStatus.PENDING,
createdAt=datetime.now(UTC),
updatedAt=datetime.now(UTC)
)
# Add dependencies
next_task.dependencies = [current_task.id]
return next_task
return None
except Exception as e:
logger.error(f"Error defining next task: {str(e)}")
return None
async def handle_error(self, task: AgentTask, error: str) -> None:
"""Handle task error with recovery strategies"""
try:
# Log error
logger.error(f"Task {task.id} failed: {error}")
# Update task status
task.status = TaskStatus.FAILED
task.error = error
# Check for retryable errors
if self._is_retryable_error(error):
if task.retryCount < task.retryMax:
# Retry task
task.retryCount += 1
task.status = TaskStatus.RETRY
await self.task_queue.put(task)
return
# Check for rollback needed
if task.rollback_on_failure:
await self._rollback_task(task)
# Notify workflow
self.workflow.status = "error"
self.workflow.error = error
except Exception as e:
logger.error(f"Error handling task error: {str(e)}")
async def _rollback_task(self, task: AgentTask) -> None:
"""Rollback task actions"""
try:
for action in task.actionList:
if action.status == ActionStatus.SUCCESS:
# Get method
method = self.chat_manager.service.methods.get(action.method)
if method:
# Rollback action
await method.rollback(
action.action,
action.parameters,
task.get_auth_data(action.auth_source)
)
except Exception as e:
logger.error(f"Error rolling back task: {str(e)}")
def _is_retryable_error(self, error: str) -> bool:
"""Check if error is retryable"""
retryable_errors = [
"timeout",
"rate limit",
"temporary",
"connection",
"server error"
]
return any(err in error.lower() for err in retryable_errors)
async def cleanup(self) -> None:
"""Clean up workflow resources"""
try:
# Clean up managers
await self.chat_manager.cleanup()
await self.document_manager.cleanup()
# Clear task queue
while not self.task_queue.empty():
self.task_queue.get_nowait()
self.task_queue.task_done()
# Clear active tasks
self.active_tasks.clear()
except Exception as e:
logger.error(f"Error during cleanup: {str(e)}")
# Execute each action
for action in task.actionList:
async def get_workflow_status(self, workflow_id: str) -> Dict[str, Any]:
"""Get current status of workflow"""
current_task = self.chat_manager.service.tasks.get('current')
previous_task = self.chat_manager.service.tasks.get('previous')
return {
'workflowId': workflow_id,
'currentTask': current_task.dict() if current_task else None,
'previousTask': previous_task.dict() if previous_task else None,
'status': self.chat_manager.workflow.status if self.chat_manager.workflow else None
}
async def stop_workflow(self, workflow_id: str) -> None:
"""Stop workflow execution"""
if self.chat_manager.workflow and self.chat_manager.workflow.id == workflow_id:
self.chat_manager.workflow.status = TaskStatus.STOPPED
self.chat_manager.workflow.updatedAt = datetime.now(UTC)
self._checkWorkflowStopped(workflow)
try:
# Execute action
actionResult = await action.execute()
# Update action status
action.status = TaskStatus.COMPLETED if actionResult.success else TaskStatus.FAILED
action.result = actionResult
# Check for failure
if not actionResult.success:
result.status = TaskStatus.FAILED
result.error = actionResult.error
break
except Exception as e:
logger.error(f"Action error: {str(e)}")
action.status = TaskStatus.FAILED
result.status = TaskStatus.FAILED
result.error = str(e)
break
# Update result status
if result.status != TaskStatus.FAILED:
result.status = TaskStatus.COMPLETED
result.updatedAt = datetime.now(UTC)
self._checkWorkflowStopped(workflow)
# Update workflow with result
await self.chatInterface.addTaskResult(workflow, result)
# Get next task
task = await self.chatInterface.getNextTask(workflow)
if not task:
break
# Check if should continue
if not await self.chatInterface.shouldContinue(workflow):
break
# Stop current task if any
current_task = self.chat_manager.service.tasks.get('current')
if current_task:
current_task.status = TaskStatus.STOPPED
current_task.updatedAt = datetime.now(UTC)
# Generate final feedback message using ChatManager
finalFeedback = await self.chatManager.generateWorkflowFeedback(workflow)
# Create final message with "last" status
self._checkWorkflowStopped(workflow)
finalMessage = ChatMessage(
id=str(uuid.uuid4()),
workflowId=workflow.id,
role="assistant",
message=finalFeedback,
status="last" # Last message in workflow
)
await self.chatInterface.createWorkflowMessage(finalMessage.dict())
# Complete workflow
if workflow.status != "failed":
workflow.status = "completed"
workflow.lastActivity = datetime.now(UTC).isoformat()
except Exception as e:
logger.error(f"Workflow error: {str(e)}")
if self.workflow:
self.workflow.status = "error"
self.workflow.error = str(e)

File diff suppressed because it is too large Load diff

View file

@ -1,17 +1,26 @@
import logging
from typing import Dict, Any, Optional
from typing import Dict, Any, List, Optional
from datetime import datetime, UTC
import json
import asyncio
from modules.shared.configuration import APP_CONFIG
from modules.methods import MethodBase, MethodResult
from modules.interfaces.serviceChatModel import AgentTask, AgentAction, AgentResult, Action, TaskStatus, ActionStatus
from modules.interfaces.serviceManagementClass import ServiceManagement
logger = logging.getLogger(__name__)
class ServiceContainer:
"""Service container with improved state management"""
"""Service container for dependency injection and service management."""
def __init__(self):
self.methods = {}
self.context = {}
self.workflow = None
self.model = {}
self.functions = {}
self.documentProcessor = None
self.state = {
'status': TaskStatus.PENDING,
'retryCount': 0,
@ -20,15 +29,236 @@ class ServiceContainer:
'lastError': None,
'lastErrorTime': None
}
self.methods: Dict[str, MethodBase] = {}
self.tasks: Dict[str, Any] = {} # Will be populated with AgentTask instances
def register_method(self, method: MethodBase) -> None:
"""Register a method in the container"""
self.methods[method.name] = method
logger.info(f"Registered method: {method.name}")
# Initialize service management
self.serviceManagement = ServiceManagement()
async def execute_task(self, task: Any) -> None: # task: AgentTask
# Initialize file-related functions
self.functions = {
'getFileData': self.serviceManagement.getFileData,
'saveFileData': self.serviceManagement.saveFileData,
'getFileMetadata': self.serviceManagement.getFileMetadata,
'saveFileMetadata': self.serviceManagement.saveFileMetadata,
'deleteFile': self.serviceManagement.deleteFile,
'getFile': self.serviceManagement.getFile,
'getMimeType': self.serviceManagement.getMimeType,
'calculateFileHash': self.serviceManagement.calculateFileHash,
'checkForDuplicateFile': self.serviceManagement.checkForDuplicateFile
}
def initialize(self) -> None:
"""Initialize service container"""
pass
def registerMethod(self, methodName: str, methodInstance: Any) -> None:
"""Register a new method"""
self.methods[methodName] = methodInstance
def getMethod(self, methodName: str) -> Optional[Any]:
"""Get a method by name"""
return self.methods.get(methodName)
def removeMethod(self, methodName: str) -> None:
"""Remove a method"""
self.methods.pop(methodName, None)
def hasMethod(self, methodName: str) -> bool:
"""Check if a method exists"""
return methodName in self.methods
def listMethods(self) -> List[str]:
"""List all registered methods"""
return list(self.methods.keys())
def getMethodInfo(self, methodName: str) -> Dict[str, Any]:
"""Get method information"""
method = self.getMethod(methodName)
if not method:
return {}
return {
"name": methodName,
"description": self.getMethodDescription(methodName),
"version": self.getMethodVersion(methodName),
"author": self.getMethodAuthor(methodName),
"license": self.getMethodLicense(methodName),
"dependencies": self.getMethodDependencies(methodName),
"tags": self.getMethodTags(methodName),
"examples": self.getMethodExamples(methodName),
"documentation": self.getMethodDocumentation(methodName),
"source": self.getMethodSource(methodName),
"tests": self.getMethodTests(methodName),
"benchmarks": self.getMethodBenchmarks(methodName),
"metrics": self.getMethodMetrics(methodName),
"logs": self.getMethodLogs(methodName),
"history": self.getMethodHistory(methodName),
"usage": self.getMethodUsage(methodName),
"errors": self.getMethodErrors(methodName),
"warnings": self.getMethodWarnings(methodName)
}
def getMethodSchema(self, methodName: str) -> Optional[Dict[str, Any]]:
"""Get method schema"""
method = self.getMethod(methodName)
return method.schema if method else None
def getMethodParameters(self, methodName: str) -> Optional[Dict[str, Any]]:
"""Get method parameters"""
method = self.getMethod(methodName)
return method.parameters if method else None
def getMethodReturnType(self, methodName: str) -> Optional[str]:
"""Get method return type"""
method = self.getMethod(methodName)
return method.returnType if method else None
def getMethodDescription(self, methodName: str) -> Optional[str]:
"""Get method description"""
method = self.getMethod(methodName)
return method.description if method else None
def getMethodVersion(self, methodName: str) -> Optional[str]:
"""Get method version"""
method = self.getMethod(methodName)
return method.version if method else None
def getMethodAuthor(self, methodName: str) -> Optional[str]:
"""Get method author"""
method = self.getMethod(methodName)
return method.author if method else None
def getMethodLicense(self, methodName: str) -> Optional[str]:
"""Get method license"""
method = self.getMethod(methodName)
return method.license if method else None
def getMethodDependencies(self, methodName: str) -> Optional[List[str]]:
"""Get method dependencies"""
method = self.getMethod(methodName)
return method.dependencies if method else None
def getMethodTags(self, methodName: str) -> Optional[List[str]]:
"""Get method tags"""
method = self.getMethod(methodName)
return method.tags if method else None
def getMethodExamples(self, methodName: str) -> Optional[List[Dict[str, Any]]]:
"""Get method examples"""
method = self.getMethod(methodName)
return method.examples if method else None
def getMethodDocumentation(self, methodName: str) -> Optional[str]:
"""Get method documentation"""
method = self.getMethod(methodName)
return method.documentation if method else None
def getMethodSource(self, methodName: str) -> Optional[str]:
"""Get method source"""
method = self.getMethod(methodName)
return method.source if method else None
def getMethodTests(self, methodName: str) -> Optional[List[Dict[str, Any]]]:
"""Get method tests"""
method = self.getMethod(methodName)
return method.tests if method else None
def getMethodBenchmarks(self, methodName: str) -> Optional[List[Dict[str, Any]]]:
"""Get method benchmarks"""
method = self.getMethod(methodName)
return method.benchmarks if method else None
def getMethodMetrics(self, methodName: str) -> Optional[Dict[str, Any]]:
"""Get method metrics"""
method = self.getMethod(methodName)
return method.metrics if method else None
def getMethodLogs(self, methodName: str) -> Optional[List[Dict[str, Any]]]:
"""Get method logs"""
method = self.getMethod(methodName)
return method.logs if method else None
def getMethodHistory(self, methodName: str) -> Optional[List[Dict[str, Any]]]:
"""Get method history"""
method = self.getMethod(methodName)
return method.history if method else None
def getMethodUsage(self, methodName: str) -> Optional[Dict[str, Any]]:
"""Get method usage"""
method = self.getMethod(methodName)
return method.usage if method else None
def getMethodErrors(self, methodName: str) -> Optional[List[Dict[str, Any]]]:
"""Get method errors"""
method = self.getMethod(methodName)
return method.errors if method else None
def getMethodWarnings(self, methodName: str) -> Optional[List[Dict[str, Any]]]:
"""Get method warnings"""
method = self.getMethod(methodName)
return method.warnings if method else None
def executeTask(self, task: Any) -> None:
"""Execute a task"""
try:
# Execute each action
for action in task.actionList:
method = self.getMethod(action.method)
if method:
method.executeAction(action.action, action.parameters)
except Exception as e:
logger.error(f"Error executing task: {str(e)}")
raise
def getFileData(self, fileId: str) -> bytes:
"""Get file data by ID"""
try:
# Get file data from storage
if hasattr(self.functions, 'getFileData'):
return self.functions.getFileData(fileId)
return b""
except Exception as e:
logger.error(f"Error getting file data: {str(e)}")
return b""
def saveFileData(self, fileId: str, data: bytes) -> bool:
"""Save file data by ID"""
try:
# Save file data to storage
if hasattr(self.functions, 'saveFileData'):
return self.functions.saveFileData(fileId, data)
return False
except Exception as e:
logger.error(f"Error saving file data: {str(e)}")
return False
def getFileMetadata(self, fileId: str) -> Dict[str, Any]:
"""Get file metadata by ID"""
try:
# Get file metadata from storage
if hasattr(self.functions, 'getFileMetadata'):
return self.functions.getFileMetadata(fileId)
return {}
except Exception as e:
logger.error(f"Error getting file metadata: {str(e)}")
return {}
def saveFileMetadata(self, fileId: str, metadata: Dict[str, Any]) -> bool:
"""Save file metadata by ID"""
try:
# Save file metadata to storage
if hasattr(self.functions, 'saveFileMetadata'):
return self.functions.saveFileMetadata(fileId, metadata)
return False
except Exception as e:
logger.error(f"Error saving file metadata: {str(e)}")
return False
async def executeTaskImproved(self, task: Any) -> None: # task: AgentTask
"""Execute task with improved error handling and timeout"""
try:
# Check for timeout
@ -38,64 +268,64 @@ class ServiceContainer:
# Execute actions
for action in task.actionList:
if not task.can_execute_action(action):
if not task.get_auth_data(action.auth_source):
if not task.canExecuteAction(action):
if not task.getAuthData(action.authSource):
action.status = ActionStatus.FAILED
task.error = f"Missing authentication for {action.auth_source}"
task.error = f"Missing authentication for {action.authSource}"
else:
action.status = ActionStatus.DEPENDENCY_FAILED
continue
try:
# Get method
method = self.methods.get(action.method)
method = self.getMethod(action.method)
if not method:
raise ValueError(f"Unknown method: {action.method}")
# Validate parameters
if not await method.validate_parameters(action.action, action.parameters):
if not await method.validateParameters(action.action, action.parameters):
raise ValueError(f"Invalid parameters for {action.method}:{action.action}")
# Get auth data if needed
auth_data = None
if action.auth_source and action.auth_source != "local":
auth_data = task.get_auth_data(action.auth_source)
if not auth_data:
raise ValueError(f"Missing authentication data for {action.auth_source}")
authData = None
if action.authSource and action.authSource != "local":
authData = task.getAuthData(action.authSource)
if not authData:
raise ValueError(f"Missing authentication data for {action.authSource}")
# Execute with timeout
result = await asyncio.wait_for(
method.execute(action.action, action.parameters, auth_data),
method.execute(action.action, action.parameters, authData),
timeout=action.timeout or 60
)
if result.success:
action.status = ActionStatus.SUCCESS
else:
if self._should_retry(result.data.get('error')):
if self._shouldRetry(result.data.get('error')):
action.retryCount += 1
if action.retryCount > action.retryMax:
action.status = ActionStatus.FAILED
if action.rollback_on_failure:
await method.rollback(action.action, action.parameters, auth_data)
if action.rollbackOnFailure:
await method.rollback(action.action, action.parameters, authData)
else:
action.status = ActionStatus.RETRY
else:
action.status = ActionStatus.FAILED
if action.rollback_on_failure:
await method.rollback(action.action, action.parameters, auth_data)
if action.rollbackOnFailure:
await method.rollback(action.action, action.parameters, authData)
except asyncio.TimeoutError:
action.status = ActionStatus.TIMEOUT
except Exception as e:
action.status = ActionStatus.FAILED
if action.rollback_on_failure:
await method.rollback(action.action, action.parameters, auth_data)
if action.rollbackOnFailure:
await method.rollback(action.action, action.parameters, authData)
# Update task status
if task.has_failed():
if task.hasFailed():
task.status = TaskStatus.FAILED
elif task.is_complete():
elif task.isComplete():
task.status = TaskStatus.SUCCESS
task.finishedAt = datetime.now(UTC).isoformat()
@ -103,22 +333,18 @@ class ServiceContainer:
task.status = TaskStatus.FAILED
task.error = str(e)
def _should_retry(self, error: str) -> bool:
def _shouldRetry(self, error: str) -> bool:
"""Determine if error is retryable"""
retryable_errors = [
retryableErrors = [
"AI down",
"Document not found",
"Content extraction failed",
"Network error",
"Temporary failure"
]
return any(err in error for err in retryable_errors)
return any(err in error for err in retryableErrors)
def get_method(self, name: str) -> Optional[MethodBase]:
"""Get a method by name"""
return self.methods.get(name)
def get_available_methods(self) -> Dict[str, Dict[str, Any]]:
def getAvailableMethodsCatalog(self) -> Dict[str, Dict[str, Any]]:
"""Get catalog of available methods and their actions"""
return {
name: {

View file

@ -1,122 +1,14 @@
....................... TASKS
Clean
We need to adapt the agent's orchestration. in the center is the handover mechanism. there to decide upon result from previous result and the history of results, what next step to do to complete user prompt. the mechanism shall ensure stepwise procedure using needed tools from self.service and to use existing model classes in @serviceChatModel.py
1. to remove object AgentHandover and to use AgentTask instead (to adapt in workflowManager and chatManager).
2. ChatMessage model to change:
- "success" attribute to be boolean
- adapt references to the object in the codebase
3. Orchestration logic:
- workflowManager.workflowProcess to keep, but to be adapted using AgentTask parameter "agentTask" instead of "handover"
- createInitialHandover --> rename to createInitialTask
- defineNextHandover --> rename to createNextTask
4. chatManager to adapt:
- to work with self.service object only
- functions "createInitialTask" and "createNextTask" only to be different in handling the result of the last task, but the preparation of the next task to be the same routine "defineNextTask" to deliver AgentTask object.
- All agentTask objects to store in self.service as self.service.tasks.history[]
- self.service.tasks.next: to be the next task reference --> initially None
- self.service.tasks.previous: to be the previous task reference --> createInitialTask() to set it to None; createNextTask to set self.service.tasks.previous = self.service.tasks.next
- function "defineNextTask" to:
- update self.service.state, error handling, update counters and stats, check if limits are reached
- analyse result with AI call and produce:
- message object to give feedback to the user in his language
- decision whether user input completed, or to retry with different approach, or to do next task step towards user input to complete --> to put into feedback from previous task
- create a new instance of AgentTask, to add it to self.service.tasks.history, to set reference in self.service.tasks.next
- HELP: HERE TOO COMPLEX: HOW TO DO IT using AI to have a generic approach to read document contents with dedicated prompts, then to handle any user request. E.g. "Search all sharepoint documents from valueon account and extract parts containing customer data into a summary excel file" or find websites for product "shampoo" and generate a marketing flyer for our product to show usp"
- to process actionMessages of tasks.next --> results to integrate in tasks.next object
- error handling and return agentTask object
6. Adapt self.service object:
HELP: How to structure and organize this object to be used in the context?
- self.service.state:
- roundsMax
- roundsCount
- self.service.tasks part to add and to use in the code
- self.service.context to add:
- userInput: UserInputMessage
- dataConnections: list of UserConnection
- methodList: list of MethodObject
7. Adapt AgentTask object:
- userInput: summary for ai prompt what finally to deliver to the user based on UserInputMessage
- dataList: list of user connections for AI prompt (in the format "authority":"externalUsername" from user's connections object UserConnection)
- methodList: list of methods for AI prompt
- chatHistory: summary from the chat messages in the workflow before the user input (message summary with file list per message, as existing workflow could be continued)
- taskHistory: summary of the messages with file list per message after user input message id (there could also be user inputs before this message in former chat rounds)
- previousTaskFeedback
- thisTaskFeedback
- status: One of pending, success, failed, retry
5. what to ensure for "promptTaskGeneration"
- to have clear ai prompt for the task to do and for the result format to deliver. the result shall
- feedback from previous task to include
- instruction on how to use methodList catalog
- HELP: How to do the prompt?
***************************
TO include...
- Dict: agents objects accessible by "name"
Core:
logAdd: Logging functionality
workflow: Direct reference to workflow object
user: User information containing:
id: User ID
name: User name
language: User's preferred language (defaults to 'en')
Function Components:
functions: Dictionary containing utility functions:
forEach: Lambda function for iterating over items
while: Lambda function for while loop operations
getFile: Function to get file information
Model Components:
model: Dictionary containing AI model operations:
callAiBasic: Basic AI call function
callAiComplex: Complex AI call function
callAiImage: Image AI call function
Document Operations:
document: Dictionary containing document-related functions:
extract: Extract content from documents
convertFileRefToFileId: Convert file references to file IDs
convertFileIdToFileRef: Convert file IDs to file references
convertDataFormat: Convert data formats
agentInputFilesCreate: Create agent input file lists
agentOutputFilesSave: Save agent output files
Data Operations:
connections: Connection data storage
msft: Microsoft service functions and metadata
google: Google service functions and metadata
Document Operations:
document: Dictionary for document operations (populated by agentManager)
- ServiceContainer to clean for used functions, no spare!
- all the definitions used in serviceChatModel?
- all AI calls to route over AI-Module (AI basic, ai special, ai...)
********************

187
notes/data_specification.md Normal file
View file

@ -0,0 +1,187 @@
# Document Management Refactoring Specification
## Overview
This specification outlines the refactoring of document management in the system, focusing on proper model separation, centralized content extraction, and future-proof neutralization integration.
## Model Structure
### Base Document Models
```python
class ContentMetadata(BaseModel, ModelMixin):
"""Metadata for content items"""
size: int = Field(description="Content size in bytes")
pages: Optional[int] = Field(None, description="Number of pages for multi-page content")
error: Optional[str] = Field(None, description="Processing error if any")
# Media-specific attributes
width: Optional[int] = Field(None, description="Width in pixels for images/videos")
height: Optional[int] = Field(None, description="Height in pixels for images/videos")
colorMode: Optional[str] = Field(None, description="Color mode (e.g., RGB, CMYK, grayscale)")
fps: Optional[float] = Field(None, description="Frames per second for videos")
durationSec: Optional[float] = Field(None, description="Duration in seconds for videos/audio")
class ContentItem(BaseModel, ModelMixin):
"""Individual content item from a document"""
label: str = Field(description="Content label (e.g., tab name, tag name)")
data: str = Field(description="Text content")
metadata: ContentMetadata = Field(description="Content metadata")
class ChatDocument(BaseModel, ModelMixin):
id: str = Field(default_factory=lambda: str(uuid.uuid4()))
fileId: str
filename: str
fileSize: int
mimeType: str
class TaskDocument(BaseModel, ModelMixin):
id: str = Field(default_factory=lambda: str(uuid.uuid4()))
filename: str
fileSize: int
mimeType: str
data: str # Base64 encoded file data
class ExtractedContent(BaseModel, ModelMixin):
objectId: str # Reference to source document
objectType: str = Field(description="Type of source object ('ChatDocument' or 'TaskDocument')")
contents: List[ContentItem]
```
## Service Layer Structure
### Document Service
```python
class DocumentService:
def __init__(self, service_container):
self.service = service_container
self.neutralizer_enabled = False # Flag for neutralization feature
async def extractFromChatDocument(self, prompt: str, document: ChatDocument) -> ExtractedContent:
"""
Extract content from a ChatDocument by converting it to TaskDocument first.
"""
# Convert ChatDocument to TaskDocument
task_doc = await self._convertToTaskDocument(document)
return await self.getDocumentContent(task_doc, prompt)
async def extractFromTaskDocument(self, prompt: str, document: TaskDocument) -> ExtractedContent:
"""
Extract content directly from a TaskDocument.
"""
return await self.getDocumentContent(document, prompt)
async def getDocumentContent(self, document: TaskDocument, prompt: str) -> ExtractedContent:
"""
Helper function for centralized content extraction.
Handles the actual content extraction and optional neutralization.
"""
# Extract content based on mimeType
content = await self._extractRawContent(document)
# Apply neutralization if enabled
if self.neutralizer_enabled:
from modules.neutralizer import neutralizer
content = await neutralizer.process_content(content)
# Process content with AI using prompt
processed_content = await self._processWithAI(content, prompt)
return ExtractedContent(
objectId=document.id,
objectType="TaskDocument",
contents=processed_content
)
```
## Implementation Steps
1. **Model Cleanup**
- Create new model classes in `serviceChatModel.py`
- Remove deprecated models:
- DocumentExtraction
- DocumentContext
- ProcessedDocument
- ChatContent (replaced by ContentItem)
- Update ChatDocument to remove contents attribute
- Convert all snake_case to camelCase in manager*.py and method*.py
2. **Service Implementation**
- Create new `DocumentService` class in `serviceDocument.py`
- Implement the three main methods:
- extractFromChatDocument
- extractFromTaskDocument
- getDocumentContent (helper function)
- Add neutralization integration with feature flag
3. **UserInput Processing**
- Update `UserInputRequest` processing to use `ChatMessage`
- Implement `processFileIds` in `serviceChatClass`
- Update all references to use new model structure
4. **Method Module Updates**
- Update all method*.py modules to use new service layer
- Remove direct file access
- Implement proper error handling and logging
5. **Testing and Validation**
- Create unit tests for new models and services
- Test document processing with various file types
- Validate content extraction and neutralization
- Test error handling and edge cases
## Files to be Removed/Modified
### To be Removed
1. `DocumentExtraction` class from serviceChatModel.py
2. `DocumentContext` class from serviceChatModel.py
3. `ProcessedDocument` class from serviceChatModel.py
4. `ChatContent` class from serviceChatModel.py
5. Direct file access methods from method*.py modules
### To be Modified
1. `serviceChatModel.py`
- Add new model classes
- Remove deprecated classes
- Update existing classes
2. `managerDocument.py`
- Move core functionality to DocumentService
- Update to use new model structure
- Remove redundant methods
3. `method*.py` modules
- Update to use DocumentService
- Remove direct file access
- Update error handling
4. `serviceChatClass.py`
- Implement processFileIds
- Update document handling
## Neutralization Integration
The neutralization feature is integrated into the `getDocumentContent` method with a feature flag. When enabled, it will process content through the neutralizer before sending it to AI processing.
```python
# In getDocumentContent method
if self.neutralizer_enabled:
from modules.neutralizer import neutralizer
content = await neutralizer.process_content(content)
```
This allows for easy enabling/disabling of the feature and future expansion of neutralization capabilities.
## Migration Strategy
1. Create new models and services
2. Implement new functionality alongside existing code
3. Gradually migrate method modules to use new services
4. Remove deprecated code once migration is complete
5. Enable neutralization feature when ready
## Testing Requirements
1. Unit tests for all new model classes
2. Integration tests for DocumentService
3. Tests for content extraction with various file types
4. Tests for neutralization integration
5. Performance tests for large file handling
6. Error handling and edge case tests

View file

@ -1,643 +0,0 @@
# Agent Chat System Handbook
# Einführung in das Agent Chat System Handbuch
## Zweck und Umfang des Dokuments
Willkommen zum "Agent Chat System Handbook". Dieses Handbuch dient als umfassende Anleitung für die Implementierung und Verwaltung eines Agent Chat Systems unter Verwendung von FastAPI. Es richtet sich an technische Fachkräfte, die für die Einrichtung, Verwaltung und Optimierung von Chat-Systemen verantwortlich sind. Ziel ist es, Ihnen die notwendigen Kenntnisse und Werkzeuge an die Hand zu geben, um ein effizientes und sicheres Chat-System zu entwickeln und zu betreiben.
## Kontext und Hintergrundinformationen
In der heutigen digitalen Welt sind Chat-Systeme ein wesentlicher Bestandteil der Kundenkommunikation und des Supports. Mit der zunehmenden Integration von Künstlicher Intelligenz (KI) in diese Systeme wird es immer wichtiger, robuste und skalierbare Lösungen zu entwickeln. FastAPI bietet eine moderne und leistungsstarke Plattform zur Erstellung von Web-APIs, die sich ideal für die Entwicklung eines solchen Systems eignet. Dieses Handbuch basiert auf den bereitgestellten FastAPI-Anwendungsdateien und bietet eine detaillierte Anleitung zur Implementierung eines Agent Chat Systems.
## Inhalt des Dokuments
Im "Agent Chat System Handbook" finden Sie detaillierte Informationen zu folgenden Themen:
- **FastAPI Setup**: Schritt-für-Schritt-Anleitung zur Einrichtung der FastAPI-Umgebung.
- **Benutzerverwaltung**: Methoden zur Verwaltung von Benutzerkonten und -rollen.
- **KI-Integration**: Implementierung von KI-Funktionen zur Verbesserung der Chat-Interaktionen.
- **Authentifizierung**: Sicherstellung der Sicherheit und Integrität des Systems durch robuste Authentifizierungsmechanismen.
- **Mandatsverwaltung**: Verwaltung von Benutzerrechten und -mandaten innerhalb des Systems.
- **Attributverwaltung**: Umgang mit benutzerdefinierten Attributen und deren Verwaltung.
- **Prompt-Management**: Erstellung und Verwaltung von Eingabeaufforderungen für die KI-Interaktion.
- **Dateioperationen**: Verwaltung und Verarbeitung von Dateien innerhalb des Systems.
- **Workflow-Management**: Optimierung und Automatisierung von Arbeitsabläufen im Chat-System.
## Ton und Zielgruppe
Dieses Handbuch ist in einem formellen und technischen Ton verfasst, um den Anforderungen einer professionellen Leserschaft gerecht zu werden. Es richtet sich an Entwickler, Systemadministratoren und technische Projektleiter, die mit der Implementierung und Verwaltung von Chat-Systemen betraut sind. Wir empfehlen, dass die Leser über grundlegende Kenntnisse in FastAPI und Web-API-Entwicklung verfügen, um den maximalen Nutzen aus diesem Handbuch zu ziehen.
Wir hoffen, dass dieses Handbuch Ihnen als wertvolle Ressource dient und Sie bei der erfolgreichen Implementierung Ihres Agent Chat Systems unterstützt.
# Einführung
## Zweck des Handbuchs
Das "Agent Chat System Handbook" dient als umfassende Anleitung zur Implementierung und Nutzung des Agenten-Chat-Systems, das auf der FastAPI-Plattform basiert. Dieses Handbuch richtet sich an technische Anwender, die eine detaillierte Anleitung zur Einrichtung, Verwaltung und Optimierung des Systems benötigen. Es bietet eine strukturierte Übersicht über die verschiedenen Komponenten und Funktionen des Systems, um eine effiziente Nutzung und Anpassung zu gewährleisten. Ziel ist es, den Anwendern ein tiefes Verständnis der Systemarchitektur und der zugrunde liegenden Prozesse zu vermitteln, um eine reibungslose Integration und Verwaltung zu ermöglichen.
## Systemübersicht
Das Agenten-Chat-System ist eine leistungsstarke Plattform, die entwickelt wurde, um die Kommunikation zwischen Agenten und Nutzern zu optimieren. Es nutzt die FastAPI-Technologie, um eine schnelle und skalierbare Lösung zu bieten. Die Hauptkomponenten des Systems umfassen:
- **Anwendungssetup**: Die FastAPI-Anwendung wird mit spezifischen Konfigurationen für Logging, CORS (Cross-Origin Resource Sharing) und Authentifizierung eingerichtet. Diese Konfigurationen sind entscheidend für die Sicherheit und Leistung der Anwendung.
- **Benutzerverwaltung**: Ein robustes Modul zur Verwaltung von Benutzerkonten, das die Erstellung, Aktualisierung und Löschung von Benutzerprofilen ermöglicht. Es stellt sicher, dass nur autorisierte Benutzer Zugriff auf das System haben.
- **Mandatsverwaltung**: Diese Komponente ermöglicht die Verwaltung von Mandaten, die den Zugriff und die Berechtigungen innerhalb des Systems regeln. Sie ist essenziell für die Einhaltung von Sicherheitsrichtlinien.
- **Attributverwaltung**: Ein flexibles System zur Verwaltung von Attributen, die zur Personalisierung und Anpassung der Benutzererfahrung verwendet werden können.
- **Prompt-Management**: Diese Funktion ermöglicht die Verwaltung von Eingabeaufforderungen, die zur Interaktion mit den Nutzern verwendet werden. Sie ist entscheidend für die Anpassung der Kommunikation an spezifische Anforderungen.
- **Dateioperationen**: Ein Modul zur effizienten Handhabung von Dateivorgängen, das das Hochladen, Herunterladen und Verwalten von Dateien innerhalb des Systems unterstützt.
- **Workflow-Management**: Diese Komponente ermöglicht die Definition und Verwaltung von Arbeitsabläufen, um die Effizienz und Konsistenz der Prozesse zu gewährleisten.
- **KI-Integration**: Das System bietet eine nahtlose Integration von KI-Technologien, um die Interaktion und Entscheidungsfindung zu verbessern.
- **Authentifizierung**: Ein sicheres Authentifizierungssystem, das sicherstellt, dass nur berechtigte Benutzer Zugriff auf die Anwendung haben.
Dieses Handbuch wird detaillierte Anleitungen und Beispiele für jede dieser Komponenten bieten, um eine umfassende Unterstützung bei der Implementierung und Verwaltung des Agenten-Chat-Systems zu gewährleisten.
# Application Setup
In diesem Abschnitt des "Agent Chat System Handbook" wird die Einrichtung der Anwendung detailliert beschrieben. Diese Anleitung richtet sich an technische Benutzer und bietet eine umfassende Übersicht über die Initialisierung der FastAPI-Anwendung, die Konfiguration von statischen Dateien und die allgemeinen Endpunkte.
## FastAPI Initialization
Die Initialisierung der FastAPI-Anwendung ist der erste Schritt zur Einrichtung des Agent Chat Systems. Hierbei werden grundlegende Parameter und Konfigurationen festgelegt, die für den Betrieb der Anwendung erforderlich sind.
### Schritte zur Initialisierung:
1. **Anwendungserstellung**:
- Die FastAPI-Anwendung wird mit einem Titel und einer Beschreibung initialisiert. Diese Informationen sind nützlich für die Dokumentation und API-Dokumentationsseiten.
- Beispiel:
```python
from fastapi import FastAPI
app = FastAPI(
title="Agent Chat System",
description="Ein System zur Verwaltung von Agenten-Chats"
)
```
2. **Lebenszyklus-Management**:
- Die Anwendung verwendet einen Lebenszyklus-Manager, um Ereignisse beim Start und Herunterfahren der Anwendung zu verwalten. Dies ist entscheidend für die ordnungsgemäße Ressourcenverwaltung.
- Beispiel:
```python
@app.on_event("startup")
async def startup_event():
# Initialisierungslogik hier
@app.on_event("shutdown")
async def shutdown_event():
# Bereinigungslogik hier
```
3. **CORS-Konfiguration**:
- Die Cross-Origin Resource Sharing (CORS) Einstellungen werden konfiguriert, um den Zugriff von verschiedenen Ursprüngen zu ermöglichen, was besonders wichtig für Webanwendungen ist, die auf verschiedenen Domains gehostet werden.
## Static Files Setup
Die Konfiguration von statischen Dateien ermöglicht es der Anwendung, Ressourcen wie Bilder, CSS-Dateien und JavaScript-Dateien bereitzustellen, die für die Benutzeroberfläche benötigt werden.
### Schritte zur Konfiguration:
1. **Verzeichnis für statische Dateien**:
- Ein Verzeichnis wird definiert, in dem alle statischen Dateien gespeichert werden. Dieses Verzeichnis wird in der Regel relativ zum Projektverzeichnis angegeben.
- Beispiel:
```python
from fastapi.staticfiles import StaticFiles
app.mount("/static", StaticFiles(directory="static"), name="static")
```
2. **Zugriff auf statische Dateien**:
- Die Anwendung stellt sicher, dass die statischen Dateien über einen bestimmten URL-Pfad zugänglich sind, was die Bereitstellung und den Zugriff auf diese Ressourcen erleichtert.
## Endpoints Overview
Die Endpunkte der Anwendung sind die Schnittstellen, über die externe Systeme und Benutzer mit der Anwendung interagieren können. Eine klare Übersicht über die verfügbaren Endpunkte ist entscheidend für die Integration und Nutzung der Anwendung.
### Allgemeine Endpunkte:
1. **Benutzerverwaltung**:
- Endpunkte zur Erstellung, Aktualisierung und Löschung von Benutzern.
- Beispiel:
```python
@app.post("/users/")
async def create_user(user: User):
# Logik zur Benutzererstellung
```
2. **Mandatsverwaltung**:
- Endpunkte zur Verwaltung von Mandaten, einschließlich der Zuweisung und Verwaltung von Berechtigungen.
3. **Attributverwaltung**:
- Endpunkte zur Verwaltung von Attributen, die für die Anpassung und Personalisierung der Agenten-Chats verwendet werden.
4. **Prompt-Management**:
- Endpunkte zur Verwaltung von Eingabeaufforderungen, die für die Interaktion mit Benutzern verwendet werden.
Diese detaillierte Anleitung zur Einrichtung der Anwendung stellt sicher, dass technische Benutzer die FastAPI-Anwendung korrekt initialisieren und konfigurieren können, um eine reibungslose Funktionalität des Agent Chat Systems zu gewährleisten.
# Logging
In diesem Abschnitt des "Agent Chat System Handbook" wird die Konfiguration und Einrichtung des Loggings im Rahmen der FastAPI-Anwendung beschrieben. Eine ordnungsgemäße Protokollierung ist entscheidend für die Überwachung und Fehlerbehebung der Anwendung. Dieser Abschnitt ist in zwei Hauptunterabschnitte unterteilt: Initialisierung und Handler.
## Initialisierung
Die Initialisierung des Loggings ist ein wesentlicher Schritt, um sicherzustellen, dass alle Ereignisse innerhalb der Anwendung korrekt erfasst werden. Die Konfiguration des Loggings erfolgt in der Regel zu Beginn der Anwendung, um sicherzustellen, dass alle nachfolgenden Prozesse und Ereignisse protokolliert werden.
### Beispiel für die Logging-Initialisierung
```python
import logging
def initialize_logging():
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler("app.log"),
logging.StreamHandler()
]
)
```
In diesem Beispiel wird das Logging mit einem Basislevel von `INFO` konfiguriert. Die Formatierung der Log-Nachrichten umfasst das Datum und die Uhrzeit, den Namen des Loggers, die Log-Stufe und die eigentliche Nachricht. Zwei Handler werden eingerichtet: ein `FileHandler`, der die Logs in eine Datei schreibt, und ein `StreamHandler`, der die Logs auf der Konsole ausgibt.
## Handler
Handler sind ein wesentlicher Bestandteil des Loggings, da sie bestimmen, wohin die Log-Nachrichten gesendet werden. In der FastAPI-Anwendung können verschiedene Arten von Handlern eingerichtet werden, um die Protokollierung flexibel und anpassbar zu gestalten.
### Einrichtung von Handlers
1. **FileHandler**: Dieser Handler schreibt Log-Nachrichten in eine Datei. Er ist nützlich für die langfristige Speicherung von Logs und die spätere Analyse.
```python
file_handler = logging.FileHandler('app.log')
file_handler.setLevel(logging.INFO)
```
2. **StreamHandler**: Dieser Handler gibt Log-Nachrichten auf der Konsole aus. Er ist besonders nützlich für die Echtzeitüberwachung während der Entwicklung und des Debuggings.
```python
stream_handler = logging.StreamHandler()
stream_handler.setLevel(logging.DEBUG)
```
3. **Custom Handler**: Bei Bedarf können benutzerdefinierte Handler erstellt werden, um spezielle Anforderungen zu erfüllen, wie z.B. das Senden von Logs an externe Systeme oder Dienste.
### Beispiel für die Handler-Konfiguration
```python
logger = logging.getLogger('agent_chat_system')
logger.setLevel(logging.DEBUG)
# Hinzufügen der Handler zum Logger
logger.addHandler(file_handler)
logger.addHandler(stream_handler)
```
In diesem Beispiel wird ein Logger mit dem Namen `agent_chat_system` erstellt und auf das Level `DEBUG` gesetzt. Die zuvor definierten `FileHandler` und `StreamHandler` werden dem Logger hinzugefügt, um die Log-Nachrichten sowohl in eine Datei als auch auf der Konsole auszugeben.
Durch die sorgfältige Konfiguration von Logging und Handlers kann die FastAPI-Anwendung effektiv überwacht und gewartet werden, was zu einer verbesserten Stabilität und Fehlerbehebung führt.
# Benutzerverwaltung
In diesem Abschnitt des "Agent Chat System Handbook" wird die Benutzerverwaltung detailliert beschrieben. Die Benutzerverwaltung ist ein zentraler Bestandteil des Systems, da sie die Zuweisung von Rollen und Berechtigungen sowie die Authentifizierungsmechanismen umfasst. Diese Aspekte sind entscheidend für die Sicherheit und Effizienz des Systems.
## Rollen und Berechtigungen
### Benutzerrollen
Benutzerrollen definieren die verschiedenen Zugriffsebenen und Verantwortlichkeiten innerhalb des Agent Chat Systems. Jede Rolle hat spezifische Berechtigungen, die den Zugriff auf bestimmte Funktionen und Daten steuern. Die Hauptrollen sind:
- **Administrator**: Hat umfassende Berechtigungen, einschließlich der Verwaltung von Benutzern, Rollen und Systemeinstellungen.
- **Agent**: Kann auf die Chat-Funktionalitäten zugreifen und mit Kunden interagieren.
- **Supervisor**: Überwacht die Aktivitäten der Agenten und hat Zugriff auf Berichte und Analysen.
### Berechtigungen
Berechtigungen sind spezifische Rechte, die einer Rolle zugewiesen werden. Sie bestimmen, welche Aktionen ein Benutzer innerhalb des Systems ausführen kann. Beispiele für Berechtigungen sind:
- Zugriff auf das Dashboard
- Verwaltung von Benutzerkonten
- Einsicht in Berichte und Statistiken
- Konfiguration von Systemeinstellungen
Die Zuweisung von Rollen und Berechtigungen erfolgt über die Administrationsoberfläche des Systems, wo Administratoren die Möglichkeit haben, Benutzerkonten zu erstellen und zu verwalten.
## Authentifizierung
### Authentifizierungsmechanismen
Die Authentifizierung ist ein kritischer Sicherheitsaspekt des Agent Chat Systems. Sie stellt sicher, dass nur autorisierte Benutzer Zugriff auf das System erhalten. Die gängigen Authentifizierungsmechanismen umfassen:
- **Passwortbasierte Authentifizierung**: Benutzer melden sich mit einem Benutzernamen und einem Passwort an. Es wird empfohlen, starke Passwörter zu verwenden und regelmäßige Passwortänderungen durchzuführen.
- **Zwei-Faktor-Authentifizierung (2FA)**: Erhöht die Sicherheit, indem ein zusätzlicher Verifizierungsschritt hinzugefügt wird, z.B. ein einmaliger Code, der an das Mobiltelefon des Benutzers gesendet wird.
- **OAuth 2.0**: Ermöglicht die Authentifizierung über Drittanbieter, wie Google oder Facebook, was den Anmeldeprozess für Benutzer vereinfacht und die Sicherheit erhöht.
### Implementierung in FastAPI
Die FastAPI-Anwendung implementiert diese Authentifizierungsmechanismen durch die Integration von Sicherheitsprotokollen und Middleware. Die Konfiguration erfolgt in der `app.py` Datei, wo die Authentifizierungslogik definiert ist. Hier ein Beispiel für die Implementierung der passwortbasierten Authentifizierung:
```python
from fastapi import FastAPI, Depends
from fastapi.security import OAuth2PasswordBearer
app = FastAPI()
oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token")
@app.post("/token")
async def login(form_data: OAuth2PasswordRequestForm = Depends()):
# Authentifizierungslogik hier
return {"access_token": "token", "token_type": "bearer"}
```
Diese Struktur ermöglicht eine flexible und sichere Verwaltung der Benutzerzugriffe und gewährleistet, dass das System den aktuellen Sicherheitsstandards entspricht.
Durch die sorgfältige Verwaltung von Rollen, Berechtigungen und Authentifizierungsmechanismen wird sichergestellt, dass das Agent Chat System sowohl sicher als auch effizient betrieben werden kann.
# Mandate Management
In diesem Abschnitt des "Agent Chat System Handbook" wird das Mandatsmanagement detailliert beschrieben. Das Mandatsmanagement ist ein wesentlicher Bestandteil des Systems, der die Erstellung und Verwaltung von Mandaten umfasst. Diese Funktionen sind entscheidend für die Organisation und den Betrieb des Agenten-Chat-Systems.
## Mandate Creation
### Erstellung von Mandaten
Die Erstellung von Mandaten ist der erste Schritt im Mandatsmanagement. Ein Mandat definiert die spezifischen Aufgaben und Verantwortlichkeiten, die einem Agenten oder einer Gruppe von Agenten zugewiesen werden. Die Erstellung eines Mandats erfolgt in mehreren Schritten:
1. **Initialisierung**: Beginnen Sie mit der Definition der grundlegenden Parameter des Mandats, einschließlich des Titels, der Beschreibung und der beteiligten Agenten.
2. **Zuweisung von Aufgaben**: Weisen Sie spezifische Aufgaben oder Ziele zu, die im Rahmen des Mandats erreicht werden sollen. Diese Aufgaben sollten klar definiert und messbar sein.
3. **Festlegung von Fristen**: Bestimmen Sie die zeitlichen Rahmenbedingungen für das Mandat, einschließlich Start- und Enddatum sowie Meilensteine.
4. **Ressourcenzuweisung**: Stellen Sie sicher, dass die notwendigen Ressourcen, wie z.B. technische Tools oder Daten, den Agenten zur Verfügung stehen.
5. **Genehmigung**: Das Mandat muss von einer autorisierten Person oder einem Gremium genehmigt werden, bevor es aktiv wird.
## Mandate Lifecycle
### Lebenszyklus eines Mandats
Der Lebenszyklus eines Mandats umfasst mehrere Phasen, die sicherstellen, dass das Mandat effektiv verwaltet und abgeschlossen wird:
1. **Initiierung**: Nach der Erstellung wird das Mandat offiziell gestartet. Alle beteiligten Parteien werden informiert und die notwendigen Ressourcen bereitgestellt.
2. **Durchführung**: In dieser Phase arbeiten die Agenten an den zugewiesenen Aufgaben. Fortschritte werden regelmäßig überwacht und dokumentiert.
3. **Überwachung und Anpassung**: Der Fortschritt des Mandats wird kontinuierlich überwacht. Bei Bedarf werden Anpassungen vorgenommen, um sicherzustellen, dass die Ziele erreicht werden.
4. **Abschluss**: Nach Erreichen der Ziele oder dem Ende der Laufzeit wird das Mandat abgeschlossen. Eine abschließende Bewertung wird durchgeführt, um den Erfolg zu messen und Erkenntnisse für zukünftige Mandate zu gewinnen.
5. **Archivierung**: Alle relevanten Dokumente und Berichte werden archiviert, um eine Nachverfolgbarkeit und Referenz für zukünftige Projekte zu gewährleisten.
## Managing Mandates
### Verwaltung von Mandaten
Die Verwaltung von Mandaten erfordert eine kontinuierliche Überwachung und Anpassung, um sicherzustellen, dass die gesetzten Ziele erreicht werden. Zu den wichtigsten Verwaltungsaufgaben gehören:
- **Statusüberprüfung**: Regelmäßige Überprüfung des Status und Fortschritts des Mandats.
- **Kommunikation**: Sicherstellen, dass alle Beteiligten über den Fortschritt und etwaige Änderungen informiert sind.
- **Risikomanagement**: Identifizierung und Management potenzieller Risiken, die den Erfolg des Mandats gefährden könnten.
- **Berichterstattung**: Erstellung regelmäßiger Berichte, um den Fortschritt zu dokumentieren und Transparenz zu gewährleisten.
Durch die sorgfältige Erstellung und Verwaltung von Mandaten wird sichergestellt, dass das Agenten-Chat-System effizient und effektiv arbeitet, um die gesteckten Ziele zu erreichen.
# Attribute Handling
In diesem Abschnitt des "Agent Chat System Handbook" wird die Handhabung von Attributen im System detailliert beschrieben. Attribute sind wesentliche Komponenten, die zur Verwaltung und Organisation von Daten innerhalb des Systems verwendet werden. Dieser Abschnitt behandelt die verschiedenen Typen von Attributen und die Operationen, die auf ihnen ausgeführt werden können.
## Typen von Attributen
Attribute im Agent Chat System sind in verschiedene Kategorien unterteilt, die jeweils spezifische Funktionen und Anwendungsbereiche haben. Die wichtigsten Attributtypen sind:
### 1. Systemattribute
Systemattribute sind vordefinierte Attribute, die für die grundlegende Funktionalität des Systems erforderlich sind. Sie werden automatisch vom System verwaltet und können nicht vom Benutzer geändert werden. Beispiele für Systemattribute sind Benutzer-ID, Erstellungsdatum und Änderungsdatum.
### 2. Benutzerdefinierte Attribute
Benutzerdefinierte Attribute werden von den Benutzern erstellt, um spezifische Anforderungen zu erfüllen. Diese Attribute bieten Flexibilität und Anpassungsfähigkeit, indem sie es den Benutzern ermöglichen, zusätzliche Informationen zu speichern, die nicht durch Systemattribute abgedeckt sind. Ein Beispiel könnte ein Attribut für die bevorzugte Sprache eines Benutzers sein.
### 3. Temporäre Attribute
Temporäre Attribute werden für kurzfristige Zwecke erstellt und haben eine begrenzte Lebensdauer. Sie werden häufig in Sitzungen oder für bestimmte Operationen verwendet, bei denen die Daten nicht dauerhaft gespeichert werden müssen. Ein Beispiel wäre ein Attribut, das während einer Chat-Sitzung verwendet wird, um den aktuellen Status eines Gesprächs zu verfolgen.
## Operationen auf Attributen
Die Verwaltung von Attributen umfasst eine Vielzahl von Operationen, die es ermöglichen, Attribute zu erstellen, zu ändern, zu löschen und zu analysieren. Die wichtigsten Operationen sind:
### 1. Erstellung von Attributen
Die Erstellung von Attributen erfolgt entweder automatisch durch das System (für Systemattribute) oder manuell durch den Benutzer (für benutzerdefinierte Attribute). Bei der Erstellung eines Attributs müssen der Attributtyp, der Name und der Datentyp spezifiziert werden.
### 2. Aktualisierung von Attributen
Attribute können aktualisiert werden, um Änderungen in den Daten widerzuspiegeln. Dies umfasst das Ändern von Attributwerten oder das Aktualisieren von Attributmetadaten. Beispielsweise kann ein Benutzer das Attribut "Telefonnummer" aktualisieren, um eine neue Nummer zu speichern.
### 3. Löschung von Attributen
Nicht mehr benötigte Attribute können gelöscht werden. Bei der Löschung von Attributen ist Vorsicht geboten, da dies irreversible Änderungen an den gespeicherten Daten zur Folge haben kann. Systemattribute können in der Regel nicht gelöscht werden, um die Integrität des Systems zu gewährleisten.
### 4. Abfrage von Attributen
Das System ermöglicht die Abfrage von Attributen, um Informationen zu extrahieren und Berichte zu erstellen. Dies ist besonders nützlich für die Analyse von Daten und die Generierung von Einblicken. Beispielsweise kann ein Administrator eine Abfrage durchführen, um alle Benutzer mit einem bestimmten Attributwert zu identifizieren.
### 5. Validierung von Attributen
Die Validierung von Attributen stellt sicher, dass die eingegebenen Daten den festgelegten Kriterien entsprechen. Dies umfasst die Überprüfung von Datentypen, Wertebereichen und anderen Einschränkungen. Eine korrekte Validierung ist entscheidend, um Datenintegrität und -konsistenz zu gewährleisten.
Durch das Verständnis der verschiedenen Attributtypen und der auf ihnen ausführbaren Operationen können Benutzer das Agent Chat System effektiver nutzen und an ihre spezifischen Bedürfnisse anpassen.
# Prompt Management
In diesem Abschnitt des "Agent Chat System Handbook" wird die Verwaltung von Prompts behandelt. Prompts sind wesentliche Bestandteile des Agent Chat Systems, da sie die Interaktion zwischen Benutzern und dem System steuern. Dieser Abschnitt bietet eine detaillierte Anleitung zur Erstellung und Nutzung von Prompts.
## Erstellen von Prompts
Die Erstellung von Prompts ist ein zentraler Bestandteil der Systemkonfiguration und ermöglicht es, spezifische Anfragen oder Anweisungen für die Interaktion mit dem System zu definieren.
### Schritte zur Erstellung von Prompts
1. **Identifikation des Bedarfs**: Bestimmen Sie den spezifischen Bedarf oder das Szenario, für das ein Prompt erforderlich ist. Dies könnte eine häufig gestellte Frage oder eine spezifische Anweisung sein, die regelmäßig benötigt wird.
2. **Definition des Inhalts**: Formulieren Sie den Inhalt des Prompts klar und präzise. Der Inhalt sollte direkt und verständlich sein, um Missverständnisse zu vermeiden.
3. **Formatierung**: Achten Sie darauf, dass der Prompt in einem konsistenten Format erstellt wird, das mit den anderen Systemkomponenten kompatibel ist. Nutzen Sie Markdown oder andere unterstützte Formate, um die Lesbarkeit zu verbessern.
4. **Implementierung im System**: Integrieren Sie den erstellten Prompt in das System. Dies kann durch die Anpassung der entsprechenden Konfigurationsdateien oder durch die Nutzung der API-Schnittstellen erfolgen.
### Beispiel
```json
{
"prompt_id": "faq_shipping",
"content": "Wie lange dauert der Versand?",
"response": "Der Versand dauert in der Regel 3-5 Werktage."
}
```
## Nutzung von Prompts
Die Nutzung von Prompts ist entscheidend, um eine effiziente und konsistente Kommunikation innerhalb des Agent Chat Systems sicherzustellen.
### Schritte zur Nutzung von Prompts
1. **Abrufen von Prompts**: Verwenden Sie die API-Endpunkte, um verfügbare Prompts abzurufen. Dies ermöglicht es Agenten, schnell auf vorgefertigte Antworten zuzugreifen.
2. **Anpassung an den Kontext**: Stellen Sie sicher, dass der ausgewählte Prompt dem aktuellen Kontext der Benutzeranfrage entspricht. Passen Sie den Inhalt gegebenenfalls an, um spezifische Details oder Variationen zu berücksichtigen.
3. **Feedback und Optimierung**: Sammeln Sie regelmäßig Feedback zur Effektivität der Prompts und optimieren Sie diese basierend auf den Rückmeldungen. Dies kann durch die Analyse von Benutzerinteraktionen und die Anpassung der Inhalte erfolgen.
### Beispiel
Ein Agent erhält eine Anfrage zu den Versandzeiten. Anstatt die Antwort manuell zu formulieren, ruft der Agent den entsprechenden Prompt ab und liefert eine konsistente und schnelle Antwort.
```json
{
"user_query": "Wann kommt meine Bestellung an?",
"prompt_used": "faq_shipping",
"response": "Der Versand dauert in der Regel 3-5 Werktage."
}
```
Durch die strukturierte Verwaltung und Nutzung von Prompts wird die Effizienz des Agent Chat Systems erheblich gesteigert, was zu einer verbesserten Benutzererfahrung führt.
# File Operations
In diesem Abschnitt des "Agent Chat System Handbook" werden die wesentlichen Aspekte der Dateiverwaltung und -speicherung im Rahmen des Agent Chat Systems behandelt. Diese Informationen sind entscheidend für die technische Verwaltung und den Betrieb des Systems. Der Abschnitt ist in zwei Hauptunterabschnitte unterteilt: "Handling Files" und "Storing Files".
## Handling Files
Die Handhabung von Dateien ist ein zentraler Bestandteil des Agent Chat Systems, da es die Interaktion mit verschiedenen Dateitypen ermöglicht, die für die Funktionalität des Systems erforderlich sind.
### Dateiverwaltung
- **Öffnen und Schließen von Dateien**: Dateien sollten mit geeigneten Methoden geöffnet und geschlossen werden, um Datenverlust oder -beschädigung zu vermeiden. In Python wird dies häufig mit dem `with`-Statement erreicht, das sicherstellt, dass Dateien nach der Verwendung ordnungsgemäß geschlossen werden.
```python
with open('datei.txt', 'r') as file:
inhalt = file.read()
```
- **Lesen und Schreiben von Dateien**: Das System unterstützt sowohl das Lesen als auch das Schreiben von Dateien. Es ist wichtig, die korrekten Modi (`'r'` für Lesen, `'w'` für Schreiben, `'a'` für Anhängen) zu verwenden, um die Integrität der Daten zu gewährleisten.
- **Fehlerbehandlung**: Beim Umgang mit Dateien können verschiedene Fehler auftreten, wie z.B. `FileNotFoundError` oder `IOError`. Eine robuste Fehlerbehandlung ist notwendig, um das System vor unerwarteten Abstürzen zu schützen.
```python
try:
with open('datei.txt', 'r') as file:
inhalt = file.read()
except FileNotFoundError:
print("Die Datei wurde nicht gefunden.")
```
## Storing Files
Die Speicherung von Dateien ist ein weiterer kritischer Aspekt, der sicherstellt, dass Daten sicher und effizient abgelegt werden.
### Dateispeicherung
- **Verzeichnisstruktur**: Eine gut organisierte Verzeichnisstruktur ist entscheidend für die effiziente Speicherung und den schnellen Zugriff auf Dateien. Es wird empfohlen, Dateien in thematisch geordneten Unterverzeichnissen zu speichern.
- **Datenbankintegration**: In einigen Fällen kann es notwendig sein, Dateien in einer Datenbank zu speichern, insbesondere wenn Metadaten oder eine hohe Zugriffsgeschwindigkeit erforderlich sind. Das System kann Datenbanklösungen wie PostgreSQL oder MongoDB integrieren, um Dateien und ihre Metadaten zu verwalten.
- **Sicherheitsaspekte**: Bei der Speicherung von Dateien müssen Sicherheitsaspekte berücksichtigt werden, insbesondere wenn es sich um sensible Daten handelt. Dies umfasst die Verschlüsselung von Dateien und die Implementierung von Zugriffsberechtigungen.
- **Backup und Wiederherstellung**: Regelmäßige Backups sind unerlässlich, um Datenverlust zu vermeiden. Das System sollte über Mechanismen zur automatisierten Sicherung und Wiederherstellung von Dateien verfügen.
Durch die Beachtung dieser Richtlinien und Praktiken wird sichergestellt, dass das Agent Chat System Dateien effizient und sicher handhabt und speichert, was zu einem reibungslosen Betrieb und einer hohen Zuverlässigkeit des Systems beiträgt.
# Workflow Management
In diesem Abschnitt des "Agent Chat System Handbook" wird das Workflow-Management detailliert beschrieben. Das Ziel ist es, den technischen Benutzern ein umfassendes Verständnis für die Erstellung und Ausführung von Workflows innerhalb des Agent Chat Systems zu vermitteln.
## Inhaltsverzeichnis
1. [Erstellung von Workflows](#erstellung-von-workflows)
2. [Ausführung von Workflows](#ausfuehrung-von-workflows)
## Erstellung von Workflows
Die Erstellung von Workflows ist ein zentraler Bestandteil des Workflow-Managements im Agent Chat System. Ein Workflow definiert eine Abfolge von Schritten, die automatisiert oder manuell ausgeführt werden können, um spezifische Aufgaben oder Prozesse zu steuern.
### Schritte zur Erstellung eines Workflows
1. **Identifikation der Anforderungen**: Bestimmen Sie die spezifischen Anforderungen und Ziele des Workflows. Dies könnte die Automatisierung von Kundenanfragen oder die Verwaltung von Support-Tickets umfassen.
2. **Definition der Schritte**: Listen Sie die einzelnen Schritte auf, die zur Erreichung des Workflows erforderlich sind. Jeder Schritt sollte klar definiert und in einer logischen Reihenfolge angeordnet sein.
3. **Konfiguration der Aktionen**: Weisen Sie jedem Schritt spezifische Aktionen zu. Diese Aktionen könnten API-Aufrufe, Datenbankabfragen oder Benachrichtigungen umfassen.
4. **Erstellung von Bedingungen**: Definieren Sie Bedingungen, die den Fluss des Workflows steuern. Bedingungen können auf Ereignissen, Datenwerten oder Benutzerinteraktionen basieren.
5. **Testen des Workflows**: Vor der Implementierung sollte der Workflow in einer Testumgebung ausgeführt werden, um sicherzustellen, dass alle Schritte korrekt funktionieren.
6. **Dokumentation**: Dokumentieren Sie den Workflow umfassend, einschließlich der Ziele, Schritte, Bedingungen und erwarteten Ergebnisse.
### Beispiel
```yaml
- name: "Kundenanfrage-Workflow"
steps:
- step: "Anfrage erhalten"
action: "API-Aufruf"
- step: "Anfrage analysieren"
action: "AI-Analyse"
- step: "Antwort generieren"
action: "Textgenerierung"
- step: "Antwort senden"
action: "Benachrichtigung"
conditions:
- if: "Anfrage enthält 'dringend'"
then: "Priorität hochsetzen"
```
## Ausführung von Workflows
Die Ausführung von Workflows ist der Prozess, bei dem die definierten Schritte eines Workflows in der Praxis umgesetzt werden. Dies kann manuell durch einen Benutzer oder automatisch durch das System erfolgen.
### Schritte zur Ausführung eines Workflows
1. **Initiierung**: Der Workflow wird entweder durch ein Ereignis, eine Benutzeraktion oder einen Zeitplan initiiert.
2. **Verarbeitung der Schritte**: Jeder Schritt des Workflows wird in der festgelegten Reihenfolge ausgeführt. Das System überwacht den Fortschritt und stellt sicher, dass alle Bedingungen erfüllt sind, bevor zum nächsten Schritt übergegangen wird.
3. **Überwachung und Protokollierung**: Während der Ausführung werden alle Aktionen und Ergebnisse protokolliert. Dies ermöglicht eine spätere Analyse und Fehlerbehebung.
4. **Fehlerbehandlung**: Bei Auftreten eines Fehlers wird der Workflow entweder pausiert oder abgebrochen, je nach Konfiguration. Fehlerprotokolle werden erstellt, um die Ursache zu identifizieren und zu beheben.
5. **Abschluss**: Nach erfolgreicher Ausführung aller Schritte wird der Workflow abgeschlossen und eine Zusammenfassung der Ergebnisse erstellt.
### Beispiel
```json
{
"workflow_id": "12345",
"status": "in_progress",
"current_step": "Anfrage analysieren",
"logs": [
{"timestamp": "2023-10-01T10:00:00Z", "message": "Anfrage erhalten"},
{"timestamp": "2023-10-01T10:01:00Z", "message": "Anfrage analysieren gestartet"}
]
}
```
Durch die sorgfältige Erstellung und Ausführung von Workflows können Unternehmen die Effizienz und Genauigkeit ihrer Prozesse erheblich verbessern. Das Agent Chat System bietet die Flexibilität und Kontrolle, die erforderlich sind, um komplexe Workflows effektiv zu verwalten.
# AI Integration
In diesem Abschnitt wird die Integration von Künstlicher Intelligenz (KI) in das Agent Chat System detailliert beschrieben. Der Fokus liegt auf den verwendeten KI-Modellen und den Integrationspunkten innerhalb des Systems. Diese Informationen sind entscheidend für das Verständnis der technischen Architektur und der Funktionsweise der KI-Komponenten.
## AI Models
### Verwendete KI-Modelle
Das Agent Chat System nutzt fortschrittliche KI-Modelle, um die Interaktion zwischen Agenten und Nutzern zu optimieren. Diese Modelle sind darauf ausgelegt, natürliche Sprachverarbeitung (NLP) zu unterstützen und kontextbezogene Antworten zu generieren. Die wichtigsten Modelle umfassen:
- **GPT-3**: Ein leistungsstarkes Sprachmodell, das für die Generierung von menschenähnlichen Texten verwendet wird. Es ist in der Lage, komplexe Anfragen zu verstehen und relevante Antworten zu liefern.
- **BERT**: Ein Modell, das für Aufgaben der Sprachverständnisoptimierung eingesetzt wird, insbesondere bei der Analyse von Benutzeranfragen und der Extraktion von Schlüsselinformationen.
- **Custom Sentiment Analysis Model**: Ein speziell entwickeltes Modell zur Analyse der Stimmung in Benutzeranfragen, um die Reaktionen der Agenten entsprechend anzupassen.
Diese Modelle werden kontinuierlich aktualisiert und optimiert, um die Effizienz und Genauigkeit der Interaktionen zu verbessern.
## Integration
### Integrationspunkte
Die Integration der KI-Modelle erfolgt an mehreren strategischen Punkten innerhalb des Agent Chat Systems. Diese Integrationspunkte sind entscheidend für die nahtlose Funktionalität und umfassen:
- **Anfrageverarbeitung**: Bei der Eingabe einer Benutzeranfrage wird diese zunächst durch das NLP-Modul geleitet, das die Anfrage analysiert und an das entsprechende KI-Modell weiterleitet.
- **Antwortgenerierung**: Die generierten Antworten werden durch das GPT-3-Modell erstellt und anschließend durch das Sentiment Analysis Model überprüft, um sicherzustellen, dass die Antwort dem emotionalen Kontext des Benutzers entspricht.
- **Datenanalyse**: Die gesammelten Daten aus den Interaktionen werden durch BERT analysiert, um Muster und Trends zu identifizieren, die zur Verbesserung der Systemleistung beitragen können.
- **Feedback-Schleife**: Eine kontinuierliche Feedback-Schleife ermöglicht es, die Modelle basierend auf Benutzerinteraktionen und Agenten-Feedback zu verfeinern und anzupassen.
### Technische Implementierung
Die Implementierung der KI-Integration erfolgt über spezialisierte APIs, die in die FastAPI-Anwendung eingebettet sind. Diese APIs ermöglichen eine effiziente Kommunikation zwischen den verschiedenen Modulen und den KI-Modellen. Die Integration ist so gestaltet, dass sie skalierbar und erweiterbar ist, um zukünftige Anforderungen und technologische Fortschritte zu berücksichtigen.
Durch die sorgfältige Auswahl und Integration dieser KI-Modelle wird sichergestellt, dass das Agent Chat System nicht nur effizient, sondern auch flexibel und anpassungsfähig bleibt, um den sich ständig ändernden Anforderungen der Benutzer gerecht zu werden.
## Authentication
```md
# Authentication
In diesem Abschnitt des "Agent Chat System Handbook" wird das Authentifizierungssystem detailliert beschrieben. Die Authentifizierung ist ein kritischer Bestandteil des Systems, der sicherstellt, dass nur autorisierte Benutzer Zugriff auf die Anwendung und ihre Funktionen haben. Wir werden die verschiedenen Authentifizierungsmethoden sowie die Sicherheitsmaßnahmen, die implementiert wurden, um die Integrität und Vertraulichkeit der Benutzerdaten zu gewährleisten, untersuchen.
## Methoden
### Authentifizierungsmethoden
Das Agent Chat System unterstützt mehrere Authentifizierungsmethoden, um Flexibilität und Sicherheit zu bieten. Die wichtigsten Methoden sind:
1. **Token-basierte Authentifizierung**:
- **Beschreibung**: Diese Methode verwendet JSON Web Tokens (JWT), um Benutzer zu authentifizieren. Nach erfolgreicher Anmeldung erhält der Benutzer ein Token, das bei jeder Anfrage an den Server gesendet wird.
- **Vorteile**: Erhöhte Sicherheit durch zeitlich begrenzte Token und die Möglichkeit, Token zu widerrufen.
- **Implementierung**: Der Token wird im Header der HTTP-Anfrage übermittelt und vom Server validiert.
2. **OAuth 2.0**:
- **Beschreibung**: OAuth 2.0 ist ein weit verbreitetes Protokoll, das es Benutzern ermöglicht, sich mit ihren bestehenden Konten von Drittanbietern (z.B. Google, Facebook) anzumelden.
- **Vorteile**: Benutzerfreundlichkeit und erhöhte Sicherheit, da keine Passwörter direkt im System gespeichert werden müssen.
- **Implementierung**: Die Anwendung leitet den Benutzer zur Authentifizierungsseite des Drittanbieters weiter und erhält nach erfolgreicher Authentifizierung ein Zugriffstoken.
3. **Zwei-Faktor-Authentifizierung (2FA)**:
- **Beschreibung**: Diese Methode fügt eine zusätzliche Sicherheitsebene hinzu, indem sie einen zweiten Authentifizierungsfaktor erfordert, z.B. einen SMS-Code oder eine Authentifizierungs-App.
- **Vorteile**: Erhöhte Sicherheit durch die Kombination von etwas, das der Benutzer kennt (Passwort) und etwas, das der Benutzer hat (zweiter Faktor).
- **Implementierung**: Nach der Eingabe des Passworts wird der Benutzer aufgefordert, den zweiten Faktor einzugeben, bevor der Zugriff gewährt wird.
## Sicherheit
### Sicherheitsmaßnahmen
Um die Sicherheit der Authentifizierung im Agent Chat System zu gewährleisten, wurden mehrere Maßnahmen implementiert:
1. **Datenverschlüsselung**:
- Alle sensiblen Daten, einschließlich Passwörter und Token, werden mit starken Verschlüsselungsalgorithmen gespeichert und übertragen. Dies schützt die Daten vor unbefugtem Zugriff und Manipulation.
2. **Sichere Passwortspeicherung**:
- Passwörter werden nicht im Klartext gespeichert. Stattdessen werden sie mit einem sicheren Hashing-Algorithmus (z.B. bcrypt) gehasht, bevor sie in der Datenbank gespeichert werden.
3. **Regelmäßige Sicherheitsüberprüfungen**:
- Das System wird regelmäßig auf Sicherheitslücken überprüft, und es werden Patches und Updates angewendet, um bekannte Schwachstellen zu beheben.
4. **Sitzungsverwaltung**:
- Sitzungen werden überwacht und bei Inaktivität automatisch abgemeldet, um das Risiko von Sitzungsentführungen zu minimieren.
5. **Protokollierung und Überwachung**:
- Alle Authentifizierungsversuche und sicherheitsrelevanten Ereignisse werden protokolliert und überwacht, um verdächtige Aktivitäten frühzeitig zu erkennen und darauf zu reagieren.
Durch die Implementierung dieser Methoden und Sicherheitsmaßnahmen stellt das Agent Chat System sicher, dass die Authentifizierung sowohl benutzerfreundlich als auch sicher ist, und schützt die Integrität und Vertraulichkeit der Benutzerdaten effektiv.
```
## Conclusion
```md
## Fazit
In diesem Handbuch zum "Agent Chat System" haben wir die wesentlichen Komponenten und Prozesse detailliert beschrieben, die für den erfolgreichen Einsatz und die Verwaltung eines Chat-Agenten-Systems erforderlich sind. Die behandelten Themen umfassen die Einrichtung der FastAPI-Anwendung, das Benutzer- und Mandatsmanagement, die Attributverwaltung, das Prompt-Management, Dateioperationen, das Workflow-Management, die Integration von Künstlicher Intelligenz sowie die Authentifizierung.
### Zusammenfassung der Hauptpunkte
1. **FastAPI Setup**: Wir haben die Schritte zur Einrichtung und Konfiguration der FastAPI-Anwendung erläutert, um eine stabile Grundlage für das Agentensystem zu schaffen.
2. **Benutzerverwaltung**: Die Verwaltung von Benutzern und deren Rollen ist entscheidend für die Sicherheit und Effizienz des Systems. Wir haben die Methoden zur Erstellung, Aktualisierung und Löschung von Benutzerkonten behandelt.
3. **AI-Integration**: Die Integration von KI-Technologien ermöglicht es dem System, intelligentere und kontextbezogene Antworten zu generieren. Wir haben die Implementierung und Optimierung dieser Funktionalität beschrieben.
4. **Authentifizierung**: Sicherheit ist ein zentrales Element jeder Anwendung. Wir haben die Authentifizierungsmechanismen und deren Implementierung im System detailliert dargestellt.
### Empfehlungen und nächste Schritte
- **Regelmäßige Updates**: Stellen Sie sicher, dass alle Systemkomponenten regelmäßig aktualisiert werden, um Sicherheitslücken zu schließen und die Leistung zu optimieren.
- **Erweiterung der AI-Funktionalitäten**: Erwägen Sie die Implementierung fortschrittlicherer KI-Modelle, um die Interaktionsqualität weiter zu verbessern.
- **Benutzerfeedback einholen**: Nutzen Sie das Feedback der Benutzer, um kontinuierlich Verbesserungen am System vorzunehmen.
### Bedeutung des Dokuments
Dieses Handbuch dient als umfassende Ressource für technische Fachleute, die für die Implementierung und Wartung des Agent Chat Systems verantwortlich sind. Es bietet nicht nur eine detaillierte Anleitung zur Einrichtung und Verwaltung des Systems, sondern auch wertvolle Einblicke in die Optimierung der Benutzererfahrung und der Systemleistung. Mit diesem Wissen sind Sie bestens gerüstet, um ein effizientes und sicheres Chat-Agenten-System zu betreiben.
Wir hoffen, dass dieses Handbuch Ihnen als wertvolle Referenz dient und Sie bei der erfolgreichen Implementierung und Verwaltung Ihres Agent Chat Systems unterstützt.
```

View file

@ -175,9 +175,9 @@ class ServiceContainer:
}
self.methods: Dict[str, MethodBase] = {}
self.tasks: Dict[str, AgentTask] = {}
self.prompt_manager = AIPromptManager()
self.task_state_manager = TaskStateManager()
self.document_processor = DocumentProcessor()
self.promptManager = AIPromptManager()
self.taskStateManager = TaskStateManager()
self.documentProcessor = DocumentProcessor()
async def execute_task(self, task: AgentTask) -> None:
"""Execute task with improved error handling and timeout"""
@ -257,7 +257,7 @@ class ServiceContainer:
class AIPromptManager:
"""Manages AI prompts and response validation"""
def generate_prompt(self, context: Dict[str, Any], examples: List[Dict]) -> str:
def generatePrompt(self, context: Dict[str, Any], examples: List[Dict]) -> str:
"""Generate a context-aware prompt with few-shot examples"""
prompt = (
f"Task: {context['task']}\n"
@ -269,7 +269,7 @@ class AIPromptManager:
prompt += "Extract the most relevant information for the task above."
return prompt
def validate_response(self, response: str, schema: Dict) -> bool:
def validateResponse(self, response: str, schema: Dict) -> bool:
"""Validate AI response against a schema"""
import jsonschema
try:
@ -282,20 +282,20 @@ class TaskStateManager:
"""Manages task state and retry tracking"""
def __init__(self):
self.task_states = {}
self.taskStates = {}
def track_state(self, task: AgentTask):
def trackState(self, task: AgentTask):
"""Track task state"""
self.task_states[task.id] = {
self.taskStates[task.id] = {
"status": task.status,
"retryState": getattr(task, "retryState", {}),
"history": getattr(task, "history", [])
}
def can_retry(self, task: AgentTask, method: str) -> bool:
def canRetry(self, task: AgentTask, method: str) -> bool:
"""Check if task can be retried"""
retry_state = self.task_states[task.id].get("retryState", {})
return retry_state.get(method, 0) < getattr(task, "retryMax", 3)
retryState = self.taskStates[task.id].get("retryState", {})
return retryState.get(method, 0) < getattr(task, "retryMax", 3)
class DocumentContext(BaseModel):
"""Model for document context"""
@ -748,7 +748,7 @@ async def _processUserInput(self, input: str, documents: List[str]) -> str:
examples = [
{"input": "Search documents", "output": "Extract relevant information"}
]
prompt = self.service.prompt_manager.generate_prompt(context, examples)
prompt = self.service.promptManager.generatePrompt(context, examples)
return await self.service.model['callAiBasic'](
f"""Analyze user request and documents:
@ -787,7 +787,7 @@ async def _analyzeTaskResults(self, task: AgentTask) -> str:
examples = [
{"input": "Task completed", "output": "Generate next steps"}
]
prompt = self.service.prompt_manager.generate_prompt(context, examples)
prompt = self.service.promptManager.generatePrompt(context, examples)
return await self.service.model['callAiBasic'](
f"""Analyze task results and determine next steps:
@ -826,7 +826,7 @@ async def _processTaskResults(self, task: AgentTask) -> str:
examples = [
{"input": "Task results", "output": "Generate summary"}
]
prompt = self.service.prompt_manager.generate_prompt(context, examples)
prompt = self.service.promptManager.generatePrompt(context, examples)
return await self.service.model['callAiBasic'](
f"""Process task results and generate feedback: