adapted to model system
This commit is contained in:
parent
739f22785c
commit
ea5f42d981
33 changed files with 3530 additions and 7690 deletions
|
|
@ -32,6 +32,9 @@ Security_PASSWORD_REQUIRE_SPECIAL = True
|
||||||
Security_FAILED_LOGIN_LIMIT = 5
|
Security_FAILED_LOGIN_LIMIT = 5
|
||||||
Security_LOCK_DURATION_MINUTES = 30
|
Security_LOCK_DURATION_MINUTES = 30
|
||||||
|
|
||||||
|
# Content Neutralization configuration
|
||||||
|
Content_Neutralization_ENABLED = False
|
||||||
|
|
||||||
# Agent Webcrawler configuration
|
# Agent Webcrawler configuration
|
||||||
Agent_Webcrawler_SERPAPI_ENGINE = google
|
Agent_Webcrawler_SERPAPI_ENGINE = google
|
||||||
Agent_Webcrawler_SERPAPI_APIKEY = 7304bd34bca767aa52dd3233297e30a9edc0abc57871f702b3f8238b9d3ee7bc
|
Agent_Webcrawler_SERPAPI_APIKEY = 7304bd34bca767aa52dd3233297e30a9edc0abc57871f702b3f8238b9d3ee7bc
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load diff
|
|
@ -1,380 +0,0 @@
|
||||||
"""
|
|
||||||
Coach agent for answering questions and generating structured content.
|
|
||||||
Provides direct AI-based responses using extracted data from documents.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import logging
|
|
||||||
from typing import Dict, Any, List
|
|
||||||
import json
|
|
||||||
from datetime import datetime
|
|
||||||
import uuid
|
|
||||||
|
|
||||||
from modules.workflow.agentBase import AgentBase
|
|
||||||
from modules.interfaces.serviceChatModel import Task, ChatDocument, ChatContent
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
class AgentCoach(AgentBase):
|
|
||||||
"""AI-driven agent for answering questions and generating structured content from extracted data"""
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
"""Initialize the coach agent"""
|
|
||||||
super().__init__()
|
|
||||||
self.name = "coach"
|
|
||||||
self.label = "Coach & Assistant"
|
|
||||||
self.description = "Answers questions, converts and generates content directly from data without complex processing"
|
|
||||||
self.capabilities = [
|
|
||||||
"dataConversion",
|
|
||||||
"questionAnswering",
|
|
||||||
"contentGeneration",
|
|
||||||
"simpleDataFormatting",
|
|
||||||
"informationSynthesis",
|
|
||||||
"directResponse",
|
|
||||||
"imageInterpretation",
|
|
||||||
"structuredOutput"
|
|
||||||
]
|
|
||||||
|
|
||||||
def setDependencies(self, serviceBase=None):
|
|
||||||
"""Set external dependencies for the agent."""
|
|
||||||
self.setService(serviceBase)
|
|
||||||
|
|
||||||
async def processTask(self, task: Task) -> Dict[str, Any]:
|
|
||||||
"""
|
|
||||||
Process a task by directly using AI to provide answers or content based on extracted data.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
task: Task object with prompt, inputDocuments, outputSpecifications
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Dictionary with feedback and documents
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
# Extract task information
|
|
||||||
prompt = task.prompt
|
|
||||||
inputDocuments = task.filesInput
|
|
||||||
outputSpecs = task.filesOutput
|
|
||||||
|
|
||||||
# Check AI service
|
|
||||||
if not self.service or not self.service.base:
|
|
||||||
return {
|
|
||||||
"feedback": "The Coach agent requires an AI service to function.",
|
|
||||||
"documents": []
|
|
||||||
}
|
|
||||||
|
|
||||||
# Collect all extracted data from input documents
|
|
||||||
documentContext = self._collectExtractedData(inputDocuments)
|
|
||||||
|
|
||||||
# Generate task understanding to guide response creation
|
|
||||||
taskUnderstanding = await self._analyzeTask(prompt, documentContext)
|
|
||||||
|
|
||||||
# Generate documents based on output specifications
|
|
||||||
documents = []
|
|
||||||
|
|
||||||
# If no output specs provided, create a default document
|
|
||||||
if not outputSpecs:
|
|
||||||
defaultFormat = taskUnderstanding.get("recommendedFormat", "md")
|
|
||||||
defaultTitle = taskUnderstanding.get("suggestedFilename", "response")
|
|
||||||
|
|
||||||
outputSpecs = [{
|
|
||||||
"label": f"{defaultTitle}.{defaultFormat}",
|
|
||||||
"description": "Response to your request"
|
|
||||||
}]
|
|
||||||
|
|
||||||
# Process each output specification
|
|
||||||
for spec in outputSpecs:
|
|
||||||
outputLabel = spec.get("label", "output.txt")
|
|
||||||
outputDescription = spec.get("description", "")
|
|
||||||
|
|
||||||
# Determine format based on file extension
|
|
||||||
outputFormat = outputLabel.split('.')[-1].lower() if '.' in outputLabel else "txt"
|
|
||||||
|
|
||||||
# Generate document based on format
|
|
||||||
document = await self._generateDocument(
|
|
||||||
prompt,
|
|
||||||
documentContext,
|
|
||||||
outputLabel,
|
|
||||||
outputFormat,
|
|
||||||
outputDescription,
|
|
||||||
taskUnderstanding
|
|
||||||
)
|
|
||||||
|
|
||||||
documents.append(document)
|
|
||||||
|
|
||||||
# Generate feedback
|
|
||||||
feedback = taskUnderstanding.get("feedback", "I've created content based on your request.")
|
|
||||||
|
|
||||||
return {
|
|
||||||
"feedback": feedback,
|
|
||||||
"documents": documents
|
|
||||||
}
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error in coach processing: {str(e)}", exc_info=True)
|
|
||||||
return {
|
|
||||||
"feedback": f"Error while processing your request: {str(e)}",
|
|
||||||
"documents": []
|
|
||||||
}
|
|
||||||
|
|
||||||
def _collectExtractedData(self, documents: List[ChatDocument]) -> str:
|
|
||||||
"""
|
|
||||||
Collect extracted data from input documents.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
documents: List of input documents
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Combined extracted data as text
|
|
||||||
"""
|
|
||||||
contextParts = []
|
|
||||||
|
|
||||||
for doc in documents:
|
|
||||||
docName = doc.name
|
|
||||||
if doc.ext:
|
|
||||||
docName = f"{docName}.{doc.ext}"
|
|
||||||
|
|
||||||
contextParts.append(f"\n\n--- {docName} ---\n")
|
|
||||||
|
|
||||||
# Process contents, focusing on dataExtracted field
|
|
||||||
for content in doc.contents:
|
|
||||||
if content.data:
|
|
||||||
contextParts.append(content.data)
|
|
||||||
|
|
||||||
return "\n".join(contextParts)
|
|
||||||
|
|
||||||
async def _analyzeTask(self, prompt: str, context: str) -> Dict:
|
|
||||||
"""
|
|
||||||
Use AI to analyze the task and develop an understanding of what's required.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
prompt: The task prompt
|
|
||||||
context: Extracted document data
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Task understanding dictionary
|
|
||||||
"""
|
|
||||||
analysisPrompt = f"""
|
|
||||||
Analyze this request to determine the best approach for creating a response.
|
|
||||||
|
|
||||||
REQUEST: {prompt}
|
|
||||||
|
|
||||||
EXTRACTED DATA:
|
|
||||||
{context[:1500]}... (truncated if longer)
|
|
||||||
|
|
||||||
Create a task analysis in JSON format with the following structure:
|
|
||||||
{{
|
|
||||||
"requestType": "question|content|data|report|description",
|
|
||||||
"recommendedFormat": "md|txt|html|csv|json",
|
|
||||||
"suggestedFilename": "appropriate_filename_without_extension",
|
|
||||||
"contentFocus": "brief description of what to focus on",
|
|
||||||
"feedback": "brief explanation of how you'll approach this request",
|
|
||||||
"complexity": "simple|moderate|complex"
|
|
||||||
}}
|
|
||||||
|
|
||||||
Only return valid JSON. No preamble or explanations.
|
|
||||||
"""
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Get task understanding from AI
|
|
||||||
response = await self.service.base.callAi([
|
|
||||||
{"role": "system", "content": "You are a task analysis expert. Respond with valid JSON only."},
|
|
||||||
{"role": "user", "content": analysisPrompt}
|
|
||||||
])
|
|
||||||
|
|
||||||
# Extract JSON from response
|
|
||||||
jsonStart = response.find('{')
|
|
||||||
jsonEnd = response.rfind('}') + 1
|
|
||||||
|
|
||||||
if jsonStart >= 0 and jsonEnd > jsonStart:
|
|
||||||
taskUnderstanding = json.loads(response[jsonStart:jsonEnd])
|
|
||||||
return taskUnderstanding
|
|
||||||
else:
|
|
||||||
# Fallback if JSON not found
|
|
||||||
return {
|
|
||||||
"requestType": "content",
|
|
||||||
"recommendedFormat": "md",
|
|
||||||
"suggestedFilename": "response",
|
|
||||||
"contentFocus": "Addressing the main request",
|
|
||||||
"feedback": "I've created content based on your request and the provided data.",
|
|
||||||
"complexity": "moderate"
|
|
||||||
}
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"Error analyzing task: {str(e)}")
|
|
||||||
return {
|
|
||||||
"requestType": "content",
|
|
||||||
"recommendedFormat": "md",
|
|
||||||
"suggestedFilename": "response",
|
|
||||||
"contentFocus": "Addressing the main request",
|
|
||||||
"feedback": "I've created content based on your request and the provided data.",
|
|
||||||
"complexity": "moderate"
|
|
||||||
}
|
|
||||||
|
|
||||||
async def _generateDocument(self, prompt: str, context: str, outputLabel: str,
|
|
||||||
outputFormat: str, description: str, taskUnderstanding: Dict) -> ChatDocument:
|
|
||||||
"""
|
|
||||||
Generate a document based on the request and extracted data.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
prompt: The task prompt
|
|
||||||
context: Extracted document data
|
|
||||||
outputLabel: Output filename
|
|
||||||
outputFormat: Output format (file extension)
|
|
||||||
description: Output description
|
|
||||||
taskUnderstanding: Task understanding from analysis
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
ChatDocument object
|
|
||||||
"""
|
|
||||||
# Determine content type based on format
|
|
||||||
contentType = self._getContentType(outputFormat)
|
|
||||||
|
|
||||||
# Build prompt based on output format
|
|
||||||
generationPrompt = f"""
|
|
||||||
Create a response to the following request in {outputFormat} format:
|
|
||||||
|
|
||||||
REQUEST: {prompt}
|
|
||||||
|
|
||||||
EXTRACTED DATA:
|
|
||||||
{context}
|
|
||||||
|
|
||||||
OUTPUT REQUIREMENTS:
|
|
||||||
- Filename: {outputLabel}
|
|
||||||
- Format: {outputFormat}
|
|
||||||
- Description: {description}
|
|
||||||
- Focus on: {taskUnderstanding.get("contentFocus", "Addressing the main request")}
|
|
||||||
|
|
||||||
Guidelines:
|
|
||||||
1. Create content that directly addresses the request
|
|
||||||
2. Use the extracted data to inform your response
|
|
||||||
3. Format the output appropriately for {outputFormat}
|
|
||||||
4. Be comprehensive but focused
|
|
||||||
5. Include appropriate formatting, structure, and organization
|
|
||||||
|
|
||||||
Only return the content. No explanations or additional text.
|
|
||||||
"""
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Get content from AI
|
|
||||||
content = await self.service.base.callAi([
|
|
||||||
{"role": "system", "content": f"You are a content generation expert. Create content in {outputFormat} format."},
|
|
||||||
{"role": "user", "content": generationPrompt}
|
|
||||||
])
|
|
||||||
|
|
||||||
# Extract content from code blocks if present
|
|
||||||
content = self._extractFromCodeBlocks(content, outputFormat)
|
|
||||||
|
|
||||||
# Create document object
|
|
||||||
return ChatDocument(
|
|
||||||
id=str(uuid.uuid4()),
|
|
||||||
name=outputLabel.split('.')[0],
|
|
||||||
ext=outputFormat,
|
|
||||||
data=content,
|
|
||||||
contents=[
|
|
||||||
ChatContent(
|
|
||||||
name="main",
|
|
||||||
data=content,
|
|
||||||
summary=description,
|
|
||||||
metadata={"format": outputFormat}
|
|
||||||
)
|
|
||||||
]
|
|
||||||
)
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error generating document: {str(e)}")
|
|
||||||
errorContent = self._createErrorContent(str(e), outputFormat)
|
|
||||||
return ChatDocument(
|
|
||||||
id=str(uuid.uuid4()),
|
|
||||||
name=outputLabel.split('.')[0],
|
|
||||||
ext=outputFormat,
|
|
||||||
data=errorContent,
|
|
||||||
contents=[
|
|
||||||
ChatContent(
|
|
||||||
name="error",
|
|
||||||
data=errorContent,
|
|
||||||
summary="Error generating content",
|
|
||||||
metadata={"format": outputFormat, "error": str(e)}
|
|
||||||
)
|
|
||||||
]
|
|
||||||
)
|
|
||||||
|
|
||||||
def _getContentType(self, outputFormat: str) -> str:
|
|
||||||
"""
|
|
||||||
Get content type based on format.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
outputFormat: Output format
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Content type
|
|
||||||
"""
|
|
||||||
contentTypeMap = {
|
|
||||||
"md": "text/markdown",
|
|
||||||
"markdown": "text/markdown",
|
|
||||||
"html": "text/html",
|
|
||||||
"txt": "text/plain",
|
|
||||||
"text": "text/plain",
|
|
||||||
"json": "application/json",
|
|
||||||
"csv": "text/csv",
|
|
||||||
"xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
|
||||||
}
|
|
||||||
|
|
||||||
return contentTypeMap.get(outputFormat, "text/plain")
|
|
||||||
|
|
||||||
def _extractFromCodeBlocks(self, content: str, format: str) -> str:
|
|
||||||
"""
|
|
||||||
Extract content from code blocks if present.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
content: Raw content
|
|
||||||
format: Expected format
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Extracted content
|
|
||||||
"""
|
|
||||||
# Check for code blocks
|
|
||||||
codeBlockStart = f"```{format}"
|
|
||||||
if codeBlockStart in content:
|
|
||||||
start = content.find(codeBlockStart) + len(codeBlockStart)
|
|
||||||
end = content.find("```", start)
|
|
||||||
if end > start:
|
|
||||||
return content[start:end].strip()
|
|
||||||
|
|
||||||
# Check for generic code blocks
|
|
||||||
if "```" in content:
|
|
||||||
start = content.find("```") + 3
|
|
||||||
# Skip format identifier if present
|
|
||||||
if content[start:].strip() and not content[start:start+1].isalnum():
|
|
||||||
start = content.find("\n", start) + 1
|
|
||||||
end = content.find("```", start)
|
|
||||||
if end > start:
|
|
||||||
return content[start:end].strip()
|
|
||||||
|
|
||||||
return content
|
|
||||||
|
|
||||||
def _createErrorContent(self, errorMessage: str, outputFormat: str) -> str:
|
|
||||||
"""
|
|
||||||
Create error content in the appropriate format.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
errorMessage: Error message
|
|
||||||
outputFormat: Output format
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Formatted error content
|
|
||||||
"""
|
|
||||||
if outputFormat == "json":
|
|
||||||
return json.dumps({"error": errorMessage})
|
|
||||||
elif outputFormat == "csv":
|
|
||||||
return f"error\n{errorMessage}"
|
|
||||||
elif outputFormat in ["md", "markdown"]:
|
|
||||||
return f"# Error\n\n{errorMessage}"
|
|
||||||
elif outputFormat == "html":
|
|
||||||
return f"<html><body><h1>Error</h1><p>{errorMessage}</p></body></html>"
|
|
||||||
else:
|
|
||||||
return f"Error: {errorMessage}"
|
|
||||||
|
|
||||||
|
|
||||||
# Factory function for the Coach agent
|
|
||||||
def getAgentCoach():
|
|
||||||
"""Returns an instance of the Coach agent."""
|
|
||||||
return AgentCoach()
|
|
||||||
File diff suppressed because it is too large
Load diff
|
|
@ -1,537 +0,0 @@
|
||||||
"""
|
|
||||||
Documentation agent for generating structured documentation.
|
|
||||||
Provides comprehensive documentation generation capabilities.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import logging
|
|
||||||
from typing import Dict, Any, List, Optional
|
|
||||||
import json
|
|
||||||
import re
|
|
||||||
from datetime import datetime
|
|
||||||
import os
|
|
||||||
import hashlib
|
|
||||||
import base64
|
|
||||||
import uuid
|
|
||||||
import shutil
|
|
||||||
from pathlib import Path
|
|
||||||
import traceback
|
|
||||||
import sys
|
|
||||||
import importlib.util
|
|
||||||
import inspect
|
|
||||||
from pydantic import BaseModel
|
|
||||||
|
|
||||||
from modules.workflow.agentBase import AgentBase
|
|
||||||
from modules.interfaces.serviceChatModel import ChatContent
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
class AgentDocumentation(AgentBase):
|
|
||||||
"""AI-driven agent for creating documentation and structured content using multi-step generation"""
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
"""Initialize the documentation agent"""
|
|
||||||
super().__init__()
|
|
||||||
self.name = "documentation"
|
|
||||||
self.label = "Documentation"
|
|
||||||
self.description = "Creates structured documentation, reports, and content using AI with multi-step generation"
|
|
||||||
self.capabilities = [
|
|
||||||
"report_generation",
|
|
||||||
"documentation",
|
|
||||||
"content_structuring",
|
|
||||||
"technical_writing",
|
|
||||||
"knowledge_organization"
|
|
||||||
]
|
|
||||||
|
|
||||||
def setDependencies(self, serviceBase=None):
|
|
||||||
"""Set external dependencies for the agent."""
|
|
||||||
self.setService(serviceBase)
|
|
||||||
|
|
||||||
async def processTask(self, task: Dict[str, Any]) -> Dict[str, Any]:
|
|
||||||
"""
|
|
||||||
Process a task by focusing on required outputs and using AI to generate them.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
task: Task dictionary with prompt, inputDocuments, outputSpecifications
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Dictionary with feedback and documents
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
# Extract task information
|
|
||||||
prompt = task.get("prompt", "")
|
|
||||||
inputDocuments = task.get("inputDocuments", [])
|
|
||||||
outputSpecs = task.get("outputSpecifications", [])
|
|
||||||
|
|
||||||
# Check AI service
|
|
||||||
if not self.service or not self.service.base:
|
|
||||||
return {
|
|
||||||
"feedback": "The Documentation agent requires an AI service to function.",
|
|
||||||
"documents": []
|
|
||||||
}
|
|
||||||
|
|
||||||
# Extract context from input documents - focusing only on dataExtracted
|
|
||||||
documentContext = self._extractDocumentContext(inputDocuments)
|
|
||||||
|
|
||||||
# Create task analysis to understand the requirements
|
|
||||||
documentationPlan = await self._analyzeTask(prompt, documentContext, outputSpecs)
|
|
||||||
logger.debug(f"Documentation plan: {documentationPlan}")
|
|
||||||
|
|
||||||
# Generate all required output documents
|
|
||||||
documents = []
|
|
||||||
|
|
||||||
# If no output specs provided, create default document
|
|
||||||
if not outputSpecs:
|
|
||||||
defaultFormat = documentationPlan.get("recommendedFormat", "markdown")
|
|
||||||
defaultTitle = documentationPlan.get("title", "Documentation")
|
|
||||||
safeTitle = self._sanitizeFilename(defaultTitle)
|
|
||||||
|
|
||||||
outputSpecs = [
|
|
||||||
{"label": f"{safeTitle}.{defaultFormat}", "description": "Comprehensive documentation"}
|
|
||||||
]
|
|
||||||
|
|
||||||
# Process each output specification
|
|
||||||
for spec in outputSpecs:
|
|
||||||
outputLabel = spec.get("label", "")
|
|
||||||
outputDescription = spec.get("description", "")
|
|
||||||
|
|
||||||
# Generate the document using multi-step approach
|
|
||||||
document = await self._createDocumentMultiStep(
|
|
||||||
prompt,
|
|
||||||
documentContext,
|
|
||||||
outputLabel,
|
|
||||||
outputDescription,
|
|
||||||
documentationPlan
|
|
||||||
)
|
|
||||||
|
|
||||||
documents.append(document)
|
|
||||||
|
|
||||||
# Generate feedback
|
|
||||||
feedback = documentationPlan.get("feedback", f"Created {len(documents)} documents based on your requirements.")
|
|
||||||
|
|
||||||
return {
|
|
||||||
"feedback": feedback,
|
|
||||||
"documents": documents
|
|
||||||
}
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error in documentation generation: {str(e)}", exc_info=True)
|
|
||||||
return {
|
|
||||||
"feedback": f"Error during documentation generation: {str(e)}",
|
|
||||||
"documents": []
|
|
||||||
}
|
|
||||||
|
|
||||||
def _extractDocumentContext(self, documents: List[Dict[str, Any]]) -> str:
|
|
||||||
"""
|
|
||||||
Extract context from input documents, focusing on dataExtracted.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
documents: List of document objects
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Extracted context as text
|
|
||||||
"""
|
|
||||||
contextParts = []
|
|
||||||
|
|
||||||
for doc in documents:
|
|
||||||
docName = doc.get("name", "unnamed")
|
|
||||||
if doc.get("ext"):
|
|
||||||
docName = f"{docName}.{doc.get('ext')}"
|
|
||||||
|
|
||||||
contextParts.append(f"\n\n--- {docName} ---\n")
|
|
||||||
|
|
||||||
# Process contents for dataExtracted
|
|
||||||
for content in doc.get("contents", []):
|
|
||||||
if content.get("dataExtracted"):
|
|
||||||
contextParts.append(content.get("dataExtracted", ""))
|
|
||||||
|
|
||||||
return "\n".join(contextParts)
|
|
||||||
|
|
||||||
def _sanitizeFilename(self, filename: str) -> str:
|
|
||||||
"""
|
|
||||||
Sanitize a filename by removing invalid characters.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
filename: Filename to sanitize
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Sanitized filename
|
|
||||||
"""
|
|
||||||
# Replace invalid characters with underscores
|
|
||||||
invalidChars = r'<>:"/\|?*'
|
|
||||||
for char in invalidChars:
|
|
||||||
filename = filename.replace(char, '_')
|
|
||||||
|
|
||||||
# Trim filename if too long
|
|
||||||
if len(filename) > 100:
|
|
||||||
filename = filename[:97] + "..."
|
|
||||||
|
|
||||||
return filename
|
|
||||||
|
|
||||||
async def _analyzeTask(self, prompt: str, context: str, outputSpecs: List) -> Dict:
|
|
||||||
"""
|
|
||||||
Use AI to analyze the task and create a documentation plan.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
prompt: The task prompt
|
|
||||||
context: Document context
|
|
||||||
outputSpecs: Output specifications
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Documentation plan dictionary
|
|
||||||
"""
|
|
||||||
analysisPrompt = f"""
|
|
||||||
Analyze this documentation task and create a detailed plan.
|
|
||||||
|
|
||||||
TASK: {prompt}
|
|
||||||
|
|
||||||
DOCUMENT CONTEXT SAMPLE:
|
|
||||||
{context[:1000]}... (truncated)
|
|
||||||
|
|
||||||
OUTPUT REQUIREMENTS:
|
|
||||||
{json.dumps(outputSpecs, indent=2)}
|
|
||||||
|
|
||||||
Create a detailed documentation plan in JSON format with the following structure:
|
|
||||||
{{
|
|
||||||
"title": "Document Title",
|
|
||||||
"documentType": "report|manual|guide|whitepaper|etc",
|
|
||||||
"audience": "technical|general|executive|etc",
|
|
||||||
"detailedStructure": [
|
|
||||||
{{
|
|
||||||
"title": "Chapter/Section Title",
|
|
||||||
"keyPoints": ["point1", "point2", ...],
|
|
||||||
"subsections": ["subsection1", "subsection2", ...],
|
|
||||||
"importance": "high|medium|low",
|
|
||||||
"estimatedLength": "short|medium|long"
|
|
||||||
}},
|
|
||||||
... more sections ...
|
|
||||||
],
|
|
||||||
"keyTopics": ["topic1", "topic2", ...],
|
|
||||||
"tone": "formal|conversational|instructional|etc",
|
|
||||||
"recommendedFormat": "markdown|html|text|etc",
|
|
||||||
"formattingRequirements": ["requirement1", "requirement2", ...],
|
|
||||||
"executiveSummary": "Brief description of what the document will cover",
|
|
||||||
"feedback": "Brief message explaining the documentation approach"
|
|
||||||
}}
|
|
||||||
|
|
||||||
Only return valid JSON. No preamble or explanations.
|
|
||||||
"""
|
|
||||||
|
|
||||||
try:
|
|
||||||
response = await self.service.base.callAi([
|
|
||||||
{"role": "system", "content": "You are a documentation expert. Respond with valid JSON only."},
|
|
||||||
{"role": "user", "content": analysisPrompt}
|
|
||||||
])
|
|
||||||
|
|
||||||
# Extract JSON from response
|
|
||||||
jsonStart = response.find('{')
|
|
||||||
jsonEnd = response.rfind('}') + 1
|
|
||||||
|
|
||||||
if jsonStart >= 0 and jsonEnd > jsonStart:
|
|
||||||
plan = json.loads(response[jsonStart:jsonEnd])
|
|
||||||
return plan
|
|
||||||
else:
|
|
||||||
# Fallback if JSON not found
|
|
||||||
return {
|
|
||||||
"title": "Documentation (DEFAULT)",
|
|
||||||
"documentType": "report",
|
|
||||||
"audience": "general",
|
|
||||||
"detailedStructure": [
|
|
||||||
{
|
|
||||||
"title": "Introduction",
|
|
||||||
"keyPoints": ["Purpose", "Scope"],
|
|
||||||
"subsections": [],
|
|
||||||
"importance": "high",
|
|
||||||
"estimatedLength": "short"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"title": "Main Content",
|
|
||||||
"keyPoints": ["Core Information"],
|
|
||||||
"subsections": ["Key Findings", "Analysis"],
|
|
||||||
"importance": "high",
|
|
||||||
"estimatedLength": "long"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"title": "Conclusion",
|
|
||||||
"keyPoints": ["Summary", "Next Steps"],
|
|
||||||
"subsections": [],
|
|
||||||
"importance": "medium",
|
|
||||||
"estimatedLength": "short"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"keyTopics": ["General Information"],
|
|
||||||
"tone": "formal",
|
|
||||||
"recommendedFormat": "markdown",
|
|
||||||
"formattingRequirements": ["Clear headings", "Professional formatting"],
|
|
||||||
"executiveSummary": "A comprehensive documentation covering the requested topics.",
|
|
||||||
"feedback": "Created documentation based on your requirements."
|
|
||||||
}
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"Error creating documentation plan: {str(e)}")
|
|
||||||
return {
|
|
||||||
"title": "Documentation",
|
|
||||||
"documentType": "report",
|
|
||||||
"audience": "general",
|
|
||||||
"detailedStructure": [
|
|
||||||
{
|
|
||||||
"title": "Introduction",
|
|
||||||
"keyPoints": ["Purpose", "Scope"],
|
|
||||||
"subsections": [],
|
|
||||||
"importance": "high",
|
|
||||||
"estimatedLength": "short"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"title": "Main Content",
|
|
||||||
"keyPoints": ["Core Information"],
|
|
||||||
"subsections": ["Key Findings", "Analysis"],
|
|
||||||
"importance": "high",
|
|
||||||
"estimatedLength": "long"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"title": "Conclusion",
|
|
||||||
"keyPoints": ["Summary", "Next Steps"],
|
|
||||||
"subsections": [],
|
|
||||||
"importance": "medium",
|
|
||||||
"estimatedLength": "short"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"keyTopics": ["General Information"],
|
|
||||||
"tone": "formal",
|
|
||||||
"recommendedFormat": "markdown",
|
|
||||||
"formattingRequirements": ["Clear headings", "Professional formatting"],
|
|
||||||
"executiveSummary": "A comprehensive documentation covering the requested topics.",
|
|
||||||
"feedback": "Created documentation based on your requirements."
|
|
||||||
}
|
|
||||||
|
|
||||||
async def _createDocumentMultiStep(self, prompt: str, context: str, outputLabel: str,
|
|
||||||
outputDescription: str, documentationPlan: Dict) -> ChatContent:
|
|
||||||
"""
|
|
||||||
Create a document using a multi-step approach with separate AI calls for each section.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
prompt: Original task prompt
|
|
||||||
context: Document context
|
|
||||||
outputLabel: Output filename
|
|
||||||
outputDescription: Description of desired output
|
|
||||||
documentationPlan: Documentation plan from AI
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
ChatContent object
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
# Determine format from filename
|
|
||||||
formatType = outputLabel.split('.')[-1].lower() if '.' in outputLabel else "md"
|
|
||||||
|
|
||||||
# Map format to contentType
|
|
||||||
contentTypeMap = {
|
|
||||||
"md": "text/markdown",
|
|
||||||
"markdown": "text/markdown",
|
|
||||||
"html": "text/html",
|
|
||||||
"txt": "text/plain",
|
|
||||||
"text": "text/plain",
|
|
||||||
"json": "application/json",
|
|
||||||
"csv": "text/csv"
|
|
||||||
}
|
|
||||||
|
|
||||||
contentType = contentTypeMap.get(formatType, "text/plain")
|
|
||||||
|
|
||||||
# Get document information
|
|
||||||
title = documentationPlan.get("title", "Documentation")
|
|
||||||
documentType = documentationPlan.get("documentType", "document")
|
|
||||||
audience = documentationPlan.get("audience", "general")
|
|
||||||
tone = documentationPlan.get("tone", "formal")
|
|
||||||
keyTopics = documentationPlan.get("keyTopics", [])
|
|
||||||
formattingRequirements = documentationPlan.get("formattingRequirements", [])
|
|
||||||
|
|
||||||
# Get the detailed structure
|
|
||||||
detailedStructure = documentationPlan.get("detailedStructure", [])
|
|
||||||
|
|
||||||
# Step 1: Generate executive summary
|
|
||||||
summaryPrompt = f"""
|
|
||||||
Create an executive summary for a {documentType} titled "{title}".
|
|
||||||
|
|
||||||
DOCUMENT OVERVIEW:
|
|
||||||
- Type: {documentType}
|
|
||||||
- Audience: {audience}
|
|
||||||
- Key Topics: {', '.join(keyTopics)}
|
|
||||||
|
|
||||||
TASK CONTEXT: {prompt}
|
|
||||||
|
|
||||||
The executive summary should:
|
|
||||||
1. Provide a concise overview of the document's purpose
|
|
||||||
2. Highlight key points and findings
|
|
||||||
3. Be clear and engaging for the target audience
|
|
||||||
4. Set expectations for the document's content
|
|
||||||
|
|
||||||
Keep the summary brief but comprehensive.
|
|
||||||
"""
|
|
||||||
|
|
||||||
executiveSummary = await self.service.base.callAi([
|
|
||||||
{"role": "system", "content": f"You are a documentation expert creating an executive summary in {formatType} format."},
|
|
||||||
{"role": "user", "content": summaryPrompt}
|
|
||||||
], produceUserAnswer = True)
|
|
||||||
|
|
||||||
# Step 2: Generate introduction
|
|
||||||
introPrompt = f"""
|
|
||||||
Create an introduction for a {documentType} titled "{title}".
|
|
||||||
|
|
||||||
DOCUMENT OVERVIEW:
|
|
||||||
- Type: {documentType}
|
|
||||||
- Audience: {audience}
|
|
||||||
- Key Topics: {', '.join(keyTopics)}
|
|
||||||
|
|
||||||
TASK CONTEXT: {prompt}
|
|
||||||
|
|
||||||
The introduction should:
|
|
||||||
1. Set the context and purpose of the document
|
|
||||||
2. Outline the scope and objectives
|
|
||||||
3. Preview the main topics to be covered
|
|
||||||
4. Engage the reader's interest
|
|
||||||
|
|
||||||
Format the introduction according to {formatType} standards.
|
|
||||||
"""
|
|
||||||
|
|
||||||
introduction = await self.service.base.callAi([
|
|
||||||
{"role": "system", "content": f"You are a documentation expert creating an introduction in {formatType} format."},
|
|
||||||
{"role": "user", "content": introPrompt}
|
|
||||||
], produceUserAnswer = True)
|
|
||||||
|
|
||||||
# Step 3: Generate main sections
|
|
||||||
sections = []
|
|
||||||
for section in detailedStructure:
|
|
||||||
sectionTitle = section.get("title", "Section")
|
|
||||||
keyPoints = section.get("keyPoints", [])
|
|
||||||
subsections = section.get("subsections", [])
|
|
||||||
importance = section.get("importance", "medium")
|
|
||||||
estimatedLength = section.get("estimatedLength", "medium")
|
|
||||||
|
|
||||||
sectionPrompt = f"""
|
|
||||||
Create the {sectionTitle} section for a {documentType} titled "{title}".
|
|
||||||
|
|
||||||
SECTION DETAILS:
|
|
||||||
- Title: {sectionTitle}
|
|
||||||
- Key Points: {', '.join(keyPoints)}
|
|
||||||
- Subsections: {', '.join(subsections)}
|
|
||||||
- Importance: {importance}
|
|
||||||
- Estimated Length: {estimatedLength}
|
|
||||||
|
|
||||||
DOCUMENT CONTEXT:
|
|
||||||
- Type: {documentType}
|
|
||||||
- Audience: {audience}
|
|
||||||
- Key Topics: {', '.join(keyTopics)}
|
|
||||||
|
|
||||||
TASK CONTEXT: {prompt}
|
|
||||||
|
|
||||||
The section should:
|
|
||||||
1. Cover all key points thoroughly
|
|
||||||
2. Include relevant subsections
|
|
||||||
3. Maintain appropriate depth based on importance
|
|
||||||
4. Follow the document's tone and style
|
|
||||||
|
|
||||||
Format the section according to {formatType} standards.
|
|
||||||
"""
|
|
||||||
|
|
||||||
sectionContent = await self.service.base.callAi([
|
|
||||||
{"role": "system", "content": f"You are a documentation expert creating a section in {formatType} format."},
|
|
||||||
{"role": "user", "content": sectionPrompt}
|
|
||||||
], produceUserAnswer = True)
|
|
||||||
|
|
||||||
sections.append(sectionContent)
|
|
||||||
|
|
||||||
# Step 4: Generate conclusion
|
|
||||||
conclusionPrompt = f"""
|
|
||||||
Create the conclusion for a {documentType} titled "{title}".
|
|
||||||
|
|
||||||
DOCUMENT OVERVIEW:
|
|
||||||
- Type: {documentType}
|
|
||||||
- Audience: {audience}
|
|
||||||
- Key Topics: {', '.join(keyTopics)}
|
|
||||||
|
|
||||||
TASK CONTEXT: {prompt}
|
|
||||||
|
|
||||||
This conclusion should:
|
|
||||||
1. Summarize the key points covered in the document
|
|
||||||
2. Provide closure to the topics discussed
|
|
||||||
3. Include any relevant recommendations or next steps
|
|
||||||
4. Leave the reader with a clear understanding of the document's significance
|
|
||||||
|
|
||||||
The conclusion should be professional and impactful, formatted according to {formatType} standards.
|
|
||||||
"""
|
|
||||||
|
|
||||||
conclusion = await self.service.base.callAi([
|
|
||||||
{"role": "system", "content": f"You are a documentation expert creating a conclusion in {formatType} format."},
|
|
||||||
{"role": "user", "content": conclusionPrompt}
|
|
||||||
], produceUserAnswer = True)
|
|
||||||
|
|
||||||
# Step 5: Assemble the complete document
|
|
||||||
if formatType in ["md", "markdown"]:
|
|
||||||
# Markdown format
|
|
||||||
documentContent = f"# {title}\n\n"
|
|
||||||
|
|
||||||
if executiveSummary:
|
|
||||||
documentContent += f"## Executive Summary\n\n{executiveSummary}\n\n"
|
|
||||||
|
|
||||||
documentContent += f"{introduction}\n\n"
|
|
||||||
|
|
||||||
for i, sectionContent in enumerate(sections):
|
|
||||||
# Ensure section starts with heading if not already
|
|
||||||
sectionTitle = detailedStructure[i].get("title", f"Section {i+1}")
|
|
||||||
if not sectionContent.strip().startswith("#"):
|
|
||||||
documentContent += f"## {sectionTitle}\n\n"
|
|
||||||
documentContent += f"{sectionContent}\n\n"
|
|
||||||
|
|
||||||
documentContent += f"## Conclusion\n\n{conclusion}\n"
|
|
||||||
|
|
||||||
elif formatType == "html":
|
|
||||||
# HTML format
|
|
||||||
documentContent = f"<html>\n<head>\n<title>{title}</title>\n</head>\n<body>\n"
|
|
||||||
documentContent += f"<h1>{title}</h1>\n\n"
|
|
||||||
|
|
||||||
if executiveSummary:
|
|
||||||
documentContent += f"<h2>Executive Summary</h2>\n<div>{executiveSummary}</div>\n\n"
|
|
||||||
|
|
||||||
documentContent += f"<div>{introduction}</div>\n\n"
|
|
||||||
|
|
||||||
for i, sectionContent in enumerate(sections):
|
|
||||||
sectionTitle = detailedStructure[i].get("title", f"Section {i+1}")
|
|
||||||
documentContent += f"<h2>{sectionTitle}</h2>\n<div>{sectionContent}</div>\n\n"
|
|
||||||
|
|
||||||
documentContent += f"<h2>Conclusion</h2>\n<div>{conclusion}</div>\n"
|
|
||||||
documentContent += "</body>\n</html>"
|
|
||||||
|
|
||||||
else:
|
|
||||||
# Plain text format
|
|
||||||
documentContent = f"{title}\n{'=' * len(title)}\n\n"
|
|
||||||
|
|
||||||
if executiveSummary:
|
|
||||||
documentContent += f"EXECUTIVE SUMMARY\n{'-' * 17}\n\n{executiveSummary}\n\n"
|
|
||||||
|
|
||||||
documentContent += f"{introduction}\n\n"
|
|
||||||
|
|
||||||
for i, sectionContent in enumerate(sections):
|
|
||||||
sectionTitle = detailedStructure[i].get("title", f"Section {i+1}")
|
|
||||||
documentContent += f"{sectionTitle}\n{'-' * len(sectionTitle)}\n\n{sectionContent}\n\n"
|
|
||||||
|
|
||||||
documentContent += f"CONCLUSION\n{'-' * 10}\n\n{conclusion}\n"
|
|
||||||
|
|
||||||
# Create document object
|
|
||||||
return self.formatAgentDocumentOutput(outputLabel, documentContent, contentType)
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error creating document: {str(e)}", exc_info=True)
|
|
||||||
|
|
||||||
# Create a simple error document
|
|
||||||
if formatType in ["md", "markdown"]:
|
|
||||||
content = f"# Error in Documentation\n\nThere was an error generating the documentation: {str(e)}"
|
|
||||||
elif formatType == "html":
|
|
||||||
content = f"<html><body><h1>Error in Documentation</h1><p>There was an error generating the documentation: {str(e)}</p></body></html>"
|
|
||||||
else:
|
|
||||||
content = f"Error in Documentation\n\nThere was an error generating the documentation: {str(e)}"
|
|
||||||
|
|
||||||
return self.formatAgentDocumentOutput(outputLabel, content, contentType)
|
|
||||||
|
|
||||||
|
|
||||||
# Factory function for the Documentation agent
|
|
||||||
def getAgentDocumentation():
|
|
||||||
"""Returns an instance of the Documentation agent."""
|
|
||||||
return AgentDocumentation()
|
|
||||||
|
|
@ -1,380 +0,0 @@
|
||||||
"""
|
|
||||||
Email Agent Module.
|
|
||||||
Handles email-related tasks using Microsoft Graph API.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import logging
|
|
||||||
import json
|
|
||||||
from typing import Dict, Any, List, Optional, Tuple
|
|
||||||
import uuid
|
|
||||||
import os
|
|
||||||
|
|
||||||
from modules.workflow.agentBase import AgentBase
|
|
||||||
from modules.interfaces.serviceChatModel import Task, ChatDocument, ChatContent
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
class AgentEmail(AgentBase):
|
|
||||||
"""Agent for handling email-related tasks."""
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
"""Initialize the email agent."""
|
|
||||||
super().__init__()
|
|
||||||
self.name = "email"
|
|
||||||
self.label = "Email Agent"
|
|
||||||
self.description = "Handles email composition and sending using Microsoft Graph API"
|
|
||||||
self.capabilities = [
|
|
||||||
"email_composition",
|
|
||||||
"email_draft_creation",
|
|
||||||
"email_template_generation"
|
|
||||||
]
|
|
||||||
self.serviceBase = None
|
|
||||||
|
|
||||||
def setDependencies(self, serviceBase=None):
|
|
||||||
"""Set external dependencies for the agent."""
|
|
||||||
self.serviceBase = serviceBase
|
|
||||||
|
|
||||||
async def processTask(self, task: Task) -> Dict[str, Any]:
|
|
||||||
"""
|
|
||||||
Process an email-related task.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
task: Task object containing:
|
|
||||||
- prompt: Instructions for the agent
|
|
||||||
- inputDocuments: List of documents to process
|
|
||||||
- outputSpecifications: List of required output documents
|
|
||||||
- context: Additional context including workflow info
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Dictionary containing:
|
|
||||||
- feedback: Text response explaining what was done
|
|
||||||
- documents: List of created documents
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
# Extract task information
|
|
||||||
prompt = task.prompt
|
|
||||||
inputDocuments = task.filesInput
|
|
||||||
outputSpecs = task.filesOutput
|
|
||||||
|
|
||||||
# Check AI service
|
|
||||||
if not self.service.base:
|
|
||||||
return {
|
|
||||||
"feedback": "The Email agent requires an AI service to function.",
|
|
||||||
"documents": []
|
|
||||||
}
|
|
||||||
|
|
||||||
# Check if Microsoft connector is available
|
|
||||||
if not hasattr(self.service, 'msft'):
|
|
||||||
return {
|
|
||||||
"feedback": "Microsoft connector not available. Please ensure Microsoft integration is properly configured.",
|
|
||||||
"documents": []
|
|
||||||
}
|
|
||||||
|
|
||||||
# Get Microsoft token
|
|
||||||
token_data = self.service.msft.getMsftToken()
|
|
||||||
if not token_data:
|
|
||||||
# Create authentication trigger document
|
|
||||||
auth_doc = self._createFrontendAuthTriggerDocument()
|
|
||||||
return {
|
|
||||||
"feedback": "Microsoft authentication required. Please authenticate to continue.",
|
|
||||||
"documents": [auth_doc]
|
|
||||||
}
|
|
||||||
|
|
||||||
# Extract document data from input
|
|
||||||
documentContents, attachments = self._processInputDocuments(inputDocuments)
|
|
||||||
|
|
||||||
# Generate email subject and body using AI
|
|
||||||
emailTemplate = await self._generateEmailTemplate(prompt, documentContents)
|
|
||||||
|
|
||||||
# Create HTML preview of the email
|
|
||||||
htmlPreview = self._createHtmlPreview(emailTemplate)
|
|
||||||
|
|
||||||
# Attempt to create a draft email using Microsoft Graph API
|
|
||||||
draft_result = self.service.msft.createDraftEmail(
|
|
||||||
emailTemplate["recipient"],
|
|
||||||
emailTemplate["subject"],
|
|
||||||
emailTemplate["htmlBody"],
|
|
||||||
attachments
|
|
||||||
)
|
|
||||||
|
|
||||||
# Prepare output documents
|
|
||||||
documents = []
|
|
||||||
|
|
||||||
# Process output specifications
|
|
||||||
for spec in outputSpecs:
|
|
||||||
label = spec.get("label", "")
|
|
||||||
description = spec.get("description", "")
|
|
||||||
|
|
||||||
if label.endswith(".html"):
|
|
||||||
# Create the HTML template file
|
|
||||||
templateDoc = self.formatAgentDocumentOutput(
|
|
||||||
label,
|
|
||||||
emailTemplate["htmlBody"], # Use the actual HTML body, not the preview
|
|
||||||
"text/html"
|
|
||||||
)
|
|
||||||
documents.append(templateDoc)
|
|
||||||
elif label.endswith(".json"):
|
|
||||||
# Create JSON template if requested
|
|
||||||
templateJson = json.dumps(emailTemplate, indent=2)
|
|
||||||
templateDoc = self.formatAgentDocumentOutput(
|
|
||||||
label,
|
|
||||||
templateJson,
|
|
||||||
"application/json"
|
|
||||||
)
|
|
||||||
documents.append(templateDoc)
|
|
||||||
else:
|
|
||||||
# Default to preview for other cases
|
|
||||||
previewDoc = self.formatAgentDocumentOutput(
|
|
||||||
label,
|
|
||||||
htmlPreview,
|
|
||||||
"text/html"
|
|
||||||
)
|
|
||||||
documents.append(previewDoc)
|
|
||||||
|
|
||||||
# Prepare feedback message
|
|
||||||
if draft_result:
|
|
||||||
feedback = f"Email draft created successfully for {emailTemplate.get('recipient')}. The subject is: '{emailTemplate['subject']}'"
|
|
||||||
if attachments:
|
|
||||||
feedback += f" with {len(attachments)} attachment(s)"
|
|
||||||
feedback += ". You can open and edit it in your Outlook draft folder."
|
|
||||||
else:
|
|
||||||
feedback = "Email template created but could not save as draft. HTML preview and template are available as documents."
|
|
||||||
|
|
||||||
return {
|
|
||||||
"feedback": feedback,
|
|
||||||
"documents": documents
|
|
||||||
}
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error in email agent: {str(e)}")
|
|
||||||
return {
|
|
||||||
"feedback": f"Error processing email task: {str(e)}",
|
|
||||||
"documents": []
|
|
||||||
}
|
|
||||||
|
|
||||||
def _createFrontendAuthTriggerDocument(self) -> ChatDocument:
|
|
||||||
"""Create a document that triggers Microsoft authentication in the frontend."""
|
|
||||||
return ChatDocument(
|
|
||||||
id=str(uuid.uuid4()),
|
|
||||||
name="microsoft_auth",
|
|
||||||
ext="html",
|
|
||||||
data="""
|
|
||||||
<div>
|
|
||||||
<h2>Microsoft Authentication Required</h2>
|
|
||||||
<p>Please click the button below to authenticate with Microsoft:</p>
|
|
||||||
<button onclick="window.location.href='/api/auth/microsoft'">Authenticate with Microsoft</button>
|
|
||||||
</div>
|
|
||||||
""",
|
|
||||||
contents=[
|
|
||||||
ChatContent(
|
|
||||||
name="main",
|
|
||||||
data="""
|
|
||||||
<div>
|
|
||||||
<h2>Microsoft Authentication Required</h2>
|
|
||||||
<p>Please click the button below to authenticate with Microsoft:</p>
|
|
||||||
<button onclick="window.location.href='/api/auth/microsoft'">Authenticate with Microsoft</button>
|
|
||||||
</div>
|
|
||||||
""",
|
|
||||||
summary="Microsoft authentication trigger page",
|
|
||||||
metadata={
|
|
||||||
"contentType": "text/html",
|
|
||||||
"isText": True
|
|
||||||
}
|
|
||||||
)
|
|
||||||
]
|
|
||||||
)
|
|
||||||
|
|
||||||
def _processInputDocuments(self, input_docs: List[ChatDocument]) -> Tuple[str, List[Dict[str, Any]]]:
|
|
||||||
"""
|
|
||||||
Process input documents to extract content and prepare attachments.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
input_docs: List of input documents
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Tuple of (document content text, list of attachments)
|
|
||||||
"""
|
|
||||||
documentContents = []
|
|
||||||
attachments = []
|
|
||||||
|
|
||||||
for doc in input_docs:
|
|
||||||
docName = doc.name
|
|
||||||
if doc.ext:
|
|
||||||
docName = f"{docName}.{doc.ext}"
|
|
||||||
|
|
||||||
# Add document name to contents
|
|
||||||
documentContents.append(f"\n\n--- {docName} ---\n")
|
|
||||||
|
|
||||||
# Process document data directly
|
|
||||||
if doc.data:
|
|
||||||
# Add to attachments with proper metadata
|
|
||||||
attachments.append({
|
|
||||||
"name": docName,
|
|
||||||
"document": {
|
|
||||||
"data": doc.data,
|
|
||||||
"mimeType": doc.contents[0].metadata.get("contentType", "application/octet-stream") if doc.contents else "application/octet-stream",
|
|
||||||
"base64Encoded": doc.contents[0].metadata.get("base64Encoded", False) if doc.contents else False
|
|
||||||
}
|
|
||||||
})
|
|
||||||
documentContents.append(f"Document attached: {docName}")
|
|
||||||
else:
|
|
||||||
documentContents.append(f"Document referenced: {docName}")
|
|
||||||
|
|
||||||
return "\n".join(documentContents), attachments
|
|
||||||
|
|
||||||
def formatAgentDocumentOutput(self, filename: str, content: str, contentType: str) -> ChatDocument:
|
|
||||||
"""
|
|
||||||
Format a document for agent output.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
filename: Output filename
|
|
||||||
content: Document content
|
|
||||||
contentType: MIME type of the content
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
ChatDocument object
|
|
||||||
"""
|
|
||||||
# Split filename into name and extension
|
|
||||||
name, ext = os.path.splitext(filename)
|
|
||||||
if ext.startswith('.'):
|
|
||||||
ext = ext[1:]
|
|
||||||
|
|
||||||
# Create document object
|
|
||||||
return ChatDocument(
|
|
||||||
id=str(uuid.uuid4()),
|
|
||||||
name=name,
|
|
||||||
ext=ext,
|
|
||||||
data=content,
|
|
||||||
contents=[
|
|
||||||
ChatContent(
|
|
||||||
name="main",
|
|
||||||
data=content,
|
|
||||||
summary=f"Generated {filename}",
|
|
||||||
metadata={"contentType": contentType}
|
|
||||||
)
|
|
||||||
]
|
|
||||||
)
|
|
||||||
|
|
||||||
async def _generateEmailTemplate(self, prompt: str, documentContents: str) -> Dict[str, Any]:
|
|
||||||
"""
|
|
||||||
Generate email template using AI.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
prompt: The task prompt
|
|
||||||
documentContents: Extracted document content
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Email template dictionary with recipient, subject, body
|
|
||||||
"""
|
|
||||||
emailPrompt = f"""
|
|
||||||
Create an email based on the following request:
|
|
||||||
|
|
||||||
REQUEST: {prompt}
|
|
||||||
|
|
||||||
DOCUMENT CONTENTS:
|
|
||||||
{documentContents[:2000]}... (truncated if longer)
|
|
||||||
|
|
||||||
Generate an email template with:
|
|
||||||
1. A relevant recipient (use placeholder or derive from content if possible)
|
|
||||||
2. A concise but descriptive subject line
|
|
||||||
3. A professional HTML-formatted email body
|
|
||||||
4. Appropriate greeting and closing
|
|
||||||
|
|
||||||
Format your response as JSON with these fields:
|
|
||||||
- recipient: email address
|
|
||||||
- subject: subject line
|
|
||||||
- plainBody: plain text version
|
|
||||||
- htmlBody: HTML formatted version
|
|
||||||
|
|
||||||
Only return valid JSON. No preamble or explanations.
|
|
||||||
"""
|
|
||||||
|
|
||||||
try:
|
|
||||||
response = await self.service.base.callAi([
|
|
||||||
{"role": "system", "content": "You are an email template specialist. Create professional emails. Respond with valid JSON only."},
|
|
||||||
{"role": "user", "content": emailPrompt}
|
|
||||||
])
|
|
||||||
|
|
||||||
# Extract JSON from response
|
|
||||||
jsonStart = response.find('{')
|
|
||||||
jsonEnd = response.rfind('}') + 1
|
|
||||||
|
|
||||||
if jsonStart >= 0 and jsonEnd > jsonStart:
|
|
||||||
template = json.loads(response[jsonStart:jsonEnd])
|
|
||||||
return template
|
|
||||||
else:
|
|
||||||
# Fallback plan
|
|
||||||
logger.warning(f"Not able creating email template, generating fallback plan")
|
|
||||||
return {
|
|
||||||
"recipient": "recipient@example.com",
|
|
||||||
"subject": "Information Regarding Your Request",
|
|
||||||
"plainBody": f"This email is regarding your request: {prompt}",
|
|
||||||
"htmlBody": f"<html><body><p>This email is regarding your request: {prompt}</p></body></html>"
|
|
||||||
}
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"Error generating email template: {str(e)}")
|
|
||||||
return {
|
|
||||||
"recipient": "recipient@example.com",
|
|
||||||
"subject": "Information Regarding Your Request",
|
|
||||||
"plainBody": f"This email is regarding your request: {prompt}",
|
|
||||||
"htmlBody": f"<html><body><p>This email is regarding your request: {prompt}</p></body></html>"
|
|
||||||
}
|
|
||||||
|
|
||||||
def _createHtmlPreview(self, emailTemplate: Dict[str, Any]) -> str:
|
|
||||||
"""
|
|
||||||
Create an HTML preview of the email template.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
emailTemplate: Email template dictionary
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
HTML string for preview
|
|
||||||
"""
|
|
||||||
html = f"""
|
|
||||||
<!DOCTYPE html>
|
|
||||||
<html>
|
|
||||||
<head>
|
|
||||||
<meta charset="UTF-8">
|
|
||||||
<title>Email Preview: {emailTemplate.get('subject', 'Email Template')}</title>
|
|
||||||
<style>
|
|
||||||
body {{ font-family: Arial, sans-serif; margin: 0; padding: 0; background-color: #f5f5f5; }}
|
|
||||||
.email-container {{ max-width: 600px; margin: 20px auto; background-color: white; border: 1px solid #ddd; border-radius: 5px; overflow: hidden; }}
|
|
||||||
.email-header {{ background-color: #f0f0f0; padding: 15px; border-bottom: 1px solid #ddd; }}
|
|
||||||
.email-content {{ padding: 20px; }}
|
|
||||||
.email-footer {{ background-color: #f0f0f0; padding: 15px; border-top: 1px solid #ddd; font-size: 12px; color: #666; }}
|
|
||||||
.field {{ margin-bottom: 10px; }}
|
|
||||||
.field-label {{ font-weight: bold; color: #555; }}
|
|
||||||
.email-body {{ margin-top: 20px; padding-top: 20px; border-top: 1px solid #eee; }}
|
|
||||||
</style>
|
|
||||||
</head>
|
|
||||||
<body>
|
|
||||||
<div class="email-container">
|
|
||||||
<div class="email-header">
|
|
||||||
<h2>Email Template Preview</h2>
|
|
||||||
</div>
|
|
||||||
<div class="email-content">
|
|
||||||
<div class="field">
|
|
||||||
<div class="field-label">To:</div>
|
|
||||||
<div>{emailTemplate.get('recipient', 'recipient@example.com')}</div>
|
|
||||||
</div>
|
|
||||||
<div class="field">
|
|
||||||
<div class="field-label">Subject:</div>
|
|
||||||
<div>{emailTemplate.get('subject', 'No Subject')}</div>
|
|
||||||
</div>
|
|
||||||
<div class="email-body">
|
|
||||||
{emailTemplate.get('htmlBody', '<p>No content</p>')}
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<div class="email-footer">
|
|
||||||
<p>This is a preview of the email template. The actual email may appear differently in various email clients.</p>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</body>
|
|
||||||
</html>
|
|
||||||
"""
|
|
||||||
return html
|
|
||||||
|
|
||||||
def getAgentEmail() -> AgentEmail:
|
|
||||||
"""Factory function to create and return an EmailAgent instance."""
|
|
||||||
return AgentEmail()
|
|
||||||
|
|
@ -1,348 +0,0 @@
|
||||||
"""
|
|
||||||
SharePoint Agent Module.
|
|
||||||
Handles SharePoint document search and data extraction using Microsoft Graph API.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import logging
|
|
||||||
import json
|
|
||||||
from typing import Dict, Any, List, Optional
|
|
||||||
from modules.workflow.agentBase import AgentBase
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
class AgentSharepoint(AgentBase):
|
|
||||||
"""Agent for handling SharePoint document operations."""
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
"""Initialize the SharePoint agent."""
|
|
||||||
super().__init__()
|
|
||||||
self.name = "sharepoint"
|
|
||||||
self.label = "SharePoint Agent"
|
|
||||||
self.description = "Searches and extracts data from SharePoint documents using Microsoft Graph API"
|
|
||||||
self.capabilities = [
|
|
||||||
"document_search",
|
|
||||||
"content_extraction",
|
|
||||||
"metadata_analysis",
|
|
||||||
"document_processing"
|
|
||||||
]
|
|
||||||
|
|
||||||
async def processTask(self, task: Dict[str, Any]) -> Dict[str, Any]:
|
|
||||||
"""
|
|
||||||
Process a SharePoint-related task.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
task: Task object containing:
|
|
||||||
- prompt: Instructions for the agent
|
|
||||||
- inputDocuments: List of documents to process
|
|
||||||
- outputSpecifications: List of required output documents
|
|
||||||
- context: Additional context including workflow info
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Dictionary containing:
|
|
||||||
- feedback: Text response explaining what was done
|
|
||||||
- documents: List of created documents
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
# Extract task information
|
|
||||||
prompt = task.get("prompt", "")
|
|
||||||
inputDocuments = task.get("inputDocuments", [])
|
|
||||||
outputSpecs = task.get("outputSpecifications", [])
|
|
||||||
|
|
||||||
# Check AI service
|
|
||||||
if not self.service.base:
|
|
||||||
return {
|
|
||||||
"feedback": "The SharePoint agent requires an AI service to function.",
|
|
||||||
"documents": []
|
|
||||||
}
|
|
||||||
|
|
||||||
# Check if Microsoft connector is available
|
|
||||||
if not hasattr(self.service, 'msft'):
|
|
||||||
return {
|
|
||||||
"feedback": "Microsoft connector not available. Please ensure Microsoft integration is properly configured.",
|
|
||||||
"documents": []
|
|
||||||
}
|
|
||||||
|
|
||||||
# Get Microsoft token
|
|
||||||
token_data = self.service.msft.getMsftToken()
|
|
||||||
if not token_data:
|
|
||||||
# Create authentication trigger document
|
|
||||||
auth_doc = self._createFrontendAuthTriggerDocument()
|
|
||||||
return {
|
|
||||||
"feedback": "Microsoft authentication required. Please authenticate to continue.",
|
|
||||||
"documents": [auth_doc]
|
|
||||||
}
|
|
||||||
|
|
||||||
# Parse the search query from the prompt
|
|
||||||
searchQuery = await self._parseSearchQuery(prompt)
|
|
||||||
|
|
||||||
# Search SharePoint documents
|
|
||||||
searchResults = await self._searchSharePointDocuments(searchQuery)
|
|
||||||
|
|
||||||
# Process search results
|
|
||||||
documents = []
|
|
||||||
for spec in outputSpecs:
|
|
||||||
label = spec.get("label", "")
|
|
||||||
description = spec.get("description", "")
|
|
||||||
|
|
||||||
if label.endswith(".json"):
|
|
||||||
# Create JSON summary of search results
|
|
||||||
summaryDoc = self._createSearchSummaryJson(searchResults, description)
|
|
||||||
documents.append(summaryDoc)
|
|
||||||
elif label.endswith(".csv"):
|
|
||||||
# Create CSV summary of search results
|
|
||||||
summaryDoc = self._createSearchSummaryCsv(searchResults, description)
|
|
||||||
documents.append(summaryDoc)
|
|
||||||
else:
|
|
||||||
# Create text summary of search results
|
|
||||||
summaryDoc = self._createSearchSummaryText(searchResults, description)
|
|
||||||
documents.append(summaryDoc)
|
|
||||||
|
|
||||||
# Prepare feedback message
|
|
||||||
feedback = f"Found {len(searchResults)} documents matching your search criteria. "
|
|
||||||
if searchResults:
|
|
||||||
feedback += "The results have been saved as documents."
|
|
||||||
else:
|
|
||||||
feedback += "No matching documents were found."
|
|
||||||
|
|
||||||
return {
|
|
||||||
"feedback": feedback,
|
|
||||||
"documents": documents
|
|
||||||
}
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error in SharePoint agent: {str(e)}")
|
|
||||||
return {
|
|
||||||
"feedback": f"Error processing SharePoint task: {str(e)}",
|
|
||||||
"documents": []
|
|
||||||
}
|
|
||||||
|
|
||||||
def _createFrontendAuthTriggerDocument(self) -> Dict[str, Any]:
|
|
||||||
"""Create a document that triggers Microsoft authentication in the frontend."""
|
|
||||||
return self.formatAgentDocumentOutput(
|
|
||||||
"microsoft_auth.html",
|
|
||||||
"""
|
|
||||||
<div>
|
|
||||||
<h2>Microsoft Authentication Required</h2>
|
|
||||||
<p>Please click the button below to authenticate with Microsoft:</p>
|
|
||||||
<button onclick="window.location.href='/api/auth/microsoft'">Authenticate with Microsoft</button>
|
|
||||||
</div>
|
|
||||||
""",
|
|
||||||
"text/html"
|
|
||||||
)
|
|
||||||
|
|
||||||
async def _parseSearchQuery(self, prompt: str) -> Dict[str, Any]:
|
|
||||||
"""
|
|
||||||
Parse the search query from the prompt using AI.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
prompt: The task prompt
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Dictionary containing search parameters
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
# Use AI to parse the search query
|
|
||||||
response = await self.service.base.callAi([
|
|
||||||
{"role": "system", "content": "You are a SharePoint search query parser. Extract search parameters from the user's request."},
|
|
||||||
{"role": "user", "content": f"""
|
|
||||||
Parse the following SharePoint search request into structured parameters:
|
|
||||||
|
|
||||||
{prompt}
|
|
||||||
|
|
||||||
Return a JSON object with these fields:
|
|
||||||
- query: The main search query
|
|
||||||
- site: Optional SharePoint site name
|
|
||||||
- folder: Optional folder path
|
|
||||||
- fileTypes: List of file types to search for
|
|
||||||
- dateRange: Optional date range for filtering
|
|
||||||
- maxResults: Maximum number of results to return
|
|
||||||
|
|
||||||
Only return valid JSON. No preamble or explanations.
|
|
||||||
"""}
|
|
||||||
])
|
|
||||||
|
|
||||||
# Extract JSON from response
|
|
||||||
jsonStart = response.find('{')
|
|
||||||
jsonEnd = response.rfind('}') + 1
|
|
||||||
|
|
||||||
if jsonStart >= 0 and jsonEnd > jsonStart:
|
|
||||||
return json.loads(response[jsonStart:jsonEnd])
|
|
||||||
else:
|
|
||||||
# Fallback to simple query
|
|
||||||
return {
|
|
||||||
"query": prompt,
|
|
||||||
"maxResults": 10
|
|
||||||
}
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"Error parsing search query: {str(e)}")
|
|
||||||
return {
|
|
||||||
"query": prompt,
|
|
||||||
"maxResults": 10
|
|
||||||
}
|
|
||||||
|
|
||||||
async def _searchSharePointDocuments(self, searchParams: Dict[str, Any]) -> List[Dict[str, Any]]:
|
|
||||||
"""
|
|
||||||
Search SharePoint documents using Microsoft Graph API.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
searchParams: Search parameters
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
List of search results
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
# Get Microsoft token
|
|
||||||
token = self.service.msft.getMsftToken()
|
|
||||||
if not token:
|
|
||||||
return []
|
|
||||||
|
|
||||||
# Prepare search query
|
|
||||||
query = searchParams.get("query", "")
|
|
||||||
site = searchParams.get("site", "")
|
|
||||||
folder = searchParams.get("folder", "")
|
|
||||||
fileTypes = searchParams.get("fileTypes", [])
|
|
||||||
maxResults = searchParams.get("maxResults", 10)
|
|
||||||
|
|
||||||
# Build search URL
|
|
||||||
searchUrl = "https://graph.microsoft.com/v1.0/sites/root/drives"
|
|
||||||
if site:
|
|
||||||
searchUrl = f"https://graph.microsoft.com/v1.0/sites/{site}/drives"
|
|
||||||
|
|
||||||
# Get drives (document libraries)
|
|
||||||
response = self.service.msft.makeGraphRequest("GET", searchUrl)
|
|
||||||
if not response or "value" not in response:
|
|
||||||
return []
|
|
||||||
|
|
||||||
results = []
|
|
||||||
for drive in response["value"]:
|
|
||||||
# Search in each drive
|
|
||||||
driveId = drive["id"]
|
|
||||||
searchEndpoint = f"https://graph.microsoft.com/v1.0/drives/{driveId}/root/search(q='{query}')"
|
|
||||||
|
|
||||||
# Add file type filters if specified
|
|
||||||
if fileTypes:
|
|
||||||
typeFilter = " or ".join([f"fileType eq '{ft}'" for ft in fileTypes])
|
|
||||||
searchEndpoint += f"&filter={typeFilter}"
|
|
||||||
|
|
||||||
# Add folder filter if specified
|
|
||||||
if folder:
|
|
||||||
searchEndpoint += f"&filter=parentReference/path eq '/{folder}'"
|
|
||||||
|
|
||||||
# Add result limit
|
|
||||||
searchEndpoint += f"&top={maxResults}"
|
|
||||||
|
|
||||||
# Make the search request
|
|
||||||
searchResponse = self.service.msft.makeGraphRequest("GET", searchEndpoint)
|
|
||||||
if searchResponse and "value" in searchResponse:
|
|
||||||
for item in searchResponse["value"]:
|
|
||||||
# Get file content
|
|
||||||
fileContent = await self._getFileContent(driveId, item["id"])
|
|
||||||
|
|
||||||
results.append({
|
|
||||||
"name": item["name"],
|
|
||||||
"id": item["id"],
|
|
||||||
"driveId": driveId,
|
|
||||||
"webUrl": item["webUrl"],
|
|
||||||
"lastModified": item["lastModifiedDateTime"],
|
|
||||||
"size": item["size"],
|
|
||||||
"content": fileContent
|
|
||||||
})
|
|
||||||
|
|
||||||
return results
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error searching SharePoint: {str(e)}")
|
|
||||||
return []
|
|
||||||
|
|
||||||
async def _getFileContent(self, driveId: str, fileId: str) -> str:
|
|
||||||
"""
|
|
||||||
Get file content from SharePoint.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
driveId: Drive ID
|
|
||||||
fileId: File ID
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
File content as string
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
# Get file content URL
|
|
||||||
contentUrl = f"https://graph.microsoft.com/v1.0/drives/{driveId}/items/{fileId}/content"
|
|
||||||
|
|
||||||
# Download file content
|
|
||||||
response = self.service.msft.makeGraphRequest("GET", contentUrl, raw=True)
|
|
||||||
if response:
|
|
||||||
return response.decode('utf-8')
|
|
||||||
return ""
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error getting file content: {str(e)}")
|
|
||||||
return ""
|
|
||||||
|
|
||||||
def _createSearchSummaryJson(self, results: List[Dict[str, Any]], description: str) -> Dict[str, Any]:
|
|
||||||
"""Create a JSON summary of search results."""
|
|
||||||
summary = {
|
|
||||||
"description": description,
|
|
||||||
"totalResults": len(results),
|
|
||||||
"results": []
|
|
||||||
}
|
|
||||||
|
|
||||||
for result in results:
|
|
||||||
summary["results"].append({
|
|
||||||
"name": result["name"],
|
|
||||||
"url": result["webUrl"],
|
|
||||||
"lastModified": result["lastModified"],
|
|
||||||
"size": result["size"]
|
|
||||||
})
|
|
||||||
|
|
||||||
return self.formatAgentDocumentOutput(
|
|
||||||
"sharepoint_search_results.json",
|
|
||||||
json.dumps(summary, indent=2),
|
|
||||||
"application/json"
|
|
||||||
)
|
|
||||||
|
|
||||||
def _createSearchSummaryCsv(self, results: List[Dict[str, Any]], description: str) -> Dict[str, Any]:
|
|
||||||
"""Create a CSV summary of search results."""
|
|
||||||
csvLines = ["Name,URL,Last Modified,Size (bytes)"]
|
|
||||||
|
|
||||||
for result in results:
|
|
||||||
name = result["name"].replace('"', '""')
|
|
||||||
url = result["webUrl"].replace('"', '""')
|
|
||||||
lastModified = result["lastModified"].replace('"', '""')
|
|
||||||
size = str(result["size"])
|
|
||||||
|
|
||||||
csvLines.append(f'"{name}","{url}","{lastModified}",{size}')
|
|
||||||
|
|
||||||
return self.formatAgentDocumentOutput(
|
|
||||||
"sharepoint_search_results.csv",
|
|
||||||
"\n".join(csvLines),
|
|
||||||
"text/csv"
|
|
||||||
)
|
|
||||||
|
|
||||||
def _createSearchSummaryText(self, results: List[Dict[str, Any]], description: str) -> Dict[str, Any]:
|
|
||||||
"""Create a text summary of search results."""
|
|
||||||
textLines = [
|
|
||||||
f"SharePoint Search Results",
|
|
||||||
f"Description: {description}",
|
|
||||||
f"Total Results: {len(results)}",
|
|
||||||
"\nResults:"
|
|
||||||
]
|
|
||||||
|
|
||||||
for result in results:
|
|
||||||
textLines.extend([
|
|
||||||
f"\nName: {result['name']}",
|
|
||||||
f"URL: {result['webUrl']}",
|
|
||||||
f"Last Modified: {result['lastModified']}",
|
|
||||||
f"Size: {result['size']} bytes"
|
|
||||||
])
|
|
||||||
|
|
||||||
return self.formatAgentDocumentOutput(
|
|
||||||
"sharepoint_search_results.txt",
|
|
||||||
"\n".join(textLines),
|
|
||||||
"text/plain"
|
|
||||||
)
|
|
||||||
|
|
||||||
def getAgentSharepoint() -> AgentSharepoint:
|
|
||||||
"""Factory function to create and return a SharePointAgent instance."""
|
|
||||||
return AgentSharepoint()
|
|
||||||
|
|
@ -1,814 +0,0 @@
|
||||||
"""
|
|
||||||
Web crawler agent for gathering and analyzing web content.
|
|
||||||
Provides web research and content extraction capabilities.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import logging
|
|
||||||
import json
|
|
||||||
import re
|
|
||||||
import time
|
|
||||||
import os
|
|
||||||
from typing import Dict, Any, List
|
|
||||||
from urllib.parse import quote_plus, unquote
|
|
||||||
|
|
||||||
from bs4 import BeautifulSoup
|
|
||||||
import requests
|
|
||||||
import markdown
|
|
||||||
|
|
||||||
from modules.workflow.agentBase import AgentBase
|
|
||||||
from modules.shared.configuration import APP_CONFIG
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
class AgentWebcrawler(AgentBase):
|
|
||||||
"""AI-driven agent for web research and information retrieval"""
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
"""Initialize the web crawler agent"""
|
|
||||||
super().__init__()
|
|
||||||
self.name = "webcrawler"
|
|
||||||
self.label = "Web Crawler"
|
|
||||||
self.description = "Gathers and analyzes web content using AI with multi-step research"
|
|
||||||
self.capabilities = [
|
|
||||||
"web_research",
|
|
||||||
"content_gathering",
|
|
||||||
"data_extraction",
|
|
||||||
"information_synthesis",
|
|
||||||
"source_verification"
|
|
||||||
]
|
|
||||||
|
|
||||||
# Web crawling configuration
|
|
||||||
self.srcApikey = APP_CONFIG.get("Agent_Webcrawler_SERPAPI_APIKEY","")
|
|
||||||
self.srcEngine = APP_CONFIG.get("Agent_Webcrawler_SERPAPI_ENGINE","google")
|
|
||||||
self.srcCountry = APP_CONFIG.get("Agent_Webcrawler_SERPAPI_COUNTRY","auto")
|
|
||||||
self.maxUrl = int(APP_CONFIG.get("Agent_Webcrawler_SERPAPI_MAX_URLS", "5"))
|
|
||||||
self.maxSearchTerms = int(APP_CONFIG.get("Agent_Webcrawler_SERPAPI_MAX_SEARCH_KEYWORDS", "3"))
|
|
||||||
self.maxResults = int(APP_CONFIG.get("Agent_Webcrawler_SERPAPI_MAX_SEARCH_RESULTS", "5"))
|
|
||||||
self.timeout = int(APP_CONFIG.get("Agent_Webcrawler_SERPAPI_TIMEOUT", "30"))
|
|
||||||
self.userAgent = APP_CONFIG.get("Agent_Webcrawler_SERPAPI_USER_AGENT", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36")
|
|
||||||
|
|
||||||
if not self.srcApikey:
|
|
||||||
logger.error("SerpAPI key not configured")
|
|
||||||
|
|
||||||
def setDependencies(self, serviceBase=None):
|
|
||||||
"""Set external dependencies for the agent."""
|
|
||||||
self.setService(serviceBase)
|
|
||||||
|
|
||||||
async def processTask(self, task: Dict[str, Any]) -> Dict[str, Any]:
|
|
||||||
"""
|
|
||||||
Process a task by focusing on required outputs and using AI to guide the research process.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
task: Task dictionary with prompt, inputDocuments, outputSpecifications
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Dictionary with feedback and documents
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
# Extract task information
|
|
||||||
prompt = task.get("prompt", "")
|
|
||||||
inputDocuments = task.get("inputDocuments", [])
|
|
||||||
outputSpecs = task.get("outputSpecifications", [])
|
|
||||||
workflow = task.get("context", {}).get("workflow", {})
|
|
||||||
|
|
||||||
# Check AI service
|
|
||||||
if not self.service or not self.service.base:
|
|
||||||
return {
|
|
||||||
"feedback": "The Web Crawler agent requires an AI service to function.",
|
|
||||||
"documents": []
|
|
||||||
}
|
|
||||||
|
|
||||||
# Create research plan
|
|
||||||
if workflow:
|
|
||||||
self.service.logAdd(workflow, "Creating research plan...", level="info", progress=35)
|
|
||||||
researchPlan = await self._createResearchPlan(prompt)
|
|
||||||
|
|
||||||
# Check if this is truly a web research task
|
|
||||||
if not researchPlan.get("requiresWebResearch", True):
|
|
||||||
return {
|
|
||||||
"feedback": "This task doesn't appear to require web research. Please try a different agent.",
|
|
||||||
"documents": []
|
|
||||||
}
|
|
||||||
|
|
||||||
# Gather raw material through web research
|
|
||||||
if workflow:
|
|
||||||
self.service.logAdd(workflow, "Gathering research material...", level="info", progress=45)
|
|
||||||
rawResults = await self._gatherResearchMaterial(researchPlan, workflow)
|
|
||||||
|
|
||||||
# Format results into requested output documents
|
|
||||||
if workflow:
|
|
||||||
self.service.logAdd(workflow, "Creating output documents...", level="info", progress=55)
|
|
||||||
documents = await self._createOutputDocuments(
|
|
||||||
prompt,
|
|
||||||
rawResults,
|
|
||||||
outputSpecs,
|
|
||||||
researchPlan
|
|
||||||
)
|
|
||||||
|
|
||||||
# Generate feedback
|
|
||||||
feedback = researchPlan.get("feedback", f"I conducted web research on '{prompt[:50]}...' and gathered information from {len(rawResults)} relevant sources.")
|
|
||||||
|
|
||||||
return {
|
|
||||||
"feedback": feedback,
|
|
||||||
"documents": documents
|
|
||||||
}
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error during web research: {str(e)}", exc_info=True)
|
|
||||||
return {
|
|
||||||
"feedback": f"Error during web research: {str(e)}",
|
|
||||||
"documents": []
|
|
||||||
}
|
|
||||||
|
|
||||||
async def _createResearchPlan(self, prompt: str) -> Dict[str, Any]:
|
|
||||||
"""
|
|
||||||
Use AI to create a detailed research plan.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
prompt: The research query
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Research plan dictionary
|
|
||||||
"""
|
|
||||||
researchPrompt = f"""
|
|
||||||
Create a detailed web research plan for this task: "{prompt}"
|
|
||||||
|
|
||||||
Analyze the request carefully and create a structured plan in JSON format with the following elements:
|
|
||||||
{{
|
|
||||||
"requiresWebResearch": true/false, # Whether this genuinely requires web research
|
|
||||||
"researchQuestions": ["question1", "question2", ...], # 2-4 specific questions to answer
|
|
||||||
"searchTerms": ["term1", "term2", ...], # Up to {self.maxSearchTerms} effective search terms
|
|
||||||
"directUrls": ["url1", "url2", ...], # Any URLs directly mentioned in the request (up to {self.maxUrl})
|
|
||||||
"expectedSources": ["type1", "type2", ...], # Types of sources that would be most valuable
|
|
||||||
"contentFocus": "what specific content to extract or focus on",
|
|
||||||
"feedback": "explanation of how the research will be conducted"
|
|
||||||
}}
|
|
||||||
|
|
||||||
Respond with ONLY the JSON object, no additional text or explanations.
|
|
||||||
"""
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Get research plan from AI
|
|
||||||
response = await self.service.base.callAi([
|
|
||||||
{"role": "system", "content": "You are a research expert. Respond with valid JSON only."},
|
|
||||||
{"role": "user", "content": researchPrompt}
|
|
||||||
])
|
|
||||||
|
|
||||||
# Extract JSON
|
|
||||||
jsonStart = response.find('{')
|
|
||||||
jsonEnd = response.rfind('}') + 1
|
|
||||||
|
|
||||||
if jsonStart >= 0 and jsonEnd > jsonStart:
|
|
||||||
plan = json.loads(response[jsonStart:jsonEnd])
|
|
||||||
|
|
||||||
# Ensure we have the expected fields with defaults if missing
|
|
||||||
if "searchTerms" not in plan:
|
|
||||||
plan["searchTerms"] = [prompt]
|
|
||||||
if "directUrls" not in plan:
|
|
||||||
plan["directUrls"] = []
|
|
||||||
if "researchQuestions" not in plan:
|
|
||||||
plan["researchQuestions"] = ["What information can be found about this topic?"]
|
|
||||||
|
|
||||||
return plan
|
|
||||||
else:
|
|
||||||
# Fallback plan
|
|
||||||
logger.warning(f"Not able creating research plan, generating fallback plan")
|
|
||||||
return {
|
|
||||||
"requiresWebResearch": True,
|
|
||||||
"researchQuestions": ["What information can be found about this topic?"],
|
|
||||||
"searchTerms": [prompt],
|
|
||||||
"directUrls": [],
|
|
||||||
"expectedSources": ["Web pages", "Articles"],
|
|
||||||
"contentFocus": "Relevant information about the topic",
|
|
||||||
"feedback": f"I'll conduct web research on '{prompt}' and gather relevant information."
|
|
||||||
}
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"Error creating research plan: {str(e)}")
|
|
||||||
# Simple fallback plan
|
|
||||||
return {
|
|
||||||
"requiresWebResearch": True,
|
|
||||||
"researchQuestions": ["What information can be found about this topic?"],
|
|
||||||
"searchTerms": [prompt],
|
|
||||||
"directUrls": [],
|
|
||||||
"expectedSources": ["Web pages", "Articles"],
|
|
||||||
"contentFocus": "Relevant information about the topic",
|
|
||||||
"feedback": f"I'll conduct web research on '{prompt}' and gather relevant information."
|
|
||||||
}
|
|
||||||
|
|
||||||
async def _gatherResearchMaterial(self, researchPlan: Dict[str, Any], workflow: Dict[str, Any]) -> List[Dict[str, Any]]:
|
|
||||||
"""
|
|
||||||
Gather research material based on the research plan.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
researchPlan: Research plan dictionary
|
|
||||||
workflow: Current workflow object
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
List of research results
|
|
||||||
"""
|
|
||||||
allResults = []
|
|
||||||
|
|
||||||
# Process direct URLs
|
|
||||||
directUrls = researchPlan.get("directUrls", [])[:self.maxUrl]
|
|
||||||
for i, url in enumerate(directUrls):
|
|
||||||
progress = 45 + int((i / len(directUrls)) * 5) # Progress from 45% to 50%
|
|
||||||
self.service.logAdd(workflow, f"Processing direct URL {i+1}/{len(directUrls)}...", level="info", progress=progress)
|
|
||||||
logger.info(f"Processing direct URL: {url}")
|
|
||||||
try:
|
|
||||||
# Fetch and extract content
|
|
||||||
soup = self._readUrl(url)
|
|
||||||
|
|
||||||
if soup:
|
|
||||||
# Extract title and content
|
|
||||||
title = self._extractTitle(soup, url)
|
|
||||||
content = self._extractMainContent(soup)
|
|
||||||
|
|
||||||
# Add to results
|
|
||||||
allResults.append({
|
|
||||||
"title": title,
|
|
||||||
"url": url,
|
|
||||||
"sourceType": "directUrl",
|
|
||||||
"content": content,
|
|
||||||
"summary": "" # Will be filled later
|
|
||||||
})
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"Error processing URL {url}: {str(e)}")
|
|
||||||
|
|
||||||
# Process search terms
|
|
||||||
searchTerms = researchPlan.get("searchTerms", [])[:self.maxSearchTerms]
|
|
||||||
for i, term in enumerate(searchTerms):
|
|
||||||
progress = 50 + int((i / len(searchTerms)) * 5) # Progress from 50% to 55%
|
|
||||||
self.service.logAdd(workflow, f"Searching term {i+1}/{len(searchTerms)}...", level="info", progress=progress)
|
|
||||||
logger.info(f"Searching for: {term}")
|
|
||||||
try:
|
|
||||||
# Perform search
|
|
||||||
searchResults = self._searchWeb(term)
|
|
||||||
|
|
||||||
# Process each search result
|
|
||||||
for result in searchResults:
|
|
||||||
# Check if URL is already in results
|
|
||||||
if not any(r["url"] == result["url"] for r in allResults):
|
|
||||||
allResults.append({
|
|
||||||
"title": result["title"],
|
|
||||||
"url": result["url"],
|
|
||||||
"sourceType": "searchResult",
|
|
||||||
"content": result["data"],
|
|
||||||
"snippet": result["snippet"],
|
|
||||||
"summary": "" # Will be filled later
|
|
||||||
})
|
|
||||||
|
|
||||||
# Stop if we've reached the maximum results
|
|
||||||
if len(allResults) >= self.maxResults:
|
|
||||||
break
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"Error searching for {term}: {str(e)}")
|
|
||||||
|
|
||||||
# Stop if we've reached the maximum results
|
|
||||||
if len(allResults) >= self.maxResults:
|
|
||||||
break
|
|
||||||
|
|
||||||
# Create summaries for all results
|
|
||||||
allResults = await self._summarizeAllResults(allResults, researchPlan)
|
|
||||||
|
|
||||||
return allResults
|
|
||||||
|
|
||||||
async def _summarizeAllResults(self, results: List[Dict[str, Any]], researchPlan: Dict[str, Any]) -> List[Dict[str, Any]]:
|
|
||||||
"""
|
|
||||||
Create summaries for all research results.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
results: List of research results
|
|
||||||
researchPlan: Research plan with questions and focus
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Results with added summaries
|
|
||||||
"""
|
|
||||||
for i, result in enumerate(results):
|
|
||||||
logger.info(f"Summarizing result {i+1}/{len(results)}: {result['title'][:30]}...")
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Limit content length to avoid token issues
|
|
||||||
content = self._limitText(result.get("content", ""), maxChars=8000)
|
|
||||||
researchQuestions = researchPlan.get("researchQuestions", ["What relevant information does this page contain?"])
|
|
||||||
contentFocus = researchPlan.get("contentFocus", "Relevant information")
|
|
||||||
|
|
||||||
# Create summary using AI
|
|
||||||
summaryPrompt = f"""
|
|
||||||
Summarize this web page content based on these research questions:
|
|
||||||
{', '.join(researchQuestions)}
|
|
||||||
|
|
||||||
Focus on: {contentFocus}
|
|
||||||
|
|
||||||
Web page: {result['url']}
|
|
||||||
Title: {result['title']}
|
|
||||||
|
|
||||||
Content:
|
|
||||||
{content}
|
|
||||||
|
|
||||||
Create a concise summary that:
|
|
||||||
1. Directly answers the research questions if possible
|
|
||||||
2. Extracts the most relevant information from the page
|
|
||||||
3. Includes specific facts, figures, or quotes if available
|
|
||||||
4. Is around 2000 characters long
|
|
||||||
|
|
||||||
Only include information actually found in the content. No fabrications or assumptions.
|
|
||||||
"""
|
|
||||||
|
|
||||||
# Get summary from AI
|
|
||||||
summary = await self.service.base.callAi([
|
|
||||||
{"role": "system", "content": "You are a research expert. Respond with valid JSON only."},
|
|
||||||
{"role": "user", "content": summaryPrompt}
|
|
||||||
])
|
|
||||||
|
|
||||||
# Add summary to result
|
|
||||||
result["summary"] = summary.strip()
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"Error summarizing result {i+1}: {str(e)}")
|
|
||||||
result["summary"] = f"Error creating summary: {str(e)}"
|
|
||||||
|
|
||||||
return results
|
|
||||||
|
|
||||||
async def _createOutputDocuments(self, prompt: str, results: List[Dict[str, Any]],
|
|
||||||
outputSpecs: List[Dict[str, Any]], researchPlan: Dict[str, Any]) -> List[Dict[str, Any]]:
|
|
||||||
"""
|
|
||||||
Create output documents based on research results and specifications.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
prompt: Original research prompt
|
|
||||||
results: List of research results
|
|
||||||
outputSpecs: Output specifications
|
|
||||||
researchPlan: Research plan
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
List of output documents
|
|
||||||
"""
|
|
||||||
# If no output specs provided, create default output
|
|
||||||
if not outputSpecs:
|
|
||||||
outputSpecs = [{
|
|
||||||
"label": "webResearchResults.md",
|
|
||||||
"description": "Comprehensive web research results"
|
|
||||||
}]
|
|
||||||
|
|
||||||
# Generate documents
|
|
||||||
documents = []
|
|
||||||
|
|
||||||
# Process each output specification
|
|
||||||
for spec in outputSpecs:
|
|
||||||
outputLabel = spec.get("label", "")
|
|
||||||
outputDescription = spec.get("description", "")
|
|
||||||
|
|
||||||
# Determine format based on file extension
|
|
||||||
formatType = self._determineFormatType(outputLabel)
|
|
||||||
|
|
||||||
# Create appropriate document based on format
|
|
||||||
if formatType == "json":
|
|
||||||
# JSON output - structured data
|
|
||||||
document = await self._createJsonDocument(prompt, results, researchPlan, outputLabel)
|
|
||||||
elif formatType == "csv":
|
|
||||||
# CSV output - tabular data
|
|
||||||
document = await self._createCsvDocument(results, outputLabel)
|
|
||||||
else:
|
|
||||||
# Text-based output (markdown, html, text) - narrative report
|
|
||||||
document = await self._createNarrativeDocument(
|
|
||||||
prompt, results, researchPlan, formatType, outputLabel, outputDescription
|
|
||||||
)
|
|
||||||
|
|
||||||
documents.append(document)
|
|
||||||
|
|
||||||
return documents
|
|
||||||
|
|
||||||
async def _createNarrativeDocument(self, prompt: str, results: List[Dict[str, Any]],
|
|
||||||
researchPlan: Dict[str, Any], formatType: str,
|
|
||||||
outputLabel: str, outputDescription: str) -> Dict[str, Any]:
|
|
||||||
"""
|
|
||||||
Create a narrative document (markdown, html, text) from research results.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
prompt: Original research prompt
|
|
||||||
results: Research results
|
|
||||||
researchPlan: Research plan
|
|
||||||
formatType: Output format (markdown, html, text)
|
|
||||||
outputLabel: Output filename
|
|
||||||
outputDescription: Output description
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Document object
|
|
||||||
"""
|
|
||||||
# Create content based on format
|
|
||||||
if formatType == "markdown":
|
|
||||||
contentType = "text/markdown"
|
|
||||||
templateFormat = "markdown"
|
|
||||||
elif formatType == "html":
|
|
||||||
contentType = "text/html"
|
|
||||||
templateFormat = "html"
|
|
||||||
else:
|
|
||||||
contentType = "text/plain"
|
|
||||||
templateFormat = "text"
|
|
||||||
|
|
||||||
# Prepare research context
|
|
||||||
researchQuestions = researchPlan.get("researchQuestions", [])
|
|
||||||
searchTerms = researchPlan.get("searchTerms", [])
|
|
||||||
|
|
||||||
# Create document structure based on results
|
|
||||||
sourcesSummary = []
|
|
||||||
for result in results:
|
|
||||||
sourcesSummary.append({
|
|
||||||
"title": result.get("title", "Untitled"),
|
|
||||||
"url": result.get("url", ""),
|
|
||||||
"summary": result.get("summary", ""),
|
|
||||||
"snippet": result.get("snippet", "")
|
|
||||||
})
|
|
||||||
|
|
||||||
# Truncate content for prompt
|
|
||||||
sourcesJson = json.dumps(sourcesSummary, indent=2)
|
|
||||||
if len(sourcesJson) > 10000:
|
|
||||||
# Logic to truncate each summary while preserving structure
|
|
||||||
for i in range(len(sourcesSummary)):
|
|
||||||
if len(sourcesJson) <= 10000:
|
|
||||||
break
|
|
||||||
# Gradually truncate summaries
|
|
||||||
sourcesSummary[i]["summary"] = sourcesSummary[i]["summary"][:500] + "..."
|
|
||||||
sourcesJson = json.dumps(sourcesSummary, indent=2)
|
|
||||||
|
|
||||||
# Create report prompt
|
|
||||||
reportPrompt = f"""
|
|
||||||
Create a comprehensive {formatType} research report based on the following web research:
|
|
||||||
|
|
||||||
TASK: {prompt}
|
|
||||||
|
|
||||||
RESEARCH QUESTIONS:
|
|
||||||
{', '.join(researchQuestions)}
|
|
||||||
|
|
||||||
SEARCH TERMS USED:
|
|
||||||
{', '.join(searchTerms)}
|
|
||||||
|
|
||||||
SOURCES AND FINDINGS:
|
|
||||||
{sourcesJson}
|
|
||||||
|
|
||||||
REPORT DETAILS:
|
|
||||||
- Format: {templateFormat}
|
|
||||||
- Filename: {outputLabel}
|
|
||||||
- Description: {outputDescription}
|
|
||||||
|
|
||||||
Create a well-structured report that:
|
|
||||||
1. Includes an executive summary of key findings
|
|
||||||
2. Addresses each research question directly
|
|
||||||
3. Integrates information from all relevant sources
|
|
||||||
4. Cites sources appropriately for each piece of information
|
|
||||||
5. Provides a comprehensive synthesis of the research
|
|
||||||
6. Is formatted professionally and appropriately for {templateFormat}
|
|
||||||
|
|
||||||
The report should be scholarly, accurate, and focused on the original research task.
|
|
||||||
"""
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Generate report with AI
|
|
||||||
reportContent = await self.service.base.callAi([
|
|
||||||
{"role": "system", "content": "You are a research expert. Respond with valid JSON only."},
|
|
||||||
{"role": "user", "content": reportPrompt}
|
|
||||||
])
|
|
||||||
|
|
||||||
# Convert to HTML if needed
|
|
||||||
if formatType == "html" and not reportContent.lower().startswith("<html"):
|
|
||||||
# Check if it's markdown that needs conversion
|
|
||||||
if reportContent.startswith("#"):
|
|
||||||
reportContent = markdown.markdown(reportContent)
|
|
||||||
# Wrap in basic HTML structure if needed
|
|
||||||
if not reportContent.lower().startswith("<html"):
|
|
||||||
reportContent = f"<html><head><title>Web Research Results</title></head><body>{reportContent}</body></html>"
|
|
||||||
|
|
||||||
return self.formatAgentDocumentOutput(outputLabel, reportContent, contentType)
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error creating narrative document: {str(e)}")
|
|
||||||
# Create error document
|
|
||||||
if formatType == "markdown":
|
|
||||||
content = f"# Web Research Error\n\nAn error occurred: {str(e)}"
|
|
||||||
elif formatType == "html":
|
|
||||||
content = f"<html><body><h1>Web Research Error</h1><p>An error occurred: {str(e)}</p></body></html>"
|
|
||||||
else:
|
|
||||||
content = f"WEB RESEARCH ERROR\n\nAn error occurred: {str(e)}"
|
|
||||||
|
|
||||||
return self.formatAgentDocumentOutput(outputLabel, content, contentType)
|
|
||||||
|
|
||||||
async def _createJsonDocument(self, prompt: str, results: List[Dict[str, Any]],
|
|
||||||
researchPlan: Dict[str, Any], outputLabel: str) -> Dict[str, Any]:
|
|
||||||
"""
|
|
||||||
Create a JSON document from research results.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
prompt: Original research prompt
|
|
||||||
results: Research results
|
|
||||||
researchPlan: Research plan
|
|
||||||
outputLabel: Output filename
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Document object
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
# Create structured data
|
|
||||||
sourcesData = []
|
|
||||||
for result in results:
|
|
||||||
sourcesData.append({
|
|
||||||
"title": result.get("title", "Untitled"),
|
|
||||||
"url": result.get("url", ""),
|
|
||||||
"summary": result.get("summary", ""),
|
|
||||||
"snippet": result.get("snippet", ""),
|
|
||||||
"sourceType": result.get("sourceType", "")
|
|
||||||
})
|
|
||||||
|
|
||||||
# Create metadata
|
|
||||||
metadata = {
|
|
||||||
"query": prompt,
|
|
||||||
"timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
|
|
||||||
"researchQuestions": researchPlan.get("researchQuestions", []),
|
|
||||||
"searchTerms": researchPlan.get("searchTerms", [])
|
|
||||||
}
|
|
||||||
|
|
||||||
# Compile complete report object
|
|
||||||
jsonContent = {
|
|
||||||
"metadata": metadata,
|
|
||||||
"summary": researchPlan.get("feedback", "Web research results"),
|
|
||||||
"sources": sourcesData
|
|
||||||
}
|
|
||||||
|
|
||||||
# Convert to JSON string
|
|
||||||
content = json.dumps(jsonContent, indent=2)
|
|
||||||
|
|
||||||
return self.formatAgentDocumentOutput(outputLabel, content, "application/json")
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error creating JSON document: {str(e)}")
|
|
||||||
return self.formatAgentDocumentOutput(outputLabel, json.dumps({"error": str(e)}), "application/json")
|
|
||||||
|
|
||||||
async def _createCsvDocument(self, results: List[Dict[str, Any]], outputLabel: str) -> Dict[str, Any]:
|
|
||||||
"""
|
|
||||||
Create a CSV document from research results.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
results: Research results
|
|
||||||
outputLabel: Output filename
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Document object
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
# Create CSV header
|
|
||||||
csvLines = ["Title,URL,Source Type,Snippet"]
|
|
||||||
|
|
||||||
# Add results
|
|
||||||
for result in results:
|
|
||||||
# Escape CSV fields
|
|
||||||
title = result.get("title", "").replace('"', '""')
|
|
||||||
url = result.get("url", "").replace('"', '""')
|
|
||||||
sourceType = result.get("sourceType", "").replace('"', '""')
|
|
||||||
snippet = result.get("snippet", "").replace('"', '""')
|
|
||||||
|
|
||||||
csvLines.append(f'"{title}","{url}","{sourceType}","{snippet}"')
|
|
||||||
|
|
||||||
# Combine into CSV content
|
|
||||||
content = "\n".join(csvLines)
|
|
||||||
|
|
||||||
return self.formatAgentDocumentOutput(outputLabel, content, "text/csv")
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error creating CSV document: {str(e)}")
|
|
||||||
return self.formatAgentDocumentOutput(outputLabel, "Error,Error\nFailed to create CSV,{0}".format(str(e)), "text/csv")
|
|
||||||
|
|
||||||
def _determineFormatType(self, outputLabel: str) -> str:
|
|
||||||
"""
|
|
||||||
Determine the format type based on the filename.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
outputLabel: Output filename
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Format type (markdown, html, text, json, csv)
|
|
||||||
"""
|
|
||||||
outputLabelLower = outputLabel.lower()
|
|
||||||
|
|
||||||
if outputLabelLower.endswith(".md"):
|
|
||||||
return "markdown"
|
|
||||||
elif outputLabelLower.endswith(".html"):
|
|
||||||
return "html"
|
|
||||||
elif outputLabelLower.endswith(".txt"):
|
|
||||||
return "text"
|
|
||||||
elif outputLabelLower.endswith(".json"):
|
|
||||||
return "json"
|
|
||||||
elif outputLabelLower.endswith(".csv"):
|
|
||||||
return "csv"
|
|
||||||
else:
|
|
||||||
# Default to markdown
|
|
||||||
return "markdown"
|
|
||||||
|
|
||||||
def _searchWeb(self, query: str) -> List[Dict[str, str]]:
|
|
||||||
"""
|
|
||||||
Conduct a web search using SerpAPI and return the results.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
query: The search query
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
List of search results
|
|
||||||
"""
|
|
||||||
if not self.srcApikey:
|
|
||||||
return []
|
|
||||||
|
|
||||||
# Get user language from serviceBase if available
|
|
||||||
userLanguage = "en" # Default language
|
|
||||||
if self.service.base.userLanguage:
|
|
||||||
userLanguage = self.service.base.userLanguage
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Format the search request for SerpAPI
|
|
||||||
params = {
|
|
||||||
"engine": self.srcEngine,
|
|
||||||
"q": query,
|
|
||||||
"api_key": self.srcApikey,
|
|
||||||
"num": self.maxResults, # Number of results to return
|
|
||||||
"hl": userLanguage # Identified user language
|
|
||||||
}
|
|
||||||
|
|
||||||
# Make the API request
|
|
||||||
response = requests.get("https://serpapi.com/search", params=params, timeout=self.timeout)
|
|
||||||
response.raise_for_status()
|
|
||||||
|
|
||||||
# Parse JSON response
|
|
||||||
search_results = response.json()
|
|
||||||
|
|
||||||
# Extract organic results
|
|
||||||
results = []
|
|
||||||
|
|
||||||
if "organic_results" in search_results:
|
|
||||||
for result in search_results["organic_results"][:self.maxResults]:
|
|
||||||
# Extract title
|
|
||||||
title = result.get("title", "No title")
|
|
||||||
|
|
||||||
# Extract URL
|
|
||||||
url = result.get("link", "No URL")
|
|
||||||
|
|
||||||
# Extract snippet
|
|
||||||
snippet = result.get("snippet", "No description")
|
|
||||||
|
|
||||||
# Get actual page content
|
|
||||||
try:
|
|
||||||
targetPageSoup = self._readUrl(url)
|
|
||||||
content = self._extractMainContent(targetPageSoup)
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"Error extracting content from {url}: {str(e)}")
|
|
||||||
content = f"Error extracting content: {str(e)}"
|
|
||||||
|
|
||||||
results.append({
|
|
||||||
'title': title,
|
|
||||||
'url': url,
|
|
||||||
'snippet': snippet,
|
|
||||||
'data': content
|
|
||||||
})
|
|
||||||
|
|
||||||
# Limit number of results
|
|
||||||
if len(results) >= self.maxResults:
|
|
||||||
break
|
|
||||||
else:
|
|
||||||
logger.warning(f"No organic results found in SerpAPI response for: {query}")
|
|
||||||
|
|
||||||
return results
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error searching with SerpAPI for {query}: {str(e)}")
|
|
||||||
return []
|
|
||||||
|
|
||||||
def _readUrl(self, url: str) -> BeautifulSoup:
|
|
||||||
"""
|
|
||||||
Read a URL and return a BeautifulSoup parser for the content.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
url: The URL to read
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
BeautifulSoup object with the content or None on errors
|
|
||||||
"""
|
|
||||||
if not url or not url.startswith(('http://', 'https://')):
|
|
||||||
return None
|
|
||||||
|
|
||||||
headers = {
|
|
||||||
'User-Agent': self.userAgent,
|
|
||||||
'Accept': 'text/html,application/xhtml+xml,application/xml',
|
|
||||||
'Accept-Language': 'en-US,en;q=0.9',
|
|
||||||
}
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Initial request
|
|
||||||
response = requests.get(url, headers=headers, timeout=self.timeout)
|
|
||||||
|
|
||||||
# Handling for status 202
|
|
||||||
if response.status_code == 202:
|
|
||||||
# Retry with backoff
|
|
||||||
backoffTimes = [0.5, 1.0, 2.0, 5.0]
|
|
||||||
|
|
||||||
for waitTime in backoffTimes:
|
|
||||||
time.sleep(waitTime)
|
|
||||||
response = requests.get(url, headers=headers, timeout=self.timeout)
|
|
||||||
|
|
||||||
if response.status_code != 202:
|
|
||||||
break
|
|
||||||
|
|
||||||
# Raise for error status codes
|
|
||||||
response.raise_for_status()
|
|
||||||
|
|
||||||
# Parse HTML
|
|
||||||
return BeautifulSoup(response.text, 'html.parser')
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error reading URL {url}: {str(e)}")
|
|
||||||
return None
|
|
||||||
|
|
||||||
def _extractTitle(self, soup: BeautifulSoup, url: str) -> str:
|
|
||||||
"""
|
|
||||||
Extract the title from a webpage.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
soup: BeautifulSoup object of the webpage
|
|
||||||
url: URL of the webpage
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Extracted title
|
|
||||||
"""
|
|
||||||
if not soup:
|
|
||||||
return f"Error with {url}"
|
|
||||||
|
|
||||||
# Extract title from title tag
|
|
||||||
titleTag = soup.find('title')
|
|
||||||
title = titleTag.text.strip() if titleTag else "No title"
|
|
||||||
|
|
||||||
# Alternative: Also look for h1 tags if title tag is missing
|
|
||||||
if title == "No title":
|
|
||||||
h1Tag = soup.find('h1')
|
|
||||||
if h1Tag:
|
|
||||||
title = h1Tag.text.strip()
|
|
||||||
|
|
||||||
return title
|
|
||||||
|
|
||||||
def _extractMainContent(self, soup: BeautifulSoup, maxChars: int = 10000) -> str:
|
|
||||||
"""
|
|
||||||
Extract the main content from an HTML page.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
soup: BeautifulSoup object of the webpage
|
|
||||||
maxChars: Maximum number of characters
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Extracted main content as a string
|
|
||||||
"""
|
|
||||||
if not soup:
|
|
||||||
return ""
|
|
||||||
|
|
||||||
# Try to find main content elements in priority order
|
|
||||||
mainContent = None
|
|
||||||
for selector in ['main', 'article', '#content', '.content', '#main', '.main']:
|
|
||||||
content = soup.select_one(selector)
|
|
||||||
if content:
|
|
||||||
mainContent = content
|
|
||||||
break
|
|
||||||
|
|
||||||
# If no main content found, use the body
|
|
||||||
if not mainContent:
|
|
||||||
mainContent = soup.find('body') or soup
|
|
||||||
|
|
||||||
# Remove script, style, nav, footer elements that don't contribute to main content
|
|
||||||
for element in mainContent.select('script, style, nav, footer, header, aside, .sidebar, #sidebar, .comments, #comments, .advertisement, .ads, iframe'):
|
|
||||||
element.extract()
|
|
||||||
|
|
||||||
# Extract text content
|
|
||||||
textContent = mainContent.get_text(separator=' ', strip=True)
|
|
||||||
|
|
||||||
# Limit to maxChars
|
|
||||||
return textContent[:maxChars]
|
|
||||||
|
|
||||||
def _limitText(self, text: str, maxChars: int = 10000) -> str:
|
|
||||||
"""
|
|
||||||
Limit text to a maximum number of characters.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
text: Input text
|
|
||||||
maxChars: Maximum number of characters
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Limited text
|
|
||||||
"""
|
|
||||||
if not text:
|
|
||||||
return ""
|
|
||||||
|
|
||||||
# If text is already under the limit, return unchanged
|
|
||||||
if len(text) <= maxChars:
|
|
||||||
return text
|
|
||||||
|
|
||||||
# Otherwise limit text to maxChars
|
|
||||||
return text[:maxChars] + "... [Content truncated due to length]"
|
|
||||||
|
|
||||||
|
|
||||||
# Factory function for the Webcrawler agent
|
|
||||||
def getAgentWebcrawler():
|
|
||||||
"""Returns an instance of the Webcrawler agent."""
|
|
||||||
return AgentWebcrawler()
|
|
||||||
|
|
@ -12,7 +12,6 @@ from typing import Dict, Any, List, Optional, Union
|
||||||
import hashlib
|
import hashlib
|
||||||
import asyncio
|
import asyncio
|
||||||
|
|
||||||
from modules.shared.mimeUtils import isTextMimeType
|
|
||||||
from modules.interfaces.serviceChatAccess import ChatAccess
|
from modules.interfaces.serviceChatAccess import ChatAccess
|
||||||
from modules.interfaces.serviceChatModel import (
|
from modules.interfaces.serviceChatModel import (
|
||||||
ChatContent, ChatDocument, ChatStat, ChatMessage,
|
ChatContent, ChatDocument, ChatStat, ChatMessage,
|
||||||
|
|
@ -20,6 +19,7 @@ from modules.interfaces.serviceChatModel import (
|
||||||
Task, TaskPlan, UserInputRequest
|
Task, TaskPlan, UserInputRequest
|
||||||
)
|
)
|
||||||
from modules.interfaces.serviceAppModel import User
|
from modules.interfaces.serviceAppModel import User
|
||||||
|
from modules.workflow.managerDocument import DocumentManager
|
||||||
|
|
||||||
# DYNAMIC PART: Connectors to the Interface
|
# DYNAMIC PART: Connectors to the Interface
|
||||||
from modules.connectors.connectorDbJson import DatabaseConnector
|
from modules.connectors.connectorDbJson import DatabaseConnector
|
||||||
|
|
@ -46,6 +46,9 @@ class ChatInterface:
|
||||||
self.mandateId = currentUser.mandateId if currentUser else None
|
self.mandateId = currentUser.mandateId if currentUser else None
|
||||||
self.access = None # Will be set when user context is provided
|
self.access = None # Will be set when user context is provided
|
||||||
|
|
||||||
|
# Initialize services
|
||||||
|
self._initializeServices()
|
||||||
|
|
||||||
# Initialize database
|
# Initialize database
|
||||||
self._initializeDatabase()
|
self._initializeDatabase()
|
||||||
|
|
||||||
|
|
@ -53,6 +56,10 @@ class ChatInterface:
|
||||||
if currentUser:
|
if currentUser:
|
||||||
self.setUserContext(currentUser)
|
self.setUserContext(currentUser)
|
||||||
|
|
||||||
|
def _initializeServices(self):
|
||||||
|
"""Initialize service dependencies"""
|
||||||
|
self.documentManager = DocumentManager(self.service)
|
||||||
|
|
||||||
def setUserContext(self, currentUser: User):
|
def setUserContext(self, currentUser: User):
|
||||||
"""Sets the user context for the interface."""
|
"""Sets the user context for the interface."""
|
||||||
if not currentUser:
|
if not currentUser:
|
||||||
|
|
@ -380,23 +387,9 @@ class ChatInterface:
|
||||||
messageData["id"] = f"msg_{uuid.uuid4()}"
|
messageData["id"] = f"msg_{uuid.uuid4()}"
|
||||||
logger.warning(f"Automatically generated ID for workflow message: {messageData['id']}")
|
logger.warning(f"Automatically generated ID for workflow message: {messageData['id']}")
|
||||||
|
|
||||||
# Ensure required fields are present
|
|
||||||
if "startedAt" not in messageData and "createdAt" not in messageData:
|
|
||||||
messageData["startedAt"] = self._getCurrentTimestamp()
|
|
||||||
|
|
||||||
if "createdAt" in messageData and "startedAt" not in messageData:
|
|
||||||
messageData["startedAt"] = messageData["createdAt"]
|
|
||||||
del messageData["createdAt"]
|
|
||||||
|
|
||||||
# Set status if not present
|
# Set status if not present
|
||||||
if "status" not in messageData:
|
if "status" not in messageData:
|
||||||
messageData["status"] = "completed"
|
messageData["status"] = "step" # Default status for intermediate messages
|
||||||
|
|
||||||
# Set sequence number if not present
|
|
||||||
if "sequenceNo" not in messageData:
|
|
||||||
# Get current messages to determine next sequence number
|
|
||||||
existingMessages = self.getWorkflowMessages(workflowId)
|
|
||||||
messageData["sequenceNo"] = len(existingMessages) + 1
|
|
||||||
|
|
||||||
# Ensure role and agentName are present
|
# Ensure role and agentName are present
|
||||||
if "role" not in messageData:
|
if "role" not in messageData:
|
||||||
|
|
@ -427,10 +420,9 @@ class ChatInterface:
|
||||||
documents=[ChatDocument(**doc) for doc in createdMessage.get("documents", [])],
|
documents=[ChatDocument(**doc) for doc in createdMessage.get("documents", [])],
|
||||||
message=createdMessage.get("message"),
|
message=createdMessage.get("message"),
|
||||||
role=createdMessage.get("role", "assistant"),
|
role=createdMessage.get("role", "assistant"),
|
||||||
status=createdMessage.get("status", "completed"),
|
status=createdMessage.get("status", "step"),
|
||||||
sequenceNr=createdMessage.get("sequenceNo", 0),
|
sequenceNr=len(messageIds), # Set sequence number based on message position
|
||||||
startedAt=createdMessage.get("startedAt", self._getCurrentTimestamp()),
|
publishedAt=createdMessage.get("publishedAt", self._getCurrentTimestamp()),
|
||||||
finishedAt=createdMessage.get("finishedAt"),
|
|
||||||
stats=ChatStat(**createdMessage.get("stats", {})) if createdMessage.get("stats") else None
|
stats=ChatStat(**createdMessage.get("stats", {})) if createdMessage.get("stats") else None
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
@ -848,7 +840,6 @@ class ChatInterface:
|
||||||
async def workflowStart(self, userInput: UserInputRequest, workflowId: Optional[str] = None) -> ChatWorkflow:
|
async def workflowStart(self, userInput: UserInputRequest, workflowId: Optional[str] = None) -> ChatWorkflow:
|
||||||
"""
|
"""
|
||||||
Starts a new workflow or continues an existing one.
|
Starts a new workflow or continues an existing one.
|
||||||
Corresponds to State 1 in the state machine documentation.
|
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
userInput: The user input request containing workflow initialization data
|
userInput: The user input request containing workflow initialization data
|
||||||
|
|
@ -861,29 +852,40 @@ class ChatInterface:
|
||||||
# Get current timestamp
|
# Get current timestamp
|
||||||
currentTime = self._getCurrentTimestamp()
|
currentTime = self._getCurrentTimestamp()
|
||||||
|
|
||||||
|
# Process files if any
|
||||||
|
documents = []
|
||||||
|
if userInput.listFileId:
|
||||||
|
documents = await self._processFileIds(userInput.listFileId)
|
||||||
|
|
||||||
|
# Create initial message
|
||||||
|
initialMessage = ChatMessage(
|
||||||
|
id=str(uuid.uuid4()),
|
||||||
|
role="user",
|
||||||
|
content=userInput.prompt,
|
||||||
|
timestamp=currentTime,
|
||||||
|
documents=documents
|
||||||
|
)
|
||||||
|
|
||||||
if workflowId:
|
if workflowId:
|
||||||
# Continue existing workflow
|
# Continue existing workflow
|
||||||
workflow = self.getWorkflow(workflowId)
|
workflow = self.getWorkflow(workflowId)
|
||||||
if not workflow:
|
if not workflow:
|
||||||
raise ValueError(f"Workflow {workflowId} not found")
|
raise ValueError(f"Workflow {workflowId} not found")
|
||||||
|
|
||||||
# Update workflow status
|
# Add message to workflow
|
||||||
workflow.status = "running"
|
self.createWorkflowMessage({
|
||||||
workflow.lastActivity = currentTime
|
"workflowId": workflowId,
|
||||||
|
"messageId": initialMessage.id,
|
||||||
# Update in database
|
"role": initialMessage.role,
|
||||||
self.updateWorkflow(workflowId, {
|
"content": initialMessage.content,
|
||||||
"status": "running",
|
"timestamp": initialMessage.timestamp,
|
||||||
"lastActivity": currentTime
|
"documents": [doc.dict() for doc in initialMessage.documents]
|
||||||
})
|
})
|
||||||
|
|
||||||
# Add log entry
|
# Update workflow
|
||||||
self.createWorkflowLog({
|
self.updateWorkflow(workflowId, {
|
||||||
"workflowId": workflowId,
|
"lastActivity": currentTime,
|
||||||
"message": "Workflow continued",
|
"currentRound": workflow.currentRound + 1
|
||||||
"type": "info",
|
|
||||||
"status": "running",
|
|
||||||
"progress": 0
|
|
||||||
})
|
})
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
|
@ -895,10 +897,10 @@ class ChatInterface:
|
||||||
"lastActivity": currentTime,
|
"lastActivity": currentTime,
|
||||||
"currentRound": 1,
|
"currentRound": 1,
|
||||||
"mandateId": self.mandateId,
|
"mandateId": self.mandateId,
|
||||||
"messageIds": [],
|
"messageIds": [initialMessage.id],
|
||||||
"dataStats": {
|
"dataStats": {
|
||||||
"totalMessages": 0,
|
"totalMessages": 1,
|
||||||
"totalDocuments": 0,
|
"totalDocuments": len(documents),
|
||||||
"totalTokens": 0
|
"totalTokens": 0
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -906,6 +908,16 @@ class ChatInterface:
|
||||||
# Create workflow
|
# Create workflow
|
||||||
workflow = self.createWorkflow(workflowData)
|
workflow = self.createWorkflow(workflowData)
|
||||||
|
|
||||||
|
# Add initial message
|
||||||
|
self.createWorkflowMessage({
|
||||||
|
"workflowId": workflow.id,
|
||||||
|
"messageId": initialMessage.id,
|
||||||
|
"role": initialMessage.role,
|
||||||
|
"content": initialMessage.content,
|
||||||
|
"timestamp": initialMessage.timestamp,
|
||||||
|
"documents": [doc.dict() for doc in initialMessage.documents]
|
||||||
|
})
|
||||||
|
|
||||||
# Add log entry
|
# Add log entry
|
||||||
self.createWorkflowLog({
|
self.createWorkflowLog({
|
||||||
"workflowId": workflow.id,
|
"workflowId": workflow.id,
|
||||||
|
|
@ -916,8 +928,8 @@ class ChatInterface:
|
||||||
})
|
})
|
||||||
|
|
||||||
# Start workflow processing
|
# Start workflow processing
|
||||||
from modules.workflow.workflowManager import getWorkflowManager
|
from modules.workflow.managerWorkflow import WorkflowManager
|
||||||
workflowManager = await getWorkflowManager(self)
|
workflowManager = WorkflowManager(self)
|
||||||
asyncio.create_task(workflowManager.workflowProcess(userInput, workflow))
|
asyncio.create_task(workflowManager.workflowProcess(userInput, workflow))
|
||||||
|
|
||||||
return workflow
|
return workflow
|
||||||
|
|
@ -979,30 +991,22 @@ class ChatInterface:
|
||||||
"""
|
"""
|
||||||
documents = []
|
documents = []
|
||||||
for fileId in fileIds:
|
for fileId in fileIds:
|
||||||
try:
|
# Get file metadata
|
||||||
# Get file content
|
fileMetadata = self.service.functions.getFile(fileId)
|
||||||
fileContent = self.service.functions.getFileData(fileId)
|
if not fileMetadata:
|
||||||
if not fileContent:
|
logger.warning(f"File metadata not found for {fileId}")
|
||||||
continue
|
|
||||||
|
|
||||||
# Get file metadata
|
|
||||||
fileMetadata = self.service.functions.getFile(fileId)
|
|
||||||
if not fileMetadata:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Create ChatDocument
|
|
||||||
document = ChatDocument(
|
|
||||||
id=str(uuid.uuid4()),
|
|
||||||
fileId=fileId,
|
|
||||||
filename=fileMetadata.get("name", "Unknown"),
|
|
||||||
fileSize=fileMetadata.get("size", 0),
|
|
||||||
content=fileContent.decode('utf-8', errors='ignore'),
|
|
||||||
mimeType=fileMetadata.get("mimeType", "text/plain")
|
|
||||||
)
|
|
||||||
documents.append(document)
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error processing file {fileId}: {str(e)}")
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
# Create ChatDocument
|
||||||
|
document = ChatDocument(
|
||||||
|
id=str(uuid.uuid4()),
|
||||||
|
fileId=fileId,
|
||||||
|
filename=fileMetadata.get("name", "Unknown"),
|
||||||
|
fileSize=fileMetadata.get("size", 0),
|
||||||
|
mimeType=fileMetadata.get("mimeType", "text/plain")
|
||||||
|
)
|
||||||
|
|
||||||
|
documents.append(document)
|
||||||
return documents
|
return documents
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -4,12 +4,38 @@ Chat model classes for the chat system.
|
||||||
|
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
from typing import List, Dict, Any, Optional, Union
|
from typing import List, Dict, Any, Optional, Union
|
||||||
from datetime import datetime
|
from datetime import datetime, UTC
|
||||||
import uuid
|
import uuid
|
||||||
|
from enum import Enum
|
||||||
|
|
||||||
from modules.shared.attributeUtils import register_model_labels, ModelMixin
|
from modules.shared.attributeUtils import register_model_labels, ModelMixin
|
||||||
|
|
||||||
|
|
||||||
|
# ENUMS
|
||||||
|
|
||||||
|
class TaskStatus(str, Enum):
|
||||||
|
"""Task status enumeration"""
|
||||||
|
PENDING = "pending"
|
||||||
|
RUNNING = "running"
|
||||||
|
COMPLETED = "completed"
|
||||||
|
FAILED = "failed"
|
||||||
|
CANCELLED = "cancelled"
|
||||||
|
ROLLED_BACK = "rolled_back"
|
||||||
|
|
||||||
|
# Register labels for TaskStatus
|
||||||
|
register_model_labels(
|
||||||
|
"TaskStatus",
|
||||||
|
{"en": "Task Status", "fr": "Statut de la tâche"},
|
||||||
|
{
|
||||||
|
"PENDING": {"en": "Pending", "fr": "En attente"},
|
||||||
|
"RUNNING": {"en": "Running", "fr": "En cours"},
|
||||||
|
"COMPLETED": {"en": "Completed", "fr": "Terminé"},
|
||||||
|
"FAILED": {"en": "Failed", "fr": "Échec"},
|
||||||
|
"CANCELLED": {"en": "Cancelled", "fr": "Annulé"},
|
||||||
|
"ROLLED_BACK": {"en": "Rolled Back", "fr": "Annulé"}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
# USER MODELS
|
# USER MODELS
|
||||||
|
|
||||||
class UserInputRequest(BaseModel, ModelMixin):
|
class UserInputRequest(BaseModel, ModelMixin):
|
||||||
|
|
@ -28,24 +54,49 @@ register_model_labels(
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
# WORKFLOW MODELS
|
# DOCUMENT MODELS
|
||||||
|
|
||||||
class ChatContent(BaseModel, ModelMixin):
|
class ContentMetadata(BaseModel, ModelMixin):
|
||||||
"""Data model for chat content"""
|
"""Metadata for content items"""
|
||||||
sequenceNr: int = Field(description="Sequence number of the content")
|
size: int = Field(description="Content size in bytes")
|
||||||
name: str = Field(description="Name of the content")
|
pages: Optional[int] = Field(None, description="Number of pages for multi-page content")
|
||||||
data: str = Field(description="The actual content data")
|
error: Optional[str] = Field(None, description="Processing error if any")
|
||||||
mimeType: str = Field(description="MIME type of the content")
|
# Media-specific attributes
|
||||||
metadata: Dict[str, Any] = Field(default_factory=dict, description="Additional metadata")
|
width: Optional[int] = Field(None, description="Width in pixels for images/videos")
|
||||||
# Register labels for ChatContent
|
height: Optional[int] = Field(None, description="Height in pixels for images/videos")
|
||||||
|
colorMode: Optional[str] = Field(None, description="Color mode (e.g., RGB, CMYK, grayscale)")
|
||||||
|
fps: Optional[float] = Field(None, description="Frames per second for videos")
|
||||||
|
durationSec: Optional[float] = Field(None, description="Duration in seconds for videos/audio")
|
||||||
|
|
||||||
|
# Register labels for ContentMetadata
|
||||||
register_model_labels(
|
register_model_labels(
|
||||||
"ChatContent",
|
"ContentMetadata",
|
||||||
{"en": "Chat Content", "fr": "Contenu de chat"},
|
{"en": "Content Metadata", "fr": "Métadonnées du contenu"},
|
||||||
{
|
{
|
||||||
"sequenceNr": {"en": "Sequence Number", "fr": "Numéro de séquence"},
|
"size": {"en": "Size", "fr": "Taille"},
|
||||||
"name": {"en": "Name", "fr": "Nom"},
|
"pages": {"en": "Pages", "fr": "Pages"},
|
||||||
|
"error": {"en": "Error", "fr": "Erreur"},
|
||||||
|
"width": {"en": "Width", "fr": "Largeur"},
|
||||||
|
"height": {"en": "Height", "fr": "Hauteur"},
|
||||||
|
"colorMode": {"en": "Color Mode", "fr": "Mode de couleur"},
|
||||||
|
"fps": {"en": "FPS", "fr": "IPS"},
|
||||||
|
"durationSec": {"en": "Duration", "fr": "Durée"}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
class ContentItem(BaseModel, ModelMixin):
|
||||||
|
"""Individual content item from a document"""
|
||||||
|
label: str = Field(description="Content label (e.g., tab name, tag name)")
|
||||||
|
data: str = Field(description="Extracted text content")
|
||||||
|
metadata: ContentMetadata = Field(description="Content metadata")
|
||||||
|
|
||||||
|
# Register labels for ContentItem
|
||||||
|
register_model_labels(
|
||||||
|
"ContentItem",
|
||||||
|
{"en": "Content Item", "fr": "Élément de contenu"},
|
||||||
|
{
|
||||||
|
"label": {"en": "Label", "fr": "Étiquette"},
|
||||||
"data": {"en": "Data", "fr": "Données"},
|
"data": {"en": "Data", "fr": "Données"},
|
||||||
"mimeType": {"en": "MIME Type", "fr": "Type MIME"},
|
|
||||||
"metadata": {"en": "Metadata", "fr": "Métadonnées"}
|
"metadata": {"en": "Metadata", "fr": "Métadonnées"}
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
@ -57,7 +108,7 @@ class ChatDocument(BaseModel, ModelMixin):
|
||||||
filename: str = Field(description="Name of the file")
|
filename: str = Field(description="Name of the file")
|
||||||
fileSize: int = Field(description="Size of the file")
|
fileSize: int = Field(description="Size of the file")
|
||||||
mimeType: str = Field(description="MIME type of the file")
|
mimeType: str = Field(description="MIME type of the file")
|
||||||
contents: List[ChatContent] = Field(default_factory=list, description="List of chat contents")
|
|
||||||
# Register labels for ChatDocument
|
# Register labels for ChatDocument
|
||||||
register_model_labels(
|
register_model_labels(
|
||||||
"ChatDocument",
|
"ChatDocument",
|
||||||
|
|
@ -67,11 +118,50 @@ register_model_labels(
|
||||||
"fileId": {"en": "File ID", "fr": "ID du fichier"},
|
"fileId": {"en": "File ID", "fr": "ID du fichier"},
|
||||||
"filename": {"en": "Filename", "fr": "Nom de fichier"},
|
"filename": {"en": "Filename", "fr": "Nom de fichier"},
|
||||||
"fileSize": {"en": "File Size", "fr": "Taille du fichier"},
|
"fileSize": {"en": "File Size", "fr": "Taille du fichier"},
|
||||||
|
"mimeType": {"en": "MIME Type", "fr": "Type MIME"}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
class TaskDocument(BaseModel, ModelMixin):
|
||||||
|
"""Data model for a task document"""
|
||||||
|
id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Primary key")
|
||||||
|
data: str = Field(description="Base64 encoded file data")
|
||||||
|
filename: str = Field(description="Name of the file")
|
||||||
|
fileSize: int = Field(description="Size of the file")
|
||||||
|
mimeType: str = Field(description="MIME type of the file")
|
||||||
|
|
||||||
|
# Register labels for TaskDocument
|
||||||
|
register_model_labels(
|
||||||
|
"TaskDocument",
|
||||||
|
{"en": "Task Document", "fr": "Document de tâche"},
|
||||||
|
{
|
||||||
|
"id": {"en": "ID", "fr": "ID"},
|
||||||
|
"filename": {"en": "Filename", "fr": "Nom de fichier"},
|
||||||
|
"fileSize": {"en": "File Size", "fr": "Taille du fichier"},
|
||||||
"mimeType": {"en": "MIME Type", "fr": "Type MIME"},
|
"mimeType": {"en": "MIME Type", "fr": "Type MIME"},
|
||||||
|
"data": {"en": "Data", "fr": "Données"}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
class ExtractedContent(BaseModel, ModelMixin):
|
||||||
|
"""Data model for extracted content"""
|
||||||
|
objectId: str = Field(description="Reference to source document")
|
||||||
|
objectType: str = Field(description="Type of source object ('ChatDocument' or 'TaskDocument')")
|
||||||
|
contents: List[ContentItem] = Field(default_factory=list, description="List of content items")
|
||||||
|
|
||||||
|
# Register labels for ExtractedContent
|
||||||
|
register_model_labels(
|
||||||
|
"ExtractedContent",
|
||||||
|
{"en": "Extracted Content", "fr": "Contenu extrait"},
|
||||||
|
{
|
||||||
|
"objectId": {"en": "Object ID", "fr": "ID de l'objet"},
|
||||||
|
"objectType": {"en": "Object Type", "fr": "Type d'objet"},
|
||||||
"contents": {"en": "Contents", "fr": "Contenus"}
|
"contents": {"en": "Contents", "fr": "Contenus"}
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# WORKFLOW MODELS
|
||||||
|
|
||||||
class ChatStat(BaseModel, ModelMixin):
|
class ChatStat(BaseModel, ModelMixin):
|
||||||
"""Data model for chat statistics"""
|
"""Data model for chat statistics"""
|
||||||
id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Primary key")
|
id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Primary key")
|
||||||
|
|
@ -133,10 +223,9 @@ class ChatMessage(BaseModel, ModelMixin):
|
||||||
documents: List[ChatDocument] = Field(default_factory=list, description="Associated documents")
|
documents: List[ChatDocument] = Field(default_factory=list, description="Associated documents")
|
||||||
message: Optional[str] = Field(None, description="Message content")
|
message: Optional[str] = Field(None, description="Message content")
|
||||||
role: str = Field(description="Role of the message sender")
|
role: str = Field(description="Role of the message sender")
|
||||||
status: str = Field(description="Status of the message")
|
status: str = Field(description="Status of the message (first, step, last)")
|
||||||
sequenceNr: int = Field(description="Sequence number of the message")
|
sequenceNr: int = Field(description="Sequence number of the message (set automatically)")
|
||||||
startedAt: str = Field(description="When the message processing started")
|
publishedAt: str = Field(description="When the message was published")
|
||||||
finishedAt: Optional[str] = Field(None, description="When the message processing finished")
|
|
||||||
stats: Optional[ChatStat] = Field(None, description="Statistics for this message")
|
stats: Optional[ChatStat] = Field(None, description="Statistics for this message")
|
||||||
success: Optional[bool] = Field(None, description="Whether the message processing was successful")
|
success: Optional[bool] = Field(None, description="Whether the message processing was successful")
|
||||||
# Register labels for ChatMessage
|
# Register labels for ChatMessage
|
||||||
|
|
@ -153,29 +242,92 @@ register_model_labels(
|
||||||
"role": {"en": "Role", "fr": "Rôle"},
|
"role": {"en": "Role", "fr": "Rôle"},
|
||||||
"status": {"en": "Status", "fr": "Statut"},
|
"status": {"en": "Status", "fr": "Statut"},
|
||||||
"sequenceNr": {"en": "Sequence Number", "fr": "Numéro de séquence"},
|
"sequenceNr": {"en": "Sequence Number", "fr": "Numéro de séquence"},
|
||||||
"startedAt": {"en": "Started At", "fr": "Démarré le"},
|
"publishedAt": {"en": "Published At", "fr": "Publié le"},
|
||||||
"finishedAt": {"en": "Finished At", "fr": "Terminé le"},
|
|
||||||
"stats": {"en": "Statistics", "fr": "Statistiques"},
|
"stats": {"en": "Statistics", "fr": "Statistiques"},
|
||||||
"success": {"en": "Success", "fr": "Succès"}
|
"success": {"en": "Success", "fr": "Succès"}
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
class AgentTask(BaseModel, ModelMixin):
|
class AgentTask(BaseModel, ModelMixin):
|
||||||
"""Data model for a task"""
|
"""Model for agent tasks"""
|
||||||
id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Primary key")
|
id: str = Field(..., description="Unique task identifier")
|
||||||
workflowId: str = Field(description="Foreign key to workflow")
|
workflowId: str = Field(..., description="Associated workflow ID")
|
||||||
agentName: str = Field(description="Name of the agent assigned to this task")
|
status: TaskStatus = Field(default=TaskStatus.PENDING, description="Current task status")
|
||||||
status: str = Field(description="Current status of the task")
|
error: Optional[str] = Field(None, description="Error message if task failed")
|
||||||
progress: float = Field(description="Task progress (0-100)")
|
startedAt: Optional[datetime] = Field(None, description="Task start timestamp")
|
||||||
prompt: str = Field(description="Prompt for the task")
|
finishedAt: Optional[datetime] = Field(None, description="Task completion timestamp")
|
||||||
userLanguage: str = Field(description="User's preferred language")
|
actionList: List[Dict[str, Any]] = Field(default_factory=list, description="List of actions to execute")
|
||||||
filesInput: List[str] = Field(default_factory=list, description="Input files")
|
documentsOutput: List[Dict[str, Any]] = Field(default_factory=list, description="Output documents")
|
||||||
filesOutput: List[str] = Field(default_factory=list, description="Output files")
|
retryCount: int = Field(default=0, description="Number of retry attempts")
|
||||||
result: Optional[ChatMessage] = Field(None, description="Task result message")
|
retryMax: int = Field(default=3, description="Maximum number of retry attempts")
|
||||||
error: Optional[str] = Field(None, description="Error message if failed")
|
rollbackOnFailure: bool = Field(default=True, description="Whether to rollback on failure")
|
||||||
startedAt: str = Field(description="When the task started")
|
dependencies: List[str] = Field(default_factory=list, description="List of dependent task IDs")
|
||||||
finishedAt: Optional[str] = Field(None, description="When the task finished")
|
thisTaskFeedback: Optional[Dict[str, Any]] = Field(None, description="Task feedback data")
|
||||||
performance: Optional[Dict[str, Any]] = Field(None, description="Performance metrics")
|
|
||||||
|
def isCompleted(self) -> bool:
|
||||||
|
"""Check if task is completed"""
|
||||||
|
return self.status == TaskStatus.COMPLETED
|
||||||
|
|
||||||
|
def isFailed(self) -> bool:
|
||||||
|
"""Check if task has failed"""
|
||||||
|
return self.status == TaskStatus.FAILED
|
||||||
|
|
||||||
|
def canRetry(self) -> bool:
|
||||||
|
"""Check if task can be retried"""
|
||||||
|
return self.retryCount < self.retryMax
|
||||||
|
|
||||||
|
def start(self) -> None:
|
||||||
|
"""Start the task"""
|
||||||
|
self.status = TaskStatus.RUNNING
|
||||||
|
self.startedAt = datetime.now(UTC)
|
||||||
|
|
||||||
|
def complete(self) -> None:
|
||||||
|
"""Mark task as completed"""
|
||||||
|
self.status = TaskStatus.COMPLETED
|
||||||
|
self.finishedAt = datetime.now(UTC)
|
||||||
|
|
||||||
|
def fail(self, error: str) -> None:
|
||||||
|
"""Mark task as failed"""
|
||||||
|
self.status = TaskStatus.FAILED
|
||||||
|
self.error = error
|
||||||
|
self.finishedAt = datetime.now(UTC)
|
||||||
|
|
||||||
|
def cancel(self) -> None:
|
||||||
|
"""Cancel the task"""
|
||||||
|
self.status = TaskStatus.CANCELLED
|
||||||
|
self.finishedAt = datetime.now(UTC)
|
||||||
|
|
||||||
|
def rollback(self) -> None:
|
||||||
|
"""Mark task as rolled back"""
|
||||||
|
self.status = TaskStatus.ROLLED_BACK
|
||||||
|
self.finishedAt = datetime.now(UTC)
|
||||||
|
|
||||||
|
def incrementRetry(self) -> None:
|
||||||
|
"""Increment retry count"""
|
||||||
|
self.retryCount += 1
|
||||||
|
|
||||||
|
def addDependency(self, taskId: str) -> None:
|
||||||
|
"""Add a task dependency"""
|
||||||
|
if taskId not in self.dependencies:
|
||||||
|
self.dependencies.append(taskId)
|
||||||
|
|
||||||
|
def removeDependency(self, taskId: str) -> None:
|
||||||
|
"""Remove a task dependency"""
|
||||||
|
if taskId in self.dependencies:
|
||||||
|
self.dependencies.remove(taskId)
|
||||||
|
|
||||||
|
def addAction(self, action: Dict[str, Any]) -> None:
|
||||||
|
"""Add an action to the task"""
|
||||||
|
self.actionList.append(action)
|
||||||
|
|
||||||
|
def addDocumentOutput(self, document: Dict[str, Any]) -> None:
|
||||||
|
"""Add an output document"""
|
||||||
|
self.documentsOutput.append(document)
|
||||||
|
|
||||||
|
def setFeedback(self, feedback: Dict[str, Any]) -> None:
|
||||||
|
"""Set task feedback"""
|
||||||
|
self.thisTaskFeedback = feedback
|
||||||
|
|
||||||
# Register labels for AgentTask
|
# Register labels for AgentTask
|
||||||
register_model_labels(
|
register_model_labels(
|
||||||
"AgentTask",
|
"AgentTask",
|
||||||
|
|
@ -183,42 +335,21 @@ register_model_labels(
|
||||||
{
|
{
|
||||||
"id": {"en": "ID", "fr": "ID"},
|
"id": {"en": "ID", "fr": "ID"},
|
||||||
"workflowId": {"en": "Workflow ID", "fr": "ID du flux de travail"},
|
"workflowId": {"en": "Workflow ID", "fr": "ID du flux de travail"},
|
||||||
"agentName": {"en": "Agent Name", "fr": "Nom de l'agent"},
|
|
||||||
"status": {"en": "Status", "fr": "Statut"},
|
"status": {"en": "Status", "fr": "Statut"},
|
||||||
"progress": {"en": "Progress", "fr": "Progression"},
|
|
||||||
"prompt": {"en": "Prompt", "fr": "Invite"},
|
|
||||||
"userLanguage": {"en": "User Language", "fr": "Langue de l'utilisateur"},
|
|
||||||
"filesInput": {"en": "Input Files", "fr": "Fichiers d'entrée"},
|
|
||||||
"filesOutput": {"en": "Output Files", "fr": "Fichiers de sortie"},
|
|
||||||
"result": {"en": "Result", "fr": "Résultat"},
|
|
||||||
"error": {"en": "Error", "fr": "Erreur"},
|
"error": {"en": "Error", "fr": "Erreur"},
|
||||||
"startedAt": {"en": "Started At", "fr": "Démarré le"},
|
"startedAt": {"en": "Started At", "fr": "Démarré le"},
|
||||||
"finishedAt": {"en": "Finished At", "fr": "Terminé le"},
|
"finishedAt": {"en": "Finished At", "fr": "Terminé le"},
|
||||||
"performance": {"en": "Performance", "fr": "Performance"}
|
"actionList": {"en": "Action List", "fr": "Liste d'actions"},
|
||||||
|
"documentsOutput": {"en": "Output Documents", "fr": "Documents de sortie"},
|
||||||
|
"retryCount": {"en": "Retry Count", "fr": "Nombre de tentatives"},
|
||||||
|
"retryMax": {"en": "Max Retries", "fr": "Tentatives maximales"},
|
||||||
|
"rollbackOnFailure": {"en": "Rollback on Failure", "fr": "Annulation en cas d'échec"},
|
||||||
|
"dependencies": {"en": "Dependencies", "fr": "Dépendances"},
|
||||||
|
"thisTaskFeedback": {"en": "Task Feedback", "fr": "Retour sur la tâche"}
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
class Agent(BaseModel, ModelMixin):
|
# WORKFLOW MODEL
|
||||||
"""Data model for an agent"""
|
|
||||||
id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Primary key")
|
|
||||||
name: str = Field(description="Name of the agent")
|
|
||||||
description: str = Field(description="Description of the agent")
|
|
||||||
capabilities: List[str] = Field(default_factory=list, description="List of agent capabilities")
|
|
||||||
performance: Optional[Dict[str, Any]] = Field(None, description="Performance metrics")
|
|
||||||
# Register labels for Agent
|
|
||||||
register_model_labels(
|
|
||||||
"Agent",
|
|
||||||
{"en": "Agent", "fr": "Agent"},
|
|
||||||
{
|
|
||||||
"id": {"en": "ID", "fr": "ID"},
|
|
||||||
"name": {"en": "Name", "fr": "Nom"},
|
|
||||||
"description": {"en": "Description", "fr": "Description"},
|
|
||||||
"capabilities": {"en": "Capabilities", "fr": "Capacités"},
|
|
||||||
"performance": {"en": "Performance", "fr": "Performance"}
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
# WORKFLOW MODELS
|
|
||||||
|
|
||||||
class ChatWorkflow(BaseModel, ModelMixin):
|
class ChatWorkflow(BaseModel, ModelMixin):
|
||||||
"""Data model for a chat workflow"""
|
"""Data model for a chat workflow"""
|
||||||
|
|
@ -251,125 +382,3 @@ register_model_labels(
|
||||||
"tasks": {"en": "Tasks", "fr": "Tâches"}
|
"tasks": {"en": "Tasks", "fr": "Tâches"}
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
# DOCUMENT MODELS
|
|
||||||
|
|
||||||
class DocumentExtraction(BaseModel, ModelMixin):
|
|
||||||
"""Data model for document extraction history"""
|
|
||||||
timestamp: str = Field(description="Timestamp of extraction")
|
|
||||||
type: str = Field(description="Type of document")
|
|
||||||
sections: List[str] = Field(default_factory=list, description="Extracted sections")
|
|
||||||
metadata: Dict[str, Any] = Field(default_factory=dict, description="Extraction metadata")
|
|
||||||
|
|
||||||
# Register labels for DocumentExtraction
|
|
||||||
register_model_labels(
|
|
||||||
"DocumentExtraction",
|
|
||||||
{"en": "Document Extraction", "fr": "Extraction de document"},
|
|
||||||
{
|
|
||||||
"timestamp": {"en": "Timestamp", "fr": "Horodatage"},
|
|
||||||
"type": {"en": "Type", "fr": "Type"},
|
|
||||||
"sections": {"en": "Sections", "fr": "Sections"},
|
|
||||||
"metadata": {"en": "Metadata", "fr": "Métadonnées"}
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
class DocumentContext(BaseModel, ModelMixin):
|
|
||||||
"""Data model for document context"""
|
|
||||||
id: str = Field(description="Document ID")
|
|
||||||
extractionHistory: List[DocumentExtraction] = Field(default_factory=list, description="History of extractions")
|
|
||||||
relevantSections: List[str] = Field(default_factory=list, description="Relevant sections")
|
|
||||||
processingStatus: Dict[str, str] = Field(default_factory=dict, description="Processing status")
|
|
||||||
|
|
||||||
# Register labels for DocumentContext
|
|
||||||
register_model_labels(
|
|
||||||
"DocumentContext",
|
|
||||||
{"en": "Document Context", "fr": "Contexte de document"},
|
|
||||||
{
|
|
||||||
"id": {"en": "ID", "fr": "ID"},
|
|
||||||
"extractionHistory": {"en": "Extraction History", "fr": "Historique d'extraction"},
|
|
||||||
"relevantSections": {"en": "Relevant Sections", "fr": "Sections pertinentes"},
|
|
||||||
"processingStatus": {"en": "Processing Status", "fr": "Statut de traitement"}
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
class DocumentMetadata(BaseModel, ModelMixin):
|
|
||||||
"""Data model for document metadata"""
|
|
||||||
type: str = Field(description="Document type")
|
|
||||||
format: str = Field(description="Document format")
|
|
||||||
size: int = Field(description="Document size in bytes")
|
|
||||||
pages: Optional[int] = Field(None, description="Number of pages")
|
|
||||||
sections: Optional[List[str]] = Field(None, description="Document sections")
|
|
||||||
error: Optional[str] = Field(None, description="Processing error if any")
|
|
||||||
|
|
||||||
# Register labels for DocumentMetadata
|
|
||||||
register_model_labels(
|
|
||||||
"DocumentMetadata",
|
|
||||||
{"en": "Document Metadata", "fr": "Métadonnées de document"},
|
|
||||||
{
|
|
||||||
"type": {"en": "Type", "fr": "Type"},
|
|
||||||
"format": {"en": "Format", "fr": "Format"},
|
|
||||||
"size": {"en": "Size", "fr": "Taille"},
|
|
||||||
"pages": {"en": "Pages", "fr": "Pages"},
|
|
||||||
"sections": {"en": "Sections", "fr": "Sections"},
|
|
||||||
"error": {"en": "Error", "fr": "Erreur"}
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
class ImageData(BaseModel, ModelMixin):
|
|
||||||
"""Data model for image data"""
|
|
||||||
data: str = Field(description="Base64 encoded image data")
|
|
||||||
format: str = Field(description="Image format")
|
|
||||||
page: Optional[int] = Field(None, description="Page number if from a multi-page document")
|
|
||||||
index: Optional[int] = Field(None, description="Image index in the document")
|
|
||||||
|
|
||||||
# Register labels for ImageData
|
|
||||||
register_model_labels(
|
|
||||||
"ImageData",
|
|
||||||
{"en": "Image Data", "fr": "Données d'image"},
|
|
||||||
{
|
|
||||||
"data": {"en": "Image Data", "fr": "Données d'image"},
|
|
||||||
"format": {"en": "Format", "fr": "Format"},
|
|
||||||
"page": {"en": "Page", "fr": "Page"},
|
|
||||||
"index": {"en": "Index", "fr": "Index"}
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
class DocumentContent(BaseModel, ModelMixin):
|
|
||||||
"""Data model for document content"""
|
|
||||||
text: Optional[str] = Field(None, description="Extracted text content")
|
|
||||||
data: Optional[Dict[str, Any]] = Field(None, description="Structured data content")
|
|
||||||
images: Optional[List[ImageData]] = Field(None, description="Extracted images")
|
|
||||||
metadata: DocumentMetadata = Field(description="Document metadata")
|
|
||||||
|
|
||||||
# Register labels for DocumentContent
|
|
||||||
register_model_labels(
|
|
||||||
"DocumentContent",
|
|
||||||
{"en": "Document Content", "fr": "Contenu de document"},
|
|
||||||
{
|
|
||||||
"text": {"en": "Text", "fr": "Texte"},
|
|
||||||
"data": {"en": "Data", "fr": "Données"},
|
|
||||||
"images": {"en": "Images", "fr": "Images"},
|
|
||||||
"metadata": {"en": "Metadata", "fr": "Métadonnées"}
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
class ProcessedDocument(BaseModel, ModelMixin):
|
|
||||||
"""Data model for processed document"""
|
|
||||||
id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Document ID")
|
|
||||||
name: str = Field(description="Document name")
|
|
||||||
contentType: str = Field(description="Content type")
|
|
||||||
content: DocumentContent = Field(description="Document content")
|
|
||||||
context: Optional[DocumentContext] = Field(None, description="Document context")
|
|
||||||
|
|
||||||
# Register labels for ProcessedDocument
|
|
||||||
register_model_labels(
|
|
||||||
"ProcessedDocument",
|
|
||||||
{"en": "Processed Document", "fr": "Document traité"},
|
|
||||||
{
|
|
||||||
"id": {"en": "ID", "fr": "ID"},
|
|
||||||
"name": {"en": "Name", "fr": "Nom"},
|
|
||||||
"contentType": {"en": "Content Type", "fr": "Type de contenu"},
|
|
||||||
"content": {"en": "Content", "fr": "Contenu"},
|
|
||||||
"context": {"en": "Context", "fr": "Contexte"}
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
|
||||||
|
|
@ -11,7 +11,6 @@ from typing import Dict, Any, List, Optional, Union
|
||||||
|
|
||||||
import hashlib
|
import hashlib
|
||||||
|
|
||||||
from modules.shared.mimeUtils import isTextMimeType
|
|
||||||
from modules.interfaces.serviceManagementAccess import ManagementAccess
|
from modules.interfaces.serviceManagementAccess import ManagementAccess
|
||||||
from modules.interfaces.serviceManagementModel import (
|
from modules.interfaces.serviceManagementModel import (
|
||||||
Prompt, FileItem, FileData
|
Prompt, FileItem, FileData
|
||||||
|
|
|
||||||
|
|
@ -2,8 +2,12 @@ from enum import Enum
|
||||||
from typing import Dict, List, Optional, Any, Literal
|
from typing import Dict, List, Optional, Any, Literal
|
||||||
from datetime import datetime, UTC
|
from datetime import datetime, UTC
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
|
import logging
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
class AuthSource(str, Enum):
|
class AuthSource(str, Enum):
|
||||||
|
"""Authentication source enumeration"""
|
||||||
LOCAL = "local"
|
LOCAL = "local"
|
||||||
MSFT = "msft"
|
MSFT = "msft"
|
||||||
GOOGLE = "google"
|
GOOGLE = "google"
|
||||||
|
|
@ -23,52 +27,122 @@ class MethodResult(BaseModel):
|
||||||
data: Dict[str, Any]
|
data: Dict[str, Any]
|
||||||
metadata: Dict[str, Any] = Field(default_factory=dict)
|
metadata: Dict[str, Any] = Field(default_factory=dict)
|
||||||
validation: List[str] = Field(default_factory=list)
|
validation: List[str] = Field(default_factory=list)
|
||||||
|
error: Optional[str] = Field(None, description="Error message if any")
|
||||||
|
|
||||||
class MethodBase:
|
class MethodBase:
|
||||||
"""Base class for all methods"""
|
"""Base class for all methods"""
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self, serviceContainer: Any):
|
||||||
|
"""Initialize method with service container"""
|
||||||
|
self.service = serviceContainer
|
||||||
self.name: str
|
self.name: str
|
||||||
self.description: str
|
self.description: str
|
||||||
self.auth_source: AuthSource = AuthSource.LOCAL # Default to local auth
|
self.authSource: AuthSource = AuthSource.LOCAL # Default to local auth
|
||||||
|
self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}")
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def actions(self) -> Dict[str, Dict[str, Any]]:
|
def actions(self) -> Dict[str, Dict[str, Any]]:
|
||||||
"""Available actions and their parameters"""
|
"""Available actions and their parameters"""
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
async def execute(self, action: str, parameters: Dict[str, Any], auth_data: Optional[Dict[str, Any]] = None) -> MethodResult:
|
async def execute(self, action: str, parameters: Dict[str, Any], authData: Optional[Dict[str, Any]] = None) -> MethodResult:
|
||||||
"""Execute method action with authentication data"""
|
"""
|
||||||
|
Execute method action with authentication data
|
||||||
|
|
||||||
|
Args:
|
||||||
|
action: The action to execute
|
||||||
|
parameters: Action parameters
|
||||||
|
authData: Authentication data
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
MethodResult containing execution results
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
ValueError: If action is not supported
|
||||||
|
RuntimeError: If authentication fails
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Validate action
|
||||||
|
if action not in self.actions:
|
||||||
|
raise ValueError(f"Unsupported action: {action}")
|
||||||
|
|
||||||
|
# Validate parameters
|
||||||
|
if not await self.validateParameters(action, parameters):
|
||||||
|
return self._createResult(
|
||||||
|
success=False,
|
||||||
|
data={},
|
||||||
|
error="Invalid parameters"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Validate authentication
|
||||||
|
if not self._validateAuth(authData):
|
||||||
|
return self._createResult(
|
||||||
|
success=False,
|
||||||
|
data={},
|
||||||
|
error="Authentication failed"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Execute action
|
||||||
|
return await self._executeAction(action, parameters, authData)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.error(f"Error executing action {action}: {str(e)}")
|
||||||
|
return self._createResult(
|
||||||
|
success=False,
|
||||||
|
data={},
|
||||||
|
error=str(e)
|
||||||
|
)
|
||||||
|
|
||||||
|
async def _executeAction(self, action: str, parameters: Dict[str, Any], authData: Optional[Dict[str, Any]] = None) -> MethodResult:
|
||||||
|
"""Execute specific action - to be implemented by subclasses"""
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
async def validate_parameters(self, action: str, parameters: Dict[str, Any]) -> bool:
|
async def validateParameters(self, action: str, parameters: Dict[str, Any]) -> bool:
|
||||||
"""Validate action parameters"""
|
"""Validate action parameters"""
|
||||||
if action not in self.actions:
|
try:
|
||||||
return False
|
if action not in self.actions:
|
||||||
|
return False
|
||||||
|
|
||||||
|
actionDef = self.actions[action]
|
||||||
|
requiredParams = {k for k, v in actionDef['parameters'].items() if v['required']}
|
||||||
|
return all(param in parameters for param in requiredParams)
|
||||||
|
|
||||||
action_def = self.actions[action]
|
except Exception as e:
|
||||||
required_params = {k for k, v in action_def['parameters'].items() if v['required']}
|
self.logger.error(f"Error validating parameters: {str(e)}")
|
||||||
return all(param in parameters for param in required_params)
|
return False
|
||||||
|
|
||||||
async def rollback(self, action: str, parameters: Dict[str, Any], auth_data: Optional[Dict[str, Any]] = None) -> None:
|
async def rollback(self, action: str, parameters: Dict[str, Any], authData: Optional[Dict[str, Any]] = None) -> None:
|
||||||
"""Rollback action if needed"""
|
"""Rollback action if needed"""
|
||||||
|
try:
|
||||||
|
await self._rollbackAction(action, parameters, authData)
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.error(f"Error rolling back action {action}: {str(e)}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
async def _rollbackAction(self, action: str, parameters: Dict[str, Any], authData: Optional[Dict[str, Any]] = None) -> None:
|
||||||
|
"""Rollback specific action - to be implemented by subclasses"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def _validate_auth(self, auth_data: Optional[Dict[str, Any]] = None) -> bool:
|
def _validateAuth(self, authData: Optional[Dict[str, Any]] = None) -> bool:
|
||||||
"""Validate authentication data"""
|
"""Validate authentication data"""
|
||||||
if self.auth_source == AuthSource.LOCAL:
|
try:
|
||||||
return True
|
if self.authSource == AuthSource.LOCAL:
|
||||||
return bool(auth_data and auth_data.get('source') == self.auth_source)
|
return True
|
||||||
|
return bool(authData and authData.get('source') == self.authSource)
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.error(f"Error validating auth: {str(e)}")
|
||||||
|
return False
|
||||||
|
|
||||||
def _create_result(self, success: bool, data: Dict[str, Any], metadata: Optional[Dict[str, Any]] = None) -> MethodResult:
|
def _createResult(self, success: bool, data: Dict[str, Any], metadata: Optional[Dict[str, Any]] = None, error: Optional[str] = None) -> MethodResult:
|
||||||
"""Create a method result"""
|
"""Create a method result"""
|
||||||
return MethodResult(
|
return MethodResult(
|
||||||
success=success,
|
success=success,
|
||||||
data=data,
|
data=data,
|
||||||
metadata=metadata or {},
|
metadata=metadata or {},
|
||||||
validation=[]
|
validation=[],
|
||||||
|
error=error
|
||||||
)
|
)
|
||||||
|
|
||||||
def _add_validation_message(self, result: MethodResult, message: str) -> None:
|
def _addValidationMessage(self, result: MethodResult, message: str) -> None:
|
||||||
"""Add a validation message to the result"""
|
"""Add a validation message to the result"""
|
||||||
result.validation.append(message)
|
result.validation.append(message)
|
||||||
|
|
@ -14,7 +14,7 @@ class MethodCoder(MethodBase):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.name = "coder"
|
self.name = "coder"
|
||||||
self.description = "Handle code operations like analysis, generation, and refactoring"
|
self.description = "Handle code operations like analysis, generation, and refactoring"
|
||||||
self.auth_source = AuthSource.LOCAL # Code operations typically don't need auth
|
self.authSource = AuthSource.LOCAL # Code operations typically don't need auth
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def actions(self) -> Dict[str, Dict[str, Any]]:
|
def actions(self) -> Dict[str, Dict[str, Any]]:
|
||||||
|
|
@ -52,37 +52,37 @@ class MethodCoder(MethodBase):
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async def execute(self, action: str, parameters: Dict[str, Any], auth_data: Optional[Dict[str, Any]] = None) -> MethodResult:
|
async def execute(self, action: str, parameters: Dict[str, Any], authData: Optional[Dict[str, Any]] = None) -> MethodResult:
|
||||||
"""Execute coder method"""
|
"""Execute coder method"""
|
||||||
try:
|
try:
|
||||||
# Validate parameters
|
# Validate parameters
|
||||||
if not await self.validate_parameters(action, parameters):
|
if not await self.validateParameters(action, parameters):
|
||||||
return self._create_result(
|
return self._createResult(
|
||||||
success=False,
|
success=False,
|
||||||
data={"error": f"Invalid parameters for {action}"}
|
data={"error": f"Invalid parameters for {action}"}
|
||||||
)
|
)
|
||||||
|
|
||||||
# Execute action
|
# Execute action
|
||||||
if action == "analyze":
|
if action == "analyze":
|
||||||
return await self._analyze_code(parameters)
|
return await self._analyzeCode(parameters)
|
||||||
elif action == "generate":
|
elif action == "generate":
|
||||||
return await self._generate_code(parameters)
|
return await self._generateCode(parameters)
|
||||||
elif action == "refactor":
|
elif action == "refactor":
|
||||||
return await self._refactor_code(parameters)
|
return await self._refactorCode(parameters)
|
||||||
else:
|
else:
|
||||||
return self._create_result(
|
return self._createResult(
|
||||||
success=False,
|
success=False,
|
||||||
data={"error": f"Unknown action: {action}"}
|
data={"error": f"Unknown action: {action}"}
|
||||||
)
|
)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error executing coder {action}: {e}")
|
logger.error(f"Error executing coder {action}: {e}")
|
||||||
return self._create_result(
|
return self._createResult(
|
||||||
success=False,
|
success=False,
|
||||||
data={"error": str(e)}
|
data={"error": str(e)}
|
||||||
)
|
)
|
||||||
|
|
||||||
async def _analyze_code(self, parameters: Dict[str, Any]) -> MethodResult:
|
async def _analyzeCode(self, parameters: Dict[str, Any]) -> MethodResult:
|
||||||
"""Analyze code structure and quality"""
|
"""Analyze code structure and quality"""
|
||||||
try:
|
try:
|
||||||
code = parameters["code"]
|
code = parameters["code"]
|
||||||
|
|
@ -121,13 +121,13 @@ class MethodCoder(MethodBase):
|
||||||
if "complexity" in metrics:
|
if "complexity" in metrics:
|
||||||
for node in ast.walk(tree):
|
for node in ast.walk(tree):
|
||||||
if isinstance(node, ast.FunctionDef):
|
if isinstance(node, ast.FunctionDef):
|
||||||
body_lines = len(node.body)
|
bodyLines = len(node.body)
|
||||||
if body_lines > 20: # Arbitrary threshold
|
if bodyLines > 20: # Arbitrary threshold
|
||||||
analysis["issues"].append({
|
analysis["issues"].append({
|
||||||
"type": "long_function",
|
"type": "long_function",
|
||||||
"line": node.lineno,
|
"line": node.lineno,
|
||||||
"name": node.name,
|
"name": node.name,
|
||||||
"lines": body_lines
|
"lines": bodyLines
|
||||||
})
|
})
|
||||||
|
|
||||||
# Check for style issues
|
# Check for style issues
|
||||||
|
|
@ -149,18 +149,18 @@ class MethodCoder(MethodBase):
|
||||||
})
|
})
|
||||||
|
|
||||||
except SyntaxError as e:
|
except SyntaxError as e:
|
||||||
return self._create_result(
|
return self._createResult(
|
||||||
success=False,
|
success=False,
|
||||||
data={"error": f"Syntax error: {str(e)}"}
|
data={"error": f"Syntax error: {str(e)}"}
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
# TODO: Implement analysis for other languages
|
# TODO: Implement analysis for other languages
|
||||||
return self._create_result(
|
return self._createResult(
|
||||||
success=False,
|
success=False,
|
||||||
data={"error": f"Unsupported language: {language}"}
|
data={"error": f"Unsupported language: {language}"}
|
||||||
)
|
)
|
||||||
|
|
||||||
return self._create_result(
|
return self._createResult(
|
||||||
success=True,
|
success=True,
|
||||||
data={
|
data={
|
||||||
"language": language,
|
"language": language,
|
||||||
|
|
@ -169,12 +169,12 @@ class MethodCoder(MethodBase):
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error analyzing code: {e}")
|
logger.error(f"Error analyzing code: {e}")
|
||||||
return self._create_result(
|
return self._createResult(
|
||||||
success=False,
|
success=False,
|
||||||
data={"error": f"Analysis failed: {str(e)}"}
|
data={"error": f"Analysis failed: {str(e)}"}
|
||||||
)
|
)
|
||||||
|
|
||||||
async def _generate_code(self, parameters: Dict[str, Any]) -> MethodResult:
|
async def _generateCode(self, parameters: Dict[str, Any]) -> MethodResult:
|
||||||
"""Generate code based on requirements"""
|
"""Generate code based on requirements"""
|
||||||
try:
|
try:
|
||||||
requirements = parameters["requirements"]
|
requirements = parameters["requirements"]
|
||||||
|
|
@ -185,8 +185,8 @@ class MethodCoder(MethodBase):
|
||||||
# This is a placeholder implementation
|
# This is a placeholder implementation
|
||||||
if language.lower() == "python":
|
if language.lower() == "python":
|
||||||
# Generate a simple Python class based on requirements
|
# Generate a simple Python class based on requirements
|
||||||
class_name = re.sub(r'[^a-zA-Z0-9]', '', requirements.split()[0].title())
|
className = re.sub(r'[^a-zA-Z0-9]', '', requirements.split()[0].title())
|
||||||
code = f"""class {class_name}:
|
code = f"""class {className}:
|
||||||
\"\"\"
|
\"\"\"
|
||||||
{requirements}
|
{requirements}
|
||||||
\"\"\"
|
\"\"\"
|
||||||
|
|
@ -198,12 +198,12 @@ class MethodCoder(MethodBase):
|
||||||
pass
|
pass
|
||||||
"""
|
"""
|
||||||
else:
|
else:
|
||||||
return self._create_result(
|
return self._createResult(
|
||||||
success=False,
|
success=False,
|
||||||
data={"error": f"Unsupported language: {language}"}
|
data={"error": f"Unsupported language: {language}"}
|
||||||
)
|
)
|
||||||
|
|
||||||
return self._create_result(
|
return self._createResult(
|
||||||
success=True,
|
success=True,
|
||||||
data={
|
data={
|
||||||
"language": language,
|
"language": language,
|
||||||
|
|
@ -212,12 +212,12 @@ class MethodCoder(MethodBase):
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error generating code: {e}")
|
logger.error(f"Error generating code: {e}")
|
||||||
return self._create_result(
|
return self._createResult(
|
||||||
success=False,
|
success=False,
|
||||||
data={"error": f"Generation failed: {str(e)}"}
|
data={"error": f"Generation failed: {str(e)}"}
|
||||||
)
|
)
|
||||||
|
|
||||||
async def _refactor_code(self, parameters: Dict[str, Any]) -> MethodResult:
|
async def _refactorCode(self, parameters: Dict[str, Any]) -> MethodResult:
|
||||||
"""Refactor code for better quality"""
|
"""Refactor code for better quality"""
|
||||||
try:
|
try:
|
||||||
code = parameters["code"]
|
code = parameters["code"]
|
||||||
|
|
@ -246,17 +246,17 @@ class MethodCoder(MethodBase):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
except SyntaxError as e:
|
except SyntaxError as e:
|
||||||
return self._create_result(
|
return self._createResult(
|
||||||
success=False,
|
success=False,
|
||||||
data={"error": f"Syntax error: {str(e)}"}
|
data={"error": f"Syntax error: {str(e)}"}
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
return self._create_result(
|
return self._createResult(
|
||||||
success=False,
|
success=False,
|
||||||
data={"error": f"Unsupported language: {language}"}
|
data={"error": f"Unsupported language: {language}"}
|
||||||
)
|
)
|
||||||
|
|
||||||
return self._create_result(
|
return self._createResult(
|
||||||
success=True,
|
success=True,
|
||||||
data={
|
data={
|
||||||
"language": language,
|
"language": language,
|
||||||
|
|
@ -266,7 +266,7 @@ class MethodCoder(MethodBase):
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error refactoring code: {e}")
|
logger.error(f"Error refactoring code: {e}")
|
||||||
return self._create_result(
|
return self._createResult(
|
||||||
success=False,
|
success=False,
|
||||||
data={"error": f"Refactoring failed: {str(e)}"}
|
data={"error": f"Refactoring failed: {str(e)}"}
|
||||||
)
|
)
|
||||||
|
|
@ -1,287 +1,215 @@
|
||||||
from typing import Dict, Any, Optional
|
"""
|
||||||
import logging
|
Document processing method module.
|
||||||
import os
|
Handles document operations using the document service.
|
||||||
from pathlib import Path
|
"""
|
||||||
import docx
|
|
||||||
import PyPDF2
|
|
||||||
import json
|
|
||||||
import yaml
|
|
||||||
import xml.etree.ElementTree as ET
|
|
||||||
from datetime import datetime, UTC
|
|
||||||
|
|
||||||
from modules.methods.methodBase import MethodBase, AuthSource, MethodResult
|
import logging
|
||||||
|
from typing import Dict, Any, List, Optional
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
from modules.interfaces.serviceChatModel import (
|
||||||
|
ChatDocument,
|
||||||
|
TaskDocument,
|
||||||
|
ExtractedContent,
|
||||||
|
ContentItem
|
||||||
|
)
|
||||||
|
from modules.workflow.managerDocument import DocumentManager
|
||||||
|
from modules.methods.methodBase import MethodBase
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
class MethodDocument(MethodBase):
|
class MethodDocument(MethodBase):
|
||||||
"""Document method implementation for document operations"""
|
"""Document processing method implementation"""
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self, serviceContainer):
|
||||||
super().__init__()
|
"""Initialize the document method"""
|
||||||
self.name = "document"
|
super().__init__(serviceContainer)
|
||||||
self.description = "Handle document operations like reading, writing, and converting documents"
|
self.documentManager = DocumentManager(serviceContainer)
|
||||||
self.auth_source = AuthSource.LOCAL # Document operations typically don't need auth
|
|
||||||
|
async def process(self, action: str, parameters: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Process document operations
|
||||||
|
|
||||||
@property
|
Args:
|
||||||
def actions(self) -> Dict[str, Dict[str, Any]]:
|
action: The action to perform
|
||||||
"""Available actions and their parameters"""
|
parameters: Action parameters
|
||||||
return {
|
|
||||||
"read": {
|
|
||||||
"description": "Read document content",
|
|
||||||
"retryMax": 2,
|
|
||||||
"timeout": 30,
|
|
||||||
"parameters": {
|
|
||||||
"path": {"type": "string", "required": True},
|
|
||||||
"format": {"type": "string", "required": False},
|
|
||||||
"encoding": {"type": "string", "required": False},
|
|
||||||
"includeMetadata": {"type": "boolean", "required": False}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"write": {
|
|
||||||
"description": "Write content to document",
|
|
||||||
"retryMax": 2,
|
|
||||||
"timeout": 30,
|
|
||||||
"parameters": {
|
|
||||||
"path": {"type": "string", "required": True},
|
|
||||||
"content": {"type": "string", "required": True},
|
|
||||||
"format": {"type": "string", "required": False},
|
|
||||||
"encoding": {"type": "string", "required": False},
|
|
||||||
"template": {"type": "string", "required": False}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"convert": {
|
|
||||||
"description": "Convert document between formats",
|
|
||||||
"retryMax": 2,
|
|
||||||
"timeout": 60,
|
|
||||||
"parameters": {
|
|
||||||
"sourcePath": {"type": "string", "required": True},
|
|
||||||
"targetPath": {"type": "string", "required": True},
|
|
||||||
"sourceFormat": {"type": "string", "required": False},
|
|
||||||
"targetFormat": {"type": "string", "required": False},
|
|
||||||
"options": {"type": "object", "required": False}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
async def execute(self, action: str, parameters: Dict[str, Any], auth_data: Optional[Dict[str, Any]] = None) -> MethodResult:
|
|
||||||
"""Execute document method"""
|
|
||||||
try:
|
|
||||||
# Validate parameters
|
|
||||||
if not await self.validate_parameters(action, parameters):
|
|
||||||
return self._create_result(
|
|
||||||
success=False,
|
|
||||||
data={"error": f"Invalid parameters for {action}"}
|
|
||||||
)
|
|
||||||
|
|
||||||
# Execute action
|
Returns:
|
||||||
if action == "read":
|
Dictionary containing the operation result
|
||||||
return await self._read_document(parameters)
|
|
||||||
elif action == "write":
|
Raises:
|
||||||
return await self._write_document(parameters)
|
ValueError: If action is not supported
|
||||||
elif action == "convert":
|
"""
|
||||||
return await self._convert_document(parameters)
|
try:
|
||||||
|
if action == "extract":
|
||||||
|
return await self._extractContent(parameters)
|
||||||
|
elif action == "analyze":
|
||||||
|
return await self._analyzeDocument(parameters)
|
||||||
|
elif action == "summarize":
|
||||||
|
return await self._summarizeDocument(parameters)
|
||||||
else:
|
else:
|
||||||
return self._create_result(
|
raise ValueError(f"Unsupported action: {action}")
|
||||||
success=False,
|
except Exception as e:
|
||||||
data={"error": f"Unknown action: {action}"}
|
logger.error(f"Error processing document action {action}: {str(e)}")
|
||||||
)
|
raise
|
||||||
|
|
||||||
|
async def _extractContent(self, parameters: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Extract content from a document
|
||||||
|
|
||||||
|
Args:
|
||||||
|
parameters: Dictionary containing:
|
||||||
|
- documentId: ID of the document to process
|
||||||
|
- documentType: Type of document ('ChatDocument' or 'TaskDocument')
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dictionary containing extracted content
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
documentId = parameters.get("documentId")
|
||||||
|
documentType = parameters.get("documentType", "ChatDocument")
|
||||||
|
|
||||||
|
if not documentId:
|
||||||
|
raise ValueError("documentId is required")
|
||||||
|
|
||||||
|
# Get document from database
|
||||||
|
if documentType == "ChatDocument":
|
||||||
|
document = await self._getChatDocument(documentId)
|
||||||
|
if not document:
|
||||||
|
raise ValueError(f"ChatDocument {documentId} not found")
|
||||||
|
extracted = await self.documentManager.extractFromChatDocument(document)
|
||||||
|
else:
|
||||||
|
document = await self._getTaskDocument(documentId)
|
||||||
|
if not document:
|
||||||
|
raise ValueError(f"TaskDocument {documentId} not found")
|
||||||
|
extracted = await self.documentManager.extractFromTaskDocument(document)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"success": True,
|
||||||
|
"content": extracted.dict(),
|
||||||
|
"metadata": await self.documentManager.getDocumentMetadata(document)
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error extracting content: {str(e)}")
|
||||||
|
return {
|
||||||
|
"success": False,
|
||||||
|
"error": str(e)
|
||||||
|
}
|
||||||
|
|
||||||
|
async def _analyzeDocument(self, parameters: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Analyze document content
|
||||||
|
|
||||||
|
Args:
|
||||||
|
parameters: Dictionary containing:
|
||||||
|
- documentId: ID of the document to analyze
|
||||||
|
- documentType: Type of document
|
||||||
|
- analysisType: Type of analysis to perform
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dictionary containing analysis results
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Extract content first
|
||||||
|
contentResult = await self._extractContent(parameters)
|
||||||
|
if not contentResult["success"]:
|
||||||
|
return contentResult
|
||||||
|
|
||||||
|
# Perform analysis based on type
|
||||||
|
analysisType = parameters.get("analysisType", "basic")
|
||||||
|
content = ExtractedContent(**contentResult["content"])
|
||||||
|
|
||||||
|
if analysisType == "basic":
|
||||||
|
# Basic analysis: count items, calculate statistics
|
||||||
|
stats = {
|
||||||
|
"totalItems": len(content.contents),
|
||||||
|
"totalSize": sum(item.metadata.size for item in content.contents),
|
||||||
|
"itemTypes": {}
|
||||||
|
}
|
||||||
|
|
||||||
|
for item in content.contents:
|
||||||
|
itemType = item.label
|
||||||
|
if itemType not in stats["itemTypes"]:
|
||||||
|
stats["itemTypes"][itemType] = 0
|
||||||
|
stats["itemTypes"][itemType] += 1
|
||||||
|
|
||||||
|
return {
|
||||||
|
"success": True,
|
||||||
|
"analysis": stats
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Unsupported analysis type: {analysisType}")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error executing document {action}: {e}")
|
logger.error(f"Error analyzing document: {str(e)}")
|
||||||
return self._create_result(
|
return {
|
||||||
success=False,
|
"success": False,
|
||||||
data={"error": str(e)}
|
"error": str(e)
|
||||||
)
|
|
||||||
|
|
||||||
async def _read_document(self, parameters: Dict[str, Any]) -> MethodResult:
|
|
||||||
"""Read document content"""
|
|
||||||
try:
|
|
||||||
path = Path(parameters["path"])
|
|
||||||
if not path.exists():
|
|
||||||
return self._create_result(
|
|
||||||
success=False,
|
|
||||||
data={"error": f"File not found: {path}"}
|
|
||||||
)
|
|
||||||
|
|
||||||
# Determine format if not specified
|
|
||||||
format = parameters.get("format")
|
|
||||||
if not format:
|
|
||||||
format = path.suffix[1:] if path.suffix else "txt"
|
|
||||||
|
|
||||||
# Read content based on format
|
|
||||||
content = ""
|
|
||||||
encoding = parameters.get("encoding", "utf-8")
|
|
||||||
include_metadata = parameters.get("includeMetadata", False)
|
|
||||||
|
|
||||||
if format.lower() in ["txt", "md"]:
|
|
||||||
with open(path, "r", encoding=encoding) as f:
|
|
||||||
content = f.read()
|
|
||||||
elif format.lower() == "docx":
|
|
||||||
doc = docx.Document(path)
|
|
||||||
content = "\n".join([paragraph.text for paragraph in doc.paragraphs])
|
|
||||||
elif format.lower() == "pdf":
|
|
||||||
with open(path, "rb") as f:
|
|
||||||
pdf = PyPDF2.PdfReader(f)
|
|
||||||
content = "\n".join([page.extract_text() for page in pdf.pages])
|
|
||||||
elif format.lower() == "json":
|
|
||||||
with open(path, "r", encoding=encoding) as f:
|
|
||||||
content = json.load(f)
|
|
||||||
elif format.lower() == "yaml":
|
|
||||||
with open(path, "r", encoding=encoding) as f:
|
|
||||||
content = yaml.safe_load(f)
|
|
||||||
elif format.lower() == "xml":
|
|
||||||
tree = ET.parse(path)
|
|
||||||
root = tree.getroot()
|
|
||||||
content = ET.tostring(root, encoding=encoding).decode(encoding)
|
|
||||||
else:
|
|
||||||
return self._create_result(
|
|
||||||
success=False,
|
|
||||||
data={"error": f"Unsupported format: {format}"}
|
|
||||||
)
|
|
||||||
|
|
||||||
result = {
|
|
||||||
"path": str(path),
|
|
||||||
"format": format,
|
|
||||||
"content": content
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if include_metadata:
|
|
||||||
result["metadata"] = {
|
|
||||||
"size": path.stat().st_size,
|
|
||||||
"modified": datetime.fromtimestamp(path.stat().st_mtime, UTC).isoformat(),
|
|
||||||
"created": datetime.fromtimestamp(path.stat().st_ctime, UTC).isoformat()
|
|
||||||
}
|
|
||||||
|
|
||||||
return self._create_result(
|
|
||||||
success=True,
|
|
||||||
data=result
|
|
||||||
)
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error reading document: {e}")
|
|
||||||
return self._create_result(
|
|
||||||
success=False,
|
|
||||||
data={"error": f"Read failed: {str(e)}"}
|
|
||||||
)
|
|
||||||
|
|
||||||
async def _write_document(self, parameters: Dict[str, Any]) -> MethodResult:
|
async def _summarizeDocument(self, parameters: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
"""Write content to document"""
|
"""
|
||||||
|
Generate document summary
|
||||||
|
|
||||||
|
Args:
|
||||||
|
parameters: Dictionary containing:
|
||||||
|
- documentId: ID of the document to summarize
|
||||||
|
- documentType: Type of document
|
||||||
|
- summaryType: Type of summary to generate
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dictionary containing summary
|
||||||
|
"""
|
||||||
try:
|
try:
|
||||||
path = Path(parameters["path"])
|
# Extract content first
|
||||||
|
contentResult = await self._extractContent(parameters)
|
||||||
|
if not contentResult["success"]:
|
||||||
|
return contentResult
|
||||||
|
|
||||||
# Create directory if it doesn't exist
|
# Generate summary based on type
|
||||||
path.parent.mkdir(parents=True, exist_ok=True)
|
summaryType = parameters.get("summaryType", "basic")
|
||||||
|
content = ExtractedContent(**contentResult["content"])
|
||||||
|
|
||||||
# Determine format if not specified
|
if summaryType == "basic":
|
||||||
format = parameters.get("format")
|
# Basic summary: concatenate all text content
|
||||||
if not format:
|
summary = "\n".join(
|
||||||
format = path.suffix[1:] if path.suffix else "txt"
|
item.data for item in content.contents
|
||||||
|
if item.label == "main"
|
||||||
# Write content based on format
|
|
||||||
encoding = parameters.get("encoding", "utf-8")
|
|
||||||
content = parameters["content"]
|
|
||||||
template = parameters.get("template")
|
|
||||||
|
|
||||||
if format.lower() in ["txt", "md"]:
|
|
||||||
with open(path, "w", encoding=encoding) as f:
|
|
||||||
f.write(content)
|
|
||||||
elif format.lower() == "docx":
|
|
||||||
if template:
|
|
||||||
doc = docx.Document(template)
|
|
||||||
else:
|
|
||||||
doc = docx.Document()
|
|
||||||
doc.add_paragraph(content)
|
|
||||||
doc.save(path)
|
|
||||||
elif format.lower() == "pdf":
|
|
||||||
# TODO: Implement PDF writing
|
|
||||||
return self._create_result(
|
|
||||||
success=False,
|
|
||||||
data={"error": "PDF writing not implemented yet"}
|
|
||||||
)
|
)
|
||||||
elif format.lower() == "json":
|
|
||||||
with open(path, "w", encoding=encoding) as f:
|
return {
|
||||||
json.dump(content, f, indent=2)
|
"success": True,
|
||||||
elif format.lower() == "yaml":
|
"summary": summary
|
||||||
with open(path, "w", encoding=encoding) as f:
|
}
|
||||||
yaml.dump(content, f)
|
|
||||||
elif format.lower() == "xml":
|
|
||||||
with open(path, "w", encoding=encoding) as f:
|
|
||||||
f.write(content)
|
|
||||||
else:
|
else:
|
||||||
return self._create_result(
|
raise ValueError(f"Unsupported summary type: {summaryType}")
|
||||||
success=False,
|
|
||||||
data={"error": f"Unsupported format: {format}"}
|
|
||||||
)
|
|
||||||
|
|
||||||
return self._create_result(
|
|
||||||
success=True,
|
|
||||||
data={
|
|
||||||
"path": str(path),
|
|
||||||
"format": format,
|
|
||||||
"size": path.stat().st_size,
|
|
||||||
"modified": datetime.now(UTC).isoformat()
|
|
||||||
}
|
|
||||||
)
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error writing document: {e}")
|
logger.error(f"Error summarizing document: {str(e)}")
|
||||||
return self._create_result(
|
return {
|
||||||
success=False,
|
"success": False,
|
||||||
data={"error": f"Write failed: {str(e)}"}
|
"error": str(e)
|
||||||
)
|
}
|
||||||
|
|
||||||
async def _convert_document(self, parameters: Dict[str, Any]) -> MethodResult:
|
async def _getChatDocument(self, documentId: str) -> Optional[ChatDocument]:
|
||||||
"""Convert document between formats"""
|
"""Get ChatDocument from database"""
|
||||||
try:
|
try:
|
||||||
source_path = Path(parameters["sourcePath"])
|
documentData = self.service.db.getRecord("chatDocuments", documentId)
|
||||||
target_path = Path(parameters["targetPath"])
|
if documentData:
|
||||||
|
return ChatDocument(**documentData)
|
||||||
if not source_path.exists():
|
return None
|
||||||
return self._create_result(
|
|
||||||
success=False,
|
|
||||||
data={"error": f"Source file not found: {source_path}"}
|
|
||||||
)
|
|
||||||
|
|
||||||
# Determine formats if not specified
|
|
||||||
source_format = parameters.get("sourceFormat")
|
|
||||||
if not source_format:
|
|
||||||
source_format = source_path.suffix[1:] if source_path.suffix else "txt"
|
|
||||||
|
|
||||||
target_format = parameters.get("targetFormat")
|
|
||||||
if not target_format:
|
|
||||||
target_format = target_path.suffix[1:] if target_path.suffix else "txt"
|
|
||||||
|
|
||||||
# Read source content
|
|
||||||
source_content = await self._read_document({
|
|
||||||
"path": str(source_path),
|
|
||||||
"format": source_format
|
|
||||||
})
|
|
||||||
|
|
||||||
if not source_content.success:
|
|
||||||
return source_content
|
|
||||||
|
|
||||||
# Write target content
|
|
||||||
target_content = await self._write_document({
|
|
||||||
"path": str(target_path),
|
|
||||||
"content": source_content.data["content"],
|
|
||||||
"format": target_format
|
|
||||||
})
|
|
||||||
|
|
||||||
if not target_content.success:
|
|
||||||
return target_content
|
|
||||||
|
|
||||||
return self._create_result(
|
|
||||||
success=True,
|
|
||||||
data={
|
|
||||||
"sourcePath": str(source_path),
|
|
||||||
"targetPath": str(target_path),
|
|
||||||
"sourceFormat": source_format,
|
|
||||||
"targetFormat": target_format,
|
|
||||||
"size": target_path.stat().st_size,
|
|
||||||
"modified": datetime.now(UTC).isoformat()
|
|
||||||
}
|
|
||||||
)
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error converting document: {e}")
|
logger.error(f"Error getting ChatDocument {documentId}: {str(e)}")
|
||||||
return self._create_result(
|
return None
|
||||||
success=False,
|
|
||||||
data={"error": f"Conversion failed: {str(e)}"}
|
async def _getTaskDocument(self, documentId: str) -> Optional[TaskDocument]:
|
||||||
)
|
"""Get TaskDocument from database"""
|
||||||
|
try:
|
||||||
|
documentData = self.service.db.getRecord("taskDocuments", documentId)
|
||||||
|
if documentData:
|
||||||
|
return TaskDocument(**documentData)
|
||||||
|
return None
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error getting TaskDocument {documentId}: {str(e)}")
|
||||||
|
return None
|
||||||
|
|
@ -15,7 +15,7 @@ class MethodOutlook(MethodBase):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.name = "outlook"
|
self.name = "outlook"
|
||||||
self.description = "Handle Outlook email operations like reading and sending emails"
|
self.description = "Handle Outlook email operations like reading and sending emails"
|
||||||
self.auth_source = AuthSource.MICROSOFT
|
self.authSource = AuthSource.MICROSOFT
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def actions(self) -> Dict[str, Dict[str, Any]]:
|
def actions(self) -> Dict[str, Dict[str, Any]]:
|
||||||
|
|
@ -47,54 +47,54 @@ class MethodOutlook(MethodBase):
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async def execute(self, action: str, parameters: Dict[str, Any], auth_data: Optional[Dict[str, Any]] = None) -> MethodResult:
|
async def execute(self, action: str, parameters: Dict[str, Any], authData: Optional[Dict[str, Any]] = None) -> MethodResult:
|
||||||
"""Execute Outlook method"""
|
"""Execute Outlook method"""
|
||||||
try:
|
try:
|
||||||
# Validate parameters
|
# Validate parameters
|
||||||
if not await self.validate_parameters(action, parameters):
|
if not await self.validateParameters(action, parameters):
|
||||||
return self._create_result(
|
return self._createResult(
|
||||||
success=False,
|
success=False,
|
||||||
data={"error": f"Invalid parameters for {action}"}
|
data={"error": f"Invalid parameters for {action}"}
|
||||||
)
|
)
|
||||||
|
|
||||||
# Get UserConnection from auth_data
|
# Get UserConnection from auth_data
|
||||||
if not auth_data or "userConnection" not in auth_data:
|
if not authData or "userConnection" not in authData:
|
||||||
return self._create_result(
|
return self._createResult(
|
||||||
success=False,
|
success=False,
|
||||||
data={"error": "UserConnection required for Outlook operations"}
|
data={"error": "UserConnection required for Outlook operations"}
|
||||||
)
|
)
|
||||||
|
|
||||||
user_connection: UserConnection = auth_data["userConnection"]
|
userConnection: UserConnection = authData["userConnection"]
|
||||||
|
|
||||||
# Execute action
|
# Execute action
|
||||||
if action == "readMails":
|
if action == "readMails":
|
||||||
return await self._read_mails(parameters, user_connection)
|
return await self._readMails(parameters, userConnection)
|
||||||
elif action == "sendMail":
|
elif action == "sendMail":
|
||||||
return await self._send_mail(parameters, user_connection)
|
return await self._sendMail(parameters, userConnection)
|
||||||
else:
|
else:
|
||||||
return self._create_result(
|
return self._createResult(
|
||||||
success=False,
|
success=False,
|
||||||
data={"error": f"Unknown action: {action}"}
|
data={"error": f"Unknown action: {action}"}
|
||||||
)
|
)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error executing Outlook {action}: {e}")
|
logger.error(f"Error executing Outlook {action}: {e}")
|
||||||
return self._create_result(
|
return self._createResult(
|
||||||
success=False,
|
success=False,
|
||||||
data={"error": str(e)}
|
data={"error": str(e)}
|
||||||
)
|
)
|
||||||
|
|
||||||
async def _read_mails(self, parameters: Dict[str, Any], user_connection: UserConnection) -> MethodResult:
|
async def _readMails(self, parameters: Dict[str, Any], userConnection: UserConnection) -> MethodResult:
|
||||||
"""Read emails from Outlook"""
|
"""Read emails from Outlook"""
|
||||||
try:
|
try:
|
||||||
folder = parameters.get("folder", "inbox")
|
folder = parameters.get("folder", "inbox")
|
||||||
query = parameters.get("query")
|
query = parameters.get("query")
|
||||||
max_results = parameters.get("maxResults", 10)
|
maxResults = parameters.get("maxResults", 10)
|
||||||
include_attachments = parameters.get("includeAttachments", False)
|
includeAttachments = parameters.get("includeAttachments", False)
|
||||||
|
|
||||||
# Create Outlook account
|
# Create Outlook account
|
||||||
account = Account(
|
account = Account(
|
||||||
credentials=(user_connection.authToken, user_connection.refreshToken),
|
credentials=(userConnection.authToken, userConnection.refreshToken),
|
||||||
protocol=MSGraphProtocol()
|
protocol=MSGraphProtocol()
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -102,18 +102,18 @@ class MethodOutlook(MethodBase):
|
||||||
mailbox = account.mailbox()
|
mailbox = account.mailbox()
|
||||||
|
|
||||||
# Get folder
|
# Get folder
|
||||||
target_folder = mailbox.folder(folder_name=folder)
|
targetFolder = mailbox.folder(folder_name=folder)
|
||||||
|
|
||||||
# Get messages
|
# Get messages
|
||||||
if query:
|
if query:
|
||||||
messages = target_folder.get_messages(query=query, limit=max_results)
|
messages = targetFolder.get_messages(query=query, limit=maxResults)
|
||||||
else:
|
else:
|
||||||
messages = target_folder.get_messages(limit=max_results)
|
messages = targetFolder.get_messages(limit=maxResults)
|
||||||
|
|
||||||
# Process messages
|
# Process messages
|
||||||
results = []
|
results = []
|
||||||
for message in messages:
|
for message in messages:
|
||||||
msg_data = {
|
msgData = {
|
||||||
"id": message.object_id,
|
"id": message.object_id,
|
||||||
"subject": message.subject,
|
"subject": message.subject,
|
||||||
"from": message.sender.address,
|
"from": message.sender.address,
|
||||||
|
|
@ -124,7 +124,7 @@ class MethodOutlook(MethodBase):
|
||||||
"hasAttachments": message.has_attachments
|
"hasAttachments": message.has_attachments
|
||||||
}
|
}
|
||||||
|
|
||||||
if include_attachments and message.has_attachments:
|
if includeAttachments and message.has_attachments:
|
||||||
attachments = []
|
attachments = []
|
||||||
for attachment in message.attachments:
|
for attachment in message.attachments:
|
||||||
attachments.append({
|
attachments.append({
|
||||||
|
|
@ -132,11 +132,11 @@ class MethodOutlook(MethodBase):
|
||||||
"contentType": attachment.content_type,
|
"contentType": attachment.content_type,
|
||||||
"size": attachment.size
|
"size": attachment.size
|
||||||
})
|
})
|
||||||
msg_data["attachments"] = attachments
|
msgData["attachments"] = attachments
|
||||||
|
|
||||||
results.append(msg_data)
|
results.append(msgData)
|
||||||
|
|
||||||
return self._create_result(
|
return self._createResult(
|
||||||
success=True,
|
success=True,
|
||||||
data={
|
data={
|
||||||
"folder": folder,
|
"folder": folder,
|
||||||
|
|
@ -146,24 +146,24 @@ class MethodOutlook(MethodBase):
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error reading Outlook emails: {e}")
|
logger.error(f"Error reading Outlook emails: {e}")
|
||||||
return self._create_result(
|
return self._createResult(
|
||||||
success=False,
|
success=False,
|
||||||
data={"error": f"Read failed: {str(e)}"}
|
data={"error": f"Read failed: {str(e)}"}
|
||||||
)
|
)
|
||||||
|
|
||||||
async def _send_mail(self, parameters: Dict[str, Any], user_connection: UserConnection) -> MethodResult:
|
async def _sendMail(self, parameters: Dict[str, Any], userConnection: UserConnection) -> MethodResult:
|
||||||
"""Send email through Outlook"""
|
"""Send email through Outlook"""
|
||||||
try:
|
try:
|
||||||
to_addresses = parameters["to"]
|
toAddresses = parameters["to"]
|
||||||
subject = parameters["subject"]
|
subject = parameters["subject"]
|
||||||
body = parameters["body"]
|
body = parameters["body"]
|
||||||
cc_addresses = parameters.get("cc", [])
|
ccAddresses = parameters.get("cc", [])
|
||||||
bcc_addresses = parameters.get("bcc", [])
|
bccAddresses = parameters.get("bcc", [])
|
||||||
attachments = parameters.get("attachments", [])
|
attachments = parameters.get("attachments", [])
|
||||||
|
|
||||||
# Create Outlook account
|
# Create Outlook account
|
||||||
account = Account(
|
account = Account(
|
||||||
credentials=(user_connection.authToken, user_connection.refreshToken),
|
credentials=(userConnection.authToken, userConnection.refreshToken),
|
||||||
protocol=MSGraphProtocol()
|
protocol=MSGraphProtocol()
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -172,32 +172,32 @@ class MethodOutlook(MethodBase):
|
||||||
|
|
||||||
# Create new message
|
# Create new message
|
||||||
message = mailbox.new_message()
|
message = mailbox.new_message()
|
||||||
message.to.add(to_addresses)
|
message.to.add(toAddresses)
|
||||||
if cc_addresses:
|
if ccAddresses:
|
||||||
message.cc.add(cc_addresses)
|
message.cc.add(ccAddresses)
|
||||||
if bcc_addresses:
|
if bccAddresses:
|
||||||
message.bcc.add(bcc_addresses)
|
message.bcc.add(bccAddresses)
|
||||||
message.subject = subject
|
message.subject = subject
|
||||||
message.body = body
|
message.body = body
|
||||||
|
|
||||||
# Add attachments
|
# Add attachments
|
||||||
for attachment_path in attachments:
|
for attachmentPath in attachments:
|
||||||
message.attachments.add(attachment_path)
|
message.attachments.add(attachmentPath)
|
||||||
|
|
||||||
# Send message
|
# Send message
|
||||||
message.send()
|
message.send()
|
||||||
|
|
||||||
return self._create_result(
|
return self._createResult(
|
||||||
success=True,
|
success=True,
|
||||||
data={
|
data={
|
||||||
"to": to_addresses,
|
"to": toAddresses,
|
||||||
"subject": subject,
|
"subject": subject,
|
||||||
"sent": datetime.now(UTC).isoformat()
|
"sent": datetime.now(UTC).isoformat()
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error sending Outlook email: {e}")
|
logger.error(f"Error sending Outlook email: {e}")
|
||||||
return self._create_result(
|
return self._createResult(
|
||||||
success=False,
|
success=False,
|
||||||
data={"error": f"Send failed: {str(e)}"}
|
data={"error": f"Send failed: {str(e)}"}
|
||||||
)
|
)
|
||||||
|
|
@ -4,6 +4,9 @@ import os
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from modules.methods.methodBase import MethodBase, AuthSource, MethodResult
|
from modules.methods.methodBase import MethodBase, AuthSource, MethodResult
|
||||||
|
from modules.models.userConnection import UserConnection
|
||||||
|
from modules.models.account import Account
|
||||||
|
from modules.protocols.msGraphProtocol import MSGraphProtocol
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
@ -14,7 +17,7 @@ class MethodPowerpoint(MethodBase):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.name = "powerpoint"
|
self.name = "powerpoint"
|
||||||
self.description = "Handle PowerPoint operations like reading, writing, and converting presentations"
|
self.description = "Handle PowerPoint operations like reading, writing, and converting presentations"
|
||||||
self.auth_source = AuthSource.MICROSOFT # PowerPoint operations need Microsoft auth
|
self.authSource = AuthSource.MICROSOFT # PowerPoint operations need Microsoft auth
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def actions(self) -> Dict[str, Dict[str, Any]]:
|
def actions(self) -> Dict[str, Dict[str, Any]]:
|
||||||
|
|
@ -50,52 +53,85 @@ class MethodPowerpoint(MethodBase):
|
||||||
"sourceFormat": {"type": "string", "required": False},
|
"sourceFormat": {"type": "string", "required": False},
|
||||||
"targetFormat": {"type": "string", "required": False}
|
"targetFormat": {"type": "string", "required": False}
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
"createPresentation": {
|
||||||
|
"description": "Create a new PowerPoint presentation",
|
||||||
|
"retryMax": 2,
|
||||||
|
"timeout": 60,
|
||||||
|
"parameters": {
|
||||||
|
"title": {"type": "string", "required": True},
|
||||||
|
"template": {"type": "string", "required": False}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"addSlide": {
|
||||||
|
"description": "Add a new slide to presentation",
|
||||||
|
"retryMax": 2,
|
||||||
|
"timeout": 60,
|
||||||
|
"parameters": {
|
||||||
|
"presentationId": {"type": "string", "required": True},
|
||||||
|
"layout": {"type": "string", "required": False},
|
||||||
|
"title": {"type": "string", "required": False}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"addContent": {
|
||||||
|
"description": "Add content to a slide",
|
||||||
|
"retryMax": 2,
|
||||||
|
"timeout": 60,
|
||||||
|
"parameters": {
|
||||||
|
"presentationId": {"type": "string", "required": True},
|
||||||
|
"slideId": {"type": "string", "required": True},
|
||||||
|
"contentType": {"type": "string", "required": True},
|
||||||
|
"content": {"type": "object", "required": True},
|
||||||
|
"position": {"type": "object", "required": False}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async def execute(self, action: str, parameters: Dict[str, Any], auth_data: Optional[Dict[str, Any]] = None) -> MethodResult:
|
async def execute(self, action: str, parameters: Dict[str, Any], authData: Optional[Dict[str, Any]] = None) -> MethodResult:
|
||||||
"""Execute powerpoint method"""
|
"""Execute PowerPoint method"""
|
||||||
try:
|
try:
|
||||||
# Validate parameters
|
# Validate parameters
|
||||||
if not await self.validate_parameters(action, parameters):
|
if not await self.validateParameters(action, parameters):
|
||||||
return self._create_result(
|
return self._createResult(
|
||||||
success=False,
|
success=False,
|
||||||
data={"error": f"Invalid parameters for {action}"}
|
data={"error": f"Invalid parameters for {action}"}
|
||||||
)
|
)
|
||||||
|
|
||||||
# Validate authentication
|
# Get UserConnection from auth_data
|
||||||
if not await self.validate_auth(auth_data):
|
if not authData or "userConnection" not in authData:
|
||||||
return self._create_result(
|
return self._createResult(
|
||||||
success=False,
|
success=False,
|
||||||
data={"error": "Authentication required for PowerPoint operations"}
|
data={"error": "UserConnection required for PowerPoint operations"}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
userConnection: UserConnection = authData["userConnection"]
|
||||||
|
|
||||||
# Execute action
|
# Execute action
|
||||||
if action == "read":
|
if action == "createPresentation":
|
||||||
return await self._read_presentation(parameters, auth_data)
|
return await self._createPresentation(parameters, userConnection)
|
||||||
elif action == "write":
|
elif action == "addSlide":
|
||||||
return await self._write_presentation(parameters, auth_data)
|
return await self._addSlide(parameters, userConnection)
|
||||||
elif action == "convert":
|
elif action == "addContent":
|
||||||
return await self._convert_presentation(parameters, auth_data)
|
return await self._addContent(parameters, userConnection)
|
||||||
else:
|
else:
|
||||||
return self._create_result(
|
return self._createResult(
|
||||||
success=False,
|
success=False,
|
||||||
data={"error": f"Unknown action: {action}"}
|
data={"error": f"Unknown action: {action}"}
|
||||||
)
|
)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error executing powerpoint {action}: {e}")
|
logger.error(f"Error executing PowerPoint {action}: {e}")
|
||||||
return self._create_result(
|
return self._createResult(
|
||||||
success=False,
|
success=False,
|
||||||
data={"error": str(e)}
|
data={"error": str(e)}
|
||||||
)
|
)
|
||||||
|
|
||||||
async def _read_presentation(self, parameters: Dict[str, Any], auth_data: Dict[str, Any]) -> MethodResult:
|
async def _read_presentation(self, parameters: Dict[str, Any], authData: Dict[str, Any]) -> MethodResult:
|
||||||
"""Read PowerPoint presentation content"""
|
"""Read PowerPoint presentation content"""
|
||||||
try:
|
try:
|
||||||
path = Path(parameters["path"])
|
path = Path(parameters["path"])
|
||||||
if not path.exists():
|
if not path.exists():
|
||||||
return self._create_result(
|
return self._createResult(
|
||||||
success=False,
|
success=False,
|
||||||
data={"error": f"File not found: {path}"}
|
data={"error": f"File not found: {path}"}
|
||||||
)
|
)
|
||||||
|
|
@ -107,7 +143,7 @@ class MethodPowerpoint(MethodBase):
|
||||||
|
|
||||||
# TODO: Implement PowerPoint reading using Microsoft Graph API
|
# TODO: Implement PowerPoint reading using Microsoft Graph API
|
||||||
# This is a placeholder implementation
|
# This is a placeholder implementation
|
||||||
return self._create_result(
|
return self._createResult(
|
||||||
success=True,
|
success=True,
|
||||||
data={
|
data={
|
||||||
"path": str(path),
|
"path": str(path),
|
||||||
|
|
@ -124,12 +160,12 @@ class MethodPowerpoint(MethodBase):
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error reading presentation: {e}")
|
logger.error(f"Error reading presentation: {e}")
|
||||||
return self._create_result(
|
return self._createResult(
|
||||||
success=False,
|
success=False,
|
||||||
data={"error": f"Read failed: {str(e)}"}
|
data={"error": f"Read failed: {str(e)}"}
|
||||||
)
|
)
|
||||||
|
|
||||||
async def _write_presentation(self, parameters: Dict[str, Any], auth_data: Dict[str, Any]) -> MethodResult:
|
async def _write_presentation(self, parameters: Dict[str, Any], authData: Dict[str, Any]) -> MethodResult:
|
||||||
"""Write content to PowerPoint presentation"""
|
"""Write content to PowerPoint presentation"""
|
||||||
try:
|
try:
|
||||||
path = Path(parameters["path"])
|
path = Path(parameters["path"])
|
||||||
|
|
@ -144,7 +180,7 @@ class MethodPowerpoint(MethodBase):
|
||||||
|
|
||||||
# TODO: Implement PowerPoint writing using Microsoft Graph API
|
# TODO: Implement PowerPoint writing using Microsoft Graph API
|
||||||
# This is a placeholder implementation
|
# This is a placeholder implementation
|
||||||
return self._create_result(
|
return self._createResult(
|
||||||
success=True,
|
success=True,
|
||||||
data={
|
data={
|
||||||
"path": str(path),
|
"path": str(path),
|
||||||
|
|
@ -154,19 +190,19 @@ class MethodPowerpoint(MethodBase):
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error writing presentation: {e}")
|
logger.error(f"Error writing presentation: {e}")
|
||||||
return self._create_result(
|
return self._createResult(
|
||||||
success=False,
|
success=False,
|
||||||
data={"error": f"Write failed: {str(e)}"}
|
data={"error": f"Write failed: {str(e)}"}
|
||||||
)
|
)
|
||||||
|
|
||||||
async def _convert_presentation(self, parameters: Dict[str, Any], auth_data: Dict[str, Any]) -> MethodResult:
|
async def _convert_presentation(self, parameters: Dict[str, Any], authData: Dict[str, Any]) -> MethodResult:
|
||||||
"""Convert PowerPoint presentation between formats"""
|
"""Convert PowerPoint presentation between formats"""
|
||||||
try:
|
try:
|
||||||
source_path = Path(parameters["sourcePath"])
|
source_path = Path(parameters["sourcePath"])
|
||||||
target_path = Path(parameters["targetPath"])
|
target_path = Path(parameters["targetPath"])
|
||||||
|
|
||||||
if not source_path.exists():
|
if not source_path.exists():
|
||||||
return self._create_result(
|
return self._createResult(
|
||||||
success=False,
|
success=False,
|
||||||
data={"error": f"Source file not found: {source_path}"}
|
data={"error": f"Source file not found: {source_path}"}
|
||||||
)
|
)
|
||||||
|
|
@ -182,7 +218,7 @@ class MethodPowerpoint(MethodBase):
|
||||||
|
|
||||||
# TODO: Implement PowerPoint conversion using Microsoft Graph API
|
# TODO: Implement PowerPoint conversion using Microsoft Graph API
|
||||||
# This is a placeholder implementation
|
# This is a placeholder implementation
|
||||||
return self._create_result(
|
return self._createResult(
|
||||||
success=True,
|
success=True,
|
||||||
data={
|
data={
|
||||||
"sourcePath": str(source_path),
|
"sourcePath": str(source_path),
|
||||||
|
|
@ -193,7 +229,148 @@ class MethodPowerpoint(MethodBase):
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error converting presentation: {e}")
|
logger.error(f"Error converting presentation: {e}")
|
||||||
return self._create_result(
|
return self._createResult(
|
||||||
success=False,
|
success=False,
|
||||||
data={"error": f"Conversion failed: {str(e)}"}
|
data={"error": f"Conversion failed: {str(e)}"}
|
||||||
|
)
|
||||||
|
|
||||||
|
async def _createPresentation(self, parameters: Dict[str, Any], userConnection: UserConnection) -> MethodResult:
|
||||||
|
"""Create a new PowerPoint presentation"""
|
||||||
|
try:
|
||||||
|
title = parameters["title"]
|
||||||
|
template = parameters.get("template")
|
||||||
|
|
||||||
|
# Create PowerPoint account
|
||||||
|
account = Account(
|
||||||
|
credentials=(userConnection.authToken, userConnection.refreshToken),
|
||||||
|
protocol=MSGraphProtocol()
|
||||||
|
)
|
||||||
|
|
||||||
|
# Get drive
|
||||||
|
drive = account.drive()
|
||||||
|
|
||||||
|
# Create presentation
|
||||||
|
if template:
|
||||||
|
# Copy template
|
||||||
|
templateFile = drive.get_item_by_path(template)
|
||||||
|
newFile = templateFile.copy(f"{title}.pptx")
|
||||||
|
else:
|
||||||
|
# Create blank presentation
|
||||||
|
newFile = drive.create_file(
|
||||||
|
name=f"{title}.pptx",
|
||||||
|
content_type="application/vnd.openxmlformats-officedocument.presentationml.presentation"
|
||||||
|
)
|
||||||
|
|
||||||
|
return self._createResult(
|
||||||
|
success=True,
|
||||||
|
data={
|
||||||
|
"id": newFile.object_id,
|
||||||
|
"name": newFile.name,
|
||||||
|
"webUrl": newFile.web_url
|
||||||
|
}
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error creating PowerPoint presentation: {e}")
|
||||||
|
return self._createResult(
|
||||||
|
success=False,
|
||||||
|
data={"error": f"Create failed: {str(e)}"}
|
||||||
|
)
|
||||||
|
|
||||||
|
async def _addSlide(self, parameters: Dict[str, Any], userConnection: UserConnection) -> MethodResult:
|
||||||
|
"""Add a new slide to presentation"""
|
||||||
|
try:
|
||||||
|
presentationId = parameters["presentationId"]
|
||||||
|
layout = parameters.get("layout", "title")
|
||||||
|
title = parameters.get("title")
|
||||||
|
|
||||||
|
# Create PowerPoint account
|
||||||
|
account = Account(
|
||||||
|
credentials=(userConnection.authToken, userConnection.refreshToken),
|
||||||
|
protocol=MSGraphProtocol()
|
||||||
|
)
|
||||||
|
|
||||||
|
# Get drive
|
||||||
|
drive = account.drive()
|
||||||
|
|
||||||
|
# Get presentation
|
||||||
|
presentation = drive.get_item_by_id(presentationId)
|
||||||
|
|
||||||
|
# Add slide
|
||||||
|
slide = presentation.add_slide(layout=layout)
|
||||||
|
if title:
|
||||||
|
slide.title = title
|
||||||
|
|
||||||
|
return self._createResult(
|
||||||
|
success=True,
|
||||||
|
data={
|
||||||
|
"slideId": slide.object_id,
|
||||||
|
"layout": layout,
|
||||||
|
"title": title
|
||||||
|
}
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error adding PowerPoint slide: {e}")
|
||||||
|
return self._createResult(
|
||||||
|
success=False,
|
||||||
|
data={"error": f"Add slide failed: {str(e)}"}
|
||||||
|
)
|
||||||
|
|
||||||
|
async def _addContent(self, parameters: Dict[str, Any], userConnection: UserConnection) -> MethodResult:
|
||||||
|
"""Add content to a slide"""
|
||||||
|
try:
|
||||||
|
presentationId = parameters["presentationId"]
|
||||||
|
slideId = parameters["slideId"]
|
||||||
|
contentType = parameters["contentType"]
|
||||||
|
content = parameters["content"]
|
||||||
|
position = parameters.get("position", {"x": 0, "y": 0})
|
||||||
|
|
||||||
|
# Create PowerPoint account
|
||||||
|
account = Account(
|
||||||
|
credentials=(userConnection.authToken, userConnection.refreshToken),
|
||||||
|
protocol=MSGraphProtocol()
|
||||||
|
)
|
||||||
|
|
||||||
|
# Get drive
|
||||||
|
drive = account.drive()
|
||||||
|
|
||||||
|
# Get presentation and slide
|
||||||
|
presentation = drive.get_item_by_id(presentationId)
|
||||||
|
slide = presentation.get_slide(slideId)
|
||||||
|
|
||||||
|
# Add content based on type
|
||||||
|
if contentType == "text":
|
||||||
|
shape = slide.add_text_box(
|
||||||
|
text=content,
|
||||||
|
left=position["x"],
|
||||||
|
top=position["y"]
|
||||||
|
)
|
||||||
|
elif contentType == "image":
|
||||||
|
shape = slide.add_picture(
|
||||||
|
image_path=content,
|
||||||
|
left=position["x"],
|
||||||
|
top=position["y"]
|
||||||
|
)
|
||||||
|
elif contentType == "table":
|
||||||
|
shape = slide.add_table(
|
||||||
|
rows=content["rows"],
|
||||||
|
cols=content["cols"],
|
||||||
|
left=position["x"],
|
||||||
|
top=position["y"]
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Unsupported content type: {contentType}")
|
||||||
|
|
||||||
|
return self._createResult(
|
||||||
|
success=True,
|
||||||
|
data={
|
||||||
|
"shapeId": shape.object_id,
|
||||||
|
"contentType": contentType,
|
||||||
|
"position": position
|
||||||
|
}
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error adding PowerPoint content: {e}")
|
||||||
|
return self._createResult(
|
||||||
|
success=False,
|
||||||
|
data={"error": f"Add content failed: {str(e)}"}
|
||||||
)
|
)
|
||||||
|
|
@ -19,7 +19,7 @@ class MethodSharepoint(MethodBase):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.name = "sharepoint"
|
self.name = "sharepoint"
|
||||||
self.description = "Handle SharePoint document operations like search, read, and write"
|
self.description = "Handle SharePoint document operations like search, read, and write"
|
||||||
self.auth_source = AuthSource.MICROSOFT
|
self.authSource = AuthSource.MICROSOFT
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def actions(self) -> Dict[str, Dict[str, Any]]:
|
def actions(self) -> Dict[str, Dict[str, Any]]:
|
||||||
|
|
@ -55,65 +55,104 @@ class MethodSharepoint(MethodBase):
|
||||||
"content": {"type": "string", "required": True},
|
"content": {"type": "string", "required": True},
|
||||||
"contentType": {"type": "string", "required": False}
|
"contentType": {"type": "string", "required": False}
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
"readList": {
|
||||||
|
"description": "Read items from SharePoint list",
|
||||||
|
"retryMax": 2,
|
||||||
|
"timeout": 30,
|
||||||
|
"parameters": {
|
||||||
|
"siteUrl": {"type": "string", "required": True},
|
||||||
|
"listName": {"type": "string", "required": True},
|
||||||
|
"query": {"type": "string", "required": False},
|
||||||
|
"fields": {"type": "array", "required": False}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"writeList": {
|
||||||
|
"description": "Write items to SharePoint list",
|
||||||
|
"retryMax": 2,
|
||||||
|
"timeout": 30,
|
||||||
|
"parameters": {
|
||||||
|
"siteUrl": {"type": "string", "required": True},
|
||||||
|
"listName": {"type": "string", "required": True},
|
||||||
|
"items": {"type": "array", "required": True}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"createList": {
|
||||||
|
"description": "Create a new SharePoint list",
|
||||||
|
"retryMax": 2,
|
||||||
|
"timeout": 30,
|
||||||
|
"parameters": {
|
||||||
|
"siteUrl": {"type": "string", "required": True},
|
||||||
|
"listName": {"type": "string", "required": True},
|
||||||
|
"description": {"type": "string", "required": False},
|
||||||
|
"template": {"type": "string", "required": False},
|
||||||
|
"fields": {"type": "array", "required": False}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async def execute(self, action: str, parameters: Dict[str, Any], auth_data: Optional[Dict[str, Any]] = None) -> MethodResult:
|
async def execute(self, action: str, parameters: Dict[str, Any], authData: Optional[Dict[str, Any]] = None) -> MethodResult:
|
||||||
"""Execute SharePoint method"""
|
"""Execute SharePoint method"""
|
||||||
try:
|
try:
|
||||||
# Validate parameters
|
# Validate parameters
|
||||||
if not await self.validate_parameters(action, parameters):
|
if not await self.validateParameters(action, parameters):
|
||||||
return self._create_result(
|
return self._createResult(
|
||||||
success=False,
|
success=False,
|
||||||
data={"error": f"Invalid parameters for {action}"}
|
data={"error": f"Invalid parameters for {action}"}
|
||||||
)
|
)
|
||||||
|
|
||||||
# Get UserConnection from auth_data
|
# Get UserConnection from auth_data
|
||||||
if not auth_data or "userConnection" not in auth_data:
|
if not authData or "userConnection" not in authData:
|
||||||
return self._create_result(
|
return self._createResult(
|
||||||
success=False,
|
success=False,
|
||||||
data={"error": "UserConnection required for SharePoint operations"}
|
data={"error": "UserConnection required for SharePoint operations"}
|
||||||
)
|
)
|
||||||
|
|
||||||
user_connection: UserConnection = auth_data["userConnection"]
|
userConnection: UserConnection = authData["userConnection"]
|
||||||
|
|
||||||
# Execute action
|
# Execute action
|
||||||
if action == "search":
|
if action == "search":
|
||||||
return await self._search_documents(parameters, user_connection)
|
return await self._search_documents(parameters, userConnection)
|
||||||
elif action == "read":
|
elif action == "read":
|
||||||
return await self._read_document(parameters, user_connection)
|
return await self._read_document(parameters, userConnection)
|
||||||
elif action == "write":
|
elif action == "write":
|
||||||
return await self._write_document(parameters, user_connection)
|
return await self._write_document(parameters, userConnection)
|
||||||
|
elif action == "readList":
|
||||||
|
return await self._readList(parameters, userConnection)
|
||||||
|
elif action == "writeList":
|
||||||
|
return await self._writeList(parameters, userConnection)
|
||||||
|
elif action == "createList":
|
||||||
|
return await self._createList(parameters, userConnection)
|
||||||
else:
|
else:
|
||||||
return self._create_result(
|
return self._createResult(
|
||||||
success=False,
|
success=False,
|
||||||
data={"error": f"Unknown action: {action}"}
|
data={"error": f"Unknown action: {action}"}
|
||||||
)
|
)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error executing SharePoint {action}: {e}")
|
logger.error(f"Error executing SharePoint {action}: {e}")
|
||||||
return self._create_result(
|
return self._createResult(
|
||||||
success=False,
|
success=False,
|
||||||
data={"error": str(e)}
|
data={"error": str(e)}
|
||||||
)
|
)
|
||||||
|
|
||||||
async def _search_documents(self, parameters: Dict[str, Any], user_connection: UserConnection) -> MethodResult:
|
async def _search_documents(self, parameters: Dict[str, Any], userConnection: UserConnection) -> MethodResult:
|
||||||
"""Search SharePoint documents"""
|
"""Search SharePoint documents"""
|
||||||
try:
|
try:
|
||||||
site_url = parameters["siteUrl"]
|
siteUrl = parameters["siteUrl"]
|
||||||
query = parameters["query"]
|
query = parameters["query"]
|
||||||
list_name = parameters.get("listName")
|
listName = parameters.get("listName")
|
||||||
max_results = parameters.get("maxResults", 10)
|
maxResults = parameters.get("maxResults", 10)
|
||||||
|
|
||||||
# Create SharePoint context
|
# Create SharePoint context
|
||||||
ctx = ClientContext(site_url).with_credentials(
|
ctx = ClientContext(siteUrl).with_credentials(
|
||||||
UserCredential(user_connection.authToken, user_connection.refreshToken)
|
UserCredential(userConnection.authToken, userConnection.refreshToken)
|
||||||
)
|
)
|
||||||
|
|
||||||
# Search in specific list or entire site
|
# Search in specific list or entire site
|
||||||
if list_name:
|
if listName:
|
||||||
target_list = ctx.web.lists.get_by_title(list_name)
|
targetList = ctx.web.lists.get_by_title(listName)
|
||||||
items = target_list.items.filter(f"Title eq '{query}'").top(max_results).get().execute_query()
|
items = targetList.items.filter(f"Title eq '{query}'").top(maxResults).get().execute_query()
|
||||||
results = [{
|
results = [{
|
||||||
"title": item.properties["Title"],
|
"title": item.properties["Title"],
|
||||||
"url": item.properties["FileRef"],
|
"url": item.properties["FileRef"],
|
||||||
|
|
@ -128,9 +167,9 @@ class MethodSharepoint(MethodBase):
|
||||||
"url": result.properties["Path"],
|
"url": result.properties["Path"],
|
||||||
"modified": result.properties["LastModifiedTime"],
|
"modified": result.properties["LastModifiedTime"],
|
||||||
"created": result.properties["Created"]
|
"created": result.properties["Created"]
|
||||||
} for result in search_results[:max_results]]
|
} for result in search_results[:maxResults]]
|
||||||
|
|
||||||
return self._create_result(
|
return self._createResult(
|
||||||
success=True,
|
success=True,
|
||||||
data={
|
data={
|
||||||
"query": query,
|
"query": query,
|
||||||
|
|
@ -139,30 +178,30 @@ class MethodSharepoint(MethodBase):
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error searching SharePoint documents: {e}")
|
logger.error(f"Error searching SharePoint documents: {e}")
|
||||||
return self._create_result(
|
return self._createResult(
|
||||||
success=False,
|
success=False,
|
||||||
data={"error": f"Search failed: {str(e)}"}
|
data={"error": f"Search failed: {str(e)}"}
|
||||||
)
|
)
|
||||||
|
|
||||||
async def _read_document(self, parameters: Dict[str, Any], user_connection: UserConnection) -> MethodResult:
|
async def _read_document(self, parameters: Dict[str, Any], userConnection: UserConnection) -> MethodResult:
|
||||||
"""Read SharePoint document content"""
|
"""Read SharePoint document content"""
|
||||||
try:
|
try:
|
||||||
site_url = parameters["siteUrl"]
|
siteUrl = parameters["siteUrl"]
|
||||||
file_url = parameters["fileUrl"]
|
fileUrl = parameters["fileUrl"]
|
||||||
|
|
||||||
# Create SharePoint context
|
# Create SharePoint context
|
||||||
ctx = ClientContext(site_url).with_credentials(
|
ctx = ClientContext(siteUrl).with_credentials(
|
||||||
UserCredential(user_connection.authToken, user_connection.refreshToken)
|
UserCredential(userConnection.authToken, userConnection.refreshToken)
|
||||||
)
|
)
|
||||||
|
|
||||||
# Get file
|
# Get file
|
||||||
file = ctx.web.get_file_by_server_relative_url(file_url)
|
file = ctx.web.get_file_by_server_relative_url(fileUrl)
|
||||||
file_content = file.read().execute_query()
|
file_content = file.read().execute_query()
|
||||||
|
|
||||||
return self._create_result(
|
return self._createResult(
|
||||||
success=True,
|
success=True,
|
||||||
data={
|
data={
|
||||||
"url": file_url,
|
"url": fileUrl,
|
||||||
"content": file_content.content.decode('utf-8'),
|
"content": file_content.content.decode('utf-8'),
|
||||||
"modified": file.properties["TimeLastModified"],
|
"modified": file.properties["TimeLastModified"],
|
||||||
"size": file.properties["Length"]
|
"size": file.properties["Length"]
|
||||||
|
|
@ -170,48 +209,182 @@ class MethodSharepoint(MethodBase):
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error reading SharePoint document: {e}")
|
logger.error(f"Error reading SharePoint document: {e}")
|
||||||
return self._create_result(
|
return self._createResult(
|
||||||
success=False,
|
success=False,
|
||||||
data={"error": f"Read failed: {str(e)}"}
|
data={"error": f"Read failed: {str(e)}"}
|
||||||
)
|
)
|
||||||
|
|
||||||
async def _write_document(self, parameters: Dict[str, Any], user_connection: UserConnection) -> MethodResult:
|
async def _write_document(self, parameters: Dict[str, Any], userConnection: UserConnection) -> MethodResult:
|
||||||
"""Write content to SharePoint document"""
|
"""Write content to SharePoint document"""
|
||||||
try:
|
try:
|
||||||
site_url = parameters["siteUrl"]
|
siteUrl = parameters["siteUrl"]
|
||||||
file_url = parameters["fileUrl"]
|
fileUrl = parameters["fileUrl"]
|
||||||
content = parameters["content"]
|
content = parameters["content"]
|
||||||
content_type = parameters.get("contentType", "text/plain")
|
contentType = parameters.get("contentType", "text/plain")
|
||||||
|
|
||||||
# Create SharePoint context
|
# Create SharePoint context
|
||||||
ctx = ClientContext(site_url).with_credentials(
|
ctx = ClientContext(siteUrl).with_credentials(
|
||||||
UserCredential(user_connection.authToken, user_connection.refreshToken)
|
UserCredential(userConnection.authToken, userConnection.refreshToken)
|
||||||
)
|
)
|
||||||
|
|
||||||
# Get or create file
|
# Get or create file
|
||||||
try:
|
try:
|
||||||
file = ctx.web.get_file_by_server_relative_url(file_url)
|
file = ctx.web.get_file_by_server_relative_url(fileUrl)
|
||||||
except:
|
except:
|
||||||
# Create new file
|
# Create new file
|
||||||
folder_url = "/".join(file_url.split("/")[:-1])
|
folderUrl = "/".join(fileUrl.split("/")[:-1])
|
||||||
file_name = file_url.split("/")[-1]
|
fileName = fileUrl.split("/")[-1]
|
||||||
folder = ctx.web.get_folder_by_server_relative_url(folder_url)
|
folder = ctx.web.get_folder_by_server_relative_url(folderUrl)
|
||||||
file = folder.upload_file(file_name, content.encode('utf-8')).execute_query()
|
file = folder.upload_file(fileName, content.encode('utf-8')).execute_query()
|
||||||
|
|
||||||
# Update file content
|
# Update file content
|
||||||
file.write(content.encode('utf-8')).execute_query()
|
file.write(content.encode('utf-8')).execute_query()
|
||||||
|
|
||||||
return self._create_result(
|
return self._createResult(
|
||||||
success=True,
|
success=True,
|
||||||
data={
|
data={
|
||||||
"url": file_url,
|
"url": fileUrl,
|
||||||
"modified": datetime.now(UTC).isoformat(),
|
"modified": datetime.now(UTC).isoformat(),
|
||||||
"size": len(content.encode('utf-8'))
|
"size": len(content.encode('utf-8'))
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error writing SharePoint document: {e}")
|
logger.error(f"Error writing SharePoint document: {e}")
|
||||||
return self._create_result(
|
return self._createResult(
|
||||||
success=False,
|
success=False,
|
||||||
data={"error": f"Write failed: {str(e)}"}
|
data={"error": f"Write failed: {str(e)}"}
|
||||||
|
)
|
||||||
|
|
||||||
|
async def _readList(self, parameters: Dict[str, Any], userConnection: UserConnection) -> MethodResult:
|
||||||
|
"""Read items from SharePoint list"""
|
||||||
|
try:
|
||||||
|
siteUrl = parameters["siteUrl"]
|
||||||
|
listName = parameters["listName"]
|
||||||
|
query = parameters.get("query")
|
||||||
|
fields = parameters.get("fields", ["*"])
|
||||||
|
|
||||||
|
# Create SharePoint account
|
||||||
|
account = Account(
|
||||||
|
credentials=(userConnection.authToken, userConnection.refreshToken),
|
||||||
|
protocol=MSGraphProtocol()
|
||||||
|
)
|
||||||
|
|
||||||
|
# Get site
|
||||||
|
site = account.get_site(siteUrl)
|
||||||
|
|
||||||
|
# Get list
|
||||||
|
list = site.get_list(listName)
|
||||||
|
|
||||||
|
# Get items
|
||||||
|
if query:
|
||||||
|
items = list.get_items(query=query, fields=fields)
|
||||||
|
else:
|
||||||
|
items = list.get_items(fields=fields)
|
||||||
|
|
||||||
|
return self._createResult(
|
||||||
|
success=True,
|
||||||
|
data={
|
||||||
|
"siteUrl": siteUrl,
|
||||||
|
"listName": listName,
|
||||||
|
"items": items
|
||||||
|
}
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error reading SharePoint list: {e}")
|
||||||
|
return self._createResult(
|
||||||
|
success=False,
|
||||||
|
data={"error": f"Read failed: {str(e)}"}
|
||||||
|
)
|
||||||
|
|
||||||
|
async def _writeList(self, parameters: Dict[str, Any], userConnection: UserConnection) -> MethodResult:
|
||||||
|
"""Write items to SharePoint list"""
|
||||||
|
try:
|
||||||
|
siteUrl = parameters["siteUrl"]
|
||||||
|
listName = parameters["listName"]
|
||||||
|
items = parameters["items"]
|
||||||
|
|
||||||
|
# Create SharePoint account
|
||||||
|
account = Account(
|
||||||
|
credentials=(userConnection.authToken, userConnection.refreshToken),
|
||||||
|
protocol=MSGraphProtocol()
|
||||||
|
)
|
||||||
|
|
||||||
|
# Get site
|
||||||
|
site = account.get_site(siteUrl)
|
||||||
|
|
||||||
|
# Get list
|
||||||
|
list = site.get_list(listName)
|
||||||
|
|
||||||
|
# Add items
|
||||||
|
results = []
|
||||||
|
for item in items:
|
||||||
|
result = list.add_item(item)
|
||||||
|
results.append({
|
||||||
|
"id": result.id,
|
||||||
|
"status": "success"
|
||||||
|
})
|
||||||
|
|
||||||
|
return self._createResult(
|
||||||
|
success=True,
|
||||||
|
data={
|
||||||
|
"siteUrl": siteUrl,
|
||||||
|
"listName": listName,
|
||||||
|
"results": results
|
||||||
|
}
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error writing to SharePoint list: {e}")
|
||||||
|
return self._createResult(
|
||||||
|
success=False,
|
||||||
|
data={"error": f"Write failed: {str(e)}"}
|
||||||
|
)
|
||||||
|
|
||||||
|
async def _createList(self, parameters: Dict[str, Any], userConnection: UserConnection) -> MethodResult:
|
||||||
|
"""Create a new SharePoint list"""
|
||||||
|
try:
|
||||||
|
siteUrl = parameters["siteUrl"]
|
||||||
|
listName = parameters["listName"]
|
||||||
|
description = parameters.get("description")
|
||||||
|
template = parameters.get("template", "generic")
|
||||||
|
fields = parameters.get("fields", [])
|
||||||
|
|
||||||
|
# Create SharePoint account
|
||||||
|
account = Account(
|
||||||
|
credentials=(userConnection.authToken, userConnection.refreshToken),
|
||||||
|
protocol=MSGraphProtocol()
|
||||||
|
)
|
||||||
|
|
||||||
|
# Get site
|
||||||
|
site = account.get_site(siteUrl)
|
||||||
|
|
||||||
|
# Create list
|
||||||
|
list = site.create_list(
|
||||||
|
name=listName,
|
||||||
|
description=description,
|
||||||
|
template=template
|
||||||
|
)
|
||||||
|
|
||||||
|
# Add fields
|
||||||
|
for field in fields:
|
||||||
|
list.add_field(
|
||||||
|
name=field["name"],
|
||||||
|
field_type=field["type"],
|
||||||
|
required=field.get("required", False),
|
||||||
|
description=field.get("description")
|
||||||
|
)
|
||||||
|
|
||||||
|
return self._createResult(
|
||||||
|
success=True,
|
||||||
|
data={
|
||||||
|
"siteUrl": siteUrl,
|
||||||
|
"listName": listName,
|
||||||
|
"id": list.id,
|
||||||
|
"webUrl": list.web_url
|
||||||
|
}
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error creating SharePoint list: {e}")
|
||||||
|
return self._createResult(
|
||||||
|
success=False,
|
||||||
|
data={"error": f"Create failed: {str(e)}"}
|
||||||
)
|
)
|
||||||
|
|
@ -8,6 +8,7 @@ import re
|
||||||
from datetime import datetime, UTC
|
from datetime import datetime, UTC
|
||||||
import requests
|
import requests
|
||||||
import time
|
import time
|
||||||
|
import json
|
||||||
|
|
||||||
from modules.methods.methodBase import MethodBase, AuthSource, MethodResult
|
from modules.methods.methodBase import MethodBase, AuthSource, MethodResult
|
||||||
from modules.shared.configuration import APP_CONFIG
|
from modules.shared.configuration import APP_CONFIG
|
||||||
|
|
@ -74,70 +75,197 @@ class MethodWeb(MethodBase):
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async def execute(self, action: str, parameters: Dict[str, Any], auth_data: Optional[Dict[str, Any]] = None) -> MethodResult:
|
async def execute(self, action: str, parameters: Dict[str, Any], authData: Optional[Dict[str, Any]] = None) -> MethodResult:
|
||||||
"""Execute web method"""
|
"""Execute web method"""
|
||||||
try:
|
try:
|
||||||
# Validate parameters
|
# Validate parameters
|
||||||
if not await self.validate_parameters(action, parameters):
|
if not await self.validateParameters(action, parameters):
|
||||||
return self._create_result(
|
return self._createResult(
|
||||||
success=False,
|
success=False,
|
||||||
data={"error": f"Invalid parameters for {action}"}
|
data={"error": f"Invalid parameters for {action}"}
|
||||||
)
|
)
|
||||||
|
|
||||||
# Execute action
|
# Execute action
|
||||||
if action == "search":
|
if action == "fetchUrl":
|
||||||
return await self._search_web(parameters)
|
return await self._fetchUrl(parameters)
|
||||||
elif action == "crawl":
|
elif action == "parseContent":
|
||||||
return await self._crawl_page(parameters)
|
return await self._parseContent(parameters)
|
||||||
elif action == "extract":
|
elif action == "extractData":
|
||||||
return await self._extract_content(parameters)
|
return await self._extractData(parameters)
|
||||||
else:
|
else:
|
||||||
return self._create_result(
|
return self._createResult(
|
||||||
success=False,
|
success=False,
|
||||||
data={"error": f"Unknown action: {action}"}
|
data={"error": f"Unknown action: {action}"}
|
||||||
)
|
)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error executing web {action}: {e}")
|
logger.error(f"Error executing web {action}: {e}")
|
||||||
return self._create_result(
|
return self._createResult(
|
||||||
success=False,
|
success=False,
|
||||||
data={"error": str(e)}
|
data={"error": str(e)}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
async def _fetchUrl(self, parameters: Dict[str, Any]) -> MethodResult:
|
||||||
|
"""Fetch content from URL"""
|
||||||
|
try:
|
||||||
|
url = parameters["url"]
|
||||||
|
method = parameters.get("method", "GET")
|
||||||
|
headers = parameters.get("headers", {})
|
||||||
|
data = parameters.get("data")
|
||||||
|
timeout = parameters.get("timeout", 30)
|
||||||
|
|
||||||
|
async with aiohttp.ClientSession() as session:
|
||||||
|
async with session.request(
|
||||||
|
method=method,
|
||||||
|
url=url,
|
||||||
|
headers=headers,
|
||||||
|
data=data,
|
||||||
|
timeout=timeout
|
||||||
|
) as response:
|
||||||
|
content = await response.text()
|
||||||
|
return self._createResult(
|
||||||
|
success=True,
|
||||||
|
data={
|
||||||
|
"url": url,
|
||||||
|
"status": response.status,
|
||||||
|
"headers": dict(response.headers),
|
||||||
|
"content": content
|
||||||
|
}
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error fetching URL: {e}")
|
||||||
|
return self._createResult(
|
||||||
|
success=False,
|
||||||
|
data={"error": f"Fetch failed: {str(e)}"}
|
||||||
|
)
|
||||||
|
|
||||||
|
async def _parseContent(self, parameters: Dict[str, Any]) -> MethodResult:
|
||||||
|
"""Parse web content"""
|
||||||
|
try:
|
||||||
|
content = parameters["content"]
|
||||||
|
contentType = parameters.get("contentType", "html")
|
||||||
|
|
||||||
|
if contentType == "html":
|
||||||
|
soup = BeautifulSoup(content, "html.parser")
|
||||||
|
return self._createResult(
|
||||||
|
success=True,
|
||||||
|
data={
|
||||||
|
"type": "html",
|
||||||
|
"title": soup.title.string if soup.title else None,
|
||||||
|
"text": soup.get_text(),
|
||||||
|
"links": [a.get("href") for a in soup.find_all("a", href=True)],
|
||||||
|
"images": [img.get("src") for img in soup.find_all("img", src=True)]
|
||||||
|
}
|
||||||
|
)
|
||||||
|
elif contentType == "json":
|
||||||
|
data = json.loads(content)
|
||||||
|
return self._createResult(
|
||||||
|
success=True,
|
||||||
|
data={
|
||||||
|
"type": "json",
|
||||||
|
"data": data
|
||||||
|
}
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Unsupported content type: {contentType}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error parsing content: {e}")
|
||||||
|
return self._createResult(
|
||||||
|
success=False,
|
||||||
|
data={"error": f"Parse failed: {str(e)}"}
|
||||||
|
)
|
||||||
|
|
||||||
|
async def _extractData(self, parameters: Dict[str, Any]) -> MethodResult:
|
||||||
|
"""Extract data from web content"""
|
||||||
|
try:
|
||||||
|
content = parameters["content"]
|
||||||
|
contentType = parameters.get("contentType", "html")
|
||||||
|
selectors = parameters["selectors"]
|
||||||
|
|
||||||
|
if contentType == "html":
|
||||||
|
soup = BeautifulSoup(content, "html.parser")
|
||||||
|
results = {}
|
||||||
|
|
||||||
|
for key, selector in selectors.items():
|
||||||
|
elements = soup.select(selector)
|
||||||
|
if len(elements) == 1:
|
||||||
|
results[key] = elements[0].get_text().strip()
|
||||||
|
else:
|
||||||
|
results[key] = [el.get_text().strip() for el in elements]
|
||||||
|
|
||||||
|
return self._createResult(
|
||||||
|
success=True,
|
||||||
|
data={
|
||||||
|
"type": "html",
|
||||||
|
"results": results
|
||||||
|
}
|
||||||
|
)
|
||||||
|
elif contentType == "json":
|
||||||
|
data = json.loads(content)
|
||||||
|
results = {}
|
||||||
|
|
||||||
|
for key, path in selectors.items():
|
||||||
|
value = data
|
||||||
|
for part in path.split("."):
|
||||||
|
if isinstance(value, dict):
|
||||||
|
value = value.get(part)
|
||||||
|
elif isinstance(value, list) and part.isdigit():
|
||||||
|
value = value[int(part)]
|
||||||
|
else:
|
||||||
|
value = None
|
||||||
|
break
|
||||||
|
results[key] = value
|
||||||
|
|
||||||
|
return self._createResult(
|
||||||
|
success=True,
|
||||||
|
data={
|
||||||
|
"type": "json",
|
||||||
|
"results": results
|
||||||
|
}
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Unsupported content type: {contentType}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error extracting data: {e}")
|
||||||
|
return self._createResult(
|
||||||
|
success=False,
|
||||||
|
data={"error": f"Extract failed: {str(e)}"}
|
||||||
|
)
|
||||||
|
|
||||||
async def _search_web(self, parameters: Dict[str, Any]) -> MethodResult:
|
async def _search_web(self, parameters: Dict[str, Any]) -> MethodResult:
|
||||||
"""Search web content"""
|
"""Search web content"""
|
||||||
try:
|
try:
|
||||||
query = parameters["query"]
|
query = parameters["query"]
|
||||||
max_results = parameters.get("maxResults", 10)
|
maxResults = parameters.get("maxResults", 10)
|
||||||
filters = parameters.get("filters", {})
|
filters = parameters.get("filters", {})
|
||||||
search_engine = parameters.get("searchEngine", "google")
|
searchEngine = parameters.get("searchEngine", "google")
|
||||||
|
|
||||||
# Implement search using different engines
|
# Implement search using different engines
|
||||||
if search_engine.lower() == "google":
|
if searchEngine.lower() == "google":
|
||||||
# Use Google Custom Search API
|
# Use Google Custom Search API
|
||||||
# TODO: Implement Google Custom Search API integration
|
# TODO: Implement Google Custom Search API integration
|
||||||
results = await self._google_search(query, max_results, filters)
|
results = await self._google_search(query, maxResults, filters)
|
||||||
elif search_engine.lower() == "bing":
|
elif searchEngine.lower() == "bing":
|
||||||
# Use Bing Web Search API
|
# Use Bing Web Search API
|
||||||
# TODO: Implement Bing Web Search API integration
|
# TODO: Implement Bing Web Search API integration
|
||||||
results = await self._bing_search(query, max_results, filters)
|
results = await self._bing_search(query, maxResults, filters)
|
||||||
else:
|
else:
|
||||||
return self._create_result(
|
return self._createResult(
|
||||||
success=False,
|
success=False,
|
||||||
data={"error": f"Unsupported search engine: {search_engine}"}
|
data={"error": f"Unsupported search engine: {searchEngine}"}
|
||||||
)
|
)
|
||||||
|
|
||||||
return self._create_result(
|
return self._createResult(
|
||||||
success=True,
|
success=True,
|
||||||
data={
|
data={
|
||||||
"query": query,
|
"query": query,
|
||||||
"engine": search_engine,
|
"engine": searchEngine,
|
||||||
"results": results
|
"results": results
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error searching web: {e}")
|
logger.error(f"Error searching web: {e}")
|
||||||
return self._create_result(
|
return self._createResult(
|
||||||
success=False,
|
success=False,
|
||||||
data={"error": f"Search failed: {str(e)}"}
|
data={"error": f"Search failed: {str(e)}"}
|
||||||
)
|
)
|
||||||
|
|
@ -173,14 +301,14 @@ class MethodWeb(MethodBase):
|
||||||
try:
|
try:
|
||||||
url = parameters["url"]
|
url = parameters["url"]
|
||||||
depth = parameters.get("depth", 1)
|
depth = parameters.get("depth", 1)
|
||||||
follow_links = parameters.get("followLinks", False)
|
followLinks = parameters.get("followLinks", False)
|
||||||
include_images = parameters.get("includeImages", False)
|
includeImages = parameters.get("includeImages", False)
|
||||||
respect_robots = parameters.get("respectRobots", True)
|
respectRobots = parameters.get("respectRobots", True)
|
||||||
|
|
||||||
# Check robots.txt if required
|
# Check robots.txt if required
|
||||||
if respect_robots:
|
if respectRobots:
|
||||||
if not await self._check_robots_txt(url):
|
if not await self._check_robots_txt(url):
|
||||||
return self._create_result(
|
return self._createResult(
|
||||||
success=False,
|
success=False,
|
||||||
data={"error": "Crawling not allowed by robots.txt"}
|
data={"error": "Crawling not allowed by robots.txt"}
|
||||||
)
|
)
|
||||||
|
|
@ -198,114 +326,57 @@ class MethodWeb(MethodBase):
|
||||||
"title": soup.title.string if soup.title else None,
|
"title": soup.title.string if soup.title else None,
|
||||||
"description": self._get_meta_description(soup),
|
"description": self._get_meta_description(soup),
|
||||||
"links": [],
|
"links": [],
|
||||||
"images": [] if include_images else None,
|
"images": [] if includeImages else None,
|
||||||
"text": soup.get_text(strip=True),
|
"text": soup.get_text(strip=True),
|
||||||
"crawled": datetime.now(UTC).isoformat()
|
"crawled": datetime.now(UTC).isoformat()
|
||||||
}
|
}
|
||||||
|
|
||||||
# Extract links if followLinks is True
|
# Extract links if followLinks is True
|
||||||
if follow_links:
|
if followLinks:
|
||||||
base_url = url
|
baseUrl = url
|
||||||
for link in soup.find_all('a'):
|
for link in soup.find_all('a'):
|
||||||
href = link.get('href')
|
href = link.get('href')
|
||||||
if href:
|
if href:
|
||||||
absolute_url = urljoin(base_url, href)
|
absoluteUrl = urljoin(baseUrl, href)
|
||||||
if self._is_valid_url(absolute_url):
|
if self._is_valid_url(absoluteUrl):
|
||||||
result["links"].append({
|
result["links"].append({
|
||||||
"url": absolute_url,
|
"url": absoluteUrl,
|
||||||
"text": link.get_text(strip=True)
|
"text": link.get_text(strip=True)
|
||||||
})
|
})
|
||||||
|
|
||||||
# Extract images if includeImages is True
|
# Extract images if includeImages is True
|
||||||
if include_images:
|
if includeImages:
|
||||||
for img in soup.find_all('img'):
|
for img in soup.find_all('img'):
|
||||||
src = img.get('src')
|
src = img.get('src')
|
||||||
if src:
|
if src:
|
||||||
absolute_src = urljoin(url, src)
|
absoluteSrc = urljoin(url, src)
|
||||||
result["images"].append({
|
result["images"].append({
|
||||||
"url": absolute_src,
|
"url": absoluteSrc,
|
||||||
"alt": img.get('alt', ''),
|
"alt": img.get('alt', ''),
|
||||||
"title": img.get('title', '')
|
"title": img.get('title', '')
|
||||||
})
|
})
|
||||||
|
|
||||||
return self._create_result(
|
return self._createResult(
|
||||||
success=True,
|
success=True,
|
||||||
data=result
|
data=result
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
return self._create_result(
|
return self._createResult(
|
||||||
success=False,
|
success=False,
|
||||||
data={"error": f"Failed to fetch URL: {response.status}"}
|
data={"error": f"Failed to fetch URL: {response.status}"}
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error crawling page: {e}")
|
logger.error(f"Error crawling page: {e}")
|
||||||
return self._create_result(
|
return self._createResult(
|
||||||
success=False,
|
success=False,
|
||||||
data={"error": f"Crawl failed: {str(e)}"}
|
data={"error": f"Crawl failed: {str(e)}"}
|
||||||
)
|
)
|
||||||
|
|
||||||
async def _extract_content(self, parameters: Dict[str, Any]) -> MethodResult:
|
|
||||||
"""Extract content from web page"""
|
|
||||||
try:
|
|
||||||
url = parameters["url"]
|
|
||||||
selectors = parameters.get("selectors")
|
|
||||||
format = parameters.get("format", "text")
|
|
||||||
include_metadata = parameters.get("includeMetadata", False)
|
|
||||||
|
|
||||||
async with aiohttp.ClientSession() as session:
|
|
||||||
async with session.get(url) as response:
|
|
||||||
if response.status == 200:
|
|
||||||
html = await response.text()
|
|
||||||
soup = BeautifulSoup(html, 'html.parser')
|
|
||||||
|
|
||||||
# Extract content based on selectors
|
|
||||||
content = {}
|
|
||||||
if selectors:
|
|
||||||
for selector in selectors:
|
|
||||||
elements = soup.select(selector)
|
|
||||||
content[selector] = [elem.get_text() for elem in elements]
|
|
||||||
else:
|
|
||||||
# Default extraction
|
|
||||||
content = {
|
|
||||||
"title": soup.title.string if soup.title else None,
|
|
||||||
"text": soup.get_text(strip=True),
|
|
||||||
"links": [a.get('href') for a in soup.find_all('a')]
|
|
||||||
}
|
|
||||||
|
|
||||||
# Add metadata if requested
|
|
||||||
if include_metadata:
|
|
||||||
content["metadata"] = {
|
|
||||||
"url": url,
|
|
||||||
"crawled": datetime.now(UTC).isoformat(),
|
|
||||||
"language": self._detect_language(soup),
|
|
||||||
"wordCount": len(content["text"].split()),
|
|
||||||
"linksCount": len(content["links"])
|
|
||||||
}
|
|
||||||
|
|
||||||
return self._create_result(
|
|
||||||
success=True,
|
|
||||||
data={
|
|
||||||
"url": url,
|
|
||||||
"content": content
|
|
||||||
}
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
return self._create_result(
|
|
||||||
success=False,
|
|
||||||
data={"error": f"Failed to fetch URL: {response.status}"}
|
|
||||||
)
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error extracting content: {e}")
|
|
||||||
return self._create_result(
|
|
||||||
success=False,
|
|
||||||
data={"error": f"Extraction failed: {str(e)}"}
|
|
||||||
)
|
|
||||||
|
|
||||||
def _get_meta_description(self, soup: BeautifulSoup) -> Optional[str]:
|
def _get_meta_description(self, soup: BeautifulSoup) -> Optional[str]:
|
||||||
"""Extract meta description from HTML"""
|
"""Extract meta description from HTML"""
|
||||||
meta_desc = soup.find('meta', attrs={'name': 'description'})
|
metaDesc = soup.find('meta', attrs={'name': 'description'})
|
||||||
if meta_desc:
|
if metaDesc:
|
||||||
return meta_desc.get('content')
|
return metaDesc.get('content')
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def _is_valid_url(self, url: str) -> bool:
|
def _is_valid_url(self, url: str) -> bool:
|
||||||
|
|
@ -319,31 +390,31 @@ class MethodWeb(MethodBase):
|
||||||
async def _check_robots_txt(self, url: str) -> bool:
|
async def _check_robots_txt(self, url: str) -> bool:
|
||||||
"""Check if URL is allowed by robots.txt"""
|
"""Check if URL is allowed by robots.txt"""
|
||||||
try:
|
try:
|
||||||
parsed_url = urlparse(url)
|
parsedUrl = urlparse(url)
|
||||||
robots_url = f"{parsed_url.scheme}://{parsed_url.netloc}/robots.txt"
|
robotsUrl = f"{parsedUrl.scheme}://{parsedUrl.netloc}/robots.txt"
|
||||||
|
|
||||||
async with aiohttp.ClientSession() as session:
|
async with aiohttp.ClientSession() as session:
|
||||||
async with session.get(robots_url, headers={"User-Agent": self.userAgent}, timeout=self.timeout) as response:
|
async with session.get(robotsUrl, headers={"User-Agent": self.userAgent}, timeout=self.timeout) as response:
|
||||||
if response.status == 200:
|
if response.status == 200:
|
||||||
robots_content = await response.text()
|
robotsContent = await response.text()
|
||||||
|
|
||||||
# Parse robots.txt content
|
# Parse robots.txt content
|
||||||
user_agent = "*" # Default to all user agents
|
userAgent = "*" # Default to all user agents
|
||||||
disallow_paths = []
|
disallowPaths = []
|
||||||
|
|
||||||
for line in robots_content.splitlines():
|
for line in robotsContent.splitlines():
|
||||||
line = line.strip().lower()
|
line = line.strip().lower()
|
||||||
if line.startswith("user-agent:"):
|
if line.startswith("user-agent:"):
|
||||||
user_agent = line[11:].strip()
|
userAgent = line[11:].strip()
|
||||||
elif line.startswith("disallow:") and user_agent in ["*", self.userAgent.lower()]:
|
elif line.startswith("disallow:") and userAgent in ["*", self.userAgent.lower()]:
|
||||||
path = line[9:].strip()
|
path = line[9:].strip()
|
||||||
if path:
|
if path:
|
||||||
disallow_paths.append(path)
|
disallowPaths.append(path)
|
||||||
|
|
||||||
# Check if URL path is disallowed
|
# Check if URL path is disallowed
|
||||||
url_path = parsed_url.path
|
urlPath = parsedUrl.path
|
||||||
for disallow_path in disallow_paths:
|
for disallowPath in disallowPaths:
|
||||||
if url_path.startswith(disallow_path):
|
if urlPath.startswith(disallowPath):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
@ -364,32 +435,32 @@ class MethodWeb(MethodBase):
|
||||||
return soup.html.get('lang')
|
return soup.html.get('lang')
|
||||||
|
|
||||||
# Try to get language from meta tag
|
# Try to get language from meta tag
|
||||||
meta_lang = soup.find('meta', attrs={'http-equiv': 'content-language'})
|
metaLang = soup.find('meta', attrs={'http-equiv': 'content-language'})
|
||||||
if meta_lang:
|
if metaLang:
|
||||||
return meta_lang.get('content', 'en')
|
return metaLang.get('content', 'en')
|
||||||
|
|
||||||
# Try to get language from meta charset
|
# Try to get language from meta charset
|
||||||
meta_charset = soup.find('meta', attrs={'charset': True})
|
metaCharset = soup.find('meta', attrs={'charset': True})
|
||||||
if meta_charset:
|
if metaCharset:
|
||||||
charset = meta_charset.get('charset', '').lower()
|
charset = metaCharset.get('charset', '').lower()
|
||||||
if 'utf-8' in charset:
|
if 'utf-8' in charset:
|
||||||
return 'en' # Default to English for UTF-8
|
return 'en' # Default to English for UTF-8
|
||||||
|
|
||||||
# Try to detect language from content
|
# Try to detect language from content
|
||||||
# This is a simple heuristic based on common words
|
# This is a simple heuristic based on common words
|
||||||
text = soup.get_text().lower()
|
text = soup.get_text().lower()
|
||||||
common_words = {
|
commonWords = {
|
||||||
'en': ['the', 'and', 'of', 'to', 'in', 'is', 'that', 'for', 'it', 'with'],
|
'en': ['the', 'and', 'of', 'to', 'in', 'is', 'that', 'for', 'it', 'with'],
|
||||||
'es': ['el', 'la', 'los', 'las', 'de', 'y', 'en', 'que', 'por', 'con'],
|
'es': ['el', 'la', 'los', 'las', 'de', 'y', 'en', 'que', 'por', 'con'],
|
||||||
'fr': ['le', 'la', 'les', 'de', 'et', 'en', 'que', 'pour', 'avec', 'dans'],
|
'fr': ['le', 'la', 'les', 'de', 'et', 'en', 'que', 'pour', 'avec', 'dans'],
|
||||||
'de': ['der', 'die', 'das', 'und', 'in', 'den', 'von', 'zu', 'für', 'mit']
|
'de': ['der', 'die', 'das', 'und', 'in', 'den', 'von', 'zu', 'für', 'mit']
|
||||||
}
|
}
|
||||||
|
|
||||||
word_counts = {lang: sum(1 for word in words if f' {word} ' in f' {text} ')
|
wordCounts = {lang: sum(1 for word in words if f' {word} ' in f' {text} ')
|
||||||
for lang, words in common_words.items()}
|
for lang, words in commonWords.items()}
|
||||||
|
|
||||||
if word_counts:
|
if wordCounts:
|
||||||
return max(word_counts.items(), key=lambda x: x[1])[0]
|
return max(wordCounts.items(), key=lambda x: x[1])[0]
|
||||||
|
|
||||||
return 'en' # Default to English if no language detected
|
return 'en' # Default to English if no language detected
|
||||||
|
|
||||||
|
|
|
||||||
368
modules/neutralizer/neutralizer.py
Normal file
368
modules/neutralizer/neutralizer.py
Normal file
|
|
@ -0,0 +1,368 @@
|
||||||
|
"""
|
||||||
|
DSGVO-konformer Daten-Neutralisierer für KI-Agentensysteme
|
||||||
|
Unterstützt TXT, JSON, CSV, Excel und Word-Dateien
|
||||||
|
Mehrsprachig: DE, EN, FR, IT
|
||||||
|
"""
|
||||||
|
|
||||||
|
import re
|
||||||
|
import json
|
||||||
|
import pandas as pd
|
||||||
|
import docx
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Dict, List, Tuple, Any, Union, Optional
|
||||||
|
from dataclasses import dataclass
|
||||||
|
import uuid
|
||||||
|
import logging
|
||||||
|
import traceback
|
||||||
|
import csv
|
||||||
|
from datetime import datetime
|
||||||
|
import xml.etree.ElementTree as ET
|
||||||
|
import os
|
||||||
|
import random
|
||||||
|
from io import StringIO
|
||||||
|
from patterns import Pattern, HeaderPatterns, DataPatterns, get_pattern_for_header, find_patterns_in_text, TextTablePatterns
|
||||||
|
import base64
|
||||||
|
|
||||||
|
# Configure logging
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class TableData:
|
||||||
|
"""Repräsentiert Tabellendaten"""
|
||||||
|
headers: List[str]
|
||||||
|
rows: List[List[str]]
|
||||||
|
source_type: str # 'csv', 'json', 'xml', 'text_table'
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class PlainText:
|
||||||
|
"""Repräsentiert normalen Text"""
|
||||||
|
content: str
|
||||||
|
source_type: str # 'txt', 'docx', 'text_plain'
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ProcessResult:
|
||||||
|
"""Result of content processing"""
|
||||||
|
data: Any
|
||||||
|
mapping: Dict[str, str]
|
||||||
|
replaced_fields: List[str]
|
||||||
|
processed_info: Dict[str, Any] # Additional processing information
|
||||||
|
|
||||||
|
class DataAnonymizer:
|
||||||
|
"""Hauptklasse für die Datenanonymisierung"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
"""Initialize the anonymizer with patterns"""
|
||||||
|
self.header_patterns = HeaderPatterns.patterns
|
||||||
|
self.data_patterns = DataPatterns.patterns
|
||||||
|
self.replaced_fields = set()
|
||||||
|
self.mapping = {}
|
||||||
|
self.processing_info = []
|
||||||
|
|
||||||
|
def _normalize_whitespace(self, text: str) -> str:
|
||||||
|
"""Normalize whitespace in text"""
|
||||||
|
text = re.sub(r'\s+', ' ', text)
|
||||||
|
text = text.replace('\r\n', '\n').replace('\r', '\n')
|
||||||
|
return text.strip()
|
||||||
|
|
||||||
|
def _is_table_line(self, line: str) -> bool:
|
||||||
|
"""Check if a line represents a table row"""
|
||||||
|
return bool(re.match(r'^\s*[^:]+:\s*[^:]+$', line) or
|
||||||
|
re.match(r'^\s*[^\t]+\t[^\t]+$', line))
|
||||||
|
|
||||||
|
def _extract_tables_from_text(self, content: str) -> Tuple[List[TableData], List[PlainText]]:
|
||||||
|
"""
|
||||||
|
Extract tables and plain text from content
|
||||||
|
|
||||||
|
Args:
|
||||||
|
content: Content to process
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple of (list of tables, list of plain text sections)
|
||||||
|
"""
|
||||||
|
tables = []
|
||||||
|
plain_texts = []
|
||||||
|
|
||||||
|
# Process the entire content as plain text
|
||||||
|
plain_texts.append(PlainText(content=content, source_type='text_plain'))
|
||||||
|
|
||||||
|
return tables, plain_texts
|
||||||
|
|
||||||
|
def _anonymize_table(self, table: TableData) -> TableData:
|
||||||
|
"""Anonymize table data"""
|
||||||
|
try:
|
||||||
|
anonymized_table = TableData(
|
||||||
|
headers=table.headers.copy(),
|
||||||
|
rows=[row.copy() for row in table.rows],
|
||||||
|
source_type=table.source_type
|
||||||
|
)
|
||||||
|
|
||||||
|
for i, header in enumerate(anonymized_table.headers):
|
||||||
|
pattern = get_pattern_for_header(header, self.header_patterns)
|
||||||
|
if pattern:
|
||||||
|
for row in anonymized_table.rows:
|
||||||
|
if row[i] is not None:
|
||||||
|
original = str(row[i])
|
||||||
|
if original not in self.mapping:
|
||||||
|
self.mapping[original] = pattern.replacement_template.format(len(self.mapping) + 1)
|
||||||
|
row[i] = self.mapping[original]
|
||||||
|
|
||||||
|
return anonymized_table
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error anonymizing table: {str(e)}")
|
||||||
|
logger.debug(traceback.format_exc())
|
||||||
|
raise
|
||||||
|
|
||||||
|
def _anonymize_plain_text(self, text: PlainText) -> PlainText:
|
||||||
|
"""Anonymize plain text content"""
|
||||||
|
try:
|
||||||
|
# Process the entire text at once instead of line by line
|
||||||
|
current_text = text.content
|
||||||
|
|
||||||
|
# Find all matches in the entire text
|
||||||
|
matches = find_patterns_in_text(current_text, self.data_patterns)
|
||||||
|
|
||||||
|
# Process matches in reverse order to avoid position shifting
|
||||||
|
for match in sorted(matches, key=lambda x: x[2], reverse=True):
|
||||||
|
pattern_name, matched_text, start, end = match
|
||||||
|
|
||||||
|
# Skip if the matched text is already a placeholder
|
||||||
|
if re.match(r'\[[A-Z_]+\d+\]', matched_text):
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Find the pattern that matched
|
||||||
|
pattern = next((p for p in self.data_patterns if p.name == pattern_name), None)
|
||||||
|
if pattern:
|
||||||
|
# Use the pattern's replacement template
|
||||||
|
if matched_text not in self.mapping:
|
||||||
|
self.mapping[matched_text] = pattern.replacement_template.format(len(self.mapping) + 1)
|
||||||
|
replacement = self.mapping[matched_text]
|
||||||
|
|
||||||
|
if pattern_name == 'email':
|
||||||
|
print(f"DEBUG: Replacing email '{matched_text}' with '{replacement}'")
|
||||||
|
print(f"DEBUG: Text after replacement: {current_text[:start] + replacement + current_text[end:]}")
|
||||||
|
|
||||||
|
# Replace the matched text while preserving surrounding whitespace
|
||||||
|
current_text = current_text[:start] + replacement + current_text[end:]
|
||||||
|
|
||||||
|
return PlainText(content=current_text, source_type=text.source_type)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error anonymizing plain text: {str(e)}")
|
||||||
|
logger.debug(traceback.format_exc())
|
||||||
|
raise
|
||||||
|
|
||||||
|
def _anonymize_json_value(self, value: Any, key: str = None) -> Any:
|
||||||
|
"""
|
||||||
|
Recursively anonymize JSON values based on their keys and content
|
||||||
|
|
||||||
|
Args:
|
||||||
|
value: Value to anonymize
|
||||||
|
key: Key name (if part of a key-value pair)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Anonymized value
|
||||||
|
"""
|
||||||
|
if isinstance(value, dict):
|
||||||
|
return {k: self._anonymize_json_value(v, k) for k, v in value.items()}
|
||||||
|
elif isinstance(value, list):
|
||||||
|
return [self._anonymize_json_value(item) for item in value]
|
||||||
|
elif isinstance(value, str):
|
||||||
|
# Check if this is a key we should process
|
||||||
|
if key:
|
||||||
|
pattern = get_pattern_for_header(key, self.header_patterns)
|
||||||
|
if pattern:
|
||||||
|
if value not in self.mapping:
|
||||||
|
self.mapping[value] = pattern.replacement_template.format(len(self.mapping) + 1)
|
||||||
|
return self.mapping[value]
|
||||||
|
|
||||||
|
# Check if the value itself matches any patterns
|
||||||
|
matches = find_patterns_in_text(value, self.data_patterns)
|
||||||
|
if matches:
|
||||||
|
# Use the first match's pattern
|
||||||
|
pattern_name = matches[0][0]
|
||||||
|
if value not in self.mapping:
|
||||||
|
self.mapping[value] = f"{pattern_name.upper()}_{len(self.mapping) + 1}"
|
||||||
|
return self.mapping[value]
|
||||||
|
|
||||||
|
return value
|
||||||
|
else:
|
||||||
|
return value
|
||||||
|
|
||||||
|
def _anonymize_xml_element(self, element: ET.Element, indent: str = '') -> str:
|
||||||
|
"""
|
||||||
|
Recursively process XML element and return formatted string
|
||||||
|
|
||||||
|
Args:
|
||||||
|
element: XML element to process
|
||||||
|
indent: Current indentation level
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Formatted XML string
|
||||||
|
"""
|
||||||
|
# Process attributes
|
||||||
|
processed_attrs = {}
|
||||||
|
for attr_name, attr_value in element.attrib.items():
|
||||||
|
# Check if attribute name matches any header patterns
|
||||||
|
pattern = get_pattern_for_header(attr_name, self.header_patterns)
|
||||||
|
if pattern:
|
||||||
|
if attr_value not in self.mapping:
|
||||||
|
self.mapping[attr_value] = pattern.replacement_template.format(len(self.mapping) + 1)
|
||||||
|
processed_attrs[attr_name] = self.mapping[attr_value]
|
||||||
|
else:
|
||||||
|
# Check if attribute value matches any data patterns
|
||||||
|
matches = find_patterns_in_text(attr_value, self.data_patterns)
|
||||||
|
if matches:
|
||||||
|
pattern_name = matches[0][0]
|
||||||
|
pattern = next((p for p in self.data_patterns if p.name == pattern_name), None)
|
||||||
|
if pattern:
|
||||||
|
if attr_value not in self.mapping:
|
||||||
|
self.mapping[attr_value] = pattern.replacement_template.format(len(self.mapping) + 1)
|
||||||
|
processed_attrs[attr_name] = self.mapping[attr_value]
|
||||||
|
else:
|
||||||
|
processed_attrs[attr_name] = attr_value
|
||||||
|
else:
|
||||||
|
processed_attrs[attr_name] = attr_value
|
||||||
|
|
||||||
|
attrs = ' '.join(f'{k}="{v}"' for k, v in processed_attrs.items())
|
||||||
|
attrs = f' {attrs}' if attrs else ''
|
||||||
|
|
||||||
|
# Process text content
|
||||||
|
text = element.text.strip() if element.text and element.text.strip() else ''
|
||||||
|
if text:
|
||||||
|
# Check if text matches any patterns
|
||||||
|
matches = find_patterns_in_text(text, self.data_patterns)
|
||||||
|
if matches:
|
||||||
|
pattern_name = matches[0][0]
|
||||||
|
pattern = next((p for p in self.data_patterns if p.name == pattern_name), None)
|
||||||
|
if pattern:
|
||||||
|
if text not in self.mapping:
|
||||||
|
self.mapping[text] = pattern.replacement_template.format(len(self.mapping) + 1)
|
||||||
|
text = self.mapping[text]
|
||||||
|
|
||||||
|
# Process child elements
|
||||||
|
children = []
|
||||||
|
for child in element:
|
||||||
|
child_str = self._anonymize_xml_element(child, indent + ' ')
|
||||||
|
children.append(child_str)
|
||||||
|
|
||||||
|
# Build element string
|
||||||
|
if not children and not text:
|
||||||
|
return f"{indent}<{element.tag}{attrs}/>"
|
||||||
|
elif not children:
|
||||||
|
return f"{indent}<{element.tag}{attrs}>{text}</{element.tag}>"
|
||||||
|
else:
|
||||||
|
result = [f"{indent}<{element.tag}{attrs}>"]
|
||||||
|
if text:
|
||||||
|
result.append(f"{indent} {text}")
|
||||||
|
result.extend(children)
|
||||||
|
result.append(f"{indent}</{element.tag}>")
|
||||||
|
return '\n'.join(result)
|
||||||
|
|
||||||
|
def process_content(self, content: str, content_type: str) -> ProcessResult:
|
||||||
|
"""
|
||||||
|
Process content and return anonymized data
|
||||||
|
|
||||||
|
Args:
|
||||||
|
content: Content to process
|
||||||
|
content_type: Type of content ('csv', 'json', 'xml', 'text')
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
ProcessResult: Contains anonymized data, mapping, replaced fields and processing info
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Check if content is binary data
|
||||||
|
is_binary = False
|
||||||
|
try:
|
||||||
|
# Try to decode base64 if it's a string
|
||||||
|
try:
|
||||||
|
decoded = base64.b64decode(content)
|
||||||
|
# If it's not valid text, consider it binary
|
||||||
|
decoded.decode('utf-8')
|
||||||
|
except (base64.binascii.Error, UnicodeDecodeError):
|
||||||
|
is_binary = True
|
||||||
|
except Exception:
|
||||||
|
is_binary = True
|
||||||
|
|
||||||
|
if is_binary:
|
||||||
|
# TODO: Implement binary data neutralization
|
||||||
|
# This would require:
|
||||||
|
# 1. Detecting binary data types (images, audio, video, etc.)
|
||||||
|
# 2. Implementing specific neutralization for each type
|
||||||
|
# 3. Handling metadata and embedded content
|
||||||
|
# 4. Preserving binary integrity while removing sensitive data
|
||||||
|
return ProcessResult(content, self.mapping, [], {'type': 'binary', 'status': 'not_implemented'})
|
||||||
|
|
||||||
|
replaced_fields = []
|
||||||
|
processed_info = {}
|
||||||
|
|
||||||
|
if content_type in ['csv', 'json', 'xml']:
|
||||||
|
# Handle as table
|
||||||
|
if content_type == 'csv':
|
||||||
|
df = pd.read_csv(StringIO(content), encoding='utf-8')
|
||||||
|
table = TableData(
|
||||||
|
headers=df.columns.tolist(),
|
||||||
|
rows=df.values.tolist(),
|
||||||
|
source_type='csv'
|
||||||
|
)
|
||||||
|
processed_info['type'] = 'table'
|
||||||
|
processed_info['headers'] = table.headers
|
||||||
|
processed_info['row_count'] = len(table.rows)
|
||||||
|
elif content_type == 'json':
|
||||||
|
data = json.loads(content)
|
||||||
|
# Process JSON recursively
|
||||||
|
result = self._anonymize_json_value(data)
|
||||||
|
processed_info['type'] = 'json'
|
||||||
|
return ProcessResult(result, self.mapping, replaced_fields, processed_info)
|
||||||
|
else: # xml
|
||||||
|
root = ET.fromstring(content)
|
||||||
|
# Process XML recursively with proper formatting
|
||||||
|
result = self._anonymize_xml_element(root)
|
||||||
|
processed_info['type'] = 'xml'
|
||||||
|
return ProcessResult(result, self.mapping, replaced_fields, processed_info)
|
||||||
|
|
||||||
|
if not table.rows:
|
||||||
|
return ProcessResult(None, self.mapping, [], processed_info)
|
||||||
|
|
||||||
|
anonymized_table = self._anonymize_table(table)
|
||||||
|
|
||||||
|
# Track replaced fields
|
||||||
|
for i, header in enumerate(anonymized_table.headers):
|
||||||
|
for orig_row, anon_row in zip(table.rows, anonymized_table.rows):
|
||||||
|
if anon_row[i] != orig_row[i]:
|
||||||
|
replaced_fields.append(header)
|
||||||
|
|
||||||
|
# Convert back to original format
|
||||||
|
if content_type == 'csv':
|
||||||
|
result = pd.DataFrame(anonymized_table.rows, columns=anonymized_table.headers)
|
||||||
|
elif content_type == 'json':
|
||||||
|
if len(anonymized_table.headers) == 1 and anonymized_table.headers[0] == 'value':
|
||||||
|
result = anonymized_table.rows[0][0]
|
||||||
|
else:
|
||||||
|
result = dict(zip(anonymized_table.headers, anonymized_table.rows[0]))
|
||||||
|
else: # xml
|
||||||
|
result = ET.tostring(root, encoding='unicode')
|
||||||
|
|
||||||
|
return ProcessResult(result, self.mapping, replaced_fields, processed_info)
|
||||||
|
else:
|
||||||
|
# Handle as text
|
||||||
|
# First, identify what needs to be replaced using table detection
|
||||||
|
tables, plain_texts = self._extract_tables_from_text(content)
|
||||||
|
processed_info['type'] = 'text'
|
||||||
|
processed_info['tables'] = [{'headers': t.headers, 'row_count': len(t.rows)} for t in tables]
|
||||||
|
|
||||||
|
# Process plain text sections
|
||||||
|
anonymized_texts = [self._anonymize_plain_text(text) for text in plain_texts]
|
||||||
|
|
||||||
|
# Combine all processed content
|
||||||
|
result = content
|
||||||
|
for text, anonymized_text in zip(plain_texts, anonymized_texts):
|
||||||
|
if text.content != anonymized_text.content:
|
||||||
|
result = result.replace(text.content, anonymized_text.content)
|
||||||
|
|
||||||
|
return ProcessResult(result, self.mapping, replaced_fields, processed_info)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error processing content: {str(e)}")
|
||||||
|
logger.debug(traceback.format_exc())
|
||||||
|
return ProcessResult(None, self.mapping, [], {'type': 'error', 'error': str(e)})
|
||||||
402
modules/neutralizer/patterns.py
Normal file
402
modules/neutralizer/patterns.py
Normal file
|
|
@ -0,0 +1,402 @@
|
||||||
|
"""
|
||||||
|
Pattern definitions for data anonymization
|
||||||
|
Separates header patterns from data patterns
|
||||||
|
"""
|
||||||
|
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from typing import List, Optional, Tuple
|
||||||
|
import re
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Pattern:
|
||||||
|
"""Base class for patterns"""
|
||||||
|
name: str
|
||||||
|
patterns: List[str]
|
||||||
|
replacement_template: str
|
||||||
|
|
||||||
|
class HeaderPatterns:
|
||||||
|
"""Patterns for identifying sensitive data in headers"""
|
||||||
|
patterns = [
|
||||||
|
# Name patterns
|
||||||
|
Pattern(
|
||||||
|
name="name",
|
||||||
|
patterns=[
|
||||||
|
# Simple variations
|
||||||
|
r'\b(?:name|first[-_\s]*name|last[-_\s]*name|full[-_\s]*name)\b',
|
||||||
|
r'\b(?:customer[-_\s]*name|client[-_\s]*name|user[-_\s]*name)\b',
|
||||||
|
r'\b(?:given[-_\s]*name|family[-_\s]*name|surname)\b',
|
||||||
|
# German variations
|
||||||
|
r'\b(?:vorname|nachname|vollständiger[-_\s]*name|name)\b',
|
||||||
|
r'\b(?:kunden[-_\s]*name|kunde[-_\s]*name|benutzer[-_\s]*name)\b',
|
||||||
|
# French variations
|
||||||
|
r'\b(?:prénom|nom|nom[-_\s]*complet)\b',
|
||||||
|
r'\b(?:nom[-_\s]*du[-_\s]*client|nom[-_\s]*d\'utilisateur)\b',
|
||||||
|
# Italian variations
|
||||||
|
r'\b(?:nome|cognome|nome[-_\s]*completo)\b',
|
||||||
|
r'\b(?:nome[-_\s]*cliente|nome[-_\s]*utente)\b',
|
||||||
|
# Common variations
|
||||||
|
r'\b(?:nom|name|nome|naam)\b'
|
||||||
|
],
|
||||||
|
replacement_template="[NAME_{}]"
|
||||||
|
),
|
||||||
|
|
||||||
|
# Email patterns
|
||||||
|
Pattern(
|
||||||
|
name="email",
|
||||||
|
patterns=[
|
||||||
|
# Simple variations - only labels
|
||||||
|
r'\b(?:email|e[-_\s]*mail|mail)\s*:?\b',
|
||||||
|
r'\b(?:contact[-_\s]*email|user[-_\s]*email|client[-_\s]*email)\s*:?\b',
|
||||||
|
r'\b(?:customer[-_\s]*email|customer[-_\s]*mail|customer[-_\s]*e[-_\s]*mail)\s*:?\b',
|
||||||
|
# German variations - only labels
|
||||||
|
r'\b(?:e[-_\s]*mail|e[-_\s]*post|mail[-_\s]*adresse)\s*:?\b',
|
||||||
|
r'\b(?:kontakt[-_\s]*email|benutzer[-_\s]*email|kunden[-_\s]*email)\s*:?\b',
|
||||||
|
r'\b(?:kunden[-_\s]*mail|kunden[-_\s]*e[-_\s]*mail|kunden[-_\s]*e[-_\s]*post)\s*:?\b',
|
||||||
|
# French variations - only labels
|
||||||
|
r'\b(?:courriel|e[-_\s]*mail|adresse[-_\s]*e[-_\s]*mail)\s*:?\b',
|
||||||
|
r'\b(?:courriel[-_\s]*de[-_\s]*contact|e[-_\s]*mail[-_\s]*client)\s*:?\b',
|
||||||
|
r'\b(?:courriel[-_\s]*client|courriel[-_\s]*utilisateur|mail[-_\s]*client)\s*:?\b',
|
||||||
|
# Italian variations - only labels
|
||||||
|
r'\b(?:posta[-_\s]*elettronica|e[-_\s]*mail|indirizzo[-_\s]*e[-_\s]*mail)\s*:?\b',
|
||||||
|
r'\b(?:email[-_\s]*cliente|email[-_\s]*utente)\s*:?\b',
|
||||||
|
r'\b(?:mail[-_\s]*cliente|mail[-_\s]*utente|posta[-_\s]*cliente)\s*:?\b'
|
||||||
|
],
|
||||||
|
replacement_template="[EMAIL_{}]"
|
||||||
|
),
|
||||||
|
|
||||||
|
# Phone patterns
|
||||||
|
Pattern(
|
||||||
|
name="phone",
|
||||||
|
patterns=[
|
||||||
|
# Simple variations
|
||||||
|
r'\b(?:phone|tel|telephone|mobile)\b',
|
||||||
|
r'\b(?:contact[-_\s]*number|phone[-_\s]*number|tel[-_\s]*number)\b',
|
||||||
|
# German variations
|
||||||
|
r'\b(?:telefon|mobil|handy|telefon[-_\s]*nummer)\b',
|
||||||
|
r'\b(?:kontakt[-_\s]*nummer|telefon[-_\s]*nummer|tel[-_\s]*nummer)\b',
|
||||||
|
# French variations
|
||||||
|
r'\b(?:téléphone|portable|mobile|numéro[-_\s]*de[-_\s]*téléphone)\b',
|
||||||
|
r'\b(?:numéro[-_\s]*de[-_\s]*contact|tél[-_\s]*fixe|tél[-_\s]*mobile)\b',
|
||||||
|
# Italian variations
|
||||||
|
r'\b(?:telefono|cellulare|mobile|numero[-_\s]*di[-_\s]*telefono)\b',
|
||||||
|
r'\b(?:numero[-_\s]*di[-_\s]*contatto|tel[-_\s]*fisso|tel[-_\s]*mobile)\b'
|
||||||
|
],
|
||||||
|
replacement_template="[PHONE_{}]"
|
||||||
|
),
|
||||||
|
|
||||||
|
# IBAN patterns
|
||||||
|
Pattern(
|
||||||
|
name="iban",
|
||||||
|
patterns=[
|
||||||
|
# Simple variations
|
||||||
|
r'\b(?:iban|bank[-_\s]*account|account[-_\s]*number)\b',
|
||||||
|
r'\b(?:bank[-_\s]*details|account[-_\s]*details|banking[-_\s]*info)\b',
|
||||||
|
# German variations
|
||||||
|
r'\b(?:iban|bank[-_\s]*konto|konto[-_\s]*nummer)\b',
|
||||||
|
r'\b(?:bank[-_\s]*verbindung|konto[-_\s]*verbindung|bank[-_\s]*daten)\b',
|
||||||
|
# French variations
|
||||||
|
r'\b(?:iban|compte[-_\s]*bancaire|numéro[-_\s]*de[-_\s]*compte)\b',
|
||||||
|
r'\b(?:coordonnées[-_\s]*bancaires|détails[-_\s]*bancaires)\b',
|
||||||
|
# Credit card variations in French
|
||||||
|
r'\b(?:carte[-_\s]*de[-_\s]*credit|carte[-_\s]*credit|numero[-_\s]*carte[-_\s]*credit)\b',
|
||||||
|
r'\b(?:carte[-_\s]*bancaire|carte[-_\s]*de[-_\s]*paiement)\b',
|
||||||
|
r'\b(?:carte[-_\s]*de[-_\s]*crédit|carte[-_\s]*crédit|numéro[-_\s]*carte[-_\s]*crédit)\b',
|
||||||
|
r'\b(?:carte[-_\s]*de[-_\s]*débit|carte[-_\s]*débit|numéro[-_\s]*carte[-_\s]*débit)\b',
|
||||||
|
# Italian variations
|
||||||
|
r'\b(?:iban|conto[-_\s]*bancario|numero[-_\s]*di[-_\s]*conto)\b',
|
||||||
|
r'\b(?:coordinate[-_\s]*bancarie|dettagli[-_\s]*bancari)\b',
|
||||||
|
# Common variations
|
||||||
|
r'\b(?:bankkonto|bank[-_\s]*konto|conto[-_\s]*di[-_\s]*banca)\b',
|
||||||
|
# Credit card variations
|
||||||
|
r'\b(?:credit[-_\s]*card|credit[-_\s]*card[-_\s]*number|credit[-_\s]*card[-_\s]*no)\b',
|
||||||
|
r'\b(?:credit[-_\s]*card[-_\s]*nr|credit[-_\s]*card[-_\s]*num)\b',
|
||||||
|
r'\b(?:credit[-_\s]*card[-_\s]*id|credit[-_\s]*card[-_\s]*code)\b',
|
||||||
|
r'\b(?:credit[-_\s]*card[-_\s]*reference|credit[-_\s]*card[-_\s]*ref)\b',
|
||||||
|
r'\b(?:credit[-_\s]*card[-_\s]*details|credit[-_\s]*card[-_\s]*info)\b',
|
||||||
|
r'\b(?:credit[-_\s]*card[-_\s]*data|credit[-_\s]*card[-_\s]*account)\b',
|
||||||
|
# Credit card variations in other languages
|
||||||
|
r'\b(?:kredit[-_\s]*karte|kreditkarte|kredit[-_\s]*karten[-_\s]*nummer)\b',
|
||||||
|
r'\b(?:carta[-_\s]*di[-_\s]*credito|carta[-_\s]*credito|numero[-_\s]*carta[-_\s]*credito)\b',
|
||||||
|
# Payment variations
|
||||||
|
r'\b(?:payment[-_\s]*details|payment[-_\s]*info|payment[-_\s]*data)\b',
|
||||||
|
r'\b(?:zahlungs[-_\s]*details|zahlungs[-_\s]*informationen|zahlungs[-_\s]*daten)\b',
|
||||||
|
r'\b(?:détails[-_\s]*de[-_\s]*paiement|informations[-_\s]*de[-_\s]*paiement)\b',
|
||||||
|
r'\b(?:dettagli[-_\s]*di[-_\s]*pagamento|informazioni[-_\s]*di[-_\s]*pagamento)\b',
|
||||||
|
# Common credit card abbreviations
|
||||||
|
r'\b(?:cc[-_\s]*number|cc[-_\s]*no|cc[-_\s]*nr)\b',
|
||||||
|
r'\b(?:cc[-_\s]*num|cc[-_\s]*id|cc[-_\s]*code)\b',
|
||||||
|
r'\b(?:cc[-_\s]*ref|cc[-_\s]*details|cc[-_\s]*info)\b',
|
||||||
|
r'\b(?:cc[-_\s]*data|cc[-_\s]*account)\b',
|
||||||
|
# Simple credit card
|
||||||
|
r'\b(?:credit[-_\s]*card|credit[-_\s]*card[-_\s]*number)\b',
|
||||||
|
# Additional credit card variations
|
||||||
|
r'\b(?:card[-_\s]*number|card[-_\s]*no|card[-_\s]*nr)\b',
|
||||||
|
r'\b(?:card[-_\s]*num|card[-_\s]*id|card[-_\s]*code)\b',
|
||||||
|
r'\b(?:card[-_\s]*ref|card[-_\s]*details|card[-_\s]*info)\b',
|
||||||
|
r'\b(?:card[-_\s]*data|card[-_\s]*account)\b'
|
||||||
|
],
|
||||||
|
replacement_template="[IBAN_{}]"
|
||||||
|
),
|
||||||
|
|
||||||
|
# Address patterns
|
||||||
|
Pattern(
|
||||||
|
name="address",
|
||||||
|
patterns=[
|
||||||
|
# English variations
|
||||||
|
r'\b(?:address|street[-_\s]*address|mailing[-_\s]*address)\b',
|
||||||
|
r'\b(?:home[-_\s]*address|work[-_\s]*address|billing[-_\s]*address)\b',
|
||||||
|
r'\b(?:.*address.*)\b', # Match any text containing "address"
|
||||||
|
# German variations
|
||||||
|
r'\b(?:adresse|strassen[-_\s]*adresse|post[-_\s]*adresse)\b',
|
||||||
|
r'\b(?:wohn[-_\s]*adresse|geschäfts[-_\s]*adresse|rechnungs[-_\s]*adresse)\b',
|
||||||
|
r'\b(?:.*adresse.*)\b', # Match any text containing "adresse"
|
||||||
|
# French variations
|
||||||
|
r'\b(?:adresse|adresse[-_\s]*postale|adresse[-_\s]*de[-_\s]*livraison)\b',
|
||||||
|
r'\b(?:adresse[-_\s]*personnelle|adresse[-_\s]*professionnelle)\b',
|
||||||
|
r'\b(?:.*adresse.*)\b', # Match any text containing "adresse"
|
||||||
|
# Italian variations
|
||||||
|
r'\b(?:indirizzo|indirizzo[-_\s]*postale|indirizzo[-_\s]*di[-_\s]*consegna)\b',
|
||||||
|
r'\b(?:indirizzo[-_\s]*personale|indirizzo[-_\s]*professionale)\b',
|
||||||
|
r'\b(?:.*indirizzo.*)\b', # Match any text containing "indirizzo"
|
||||||
|
# Common variations
|
||||||
|
r'\b(?:location|place|residence|domicile)\b',
|
||||||
|
r'\b(?:standort|ort|wohnort|domizil)\b',
|
||||||
|
r'\b(?:lieu|emplacement|résidence|domicile)\b',
|
||||||
|
r'\b(?:luogo|posizione|residenza|domicilio)\b'
|
||||||
|
],
|
||||||
|
replacement_template="[ADDRESS_{}]"
|
||||||
|
),
|
||||||
|
|
||||||
|
# Date patterns
|
||||||
|
Pattern(
|
||||||
|
name="date",
|
||||||
|
patterns=[
|
||||||
|
# English variations
|
||||||
|
r'\b(?:date|birth[-_\s]*date|date[-_\s]*of[-_\s]*birth)\b',
|
||||||
|
r'\b(?:dob|birthday|anniversary)\b',
|
||||||
|
# German variations
|
||||||
|
r'\b(?:datum|geburt[-_\s]*datum|geboren[-_\s]*am)\b',
|
||||||
|
r'\b(?:geburtstag|jubiläum|feier[-_\s]*tag)\b',
|
||||||
|
r'\b(?:geboren|geb\.|geboren[-_\s]*am)\b',
|
||||||
|
# French variations
|
||||||
|
r'\b(?:date|date[-_\s]*de[-_\s]*naissance|né[-_\s]*le)\b',
|
||||||
|
r'\b(?:anniversaire|date[-_\s]*anniversaire)\b',
|
||||||
|
r'\b(?:né|née|né[-_\s]*le)\b',
|
||||||
|
# Italian variations
|
||||||
|
r'\b(?:data|data[-_\s]*di[-_\s]*nascita|nato[-_\s]*il)\b',
|
||||||
|
r'\b(?:compleanno|anniversario)\b',
|
||||||
|
r'\b(?:nato|nata|nato[-_\s]*il)\b',
|
||||||
|
# Common variations
|
||||||
|
r'\b(?:birth|born|geboren|né|nato)\b'
|
||||||
|
],
|
||||||
|
replacement_template="[DATE_{}]"
|
||||||
|
),
|
||||||
|
|
||||||
|
# SSN patterns
|
||||||
|
Pattern(
|
||||||
|
name="ssn",
|
||||||
|
patterns=[
|
||||||
|
# English variations
|
||||||
|
r'\b(?:ssn|social[-_\s]*security[-_\s]*number|tax[-_\s]*id)\b',
|
||||||
|
r'\b(?:tax[-_\s]*identification|national[-_\s]*id)\b',
|
||||||
|
# German variations
|
||||||
|
r'\b(?:ahv[-_\s]*nummer|sozial[-_\s]*versicherungs[-_\s]*nummer)\b',
|
||||||
|
r'\b(?:steuer[-_\s]*nummer|steuer[-_\s]*id|svn)\b',
|
||||||
|
r'\b(?:ahv[-_\s]*nr|ahv[-_\s]*no|ahv[-_\s]*num)\b',
|
||||||
|
# French variations
|
||||||
|
r'\b(?:numéro[-_\s]*avs|numéro[-_\s]*de[-_\s]*sécurité[-_\s]*sociale)\b',
|
||||||
|
r'\b(?:numéro[-_\s]*fiscal|numéro[-_\s]*d\'identification)\b',
|
||||||
|
# Italian variations
|
||||||
|
r'\b(?:numero[-_\s]*avs|numero[-_\s]*di[-_\s]*sicurezza[-_\s]*sociale)\b',
|
||||||
|
r'\b(?:numero[-_\s]*fiscale|codice[-_\s]*fiscale)\b',
|
||||||
|
# Common variations
|
||||||
|
r'\b(?:ahv|svn|nss|avs)\b',
|
||||||
|
# Additional AHV variations
|
||||||
|
r'\b(?:ahv_nummer|ahvnummer|ahv-nummer|ahv_number)\b',
|
||||||
|
r'\b(?:ahv[-_\s]*nr|ahv[-_\s]*no|ahv[-_\s]*num)\b',
|
||||||
|
r'\b(?:ahv[-_\s]*number|ahv[-_\s]*number)\b',
|
||||||
|
r'\b(?:ahv[-_\s]*id|ahv[-_\s]*id)\b',
|
||||||
|
r'\b(?:ahv[-_\s]*code|ahv[-_\s]*code)\b',
|
||||||
|
r'\b(?:ahv[-_\s]*reference|ahv[-_\s]*reference)\b',
|
||||||
|
r'\b(?:ahv[-_\s]*reference[-_\s]*number|ahv[-_\s]*reference[-_\s]*number)\b',
|
||||||
|
r'\b(?:ahv[-_\s]*reference[-_\s]*no|ahv[-_\s]*reference[-_\s]*no)\b',
|
||||||
|
r'\b(?:ahv[-_\s]*reference[-_\s]*nr|ahv[-_\s]*reference[-_\s]*nr)\b',
|
||||||
|
r'\b(?:ahv[-_\s]*reference[-_\s]*num|ahv[-_\s]*reference[-_\s]*num)\b',
|
||||||
|
r'\b(?:ahv[-_\s]*reference[-_\s]*id|ahv[-_\s]*reference[-_\s]*id)\b',
|
||||||
|
r'\b(?:ahv[-_\s]*reference[-_\s]*code|ahv[-_\s]*reference[-_\s]*code)\b'
|
||||||
|
],
|
||||||
|
replacement_template="[SSN_{}]"
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
|
class DataPatterns:
|
||||||
|
"""Patterns for identifying sensitive data in content"""
|
||||||
|
patterns = [
|
||||||
|
# Name patterns
|
||||||
|
Pattern(
|
||||||
|
name="name",
|
||||||
|
patterns=[
|
||||||
|
# Person names with titles and academic degrees
|
||||||
|
r'\b(?:Dr\.|Prof\.|PhD\.?|MD\.?|Herr|Frau|Mr\.|Mrs\.|Ms\.|Monsieur|Madame|Signore|Signora)\s+[A-Z][a-z]{2,}(?:\s+[A-Za-z]{2,}){1,2}\b'
|
||||||
|
],
|
||||||
|
replacement_template="[NAME_{}]"
|
||||||
|
),
|
||||||
|
|
||||||
|
# Email pattern for plain text
|
||||||
|
Pattern(
|
||||||
|
name="email",
|
||||||
|
patterns=[
|
||||||
|
# Basic email pattern
|
||||||
|
r'[A-Za-z0-9._%+-]+@[A-Za-z0-9-]+(?:\.[A-Za-z0-9-]+)*'
|
||||||
|
],
|
||||||
|
replacement_template="[EMAIL_{}]"
|
||||||
|
),
|
||||||
|
|
||||||
|
# Phone patterns
|
||||||
|
Pattern(
|
||||||
|
name="phone",
|
||||||
|
patterns=[
|
||||||
|
# International format
|
||||||
|
r'\+\d{1,3}[-.\s]?\d{1,4}[-.\s]?\d{1,4}[-.\s]?\d{1,9}\b',
|
||||||
|
# Swiss format
|
||||||
|
r'\b(?:0\d{1,2}|0041\d{1,2})[-.\s]?\d{3}[-.\s]?\d{2}[-.\s]?\d{2}\b',
|
||||||
|
# German format
|
||||||
|
r'\b(?:0\d{1,4}|0049\d{1,4})[-.\s]?\d{3,}[-.\s]?\d{3,}\b',
|
||||||
|
# French format
|
||||||
|
r'\b(?:0\d{1,2}|0033\d{1,2})[-.\s]?\d{1,2}[-.\s]?\d{2}[-.\s]?\d{2}[-.\s]?\d{2}\b',
|
||||||
|
# Italian format
|
||||||
|
r'\b(?:0\d{1,3}|0039\d{1,3})[-.\s]?\d{3,}[-.\s]?\d{3,}\b',
|
||||||
|
# Mobile numbers
|
||||||
|
r'\b(?:07|00417|004917|00337|00397)\d{8,9}\b',
|
||||||
|
# Emergency numbers
|
||||||
|
r'\b(?:112|911|118|117|144|1414)\b'
|
||||||
|
],
|
||||||
|
replacement_template="[PHONE_{}]"
|
||||||
|
),
|
||||||
|
|
||||||
|
# IBAN patterns
|
||||||
|
Pattern(
|
||||||
|
name="iban",
|
||||||
|
patterns=[
|
||||||
|
r'\b(?:CH|DE|FR|IT)\d{2}\s?(?:\d{4}\s?){5}\d{2}\b',
|
||||||
|
r'\b(?:CH|DE|FR|IT)\d{2}(?:\d{4}){5}\d{2}\b'
|
||||||
|
],
|
||||||
|
replacement_template="[IBAN_{}]"
|
||||||
|
),
|
||||||
|
|
||||||
|
# Address patterns
|
||||||
|
Pattern(
|
||||||
|
name="address",
|
||||||
|
patterns=[
|
||||||
|
r'\b(?:[A-Za-zäöüßÄÖÜ]+(?:strasse|str\.|gasse|weg|platz|allee|boulevard|avenue|via|strada|rue|chemin|route))\s+\d{1,4}(?:[a-z])?\b',
|
||||||
|
r'\b\d{4}\s+[A-Za-zäöüßÄÖÜ]+\b'
|
||||||
|
],
|
||||||
|
replacement_template="[ADDRESS_{}]"
|
||||||
|
),
|
||||||
|
|
||||||
|
# Date patterns
|
||||||
|
Pattern(
|
||||||
|
name="date",
|
||||||
|
patterns=[
|
||||||
|
# Specific date formats with context
|
||||||
|
r'\b(?:geboren|birth|né|nato)\s+am\s+[0-9]{2}[./-][0-9]{2}[./-][0-9]{4}\b', # Birth dates
|
||||||
|
r'\b(?:geboren|birth|né|nato)\s+am\s+[0-9]{4}[./-][0-9]{2}[./-][0-9]{2}\b', # Birth dates
|
||||||
|
r'\b(?:vertrag|contract|contrat|contratto)\s+vom\s+[0-9]{2}[./-][0-9]{2}[./-][0-9]{4}\b', # Contract dates
|
||||||
|
r'\b(?:vertrag|contract|contrat|contratto)\s+vom\s+[0-9]{4}[./-][0-9]{2}[./-][0-9]{2}\b', # Contract dates
|
||||||
|
# Specific date formats with month names
|
||||||
|
r'\b(?:geboren|birth|né|nato)\s+am\s+(?:jan|feb|mar|apr|mai|jun|jul|aug|sep|okt|nov|dez|januar|februar|märz|april|mai|juni|juli|august|september|oktober|november|dezember)[a-z]*\s+\d{4}\b', # Birth dates with month
|
||||||
|
r'\b(?:vertrag|contract|contrat|contratto)\s+vom\s+(?:jan|feb|mar|apr|mai|jun|jul|aug|sep|okt|nov|dez|januar|februar|märz|april|mai|juni|juli|august|september|oktober|november|dezember)[a-z]*\s+\d{4}\b' # Contract dates with month
|
||||||
|
],
|
||||||
|
replacement_template="[DATE_{}]"
|
||||||
|
),
|
||||||
|
|
||||||
|
# SSN patterns
|
||||||
|
Pattern(
|
||||||
|
name="ssn",
|
||||||
|
patterns=[
|
||||||
|
r'\b(?:756|757|758|759)\.\d{4}\.\d{4}\.\d{2}\b', # Swiss AHV
|
||||||
|
r'\b(?:CHE|DE|FR|IT)-\d{3}\.\d{3}\.\d{3}\b', # Company IDs
|
||||||
|
r'\b\d{3}\.\d{3}\.\d{3}\b' # Generic SSN format
|
||||||
|
],
|
||||||
|
replacement_template="[SSN_{}]"
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
|
class TextTablePatterns:
|
||||||
|
"""Patterns for identifying table-like structures in text"""
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_patterns() -> List[Tuple[str, str]]:
|
||||||
|
return [
|
||||||
|
# key: value pattern (with optional whitespace)
|
||||||
|
(r'^([^:]+):\s*(.+)$', ':'),
|
||||||
|
# key = value pattern (with optional whitespace)
|
||||||
|
(r'^([^=]+)=\s*(.+)$', '='),
|
||||||
|
# key = value pattern (with required whitespace)
|
||||||
|
(r'^([^=]+)\s+=\s+(.+)$', '='),
|
||||||
|
# key: value pattern (with required whitespace)
|
||||||
|
(r'^([^:]+)\s+:\s+(.+)$', ':'),
|
||||||
|
]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def is_table_line(line: str) -> bool:
|
||||||
|
"""Check if a line matches any table pattern"""
|
||||||
|
patterns = TextTablePatterns.get_patterns()
|
||||||
|
return any(re.match(pattern[0], line.strip()) for pattern in patterns)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def extract_key_value(line: str) -> Optional[Tuple[str, str]]:
|
||||||
|
"""Extract key and value from a table line"""
|
||||||
|
patterns = TextTablePatterns.get_patterns()
|
||||||
|
for pattern, separator in patterns:
|
||||||
|
match = re.match(pattern, line.strip())
|
||||||
|
if match:
|
||||||
|
key = match.group(1).strip()
|
||||||
|
value = match.group(2).strip()
|
||||||
|
return key, value
|
||||||
|
return None
|
||||||
|
|
||||||
|
def get_pattern_for_header(header: str, patterns: List[Pattern]) -> Optional[Pattern]:
|
||||||
|
"""
|
||||||
|
Find matching pattern for a header
|
||||||
|
|
||||||
|
Args:
|
||||||
|
header: The header to check
|
||||||
|
patterns: List of patterns to check against
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Optional[Pattern]: Matching pattern or None
|
||||||
|
"""
|
||||||
|
if not header:
|
||||||
|
return None
|
||||||
|
|
||||||
|
header = header.lower().strip()
|
||||||
|
|
||||||
|
for pattern in patterns:
|
||||||
|
for p in pattern.patterns:
|
||||||
|
if re.search(p, header, re.IGNORECASE):
|
||||||
|
return pattern
|
||||||
|
return None
|
||||||
|
|
||||||
|
def find_patterns_in_text(text: str, patterns: List[Pattern]) -> List[tuple]:
|
||||||
|
"""
|
||||||
|
Find all pattern matches in text
|
||||||
|
|
||||||
|
Args:
|
||||||
|
text: Text to search
|
||||||
|
patterns: List of patterns to check
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List[tuple]: List of (pattern_name, match, start, end)
|
||||||
|
"""
|
||||||
|
matches = []
|
||||||
|
for pattern in patterns:
|
||||||
|
for p in pattern.patterns:
|
||||||
|
if pattern.name == 'email':
|
||||||
|
print(f"\nDEBUG: Checking email pattern '{p}'")
|
||||||
|
for match in re.finditer(p, text, re.IGNORECASE):
|
||||||
|
if pattern.name == 'email':
|
||||||
|
print(f"DEBUG: Found email match: '{match.group(0)}' at position {match.start()}-{match.end()}")
|
||||||
|
print(f"DEBUG: Context: '{text[max(0, match.start()-20):match.end()+20]}'")
|
||||||
|
matches.append((pattern.name, match.group(0), match.start(), match.end()))
|
||||||
|
return sorted(matches, key=lambda x: x[2]) # Sort by start position
|
||||||
|
|
@ -1,67 +0,0 @@
|
||||||
"""
|
|
||||||
Utility functions for MIME type handling and file format determination.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def isTextMimeType(mimeType: str) -> bool:
|
|
||||||
"""
|
|
||||||
Determines if a MIME type represents a text format that should not be base64 encoded.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
mimeType: The MIME type to check
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
True if the content is a text format, False otherwise
|
|
||||||
"""
|
|
||||||
return (
|
|
||||||
mimeType.startswith("text/") or
|
|
||||||
mimeType in [
|
|
||||||
"application/json",
|
|
||||||
"application/xml",
|
|
||||||
"application/javascript",
|
|
||||||
"application/x-python",
|
|
||||||
"image/svg+xml"
|
|
||||||
]
|
|
||||||
)
|
|
||||||
|
|
||||||
def determineContentEncoding(fileName: str, content: any, mimeType: str = None) -> bool:
|
|
||||||
"""
|
|
||||||
Determines if content should be base64 encoded based on file type and MIME type.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
fileName: Name of the file including extension
|
|
||||||
content: The content of the file
|
|
||||||
mimeType: Optional MIME type of the content
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
True if content should be base64 encoded, False otherwise
|
|
||||||
"""
|
|
||||||
# If MIME type is provided, use it for determination
|
|
||||||
if mimeType:
|
|
||||||
if isTextMimeType(mimeType):
|
|
||||||
return False if isinstance(content, str) else True
|
|
||||||
|
|
||||||
# Import here to avoid circular imports
|
|
||||||
import os
|
|
||||||
|
|
||||||
# Extract file extension
|
|
||||||
_, extension = os.path.splitext(fileName)
|
|
||||||
extension = extension.lower().lstrip('.')
|
|
||||||
|
|
||||||
# Determine if we should base64 encode based on file type
|
|
||||||
text_extensions = {'txt', 'csv', 'json', 'xml', 'html', 'md', 'svg', 'js', 'css', 'py'}
|
|
||||||
|
|
||||||
# If it's a text format and content is a string, don't base64 encode
|
|
||||||
if extension in text_extensions and isinstance(content, str):
|
|
||||||
return False
|
|
||||||
|
|
||||||
# For binary formats, always base64 encode
|
|
||||||
binary_extensions = {'jpg', 'jpeg', 'png', 'gif', 'pdf', 'doc', 'docx', 'xls', 'xlsx', 'zip', 'rar'}
|
|
||||||
if extension in binary_extensions:
|
|
||||||
return True
|
|
||||||
|
|
||||||
# If content is bytes, base64 encode regardless of extension
|
|
||||||
if isinstance(content, bytes):
|
|
||||||
return True
|
|
||||||
|
|
||||||
# Default for unknown types
|
|
||||||
return not isinstance(content, str)
|
|
||||||
106
modules/workflow/documentService.py
Normal file
106
modules/workflow/documentService.py
Normal file
|
|
@ -0,0 +1,106 @@
|
||||||
|
"""
|
||||||
|
Document Manager Module for handling document operations and content extraction.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import base64
|
||||||
|
import logging
|
||||||
|
from typing import List, Optional, Dict, Any, Union
|
||||||
|
from pathlib import Path
|
||||||
|
import uuid
|
||||||
|
|
||||||
|
from modules.interfaces.serviceChatModel import (
|
||||||
|
ChatDocument,
|
||||||
|
TaskDocument,
|
||||||
|
ExtractedContent,
|
||||||
|
ContentItem,
|
||||||
|
ContentMetadata
|
||||||
|
)
|
||||||
|
from modules.workflow.serviceContainer import ServiceContainer
|
||||||
|
from modules.workflow.processorDocument import DocumentProcessor
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
class DocumentManager:
|
||||||
|
"""Manager for document operations and content extraction"""
|
||||||
|
|
||||||
|
def __init__(self, serviceContainer: ServiceContainer):
|
||||||
|
self.service = serviceContainer
|
||||||
|
self._processor = DocumentProcessor()
|
||||||
|
|
||||||
|
async def extractFromChatDocument(self, prompt: str, document: ChatDocument) -> ExtractedContent:
|
||||||
|
"""
|
||||||
|
Extract content from a ChatDocument with AI processing.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
prompt: Prompt for AI content extraction
|
||||||
|
document: The ChatDocument to process
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
ExtractedContent containing the processed content
|
||||||
|
"""
|
||||||
|
# Convert ChatDocument to TaskDocument
|
||||||
|
taskDoc = await self._convertToTaskDocument(document)
|
||||||
|
|
||||||
|
# Process document using processor
|
||||||
|
extractedContent = await self._processor.processDocument(taskDoc, prompt)
|
||||||
|
|
||||||
|
# Update the objectId and objectType to reference the original ChatDocument
|
||||||
|
extractedContent.objectId = document.id
|
||||||
|
extractedContent.objectType = "ChatDocument"
|
||||||
|
|
||||||
|
return extractedContent
|
||||||
|
|
||||||
|
async def extractFromTaskDocument(self, prompt: str, document: TaskDocument) -> ExtractedContent:
|
||||||
|
"""
|
||||||
|
Extract content directly from a task document.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
prompt: The prompt to use for content extraction
|
||||||
|
document: The task document to extract content from
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
ExtractedContent containing the processed content
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
ValueError: If document is invalid
|
||||||
|
IOError: If file cannot be read
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
return await self._processor.processDocument(document, prompt)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error extracting from task document: {str(e)}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
async def _convertToTaskDocument(self, chatDoc: ChatDocument) -> TaskDocument:
|
||||||
|
"""
|
||||||
|
Convert a ChatDocument to a TaskDocument.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
chatDoc: The chat document to convert
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
TaskDocument containing the converted data
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
ValueError: If document is invalid
|
||||||
|
IOError: If file cannot be read
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Get file content
|
||||||
|
fileContent = await self.service.functions.getFileData(chatDoc.fileId)
|
||||||
|
if not fileContent:
|
||||||
|
raise ValueError(f"Could not get content for file {chatDoc.fileId}")
|
||||||
|
|
||||||
|
# Convert to base64
|
||||||
|
base64Data = base64.b64encode(fileContent).decode('utf-8')
|
||||||
|
|
||||||
|
return TaskDocument(
|
||||||
|
id=str(uuid.uuid4()),
|
||||||
|
filename=chatDoc.filename,
|
||||||
|
fileSize=chatDoc.fileSize,
|
||||||
|
mimeType=chatDoc.mimeType,
|
||||||
|
data=base64Data
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error converting chat document to task document: {str(e)}")
|
||||||
|
raise
|
||||||
|
|
@ -2,15 +2,18 @@ import logging
|
||||||
import importlib
|
import importlib
|
||||||
import pkgutil
|
import pkgutil
|
||||||
import inspect
|
import inspect
|
||||||
from typing import Dict, Any, Optional, List, Type
|
from typing import Dict, Any, Optional, List, Type, Callable, Awaitable
|
||||||
from datetime import datetime, UTC
|
from datetime import datetime, UTC
|
||||||
import json
|
import json
|
||||||
import asyncio
|
import asyncio
|
||||||
|
import base64
|
||||||
|
|
||||||
from modules.methods.methodBase import MethodBase, AuthSource, MethodResult
|
from modules.methods.methodBase import MethodBase, AuthSource, MethodResult
|
||||||
from modules.workflow.serviceContainer import ServiceContainer
|
from modules.workflow.serviceContainer import ServiceContainer
|
||||||
from modules.interfaces.serviceChatModel import AgentTask, AgentAction, AgentResult, Action, TaskStatus
|
from modules.interfaces.serviceChatModel import (
|
||||||
from modules.workflow.managerPrompt import AIPromptManager
|
AgentTask, AgentAction, AgentResult, Action, TaskStatus, ChatWorkflow,
|
||||||
|
ChatMessage, ChatDocument, ChatStat, ExtractedContent, ContentItem
|
||||||
|
)
|
||||||
from modules.workflow.processorDocument import DocumentProcessor
|
from modules.workflow.processorDocument import DocumentProcessor
|
||||||
from modules.shared.configuration import APP_CONFIG
|
from modules.shared.configuration import APP_CONFIG
|
||||||
|
|
||||||
|
|
@ -21,33 +24,34 @@ class ChatManager:
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.service = ServiceContainer()
|
self.service = ServiceContainer()
|
||||||
self._discover_methods()
|
self._discoverMethods()
|
||||||
self.workflow = None
|
self.workflow: Optional[ChatWorkflow] = None
|
||||||
self.current_task = None
|
self.currentTask: Optional[AgentTask] = None
|
||||||
self.workflow_history = []
|
self.workflowHistory: List[ChatMessage] = []
|
||||||
|
self.documentProcessor = DocumentProcessor()
|
||||||
|
|
||||||
def _discover_methods(self):
|
def _discoverMethods(self):
|
||||||
"""Dynamically discover all method classes in modules.methods package"""
|
"""Dynamically discover all method classes in modules.methods package"""
|
||||||
try:
|
try:
|
||||||
# Import the methods package
|
# Import the methods package
|
||||||
methods_package = importlib.import_module('modules.methods')
|
methodsPackage = importlib.import_module('modules.methods')
|
||||||
|
|
||||||
# Discover all modules in the package
|
# Discover all modules in the package
|
||||||
for _, name, is_pkg in pkgutil.iter_modules(methods_package.__path__):
|
for _, name, isPkg in pkgutil.iter_modules(methodsPackage.__path__):
|
||||||
if not is_pkg and name.startswith('method'):
|
if not isPkg and name.startswith('method'):
|
||||||
try:
|
try:
|
||||||
# Import the module
|
# Import the module
|
||||||
module = importlib.import_module(f'modules.methods.{name}')
|
module = importlib.import_module(f'modules.methods.{name}')
|
||||||
|
|
||||||
# Find all classes in the module that inherit from MethodBase
|
# Find all classes in the module that inherit from MethodBase
|
||||||
for item_name, item in inspect.getmembers(module):
|
for itemName, item in inspect.getmembers(module):
|
||||||
if (inspect.isclass(item) and
|
if (inspect.isclass(item) and
|
||||||
issubclass(item, MethodBase) and
|
issubclass(item, MethodBase) and
|
||||||
item != MethodBase):
|
item != MethodBase):
|
||||||
# Instantiate the method and add to service
|
# Instantiate the method and add to service
|
||||||
method_instance = item()
|
methodInstance = item()
|
||||||
self.service.methods[method_instance.name] = method_instance
|
self.service.methods[methodInstance.name] = methodInstance
|
||||||
logger.info(f"Discovered method: {method_instance.name}")
|
logger.info(f"Discovered method: {methodInstance.name}")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error loading method module {name}: {str(e)}")
|
logger.error(f"Error loading method module {name}: {str(e)}")
|
||||||
|
|
@ -55,37 +59,58 @@ class ChatManager:
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error discovering methods: {str(e)}")
|
logger.error(f"Error discovering methods: {str(e)}")
|
||||||
|
|
||||||
async def initialize(self, workflow: Any, context: Dict[str, Any]) -> None:
|
async def initialize(self, workflow: ChatWorkflow) -> None:
|
||||||
"""Initialize chat manager with workflow and context"""
|
"""Initialize chat manager with workflow"""
|
||||||
self.service.workflow = workflow
|
self.service.workflow = workflow
|
||||||
self.service.context = context
|
|
||||||
|
|
||||||
# Initialize AI model
|
# Initialize AI model
|
||||||
self.service.model = {
|
self.service.model = {
|
||||||
'callAiBasic': self._call_ai_basic,
|
'callAiBasic': self._callAiBasic,
|
||||||
'callAiAdvanced': self._call_ai_advanced
|
'callAiAdvanced': self._callAiAdvanced
|
||||||
}
|
}
|
||||||
|
|
||||||
# Initialize document processor
|
# Initialize document processor
|
||||||
self.service.document_processor.initialize(context)
|
self.service.documentProcessor.initialize()
|
||||||
|
|
||||||
async def create_initial_task(self, user_input: Dict[str, Any]) -> AgentTask:
|
def _generatePrompt(self, task: str, document: ChatDocument, examples: List[Dict[str, str]] = None) -> str:
|
||||||
|
"""Generate a prompt based on task and document"""
|
||||||
|
try:
|
||||||
|
# Create base prompt
|
||||||
|
prompt = f"""Task: {task}
|
||||||
|
Document: {document.filename} ({document.mimeType})
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Add examples if provided
|
||||||
|
if examples:
|
||||||
|
prompt += "\nExamples:\n"
|
||||||
|
for example in examples:
|
||||||
|
prompt += f"Input: {example.get('input', '')}\n"
|
||||||
|
prompt += f"Output: {example.get('output', '')}\n\n"
|
||||||
|
|
||||||
|
return prompt
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error generating prompt: {str(e)}")
|
||||||
|
return ""
|
||||||
|
|
||||||
|
async def createInitialTask(self, userInput: Dict[str, Any]) -> AgentTask:
|
||||||
"""Create initial task from user input"""
|
"""Create initial task from user input"""
|
||||||
# Get available methods and their actions
|
# Get available methods and their actions
|
||||||
method_catalog = self.service.get_available_methods()
|
methodCatalog = self.service.getAvailableMethods()
|
||||||
|
|
||||||
# Process user input with AI
|
# Process user input with AI
|
||||||
processed_input = await self._process_user_input(user_input, method_catalog)
|
processedInput = await self._processUserInput(userInput, methodCatalog)
|
||||||
|
|
||||||
# Create actions from processed input
|
# Create actions from processed input
|
||||||
actions = await self._create_actions(processed_input['actions'])
|
actions = await self._createActions(processedInput['actions'])
|
||||||
|
|
||||||
# Create task
|
# Create task
|
||||||
task = AgentTask(
|
task = AgentTask(
|
||||||
id=f"task_{datetime.now(UTC).timestamp()}",
|
id=f"task_{datetime.now(UTC).timestamp()}",
|
||||||
workflowId=self.workflow.id,
|
workflowId=self.workflow.id,
|
||||||
userInput=processed_input['objective'],
|
userInput=processedInput['objective'],
|
||||||
dataList=user_input.get('connections', []),
|
dataList=userInput.get('connections', []),
|
||||||
actionList=actions,
|
actionList=actions,
|
||||||
status=TaskStatus.PENDING,
|
status=TaskStatus.PENDING,
|
||||||
createdAt=datetime.now(UTC),
|
createdAt=datetime.now(UTC),
|
||||||
|
|
@ -96,15 +121,15 @@ class ChatManager:
|
||||||
self.service.tasks['current'] = task
|
self.service.tasks['current'] = task
|
||||||
return task
|
return task
|
||||||
|
|
||||||
async def execute_current_task(self) -> None:
|
async def executeCurrentTask(self) -> None:
|
||||||
"""Execute current task"""
|
"""Execute current task"""
|
||||||
task = self.service.tasks.get('current')
|
task = self.service.tasks.get('current')
|
||||||
if not task:
|
if not task:
|
||||||
raise ValueError("No current task to execute")
|
raise ValueError("No current task to execute")
|
||||||
|
|
||||||
await self.service.execute_task(task)
|
await self.service.executeTask(task)
|
||||||
|
|
||||||
async def define_next_task(self) -> Optional[AgentTask]:
|
async def defineNextTask(self) -> Optional[AgentTask]:
|
||||||
"""Define next task based on current task results"""
|
"""Define next task based on current task results"""
|
||||||
current_task = self.service.tasks.get('current')
|
current_task = self.service.tasks.get('current')
|
||||||
if not current_task:
|
if not current_task:
|
||||||
|
|
@ -112,7 +137,7 @@ class ChatManager:
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Analyze task results
|
# Analyze task results
|
||||||
analysis = await self._analyze_task_results(current_task)
|
analysis = await self._analyzeTaskResults(current_task)
|
||||||
|
|
||||||
# If workflow is complete, update task status
|
# If workflow is complete, update task status
|
||||||
if analysis['isComplete']:
|
if analysis['isComplete']:
|
||||||
|
|
@ -122,7 +147,7 @@ class ChatManager:
|
||||||
|
|
||||||
# If more actions needed, create next task
|
# If more actions needed, create next task
|
||||||
if not analysis['isComplete']:
|
if not analysis['isComplete']:
|
||||||
next_task = self._create_next_task(current_task, analysis)
|
next_task = self._createNextTask(current_task, analysis)
|
||||||
self.service.tasks['previous'] = current_task
|
self.service.tasks['previous'] = current_task
|
||||||
self.service.tasks['current'] = next_task
|
self.service.tasks['current'] = next_task
|
||||||
return next_task
|
return next_task
|
||||||
|
|
@ -133,15 +158,15 @@ class ChatManager:
|
||||||
current_task.updatedAt = datetime.now(UTC)
|
current_task.updatedAt = datetime.now(UTC)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
async def _process_user_input(self, user_input: Dict[str, Any], method_catalog: Dict[str, Any]) -> Dict[str, Any]:
|
async def _processUserInput(self, userInput: Dict[str, Any], methodCatalog: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
"""Process user input with AI to extract objectives and actions"""
|
"""Process user input with AI to extract objectives and actions"""
|
||||||
# Create prompt with available methods and actions
|
# Create prompt with available methods and actions
|
||||||
prompt = f"""Given the following user input and available methods/actions, extract the objective and required actions:
|
prompt = f"""Given the following user input and available methods/actions, extract the objective and required actions:
|
||||||
|
|
||||||
User Input: {user_input.get('message', '')}
|
User Input: {userInput.get('message', '')}
|
||||||
|
|
||||||
Available Methods and Actions:
|
Available Methods and Actions:
|
||||||
{json.dumps(method_catalog, indent=2)}
|
{json.dumps(methodCatalog, indent=2)}
|
||||||
|
|
||||||
Please provide a JSON response with:
|
Please provide a JSON response with:
|
||||||
1. objective: The main goal or task to accomplish
|
1. objective: The main goal or task to accomplish
|
||||||
|
|
@ -164,22 +189,22 @@ Example format:
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Call AI service
|
# Call AI service
|
||||||
response = await self.service.model['callAiBasic'](prompt)
|
response = await self._callAiBasic(prompt)
|
||||||
return json.loads(response)
|
return json.loads(response)
|
||||||
|
|
||||||
async def _create_actions(self, actions_data: List[Dict[str, Any]]) -> List[AgentAction]:
|
async def _createActions(self, actionsData: List[Dict[str, Any]]) -> List[AgentAction]:
|
||||||
"""Create action objects from processed input"""
|
"""Create action objects from processed input"""
|
||||||
actions = []
|
actions = []
|
||||||
for action_data in actions_data:
|
for actionData in actionsData:
|
||||||
method = self.service.get_method(action_data['method'])
|
method = self.service.getMethod(actionData['method'])
|
||||||
if not method:
|
if not method:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
action = AgentAction(
|
action = AgentAction(
|
||||||
id=f"action_{datetime.now(UTC).timestamp()}",
|
id=f"action_{datetime.now(UTC).timestamp()}",
|
||||||
method=action_data['method'],
|
method=actionData['method'],
|
||||||
action=action_data['action'],
|
action=actionData['action'],
|
||||||
parameters=action_data.get('parameters', {}),
|
parameters=actionData.get('parameters', {}),
|
||||||
status=TaskStatus.PENDING,
|
status=TaskStatus.PENDING,
|
||||||
createdAt=datetime.now(UTC),
|
createdAt=datetime.now(UTC),
|
||||||
updatedAt=datetime.now(UTC)
|
updatedAt=datetime.now(UTC)
|
||||||
|
|
@ -188,7 +213,7 @@ Example format:
|
||||||
|
|
||||||
return actions
|
return actions
|
||||||
|
|
||||||
async def _summarize_workflow(self) -> str:
|
async def _summarizeWorkflow(self) -> str:
|
||||||
"""Summarize workflow history"""
|
"""Summarize workflow history"""
|
||||||
if not self.workflow.messages:
|
if not self.workflow.messages:
|
||||||
return ""
|
return ""
|
||||||
|
|
@ -203,12 +228,12 @@ Example format:
|
||||||
4. Any issues or blockers
|
4. Any issues or blockers
|
||||||
"""
|
"""
|
||||||
|
|
||||||
return await self.service.model['callAiBasic'](prompt)
|
return await self._callAiBasic(prompt)
|
||||||
|
|
||||||
async def _analyze_task_results(self, task: AgentTask) -> Dict[str, Any]:
|
async def _analyzeTaskResults(self, task: AgentTask) -> Dict[str, Any]:
|
||||||
"""Analyze task results to determine next steps"""
|
"""Analyze task results to determine next steps"""
|
||||||
# Get workflow summary
|
# Get workflow summary
|
||||||
summary = await self._summarize_workflow()
|
summary = await self._summarizeWorkflow()
|
||||||
|
|
||||||
# Create prompt for analysis
|
# Create prompt for analysis
|
||||||
prompt = f"""Analyze the following task results and workflow history to determine next steps:
|
prompt = f"""Analyze the following task results and workflow history to determine next steps:
|
||||||
|
|
@ -240,10 +265,10 @@ Example format:
|
||||||
}}
|
}}
|
||||||
"""
|
"""
|
||||||
|
|
||||||
response = await self.service.model['callAiBasic'](prompt)
|
response = await self._callAiBasic(prompt)
|
||||||
return json.loads(response)
|
return json.loads(response)
|
||||||
|
|
||||||
def _create_next_task(self, current_task: AgentTask, analysis: Dict[str, Any]) -> AgentTask:
|
def _createNextTask(self, current_task: AgentTask, analysis: Dict[str, Any]) -> AgentTask:
|
||||||
"""Create next task based on analysis"""
|
"""Create next task based on analysis"""
|
||||||
# Create actions for next task
|
# Create actions for next task
|
||||||
actions = []
|
actions = []
|
||||||
|
|
@ -271,20 +296,20 @@ Example format:
|
||||||
updatedAt=datetime.now(UTC)
|
updatedAt=datetime.now(UTC)
|
||||||
)
|
)
|
||||||
|
|
||||||
async def process_task(self, task: Any) -> Dict[str, Any]:
|
async def processTask(self, task: AgentTask) -> Dict[str, Any]:
|
||||||
"""Process a task with improved error handling and AI integration"""
|
"""Process a task with improved error handling and AI integration"""
|
||||||
try:
|
try:
|
||||||
# Execute task
|
# Execute task
|
||||||
await self.service.execute_task(task)
|
await self.service.executeTask(task)
|
||||||
|
|
||||||
# Process results
|
# Process results
|
||||||
if task.status == 'success':
|
if task.status == TaskStatus.COMPLETED:
|
||||||
# Generate feedback using AI
|
# Generate feedback using AI
|
||||||
feedback = await self._process_task_results(task)
|
feedback = await self._processTaskResults(task)
|
||||||
task.thisTaskFeedback = feedback
|
task.thisTaskFeedback = feedback
|
||||||
|
|
||||||
# Create output documents
|
# Create output documents
|
||||||
documents = await self._create_output_documents(task)
|
documents = await self._createOutputDocuments(task)
|
||||||
task.documentsOutput = documents
|
task.documentsOutput = documents
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
|
@ -307,89 +332,168 @@ Example format:
|
||||||
"feedback": f"Error processing task: {str(e)}"
|
"feedback": f"Error processing task: {str(e)}"
|
||||||
}
|
}
|
||||||
|
|
||||||
async def _process_task_results(self, task: Any) -> str:
|
def _generateDocumentPrompt(self, task: str) -> str:
|
||||||
"""Process task results and generate feedback using AI"""
|
"""Generate a prompt for document generation"""
|
||||||
|
return f"""Generate output documents for the following task:
|
||||||
|
|
||||||
|
Task: {task}
|
||||||
|
|
||||||
|
For each document you need to generate, provide a TaskDocument object with the following structure:
|
||||||
|
{{
|
||||||
|
"filename": "string", # Filename with extension
|
||||||
|
"mimeType": "string", # MIME type of the file
|
||||||
|
"data": "string", # File content as text or base64
|
||||||
|
"base64Encoded": boolean # True if data is base64 encoded
|
||||||
|
}}
|
||||||
|
|
||||||
|
Rules:
|
||||||
|
1. For text files (txt, json, xml, etc.), provide content directly in the data field
|
||||||
|
2. For binary files (images, videos, etc.), encode content in base64 and set base64Encoded to true
|
||||||
|
3. Use appropriate MIME types (e.g., text/plain, image/jpeg, application/pdf)
|
||||||
|
4. Include file extensions in filenames
|
||||||
|
|
||||||
|
Return a JSON array of TaskDocument objects.
|
||||||
|
"""
|
||||||
|
|
||||||
|
async def _processTaskResults(self, task: AgentTask) -> str:
|
||||||
|
"""Process task results and generate feedback"""
|
||||||
try:
|
try:
|
||||||
# Create context for AI
|
# Generate document prompt
|
||||||
context = {
|
docPrompt = self._generateDocumentPrompt(task.userInput)
|
||||||
"task": "Process task results",
|
|
||||||
"document": {"name": "Task Results", "type": "json"}
|
|
||||||
}
|
|
||||||
|
|
||||||
# Generate prompt
|
# Get AI response for document generation
|
||||||
prompt = self.service.prompt_manager.generate_prompt(
|
docResponse = await self._callAiBasic(docPrompt)
|
||||||
context,
|
|
||||||
[
|
|
||||||
{"input": "Task results", "output": "Generate summary"}
|
|
||||||
]
|
|
||||||
)
|
|
||||||
|
|
||||||
# Call AI
|
# Parse response into TaskDocument objects
|
||||||
response = await self.service.model['callAiBasic'](
|
try:
|
||||||
f"""Process task results and generate feedback:
|
taskDocs = json.loads(docResponse)
|
||||||
Task Input: {task.userInput}
|
task.documentsOutput = taskDocs
|
||||||
Method Results: {task.result}
|
except json.JSONDecodeError as e:
|
||||||
Generated Documents: {task.documentsOutput}
|
logger.error(f"Error parsing document response: {str(e)}")
|
||||||
|
return f"Error processing results: {str(e)}"
|
||||||
|
|
||||||
|
# Generate feedback
|
||||||
|
feedback = await self._callAiBasic(
|
||||||
|
f"""Generate feedback for the completed task:
|
||||||
|
Task: {task.userInput}
|
||||||
|
Generated Documents: {len(task.documentsOutput)} files
|
||||||
|
|
||||||
{prompt}
|
Provide a concise summary of what was accomplished.
|
||||||
|
|
||||||
Please provide:
|
|
||||||
1. Summary of completed actions
|
|
||||||
2. Generated document descriptions
|
|
||||||
3. Next steps or completion status
|
|
||||||
|
|
||||||
Format your response as JSON:
|
|
||||||
{{
|
|
||||||
"summary": "string",
|
|
||||||
"documents": ["string"],
|
|
||||||
"nextSteps": ["string"]
|
|
||||||
}}
|
|
||||||
"""
|
"""
|
||||||
)
|
)
|
||||||
|
|
||||||
# Parse and validate response
|
return feedback
|
||||||
try:
|
|
||||||
result = json.loads(response)
|
|
||||||
return result.get("summary", "Task completed successfully")
|
|
||||||
except json.JSONDecodeError:
|
|
||||||
return response.strip()
|
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error processing task results: {str(e)}")
|
logger.error(f"Error processing task results: {str(e)}")
|
||||||
return f"Error processing results: {str(e)}"
|
return f"Error processing results: {str(e)}"
|
||||||
|
|
||||||
async def _create_output_documents(self, task: Any) -> List[Dict[str, Any]]:
|
async def _createOutputDocuments(self, task: AgentTask) -> List[ChatDocument]:
|
||||||
"""Create output documents from task results"""
|
"""Create output documents from task results"""
|
||||||
try:
|
try:
|
||||||
documents = []
|
fileIds = []
|
||||||
|
|
||||||
# Process each document
|
# Process each TaskDocument from AI output
|
||||||
for doc in task.documentsOutput:
|
for taskDoc in task.documentsOutput:
|
||||||
processed = self.service.document_processor.process_with_context(
|
# Store file in database
|
||||||
doc,
|
fileItem = self.service.functions.createFile(
|
||||||
{
|
name=taskDoc.filename,
|
||||||
"id": doc.get("id", ""),
|
mimeType=taskDoc.mimeType
|
||||||
"extractionHistory": doc.get("extractionHistory", []),
|
|
||||||
"relevantSections": doc.get("relevantSections", []),
|
|
||||||
"processingStatus": doc.get("processingStatus", {})
|
|
||||||
}
|
|
||||||
)
|
)
|
||||||
|
|
||||||
if processed:
|
# Store file content
|
||||||
documents.append(processed)
|
if taskDoc.base64Encoded:
|
||||||
|
# Decode base64 content
|
||||||
|
content = base64.b64decode(taskDoc.data)
|
||||||
|
else:
|
||||||
|
# Use text content directly
|
||||||
|
content = taskDoc.data.encode('utf-8')
|
||||||
|
|
||||||
|
# Store file data
|
||||||
|
self.service.functions.createFileData(fileItem.id, content)
|
||||||
|
fileIds.append(fileItem.id)
|
||||||
|
|
||||||
return documents
|
# Convert all files to ChatDocuments in one call
|
||||||
|
if fileIds:
|
||||||
|
return await self.service.chat.processFileIds(fileIds)
|
||||||
|
return []
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error creating output documents: {str(e)}")
|
logger.error(f"Error creating output documents: {str(e)}")
|
||||||
return []
|
return []
|
||||||
|
|
||||||
async def _call_ai_basic(self, prompt: str) -> str:
|
async def _callAiBasic(self, prompt: str) -> str:
|
||||||
"""Call basic AI model"""
|
"""Call basic AI service"""
|
||||||
# TODO: Implement actual AI call
|
try:
|
||||||
return "AI response placeholder"
|
if not self.service or not self.service.base:
|
||||||
|
raise ValueError("Service or base interface not initialized")
|
||||||
|
return await self.service.base.callAi([
|
||||||
|
{"role": "system", "content": "You are an AI assistant that helps process user requests."},
|
||||||
|
{"role": "user", "content": prompt}
|
||||||
|
])
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error calling AI service: {str(e)}")
|
||||||
|
raise
|
||||||
|
|
||||||
async def _call_ai_advanced(self, prompt: str, context: Dict[str, Any]) -> str:
|
async def _callAiAdvanced(self, prompt: str, context: Dict[str, Any]) -> str:
|
||||||
"""Call advanced AI model with context"""
|
"""Call advanced AI model with context"""
|
||||||
# TODO: Implement actual AI call
|
# TODO: Implement actual AI call
|
||||||
return "AI response placeholder"
|
return "AI response placeholder"
|
||||||
|
|
||||||
|
async def generateWorkflowFeedback(self, workflow: ChatWorkflow) -> str:
|
||||||
|
"""
|
||||||
|
Generates a final feedback message for the workflow in the user's language.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
workflow: The completed workflow to generate feedback for
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: The generated feedback message
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Get workflow summary
|
||||||
|
workflowSummary = {
|
||||||
|
"status": workflow.status,
|
||||||
|
"totalMessages": len(workflow.messages),
|
||||||
|
"totalDocuments": sum(len(msg.documents) for msg in workflow.messages),
|
||||||
|
"duration": (datetime.now(UTC) - datetime.fromisoformat(workflow.startedAt)).total_seconds()
|
||||||
|
}
|
||||||
|
|
||||||
|
# Get user language from workflow mandate
|
||||||
|
userLanguage = workflow.mandateId.split('_')[0] if workflow.mandateId else 'en'
|
||||||
|
|
||||||
|
# Prepare messages for AI context
|
||||||
|
messages = [
|
||||||
|
{
|
||||||
|
"role": "system",
|
||||||
|
"content": f"You are an AI assistant providing a summary of a completed workflow. "
|
||||||
|
f"Please respond in '{userLanguage}' language. "
|
||||||
|
f"Summarize the workflow's activities, outcomes, and any important points. "
|
||||||
|
f"Be concise but informative. Use a professional but friendly tone."
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": f"Please provide a summary of this workflow:\n"
|
||||||
|
f"Status: {workflowSummary['status']}\n"
|
||||||
|
f"Total Messages: {workflowSummary['totalMessages']}\n"
|
||||||
|
f"Total Documents: {workflowSummary['totalDocuments']}\n"
|
||||||
|
f"Duration: {workflowSummary['duration']:.1f} seconds"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
# Add relevant workflow messages for context
|
||||||
|
for msg in workflow.messages:
|
||||||
|
if msg.role == "user" or msg.status in ["first", "last"]:
|
||||||
|
messages.append({
|
||||||
|
"role": msg.role,
|
||||||
|
"content": msg.message
|
||||||
|
})
|
||||||
|
|
||||||
|
# Generate feedback using AI
|
||||||
|
feedback = await self.service.aiService.callApi(messages, temperature=0.7)
|
||||||
|
|
||||||
|
return feedback
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error generating workflow feedback: {str(e)}")
|
||||||
|
return "Workflow completed successfully."
|
||||||
|
|
@ -1,478 +1,106 @@
|
||||||
from typing import Dict, Any, Optional, List
|
"""
|
||||||
import logging
|
Document Manager Module for handling document operations and content extraction.
|
||||||
import json
|
"""
|
||||||
import os
|
|
||||||
from datetime import datetime, UTC
|
|
||||||
from pathlib import Path
|
|
||||||
import mimetypes
|
|
||||||
import hashlib
|
|
||||||
import shutil
|
|
||||||
import uuid
|
|
||||||
import base64
|
|
||||||
|
|
||||||
|
import base64
|
||||||
|
import logging
|
||||||
|
from typing import List, Optional, Dict, Any, Union
|
||||||
|
from pathlib import Path
|
||||||
|
import uuid
|
||||||
|
|
||||||
|
from modules.interfaces.serviceChatModel import (
|
||||||
|
ChatDocument,
|
||||||
|
TaskDocument,
|
||||||
|
ExtractedContent,
|
||||||
|
ContentItem,
|
||||||
|
ContentMetadata
|
||||||
|
)
|
||||||
|
from modules.workflow.serviceContainer import ServiceContainer
|
||||||
from modules.workflow.processorDocument import DocumentProcessor
|
from modules.workflow.processorDocument import DocumentProcessor
|
||||||
from modules.shared.configuration import APP_CONFIG
|
|
||||||
from modules.interfaces.serviceChatModel import ChatDocument, ChatContent
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
class DocumentManager:
|
class DocumentManager:
|
||||||
"""Document manager with enhanced operations and file handling"""
|
"""Manager for document operations and content extraction"""
|
||||||
|
|
||||||
_instance = None
|
def __init__(self, serviceContainer: ServiceContainer):
|
||||||
|
self.service = serviceContainer
|
||||||
|
self._processor = DocumentProcessor()
|
||||||
|
|
||||||
@classmethod
|
async def extractFromChatDocument(self, prompt: str, document: ChatDocument) -> ExtractedContent:
|
||||||
def getInstance(cls):
|
"""
|
||||||
"""Return a singleton instance of the document manager."""
|
Extract content from a ChatDocument with AI processing.
|
||||||
if cls._instance is None:
|
|
||||||
cls._instance = cls()
|
Args:
|
||||||
return cls._instance
|
prompt: Prompt for AI content extraction
|
||||||
|
document: The ChatDocument to process
|
||||||
def __init__(self):
|
|
||||||
"""Initialize document manager"""
|
|
||||||
if DocumentManager._instance is not None:
|
|
||||||
raise RuntimeError("Singleton instance already exists - use getInstance()")
|
|
||||||
|
|
||||||
self.processor = DocumentProcessor()
|
Returns:
|
||||||
self.document_cache = {}
|
ExtractedContent containing the processed content
|
||||||
self.temp_dir = Path(APP_CONFIG.get('temp_dir', 'temp'))
|
"""
|
||||||
self.output_dir = Path(APP_CONFIG.get('output_dir', 'output'))
|
# Convert ChatDocument to TaskDocument
|
||||||
self.service = None
|
taskDoc = await self._convertToTaskDocument(document)
|
||||||
|
|
||||||
async def initialize(self, context: Dict[str, Any], service=None) -> None:
|
# Process document using processor
|
||||||
"""Initialize document manager with context and service"""
|
extractedContent = await self._processor.processDocument(taskDoc, prompt)
|
||||||
# Initialize processor
|
|
||||||
self.processor.initialize(context)
|
|
||||||
|
|
||||||
# Initialize service container
|
# Update the objectId and objectType to reference the original ChatDocument
|
||||||
if service:
|
extractedContent.objectId = document.id
|
||||||
# Validate required interfaces
|
extractedContent.objectType = "ChatDocument"
|
||||||
required_interfaces = ['base', 'msft', 'google']
|
|
||||||
missing_interfaces = []
|
return extractedContent
|
||||||
for interface in required_interfaces:
|
|
||||||
if not hasattr(service, interface):
|
async def extractFromTaskDocument(self, prompt: str, document: TaskDocument) -> ExtractedContent:
|
||||||
missing_interfaces.append(interface)
|
"""
|
||||||
|
Extract content directly from a task document.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
prompt: The prompt to use for content extraction
|
||||||
|
document: The task document to extract content from
|
||||||
|
|
||||||
if missing_interfaces:
|
Returns:
|
||||||
logger.warning(f"Service container missing required interfaces: {', '.join(missing_interfaces)}")
|
ExtractedContent containing the processed content
|
||||||
return False
|
|
||||||
|
Raises:
|
||||||
self.service = service
|
ValueError: If document is invalid
|
||||||
|
IOError: If file cannot be read
|
||||||
# Create directories if they don't exist
|
"""
|
||||||
self.temp_dir.mkdir(parents=True, exist_ok=True)
|
|
||||||
self.output_dir.mkdir(parents=True, exist_ok=True)
|
|
||||||
|
|
||||||
# Clear temporary directory
|
|
||||||
self._clear_temp_directory()
|
|
||||||
|
|
||||||
def _clear_temp_directory(self) -> None:
|
|
||||||
"""Clear temporary directory"""
|
|
||||||
try:
|
try:
|
||||||
if self.temp_dir.exists():
|
return await self._processor.processDocument(document, prompt)
|
||||||
shutil.rmtree(self.temp_dir)
|
|
||||||
self.temp_dir.mkdir(parents=True)
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error clearing temp directory: {str(e)}")
|
logger.error(f"Error extracting from task document: {str(e)}")
|
||||||
|
raise
|
||||||
|
|
||||||
async def process_document(self, document: Dict[str, Any], context: Dict[str, Any]) -> Dict[str, Any]:
|
async def _convertToTaskDocument(self, chatDoc: ChatDocument) -> TaskDocument:
|
||||||
"""Process a document with context"""
|
"""
|
||||||
try:
|
Convert a ChatDocument to a TaskDocument.
|
||||||
# Generate document ID if not present
|
|
||||||
if 'id' not in document:
|
Args:
|
||||||
document['id'] = self._generate_document_id(document)
|
chatDoc: The chat document to convert
|
||||||
|
|
||||||
# Process document content
|
Returns:
|
||||||
processed = await self.processor.process_with_context(document, context)
|
TaskDocument containing the converted data
|
||||||
|
|
||||||
# Add metadata
|
Raises:
|
||||||
processed['metadata'] = {
|
ValueError: If document is invalid
|
||||||
'processedAt': datetime.now(UTC).isoformat(),
|
IOError: If file cannot be read
|
||||||
'processor': 'DocumentManager',
|
"""
|
||||||
'version': '1.0'
|
|
||||||
}
|
|
||||||
|
|
||||||
# Cache document
|
|
||||||
self.document_cache[document['id']] = processed
|
|
||||||
|
|
||||||
return processed
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error processing document: {str(e)}")
|
|
||||||
return {
|
|
||||||
'id': document.get('id', ''),
|
|
||||||
'error': str(e),
|
|
||||||
'status': 'error'
|
|
||||||
}
|
|
||||||
|
|
||||||
async def extract_content(self, file_id: str) -> Optional[ChatDocument]:
|
|
||||||
"""Extract content from a file"""
|
|
||||||
try:
|
try:
|
||||||
# Get file content
|
# Get file content
|
||||||
file_content = await self.get_file_content(file_id)
|
fileContent = await self.service.functions.getFileData(chatDoc.fileId)
|
||||||
if not file_content:
|
if not fileContent:
|
||||||
return None
|
raise ValueError(f"Could not get content for file {chatDoc.fileId}")
|
||||||
|
|
||||||
# Get file metadata
|
|
||||||
file_metadata = await self.get_file_metadata(file_id)
|
|
||||||
if not file_metadata:
|
|
||||||
return None
|
|
||||||
|
|
||||||
# Create ChatDocument
|
# Convert to base64
|
||||||
return ChatDocument(
|
base64Data = base64.b64encode(fileContent).decode('utf-8')
|
||||||
|
|
||||||
|
return TaskDocument(
|
||||||
id=str(uuid.uuid4()),
|
id=str(uuid.uuid4()),
|
||||||
fileId=file_id,
|
filename=chatDoc.filename,
|
||||||
filename=file_metadata.get("name", "Unknown"),
|
fileSize=chatDoc.fileSize,
|
||||||
fileSize=file_metadata.get("size", 0),
|
mimeType=chatDoc.mimeType,
|
||||||
content=file_content.decode('utf-8', errors='ignore'),
|
data=base64Data
|
||||||
mimeType=file_metadata.get("mimeType", "text/plain")
|
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error extracting content from file {file_id}: {str(e)}")
|
logger.error(f"Error converting chat document to task document: {str(e)}")
|
||||||
return None
|
|
||||||
|
|
||||||
async def get_file_content(self, file_id: str) -> Optional[bytes]:
|
|
||||||
"""Get file content"""
|
|
||||||
try:
|
|
||||||
if not self.service or not self.service.functions:
|
|
||||||
logger.error("Service or functions not initialized")
|
|
||||||
return None
|
|
||||||
return self.service.functions.getFileData(file_id)
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error getting file content for {file_id}: {str(e)}")
|
|
||||||
return None
|
|
||||||
|
|
||||||
async def get_file_metadata(self, file_id: str) -> Optional[Dict[str, Any]]:
|
|
||||||
"""Get file metadata"""
|
|
||||||
try:
|
|
||||||
if not self.service or not self.service.functions:
|
|
||||||
logger.error("Service or functions not initialized")
|
|
||||||
return None
|
|
||||||
return self.service.functions.getFile(file_id)
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error getting file metadata for {file_id}: {str(e)}")
|
|
||||||
return None
|
|
||||||
|
|
||||||
async def save_file(self, filename: str, content: bytes, mime_type: str) -> Optional[int]:
|
|
||||||
"""Save a new file"""
|
|
||||||
try:
|
|
||||||
if not self.service or not self.service.base:
|
|
||||||
logger.error("Service or base interface not initialized")
|
|
||||||
return None
|
|
||||||
return await self.service.base.saveFile(filename, content, mime_type)
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error saving file {filename}: {str(e)}")
|
|
||||||
return None
|
|
||||||
|
|
||||||
async def delete_file(self, file_id: str) -> bool:
|
|
||||||
"""Delete a file"""
|
|
||||||
try:
|
|
||||||
if not self.service or not self.service.functions:
|
|
||||||
logger.error("Service or functions not initialized")
|
|
||||||
return False
|
|
||||||
return self.service.functions.deleteFile(file_id)
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error deleting file {file_id}: {str(e)}")
|
|
||||||
return False
|
|
||||||
|
|
||||||
def convert_file_ref_to_id(self, ref: str) -> Optional[int]:
|
|
||||||
"""Convert file reference to ID"""
|
|
||||||
try:
|
|
||||||
if isinstance(ref, str) and ';' in ref:
|
|
||||||
return int(ref.split(';')[1])
|
|
||||||
return int(ref)
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error converting file reference to ID: {str(e)}")
|
|
||||||
return None
|
|
||||||
|
|
||||||
def convert_file_id_to_ref(self, file_id: str) -> Optional[str]:
|
|
||||||
"""Convert file ID to reference"""
|
|
||||||
try:
|
|
||||||
if not self.service or not self.service.functions:
|
|
||||||
logger.error("Service or functions not initialized")
|
|
||||||
return None
|
|
||||||
|
|
||||||
file = self.service.functions.getFile(file_id)
|
|
||||||
if not file:
|
|
||||||
return None
|
|
||||||
return f"{file.filename};{file_id}"
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error converting file ID to reference: {str(e)}")
|
|
||||||
return None
|
|
||||||
|
|
||||||
async def convert_data_format(self, data: Any, format: str) -> Any:
|
|
||||||
"""Convert data between formats"""
|
|
||||||
try:
|
|
||||||
if format == 'json':
|
|
||||||
if isinstance(data, str):
|
|
||||||
return json.loads(data)
|
|
||||||
return json.dumps(data)
|
|
||||||
elif format == 'base64':
|
|
||||||
if isinstance(data, str):
|
|
||||||
return base64.b64encode(data.encode('utf-8')).decode('utf-8')
|
|
||||||
return base64.b64encode(data).decode('utf-8')
|
|
||||||
return data
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error converting data format: {str(e)}")
|
|
||||||
return data
|
|
||||||
|
|
||||||
async def create_agent_input_file_list(self, files: List[str]) -> List[Dict[str, Any]]:
|
|
||||||
"""Create list of input files for agent processing"""
|
|
||||||
try:
|
|
||||||
input_files = []
|
|
||||||
for file in files:
|
|
||||||
file_id = await self.convert_file_ref_to_id(file)
|
|
||||||
if file_id:
|
|
||||||
file_data = await self.get_file_metadata(file_id)
|
|
||||||
if file_data:
|
|
||||||
content = await self.get_file_content(file_id)
|
|
||||||
input_files.append({
|
|
||||||
'id': file_id,
|
|
||||||
'name': file_data['name'],
|
|
||||||
'mimeType': file_data['mimeType'],
|
|
||||||
'content': content
|
|
||||||
})
|
|
||||||
return input_files
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error creating agent input file list: {str(e)}")
|
|
||||||
return []
|
|
||||||
|
|
||||||
async def save_agent_output_files(self, files: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
||||||
"""Save output files from agent processing"""
|
|
||||||
try:
|
|
||||||
saved_files = []
|
|
||||||
for file in files:
|
|
||||||
file_meta = await self.save_file(
|
|
||||||
filename=file['name'],
|
|
||||||
content=file['content'],
|
|
||||||
mimeType=file.get('mimeType', 'application/octet-stream')
|
|
||||||
)
|
|
||||||
|
|
||||||
if file_meta:
|
|
||||||
saved_files.append({
|
|
||||||
'id': file_meta,
|
|
||||||
'name': file['name'],
|
|
||||||
'mimeType': file.get('mimeType', 'application/octet-stream')
|
|
||||||
})
|
|
||||||
return saved_files
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error saving agent output files: {str(e)}")
|
|
||||||
return []
|
|
||||||
|
|
||||||
async def content_with_prompt(self, document: Dict[str, Any], prompt: str) -> Optional[Dict[str, Any]]:
|
|
||||||
"""Extract content using AI with specific prompt"""
|
|
||||||
try:
|
|
||||||
# Get document content
|
|
||||||
chat_doc = await self.extract_content(document.get('id'))
|
|
||||||
if not chat_doc:
|
|
||||||
return None
|
|
||||||
|
|
||||||
# Prepare content
|
|
||||||
content = chat_doc.content
|
|
||||||
mime_type = chat_doc.mimeType
|
|
||||||
|
|
||||||
# Process large files in chunks
|
|
||||||
if len(content) > 100000:
|
|
||||||
chunks = self._split_content_into_chunks(content, mime_type)
|
|
||||||
extracted_chunks = []
|
|
||||||
|
|
||||||
for chunk in chunks:
|
|
||||||
chunk_result = await self._process_content_chunk(chunk, prompt)
|
|
||||||
if chunk_result:
|
|
||||||
extracted_chunks.append(chunk_result)
|
|
||||||
|
|
||||||
return {
|
|
||||||
"content": self._merge_chunk_results(extracted_chunks),
|
|
||||||
"metadata": {
|
|
||||||
"original_size": len(content),
|
|
||||||
"chunks_processed": len(chunks),
|
|
||||||
"mime_type": mime_type
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else:
|
|
||||||
result = await self._process_content_chunk(content, prompt)
|
|
||||||
return {
|
|
||||||
"content": result,
|
|
||||||
"metadata": {
|
|
||||||
"original_size": len(content),
|
|
||||||
"chunks_processed": 1,
|
|
||||||
"mime_type": mime_type
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error in content_with_prompt: {str(e)}")
|
|
||||||
return None
|
|
||||||
|
|
||||||
def _split_content_into_chunks(self, content: str, mime_type: str) -> List[str]:
|
|
||||||
"""Split content into manageable chunks"""
|
|
||||||
try:
|
|
||||||
if mime_type.startswith('text/'):
|
|
||||||
return [chunk.strip() for chunk in content.split('\n\n') if chunk.strip()]
|
|
||||||
elif mime_type == 'application/json':
|
|
||||||
data = json.loads(content)
|
|
||||||
if isinstance(data, list):
|
|
||||||
return [json.dumps(item) for item in data]
|
|
||||||
return [content]
|
|
||||||
else:
|
|
||||||
return [content[i:i+10000] for i in range(0, len(content), 10000)]
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error splitting content: {str(e)}")
|
|
||||||
return [content]
|
|
||||||
|
|
||||||
async def _process_content_chunk(self, chunk: str, prompt: str) -> Optional[str]:
|
|
||||||
"""Process content chunk with AI"""
|
|
||||||
try:
|
|
||||||
if not self.service or not self.service.base:
|
|
||||||
logger.error("Service or base interface not initialized")
|
|
||||||
return None
|
|
||||||
|
|
||||||
ai_prompt = f"""
|
|
||||||
Extract relevant information from this content based on the following prompt:
|
|
||||||
|
|
||||||
PROMPT: {prompt}
|
|
||||||
|
|
||||||
CONTENT:
|
|
||||||
{chunk}
|
|
||||||
|
|
||||||
Return ONLY the extracted information in a clear, concise format.
|
|
||||||
"""
|
|
||||||
|
|
||||||
response = await self.service.base.callAi([
|
|
||||||
{"role": "system", "content": "You are an expert at extracting relevant information from documents."},
|
|
||||||
{"role": "user", "content": ai_prompt}
|
|
||||||
])
|
|
||||||
|
|
||||||
return response.strip()
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error processing content chunk: {str(e)}")
|
|
||||||
return None
|
|
||||||
|
|
||||||
def _merge_chunk_results(self, chunks: List[str]) -> str:
|
|
||||||
"""Merge processed content chunks"""
|
|
||||||
try:
|
|
||||||
chunks = [chunk for chunk in chunks if chunk and chunk.strip()]
|
|
||||||
return "\n\n".join(chunks)
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error merging chunk results: {str(e)}")
|
|
||||||
return ""
|
|
||||||
|
|
||||||
async def save_document(self, document: Dict[str, Any], format: str = 'json') -> str:
|
|
||||||
"""Save document to output directory"""
|
|
||||||
try:
|
|
||||||
filename = f"{document['id']}.{format}"
|
|
||||||
filepath = self.output_dir / filename
|
|
||||||
|
|
||||||
if format == 'json':
|
|
||||||
with open(filepath, 'w', encoding='utf-8') as f:
|
|
||||||
json.dump(document, f, indent=2)
|
|
||||||
else:
|
|
||||||
content = document.get('content', '')
|
|
||||||
if isinstance(content, str):
|
|
||||||
with open(filepath, 'w', encoding='utf-8') as f:
|
|
||||||
f.write(content)
|
|
||||||
else:
|
|
||||||
with open(filepath, 'wb') as f:
|
|
||||||
f.write(content)
|
|
||||||
|
|
||||||
return str(filepath)
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error saving document: {str(e)}")
|
|
||||||
raise
|
raise
|
||||||
|
|
||||||
async def load_document(self, filepath: str) -> Dict[str, Any]:
|
|
||||||
"""Load document from file"""
|
|
||||||
try:
|
|
||||||
path = Path(filepath)
|
|
||||||
if not path.exists():
|
|
||||||
raise FileNotFoundError(f"Document not found: {filepath}")
|
|
||||||
|
|
||||||
format = path.suffix[1:].lower()
|
|
||||||
|
|
||||||
if format == 'json':
|
|
||||||
with open(path, 'r', encoding='utf-8') as f:
|
|
||||||
document = json.load(f)
|
|
||||||
else:
|
|
||||||
mime_type = mimetypes.guess_type(filepath)[0]
|
|
||||||
if mime_type and mime_type.startswith('text/'):
|
|
||||||
with open(path, 'r', encoding='utf-8') as f:
|
|
||||||
content = f.read()
|
|
||||||
else:
|
|
||||||
with open(path, 'rb') as f:
|
|
||||||
content = f.read()
|
|
||||||
|
|
||||||
document = {
|
|
||||||
'id': path.stem,
|
|
||||||
'content': content,
|
|
||||||
'format': format,
|
|
||||||
'mime_type': mime_type
|
|
||||||
}
|
|
||||||
|
|
||||||
document['metadata'] = {
|
|
||||||
'loadedAt': datetime.now(UTC).isoformat(),
|
|
||||||
'filepath': str(path),
|
|
||||||
'size': path.stat().st_size
|
|
||||||
}
|
|
||||||
|
|
||||||
return document
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error loading document: {str(e)}")
|
|
||||||
raise
|
|
||||||
|
|
||||||
async def convert_document(self, document: Dict[str, Any], target_format: str) -> Dict[str, Any]:
|
|
||||||
"""Convert document to target format"""
|
|
||||||
try:
|
|
||||||
current_format = document.get('format', 'json')
|
|
||||||
|
|
||||||
if current_format == 'json' and target_format == 'text':
|
|
||||||
content = json.dumps(document, indent=2)
|
|
||||||
return {
|
|
||||||
'id': document['id'],
|
|
||||||
'content': content,
|
|
||||||
'format': 'text',
|
|
||||||
'mime_type': 'text/plain'
|
|
||||||
}
|
|
||||||
elif current_format == 'text' and target_format == 'json':
|
|
||||||
try:
|
|
||||||
content = json.loads(document['content'])
|
|
||||||
return {
|
|
||||||
'id': document['id'],
|
|
||||||
'content': content,
|
|
||||||
'format': 'json',
|
|
||||||
'mime_type': 'application/json'
|
|
||||||
}
|
|
||||||
except json.JSONDecodeError:
|
|
||||||
return {
|
|
||||||
'id': document['id'],
|
|
||||||
'content': document['content'],
|
|
||||||
'format': 'json',
|
|
||||||
'mime_type': 'application/json'
|
|
||||||
}
|
|
||||||
else:
|
|
||||||
raise ValueError(f"Unsupported conversion: {current_format} to {target_format}")
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error converting document: {str(e)}")
|
|
||||||
raise
|
|
||||||
|
|
||||||
def _generate_document_id(self, document: Dict[str, Any]) -> str:
|
|
||||||
"""Generate unique document ID"""
|
|
||||||
if 'content' in document:
|
|
||||||
content = str(document['content'])
|
|
||||||
return hashlib.md5(content.encode()).hexdigest()
|
|
||||||
return f"doc_{int(datetime.now(UTC).timestamp())}"
|
|
||||||
|
|
||||||
async def cleanup(self) -> None:
|
|
||||||
"""Clean up temporary files and cache"""
|
|
||||||
try:
|
|
||||||
self._clear_temp_directory()
|
|
||||||
self.document_cache.clear()
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error during cleanup: {str(e)}")
|
|
||||||
|
|
||||||
# Singleton factory for the document manager
|
|
||||||
def getDocumentManager():
|
|
||||||
return DocumentManager.getInstance()
|
|
||||||
|
|
@ -1,182 +0,0 @@
|
||||||
from typing import Dict, Any, List, Optional
|
|
||||||
import logging
|
|
||||||
import json
|
|
||||||
from datetime import datetime, UTC
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
class AIPromptManager:
|
|
||||||
"""Manages AI prompts and response validation"""
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
self.prompt_templates = {}
|
|
||||||
self.response_schemas = {}
|
|
||||||
self._load_templates()
|
|
||||||
|
|
||||||
def _load_templates(self) -> None:
|
|
||||||
"""Load prompt templates and schemas"""
|
|
||||||
# Basic templates
|
|
||||||
self.prompt_templates = {
|
|
||||||
"task_analysis": {
|
|
||||||
"template": """Analyze the following task and determine required actions:
|
|
||||||
Task: {task}
|
|
||||||
Context: {context}
|
|
||||||
Available Methods: {methods}
|
|
||||||
|
|
||||||
Please provide:
|
|
||||||
1. Main objective
|
|
||||||
2. Required actions
|
|
||||||
3. Required data sources
|
|
||||||
4. Document processing requirements
|
|
||||||
5. Expected output format
|
|
||||||
|
|
||||||
Format your response as JSON:
|
|
||||||
{{
|
|
||||||
"objective": "string",
|
|
||||||
"actions": [
|
|
||||||
{{
|
|
||||||
"method": "string",
|
|
||||||
"action": "string",
|
|
||||||
"parameters": {{
|
|
||||||
"param1": "value1"
|
|
||||||
}}
|
|
||||||
}}
|
|
||||||
],
|
|
||||||
"dataSources": ["string"],
|
|
||||||
"documentRequirements": ["string"],
|
|
||||||
"outputFormat": "string"
|
|
||||||
}}
|
|
||||||
""",
|
|
||||||
"schema": {
|
|
||||||
"type": "object",
|
|
||||||
"required": ["objective", "actions"],
|
|
||||||
"properties": {
|
|
||||||
"objective": {"type": "string"},
|
|
||||||
"actions": {
|
|
||||||
"type": "array",
|
|
||||||
"items": {
|
|
||||||
"type": "object",
|
|
||||||
"required": ["method", "action"],
|
|
||||||
"properties": {
|
|
||||||
"method": {"type": "string"},
|
|
||||||
"action": {"type": "string"},
|
|
||||||
"parameters": {"type": "object"}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"dataSources": {
|
|
||||||
"type": "array",
|
|
||||||
"items": {"type": "string"}
|
|
||||||
},
|
|
||||||
"documentRequirements": {
|
|
||||||
"type": "array",
|
|
||||||
"items": {"type": "string"}
|
|
||||||
},
|
|
||||||
"outputFormat": {"type": "string"}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"result_analysis": {
|
|
||||||
"template": """Analyze the following task results and determine next steps:
|
|
||||||
Task Results: {results}
|
|
||||||
Workflow History: {history}
|
|
||||||
|
|
||||||
Please provide:
|
|
||||||
1. Task completion status
|
|
||||||
2. Next required actions
|
|
||||||
3. Required documents
|
|
||||||
4. Method recommendations
|
|
||||||
|
|
||||||
Format your response as JSON:
|
|
||||||
{{
|
|
||||||
"isComplete": boolean,
|
|
||||||
"nextActions": ["string"],
|
|
||||||
"requiredDocuments": ["string"],
|
|
||||||
"recommendedMethods": ["string"]
|
|
||||||
}}
|
|
||||||
""",
|
|
||||||
"schema": {
|
|
||||||
"type": "object",
|
|
||||||
"required": ["isComplete"],
|
|
||||||
"properties": {
|
|
||||||
"isComplete": {"type": "boolean"},
|
|
||||||
"nextActions": {
|
|
||||||
"type": "array",
|
|
||||||
"items": {"type": "string"}
|
|
||||||
},
|
|
||||||
"requiredDocuments": {
|
|
||||||
"type": "array",
|
|
||||||
"items": {"type": "string"}
|
|
||||||
},
|
|
||||||
"recommendedMethods": {
|
|
||||||
"type": "array",
|
|
||||||
"items": {"type": "string"}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
def generate_prompt(self, context: Dict[str, Any], examples: List[Dict]) -> str:
|
|
||||||
"""Generate a context-aware prompt with few-shot examples"""
|
|
||||||
try:
|
|
||||||
# Get template
|
|
||||||
template = self.prompt_templates.get(context.get("type", "task_analysis"))
|
|
||||||
if not template:
|
|
||||||
raise ValueError(f"Unknown prompt type: {context.get('type')}")
|
|
||||||
|
|
||||||
# Format prompt
|
|
||||||
prompt = template["template"].format(
|
|
||||||
task=context.get("task", ""),
|
|
||||||
context=json.dumps(context.get("context", {}), indent=2),
|
|
||||||
methods=json.dumps(context.get("methods", {}), indent=2),
|
|
||||||
results=json.dumps(context.get("results", {}), indent=2),
|
|
||||||
history=json.dumps(context.get("history", []), indent=2)
|
|
||||||
)
|
|
||||||
|
|
||||||
# Add examples if provided
|
|
||||||
if examples:
|
|
||||||
prompt += "\nExamples:\n"
|
|
||||||
for ex in examples:
|
|
||||||
prompt += f"- {ex['input']} => {ex['output']}\n"
|
|
||||||
|
|
||||||
return prompt
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error generating prompt: {str(e)}")
|
|
||||||
raise
|
|
||||||
|
|
||||||
def validate_response(self, response: str, schema: Dict) -> bool:
|
|
||||||
"""Validate AI response against a schema"""
|
|
||||||
try:
|
|
||||||
# Parse response
|
|
||||||
if isinstance(response, str):
|
|
||||||
try:
|
|
||||||
response = json.loads(response)
|
|
||||||
except json.JSONDecodeError:
|
|
||||||
return False
|
|
||||||
|
|
||||||
# Validate against schema
|
|
||||||
import jsonschema
|
|
||||||
jsonschema.validate(instance=response, schema=schema)
|
|
||||||
return True
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error validating response: {str(e)}")
|
|
||||||
return False
|
|
||||||
|
|
||||||
def get_schema(self, prompt_type: str) -> Optional[Dict]:
|
|
||||||
"""Get schema for prompt type"""
|
|
||||||
template = self.prompt_templates.get(prompt_type)
|
|
||||||
return template.get("schema") if template else None
|
|
||||||
|
|
||||||
def add_template(self, name: str, template: str, schema: Dict) -> None:
|
|
||||||
"""Add new prompt template"""
|
|
||||||
self.prompt_templates[name] = {
|
|
||||||
"template": template,
|
|
||||||
"schema": schema
|
|
||||||
}
|
|
||||||
|
|
||||||
def remove_template(self, name: str) -> None:
|
|
||||||
"""Remove prompt template"""
|
|
||||||
self.prompt_templates.pop(name, None)
|
|
||||||
|
|
@ -1,239 +1,147 @@
|
||||||
from typing import Dict, Any, Optional, List
|
from typing import Dict, Any
|
||||||
import logging
|
import logging
|
||||||
import json
|
|
||||||
import asyncio
|
|
||||||
from datetime import datetime, UTC
|
from datetime import datetime, UTC
|
||||||
import uuid
|
import uuid
|
||||||
|
|
||||||
|
from modules.interfaces.serviceChatModel import (
|
||||||
|
AgentTask, AgentResult, TaskStatus, ChatMessage,
|
||||||
|
UserInputRequest, ChatWorkflow, ChatDocument
|
||||||
|
)
|
||||||
|
from modules.interfaces.serviceChatClass import ChatInterface
|
||||||
from modules.workflow.managerChat import ChatManager
|
from modules.workflow.managerChat import ChatManager
|
||||||
from modules.workflow.managerDocument import DocumentManager
|
|
||||||
from modules.interfaces.serviceChatModel import AgentTask, TaskStatus, ActionStatus
|
|
||||||
from modules.shared.configuration import APP_CONFIG
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
class WorkflowStoppedException(Exception):
|
||||||
|
"""Exception raised when workflow is stopped by user"""
|
||||||
|
pass
|
||||||
|
|
||||||
class WorkflowManager:
|
class WorkflowManager:
|
||||||
"""Workflow manager with improved task management and error recovery"""
|
"""Manages workflow execution lifecycle"""
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self, chatInterface: ChatInterface):
|
||||||
self.chat_manager = ChatManager()
|
|
||||||
self.document_manager = DocumentManager()
|
|
||||||
self.workflow = None
|
self.workflow = None
|
||||||
self.context = {}
|
self.isRunning = False
|
||||||
self.task_queue = asyncio.Queue()
|
self.chatInterface = chatInterface
|
||||||
self.active_tasks = {}
|
self.chatManager = ChatManager()
|
||||||
self.task_history = []
|
|
||||||
|
def _checkWorkflowStopped(self, workflow: ChatWorkflow) -> None:
|
||||||
async def initialize(self, workflow: Any, context: Dict[str, Any]) -> None:
|
if workflow.status == "stopped":
|
||||||
"""Initialize workflow manager with workflow and context"""
|
logger.info(f"Workflow {workflow.id} stopped by user")
|
||||||
self.workflow = workflow
|
raise WorkflowStoppedException("User stopped workflow")
|
||||||
self.context = context
|
|
||||||
|
async def workflowProcess(self, userInput: UserInputRequest, workflow: ChatWorkflow) -> None:
|
||||||
# Initialize managers
|
"""Main workflow execution process"""
|
||||||
await self.chat_manager.initialize(workflow, context)
|
|
||||||
await self.document_manager.initialize(context)
|
|
||||||
|
|
||||||
# Start task processor
|
|
||||||
asyncio.create_task(self._process_task_queue())
|
|
||||||
|
|
||||||
async def process_workflow(self, user_input: Dict[str, Any]) -> Dict[str, Any]:
|
|
||||||
"""Process workflow with user input"""
|
|
||||||
try:
|
try:
|
||||||
|
self.workflow = workflow
|
||||||
|
self.isRunning = True
|
||||||
|
|
||||||
|
# Process documents from userInput using ChatInterface's method
|
||||||
|
documents = []
|
||||||
|
if userInput.listFileId:
|
||||||
|
documents = await self.chatInterface.processFileIds(userInput.listFileId)
|
||||||
|
|
||||||
|
# Create initial ChatMessage from userInput
|
||||||
|
initialMessage = ChatMessage(
|
||||||
|
id=str(uuid.uuid4()),
|
||||||
|
workflowId=workflow.id,
|
||||||
|
role="user",
|
||||||
|
message=userInput.prompt,
|
||||||
|
status="first", # First message in workflow
|
||||||
|
documents=documents
|
||||||
|
)
|
||||||
|
|
||||||
|
# Add message to workflow
|
||||||
|
await self.chatInterface.createWorkflowMessage(initialMessage.dict())
|
||||||
|
|
||||||
# Create initial task
|
# Create initial task
|
||||||
task = await self.chat_manager.create_initial_task(user_input)
|
task = await self.chatInterface.createInitialTask(workflow, initialMessage)
|
||||||
|
if not task:
|
||||||
|
logger.error("Failed to create initial task")
|
||||||
|
workflow.status = "error"
|
||||||
|
workflow.error = "Failed to create initial task"
|
||||||
|
return
|
||||||
|
|
||||||
# Add to queue
|
# Main workflow loop
|
||||||
await self.task_queue.put(task)
|
while self.isRunning and workflow.status == "running":
|
||||||
|
|
||||||
# Wait for completion
|
self._checkWorkflowStopped(workflow)
|
||||||
while not task.is_complete() and not task.has_failed():
|
|
||||||
await asyncio.sleep(0.1)
|
# Execute task
|
||||||
|
result = AgentResult(
|
||||||
# Process results
|
id=task.id,
|
||||||
if task.status == TaskStatus.SUCCESS:
|
status=TaskStatus.PENDING,
|
||||||
return {
|
createdAt=datetime.now(UTC),
|
||||||
"status": "success",
|
updatedAt=datetime.now(UTC)
|
||||||
"result": task.result,
|
|
||||||
"documents": task.documentsOutput
|
|
||||||
}
|
|
||||||
else:
|
|
||||||
return {
|
|
||||||
"status": "error",
|
|
||||||
"error": task.error,
|
|
||||||
"feedback": task.thisTaskFeedback
|
|
||||||
}
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error processing workflow: {str(e)}")
|
|
||||||
return {
|
|
||||||
"status": "error",
|
|
||||||
"error": str(e)
|
|
||||||
}
|
|
||||||
|
|
||||||
async def _process_task_queue(self) -> None:
|
|
||||||
"""Process tasks in queue"""
|
|
||||||
while True:
|
|
||||||
try:
|
|
||||||
# Get task from queue
|
|
||||||
task = await self.task_queue.get()
|
|
||||||
|
|
||||||
# Process task
|
|
||||||
result = await self.chat_manager.process_task(task)
|
|
||||||
|
|
||||||
# Update task status
|
|
||||||
if result["status"] == "success":
|
|
||||||
task.status = TaskStatus.SUCCESS
|
|
||||||
task.result = result.get("result")
|
|
||||||
task.documentsOutput = result.get("documents", [])
|
|
||||||
else:
|
|
||||||
task.status = TaskStatus.FAILED
|
|
||||||
task.error = result.get("error")
|
|
||||||
|
|
||||||
# Add to history
|
|
||||||
self.task_history.append({
|
|
||||||
"id": task.id,
|
|
||||||
"status": task.status,
|
|
||||||
"startedAt": task.startedAt,
|
|
||||||
"finishedAt": datetime.now(UTC).isoformat(),
|
|
||||||
"error": task.error
|
|
||||||
})
|
|
||||||
|
|
||||||
# Check for next task
|
|
||||||
if not task.is_complete():
|
|
||||||
next_task = await self._define_next_task(task)
|
|
||||||
if next_task:
|
|
||||||
await self.task_queue.put(next_task)
|
|
||||||
|
|
||||||
# Mark task as done
|
|
||||||
self.task_queue.task_done()
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error processing task queue: {str(e)}")
|
|
||||||
await asyncio.sleep(1) # Prevent tight loop on error
|
|
||||||
|
|
||||||
async def _define_next_task(self, current_task: AgentTask) -> Optional[AgentTask]:
|
|
||||||
"""Define next task based on current task results"""
|
|
||||||
try:
|
|
||||||
# Analyze current task
|
|
||||||
analysis = await self.chat_manager._analyze_task_results(current_task)
|
|
||||||
|
|
||||||
# Check if next task needed
|
|
||||||
if not analysis.get("isComplete", True):
|
|
||||||
# Create next task
|
|
||||||
next_task = await self.chat_manager.create_next_task(
|
|
||||||
current_task,
|
|
||||||
analysis.get("nextActions", []),
|
|
||||||
analysis.get("requiredDocuments", [])
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# Add dependencies
|
# Execute each action
|
||||||
next_task.dependencies = [current_task.id]
|
for action in task.actionList:
|
||||||
|
|
||||||
return next_task
|
|
||||||
|
|
||||||
return None
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error defining next task: {str(e)}")
|
|
||||||
return None
|
|
||||||
|
|
||||||
async def handle_error(self, task: AgentTask, error: str) -> None:
|
|
||||||
"""Handle task error with recovery strategies"""
|
|
||||||
try:
|
|
||||||
# Log error
|
|
||||||
logger.error(f"Task {task.id} failed: {error}")
|
|
||||||
|
|
||||||
# Update task status
|
|
||||||
task.status = TaskStatus.FAILED
|
|
||||||
task.error = error
|
|
||||||
|
|
||||||
# Check for retryable errors
|
|
||||||
if self._is_retryable_error(error):
|
|
||||||
if task.retryCount < task.retryMax:
|
|
||||||
# Retry task
|
|
||||||
task.retryCount += 1
|
|
||||||
task.status = TaskStatus.RETRY
|
|
||||||
await self.task_queue.put(task)
|
|
||||||
return
|
|
||||||
|
|
||||||
# Check for rollback needed
|
|
||||||
if task.rollback_on_failure:
|
|
||||||
await self._rollback_task(task)
|
|
||||||
|
|
||||||
# Notify workflow
|
|
||||||
self.workflow.status = "error"
|
|
||||||
self.workflow.error = error
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error handling task error: {str(e)}")
|
|
||||||
|
|
||||||
async def _rollback_task(self, task: AgentTask) -> None:
|
|
||||||
"""Rollback task actions"""
|
|
||||||
try:
|
|
||||||
for action in task.actionList:
|
|
||||||
if action.status == ActionStatus.SUCCESS:
|
|
||||||
# Get method
|
|
||||||
method = self.chat_manager.service.methods.get(action.method)
|
|
||||||
if method:
|
|
||||||
# Rollback action
|
|
||||||
await method.rollback(
|
|
||||||
action.action,
|
|
||||||
action.parameters,
|
|
||||||
task.get_auth_data(action.auth_source)
|
|
||||||
)
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error rolling back task: {str(e)}")
|
|
||||||
|
|
||||||
def _is_retryable_error(self, error: str) -> bool:
|
|
||||||
"""Check if error is retryable"""
|
|
||||||
retryable_errors = [
|
|
||||||
"timeout",
|
|
||||||
"rate limit",
|
|
||||||
"temporary",
|
|
||||||
"connection",
|
|
||||||
"server error"
|
|
||||||
]
|
|
||||||
return any(err in error.lower() for err in retryable_errors)
|
|
||||||
|
|
||||||
async def cleanup(self) -> None:
|
|
||||||
"""Clean up workflow resources"""
|
|
||||||
try:
|
|
||||||
# Clean up managers
|
|
||||||
await self.chat_manager.cleanup()
|
|
||||||
await self.document_manager.cleanup()
|
|
||||||
|
|
||||||
# Clear task queue
|
|
||||||
while not self.task_queue.empty():
|
|
||||||
self.task_queue.get_nowait()
|
|
||||||
self.task_queue.task_done()
|
|
||||||
|
|
||||||
# Clear active tasks
|
|
||||||
self.active_tasks.clear()
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error during cleanup: {str(e)}")
|
|
||||||
|
|
||||||
async def get_workflow_status(self, workflow_id: str) -> Dict[str, Any]:
|
self._checkWorkflowStopped(workflow)
|
||||||
"""Get current status of workflow"""
|
|
||||||
current_task = self.chat_manager.service.tasks.get('current')
|
try:
|
||||||
previous_task = self.chat_manager.service.tasks.get('previous')
|
# Execute action
|
||||||
|
actionResult = await action.execute()
|
||||||
return {
|
|
||||||
'workflowId': workflow_id,
|
# Update action status
|
||||||
'currentTask': current_task.dict() if current_task else None,
|
action.status = TaskStatus.COMPLETED if actionResult.success else TaskStatus.FAILED
|
||||||
'previousTask': previous_task.dict() if previous_task else None,
|
action.result = actionResult
|
||||||
'status': self.chat_manager.workflow.status if self.chat_manager.workflow else None
|
|
||||||
}
|
# Check for failure
|
||||||
|
if not actionResult.success:
|
||||||
async def stop_workflow(self, workflow_id: str) -> None:
|
result.status = TaskStatus.FAILED
|
||||||
"""Stop workflow execution"""
|
result.error = actionResult.error
|
||||||
if self.chat_manager.workflow and self.chat_manager.workflow.id == workflow_id:
|
break
|
||||||
self.chat_manager.workflow.status = TaskStatus.STOPPED
|
|
||||||
self.chat_manager.workflow.updatedAt = datetime.now(UTC)
|
except Exception as e:
|
||||||
|
logger.error(f"Action error: {str(e)}")
|
||||||
|
action.status = TaskStatus.FAILED
|
||||||
|
result.status = TaskStatus.FAILED
|
||||||
|
result.error = str(e)
|
||||||
|
break
|
||||||
|
|
||||||
|
# Update result status
|
||||||
|
if result.status != TaskStatus.FAILED:
|
||||||
|
result.status = TaskStatus.COMPLETED
|
||||||
|
|
||||||
|
result.updatedAt = datetime.now(UTC)
|
||||||
|
|
||||||
|
self._checkWorkflowStopped(workflow)
|
||||||
|
|
||||||
|
# Update workflow with result
|
||||||
|
await self.chatInterface.addTaskResult(workflow, result)
|
||||||
|
|
||||||
|
# Get next task
|
||||||
|
task = await self.chatInterface.getNextTask(workflow)
|
||||||
|
if not task:
|
||||||
|
break
|
||||||
|
|
||||||
|
# Check if should continue
|
||||||
|
if not await self.chatInterface.shouldContinue(workflow):
|
||||||
|
break
|
||||||
|
|
||||||
# Stop current task if any
|
# Generate final feedback message using ChatManager
|
||||||
current_task = self.chat_manager.service.tasks.get('current')
|
finalFeedback = await self.chatManager.generateWorkflowFeedback(workflow)
|
||||||
if current_task:
|
|
||||||
current_task.status = TaskStatus.STOPPED
|
# Create final message with "last" status
|
||||||
current_task.updatedAt = datetime.now(UTC)
|
self._checkWorkflowStopped(workflow)
|
||||||
|
finalMessage = ChatMessage(
|
||||||
|
id=str(uuid.uuid4()),
|
||||||
|
workflowId=workflow.id,
|
||||||
|
role="assistant",
|
||||||
|
message=finalFeedback,
|
||||||
|
status="last" # Last message in workflow
|
||||||
|
)
|
||||||
|
await self.chatInterface.createWorkflowMessage(finalMessage.dict())
|
||||||
|
|
||||||
|
# Complete workflow
|
||||||
|
if workflow.status != "failed":
|
||||||
|
workflow.status = "completed"
|
||||||
|
workflow.lastActivity = datetime.now(UTC).isoformat()
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Workflow error: {str(e)}")
|
||||||
|
if self.workflow:
|
||||||
|
self.workflow.status = "error"
|
||||||
|
self.workflow.error = str(e)
|
||||||
File diff suppressed because it is too large
Load diff
|
|
@ -1,17 +1,26 @@
|
||||||
import logging
|
import logging
|
||||||
from typing import Dict, Any, Optional
|
from typing import Dict, Any, List, Optional
|
||||||
from datetime import datetime, UTC
|
from datetime import datetime, UTC
|
||||||
|
import json
|
||||||
import asyncio
|
import asyncio
|
||||||
|
|
||||||
|
from modules.shared.configuration import APP_CONFIG
|
||||||
from modules.methods import MethodBase, MethodResult
|
from modules.methods import MethodBase, MethodResult
|
||||||
from modules.interfaces.serviceChatModel import AgentTask, AgentAction, AgentResult, Action, TaskStatus, ActionStatus
|
from modules.interfaces.serviceChatModel import AgentTask, AgentAction, AgentResult, Action, TaskStatus, ActionStatus
|
||||||
|
from modules.interfaces.serviceManagementClass import ServiceManagement
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
class ServiceContainer:
|
class ServiceContainer:
|
||||||
"""Service container with improved state management"""
|
"""Service container for dependency injection and service management."""
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
|
self.methods = {}
|
||||||
|
self.context = {}
|
||||||
|
self.workflow = None
|
||||||
|
self.model = {}
|
||||||
|
self.functions = {}
|
||||||
|
self.documentProcessor = None
|
||||||
self.state = {
|
self.state = {
|
||||||
'status': TaskStatus.PENDING,
|
'status': TaskStatus.PENDING,
|
||||||
'retryCount': 0,
|
'retryCount': 0,
|
||||||
|
|
@ -20,15 +29,236 @@ class ServiceContainer:
|
||||||
'lastError': None,
|
'lastError': None,
|
||||||
'lastErrorTime': None
|
'lastErrorTime': None
|
||||||
}
|
}
|
||||||
self.methods: Dict[str, MethodBase] = {}
|
|
||||||
self.tasks: Dict[str, Any] = {} # Will be populated with AgentTask instances
|
self.tasks: Dict[str, Any] = {} # Will be populated with AgentTask instances
|
||||||
|
|
||||||
def register_method(self, method: MethodBase) -> None:
|
# Initialize service management
|
||||||
"""Register a method in the container"""
|
self.serviceManagement = ServiceManagement()
|
||||||
self.methods[method.name] = method
|
|
||||||
logger.info(f"Registered method: {method.name}")
|
|
||||||
|
|
||||||
async def execute_task(self, task: Any) -> None: # task: AgentTask
|
# Initialize file-related functions
|
||||||
|
self.functions = {
|
||||||
|
'getFileData': self.serviceManagement.getFileData,
|
||||||
|
'saveFileData': self.serviceManagement.saveFileData,
|
||||||
|
'getFileMetadata': self.serviceManagement.getFileMetadata,
|
||||||
|
'saveFileMetadata': self.serviceManagement.saveFileMetadata,
|
||||||
|
'deleteFile': self.serviceManagement.deleteFile,
|
||||||
|
'getFile': self.serviceManagement.getFile,
|
||||||
|
'getMimeType': self.serviceManagement.getMimeType,
|
||||||
|
'calculateFileHash': self.serviceManagement.calculateFileHash,
|
||||||
|
'checkForDuplicateFile': self.serviceManagement.checkForDuplicateFile
|
||||||
|
}
|
||||||
|
|
||||||
|
def initialize(self) -> None:
|
||||||
|
"""Initialize service container"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
def registerMethod(self, methodName: str, methodInstance: Any) -> None:
|
||||||
|
"""Register a new method"""
|
||||||
|
self.methods[methodName] = methodInstance
|
||||||
|
|
||||||
|
def getMethod(self, methodName: str) -> Optional[Any]:
|
||||||
|
"""Get a method by name"""
|
||||||
|
return self.methods.get(methodName)
|
||||||
|
|
||||||
|
def removeMethod(self, methodName: str) -> None:
|
||||||
|
"""Remove a method"""
|
||||||
|
self.methods.pop(methodName, None)
|
||||||
|
|
||||||
|
def hasMethod(self, methodName: str) -> bool:
|
||||||
|
"""Check if a method exists"""
|
||||||
|
return methodName in self.methods
|
||||||
|
|
||||||
|
def listMethods(self) -> List[str]:
|
||||||
|
"""List all registered methods"""
|
||||||
|
return list(self.methods.keys())
|
||||||
|
|
||||||
|
def getMethodInfo(self, methodName: str) -> Dict[str, Any]:
|
||||||
|
"""Get method information"""
|
||||||
|
method = self.getMethod(methodName)
|
||||||
|
if not method:
|
||||||
|
return {}
|
||||||
|
return {
|
||||||
|
"name": methodName,
|
||||||
|
"description": self.getMethodDescription(methodName),
|
||||||
|
"version": self.getMethodVersion(methodName),
|
||||||
|
"author": self.getMethodAuthor(methodName),
|
||||||
|
"license": self.getMethodLicense(methodName),
|
||||||
|
"dependencies": self.getMethodDependencies(methodName),
|
||||||
|
"tags": self.getMethodTags(methodName),
|
||||||
|
"examples": self.getMethodExamples(methodName),
|
||||||
|
"documentation": self.getMethodDocumentation(methodName),
|
||||||
|
"source": self.getMethodSource(methodName),
|
||||||
|
"tests": self.getMethodTests(methodName),
|
||||||
|
"benchmarks": self.getMethodBenchmarks(methodName),
|
||||||
|
"metrics": self.getMethodMetrics(methodName),
|
||||||
|
"logs": self.getMethodLogs(methodName),
|
||||||
|
"history": self.getMethodHistory(methodName),
|
||||||
|
"usage": self.getMethodUsage(methodName),
|
||||||
|
"errors": self.getMethodErrors(methodName),
|
||||||
|
"warnings": self.getMethodWarnings(methodName)
|
||||||
|
}
|
||||||
|
|
||||||
|
def getMethodSchema(self, methodName: str) -> Optional[Dict[str, Any]]:
|
||||||
|
"""Get method schema"""
|
||||||
|
method = self.getMethod(methodName)
|
||||||
|
return method.schema if method else None
|
||||||
|
|
||||||
|
def getMethodParameters(self, methodName: str) -> Optional[Dict[str, Any]]:
|
||||||
|
"""Get method parameters"""
|
||||||
|
method = self.getMethod(methodName)
|
||||||
|
return method.parameters if method else None
|
||||||
|
|
||||||
|
def getMethodReturnType(self, methodName: str) -> Optional[str]:
|
||||||
|
"""Get method return type"""
|
||||||
|
method = self.getMethod(methodName)
|
||||||
|
return method.returnType if method else None
|
||||||
|
|
||||||
|
def getMethodDescription(self, methodName: str) -> Optional[str]:
|
||||||
|
"""Get method description"""
|
||||||
|
method = self.getMethod(methodName)
|
||||||
|
return method.description if method else None
|
||||||
|
|
||||||
|
def getMethodVersion(self, methodName: str) -> Optional[str]:
|
||||||
|
"""Get method version"""
|
||||||
|
method = self.getMethod(methodName)
|
||||||
|
return method.version if method else None
|
||||||
|
|
||||||
|
def getMethodAuthor(self, methodName: str) -> Optional[str]:
|
||||||
|
"""Get method author"""
|
||||||
|
method = self.getMethod(methodName)
|
||||||
|
return method.author if method else None
|
||||||
|
|
||||||
|
def getMethodLicense(self, methodName: str) -> Optional[str]:
|
||||||
|
"""Get method license"""
|
||||||
|
method = self.getMethod(methodName)
|
||||||
|
return method.license if method else None
|
||||||
|
|
||||||
|
def getMethodDependencies(self, methodName: str) -> Optional[List[str]]:
|
||||||
|
"""Get method dependencies"""
|
||||||
|
method = self.getMethod(methodName)
|
||||||
|
return method.dependencies if method else None
|
||||||
|
|
||||||
|
def getMethodTags(self, methodName: str) -> Optional[List[str]]:
|
||||||
|
"""Get method tags"""
|
||||||
|
method = self.getMethod(methodName)
|
||||||
|
return method.tags if method else None
|
||||||
|
|
||||||
|
def getMethodExamples(self, methodName: str) -> Optional[List[Dict[str, Any]]]:
|
||||||
|
"""Get method examples"""
|
||||||
|
method = self.getMethod(methodName)
|
||||||
|
return method.examples if method else None
|
||||||
|
|
||||||
|
def getMethodDocumentation(self, methodName: str) -> Optional[str]:
|
||||||
|
"""Get method documentation"""
|
||||||
|
method = self.getMethod(methodName)
|
||||||
|
return method.documentation if method else None
|
||||||
|
|
||||||
|
def getMethodSource(self, methodName: str) -> Optional[str]:
|
||||||
|
"""Get method source"""
|
||||||
|
method = self.getMethod(methodName)
|
||||||
|
return method.source if method else None
|
||||||
|
|
||||||
|
def getMethodTests(self, methodName: str) -> Optional[List[Dict[str, Any]]]:
|
||||||
|
"""Get method tests"""
|
||||||
|
method = self.getMethod(methodName)
|
||||||
|
return method.tests if method else None
|
||||||
|
|
||||||
|
def getMethodBenchmarks(self, methodName: str) -> Optional[List[Dict[str, Any]]]:
|
||||||
|
"""Get method benchmarks"""
|
||||||
|
method = self.getMethod(methodName)
|
||||||
|
return method.benchmarks if method else None
|
||||||
|
|
||||||
|
def getMethodMetrics(self, methodName: str) -> Optional[Dict[str, Any]]:
|
||||||
|
"""Get method metrics"""
|
||||||
|
method = self.getMethod(methodName)
|
||||||
|
return method.metrics if method else None
|
||||||
|
|
||||||
|
def getMethodLogs(self, methodName: str) -> Optional[List[Dict[str, Any]]]:
|
||||||
|
"""Get method logs"""
|
||||||
|
method = self.getMethod(methodName)
|
||||||
|
return method.logs if method else None
|
||||||
|
|
||||||
|
def getMethodHistory(self, methodName: str) -> Optional[List[Dict[str, Any]]]:
|
||||||
|
"""Get method history"""
|
||||||
|
method = self.getMethod(methodName)
|
||||||
|
return method.history if method else None
|
||||||
|
|
||||||
|
def getMethodUsage(self, methodName: str) -> Optional[Dict[str, Any]]:
|
||||||
|
"""Get method usage"""
|
||||||
|
method = self.getMethod(methodName)
|
||||||
|
return method.usage if method else None
|
||||||
|
|
||||||
|
def getMethodErrors(self, methodName: str) -> Optional[List[Dict[str, Any]]]:
|
||||||
|
"""Get method errors"""
|
||||||
|
method = self.getMethod(methodName)
|
||||||
|
return method.errors if method else None
|
||||||
|
|
||||||
|
def getMethodWarnings(self, methodName: str) -> Optional[List[Dict[str, Any]]]:
|
||||||
|
"""Get method warnings"""
|
||||||
|
method = self.getMethod(methodName)
|
||||||
|
return method.warnings if method else None
|
||||||
|
|
||||||
|
def executeTask(self, task: Any) -> None:
|
||||||
|
"""Execute a task"""
|
||||||
|
try:
|
||||||
|
# Execute each action
|
||||||
|
for action in task.actionList:
|
||||||
|
method = self.getMethod(action.method)
|
||||||
|
if method:
|
||||||
|
method.executeAction(action.action, action.parameters)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error executing task: {str(e)}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
def getFileData(self, fileId: str) -> bytes:
|
||||||
|
"""Get file data by ID"""
|
||||||
|
try:
|
||||||
|
# Get file data from storage
|
||||||
|
if hasattr(self.functions, 'getFileData'):
|
||||||
|
return self.functions.getFileData(fileId)
|
||||||
|
return b""
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error getting file data: {str(e)}")
|
||||||
|
return b""
|
||||||
|
|
||||||
|
def saveFileData(self, fileId: str, data: bytes) -> bool:
|
||||||
|
"""Save file data by ID"""
|
||||||
|
try:
|
||||||
|
# Save file data to storage
|
||||||
|
if hasattr(self.functions, 'saveFileData'):
|
||||||
|
return self.functions.saveFileData(fileId, data)
|
||||||
|
return False
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error saving file data: {str(e)}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
def getFileMetadata(self, fileId: str) -> Dict[str, Any]:
|
||||||
|
"""Get file metadata by ID"""
|
||||||
|
try:
|
||||||
|
# Get file metadata from storage
|
||||||
|
if hasattr(self.functions, 'getFileMetadata'):
|
||||||
|
return self.functions.getFileMetadata(fileId)
|
||||||
|
return {}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error getting file metadata: {str(e)}")
|
||||||
|
return {}
|
||||||
|
|
||||||
|
def saveFileMetadata(self, fileId: str, metadata: Dict[str, Any]) -> bool:
|
||||||
|
"""Save file metadata by ID"""
|
||||||
|
try:
|
||||||
|
# Save file metadata to storage
|
||||||
|
if hasattr(self.functions, 'saveFileMetadata'):
|
||||||
|
return self.functions.saveFileMetadata(fileId, metadata)
|
||||||
|
return False
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error saving file metadata: {str(e)}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
async def executeTaskImproved(self, task: Any) -> None: # task: AgentTask
|
||||||
"""Execute task with improved error handling and timeout"""
|
"""Execute task with improved error handling and timeout"""
|
||||||
try:
|
try:
|
||||||
# Check for timeout
|
# Check for timeout
|
||||||
|
|
@ -38,64 +268,64 @@ class ServiceContainer:
|
||||||
|
|
||||||
# Execute actions
|
# Execute actions
|
||||||
for action in task.actionList:
|
for action in task.actionList:
|
||||||
if not task.can_execute_action(action):
|
if not task.canExecuteAction(action):
|
||||||
if not task.get_auth_data(action.auth_source):
|
if not task.getAuthData(action.authSource):
|
||||||
action.status = ActionStatus.FAILED
|
action.status = ActionStatus.FAILED
|
||||||
task.error = f"Missing authentication for {action.auth_source}"
|
task.error = f"Missing authentication for {action.authSource}"
|
||||||
else:
|
else:
|
||||||
action.status = ActionStatus.DEPENDENCY_FAILED
|
action.status = ActionStatus.DEPENDENCY_FAILED
|
||||||
continue
|
continue
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Get method
|
# Get method
|
||||||
method = self.methods.get(action.method)
|
method = self.getMethod(action.method)
|
||||||
if not method:
|
if not method:
|
||||||
raise ValueError(f"Unknown method: {action.method}")
|
raise ValueError(f"Unknown method: {action.method}")
|
||||||
|
|
||||||
# Validate parameters
|
# Validate parameters
|
||||||
if not await method.validate_parameters(action.action, action.parameters):
|
if not await method.validateParameters(action.action, action.parameters):
|
||||||
raise ValueError(f"Invalid parameters for {action.method}:{action.action}")
|
raise ValueError(f"Invalid parameters for {action.method}:{action.action}")
|
||||||
|
|
||||||
# Get auth data if needed
|
# Get auth data if needed
|
||||||
auth_data = None
|
authData = None
|
||||||
if action.auth_source and action.auth_source != "local":
|
if action.authSource and action.authSource != "local":
|
||||||
auth_data = task.get_auth_data(action.auth_source)
|
authData = task.getAuthData(action.authSource)
|
||||||
if not auth_data:
|
if not authData:
|
||||||
raise ValueError(f"Missing authentication data for {action.auth_source}")
|
raise ValueError(f"Missing authentication data for {action.authSource}")
|
||||||
|
|
||||||
# Execute with timeout
|
# Execute with timeout
|
||||||
result = await asyncio.wait_for(
|
result = await asyncio.wait_for(
|
||||||
method.execute(action.action, action.parameters, auth_data),
|
method.execute(action.action, action.parameters, authData),
|
||||||
timeout=action.timeout or 60
|
timeout=action.timeout or 60
|
||||||
)
|
)
|
||||||
|
|
||||||
if result.success:
|
if result.success:
|
||||||
action.status = ActionStatus.SUCCESS
|
action.status = ActionStatus.SUCCESS
|
||||||
else:
|
else:
|
||||||
if self._should_retry(result.data.get('error')):
|
if self._shouldRetry(result.data.get('error')):
|
||||||
action.retryCount += 1
|
action.retryCount += 1
|
||||||
if action.retryCount > action.retryMax:
|
if action.retryCount > action.retryMax:
|
||||||
action.status = ActionStatus.FAILED
|
action.status = ActionStatus.FAILED
|
||||||
if action.rollback_on_failure:
|
if action.rollbackOnFailure:
|
||||||
await method.rollback(action.action, action.parameters, auth_data)
|
await method.rollback(action.action, action.parameters, authData)
|
||||||
else:
|
else:
|
||||||
action.status = ActionStatus.RETRY
|
action.status = ActionStatus.RETRY
|
||||||
else:
|
else:
|
||||||
action.status = ActionStatus.FAILED
|
action.status = ActionStatus.FAILED
|
||||||
if action.rollback_on_failure:
|
if action.rollbackOnFailure:
|
||||||
await method.rollback(action.action, action.parameters, auth_data)
|
await method.rollback(action.action, action.parameters, authData)
|
||||||
|
|
||||||
except asyncio.TimeoutError:
|
except asyncio.TimeoutError:
|
||||||
action.status = ActionStatus.TIMEOUT
|
action.status = ActionStatus.TIMEOUT
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
action.status = ActionStatus.FAILED
|
action.status = ActionStatus.FAILED
|
||||||
if action.rollback_on_failure:
|
if action.rollbackOnFailure:
|
||||||
await method.rollback(action.action, action.parameters, auth_data)
|
await method.rollback(action.action, action.parameters, authData)
|
||||||
|
|
||||||
# Update task status
|
# Update task status
|
||||||
if task.has_failed():
|
if task.hasFailed():
|
||||||
task.status = TaskStatus.FAILED
|
task.status = TaskStatus.FAILED
|
||||||
elif task.is_complete():
|
elif task.isComplete():
|
||||||
task.status = TaskStatus.SUCCESS
|
task.status = TaskStatus.SUCCESS
|
||||||
task.finishedAt = datetime.now(UTC).isoformat()
|
task.finishedAt = datetime.now(UTC).isoformat()
|
||||||
|
|
||||||
|
|
@ -103,22 +333,18 @@ class ServiceContainer:
|
||||||
task.status = TaskStatus.FAILED
|
task.status = TaskStatus.FAILED
|
||||||
task.error = str(e)
|
task.error = str(e)
|
||||||
|
|
||||||
def _should_retry(self, error: str) -> bool:
|
def _shouldRetry(self, error: str) -> bool:
|
||||||
"""Determine if error is retryable"""
|
"""Determine if error is retryable"""
|
||||||
retryable_errors = [
|
retryableErrors = [
|
||||||
"AI down",
|
"AI down",
|
||||||
"Document not found",
|
"Document not found",
|
||||||
"Content extraction failed",
|
"Content extraction failed",
|
||||||
"Network error",
|
"Network error",
|
||||||
"Temporary failure"
|
"Temporary failure"
|
||||||
]
|
]
|
||||||
return any(err in error for err in retryable_errors)
|
return any(err in error for err in retryableErrors)
|
||||||
|
|
||||||
def get_method(self, name: str) -> Optional[MethodBase]:
|
def getAvailableMethodsCatalog(self) -> Dict[str, Dict[str, Any]]:
|
||||||
"""Get a method by name"""
|
|
||||||
return self.methods.get(name)
|
|
||||||
|
|
||||||
def get_available_methods(self) -> Dict[str, Dict[str, Any]]:
|
|
||||||
"""Get catalog of available methods and their actions"""
|
"""Get catalog of available methods and their actions"""
|
||||||
return {
|
return {
|
||||||
name: {
|
name: {
|
||||||
|
|
|
||||||
|
|
@ -1,122 +1,14 @@
|
||||||
....................... TASKS
|
Clean
|
||||||
|
|
||||||
|
- ServiceContainer to clean for used functions, no spare!
|
||||||
We need to adapt the agent's orchestration. in the center is the handover mechanism. there to decide upon result from previous result and the history of results, what next step to do to complete user prompt. the mechanism shall ensure stepwise procedure using needed tools from self.service and to use existing model classes in @serviceChatModel.py
|
- all the definitions used in serviceChatModel?
|
||||||
|
- all AI calls to route over AI-Module (AI basic, ai special, ai...)
|
||||||
|
|
||||||
1. to remove object AgentHandover and to use AgentTask instead (to adapt in workflowManager and chatManager).
|
|
||||||
|
|
||||||
2. ChatMessage model to change:
|
|
||||||
- "success" attribute to be boolean
|
|
||||||
- adapt references to the object in the codebase
|
|
||||||
|
|
||||||
3. Orchestration logic:
|
|
||||||
- workflowManager.workflowProcess to keep, but to be adapted using AgentTask parameter "agentTask" instead of "handover"
|
|
||||||
- createInitialHandover --> rename to createInitialTask
|
|
||||||
- defineNextHandover --> rename to createNextTask
|
|
||||||
|
|
||||||
4. chatManager to adapt:
|
|
||||||
- to work with self.service object only
|
|
||||||
- functions "createInitialTask" and "createNextTask" only to be different in handling the result of the last task, but the preparation of the next task to be the same routine "defineNextTask" to deliver AgentTask object.
|
|
||||||
- All agentTask objects to store in self.service as self.service.tasks.history[]
|
|
||||||
- self.service.tasks.next: to be the next task reference --> initially None
|
|
||||||
- self.service.tasks.previous: to be the previous task reference --> createInitialTask() to set it to None; createNextTask to set self.service.tasks.previous = self.service.tasks.next
|
|
||||||
- function "defineNextTask" to:
|
|
||||||
- update self.service.state, error handling, update counters and stats, check if limits are reached
|
|
||||||
- analyse result with AI call and produce:
|
|
||||||
- message object to give feedback to the user in his language
|
|
||||||
- decision whether user input completed, or to retry with different approach, or to do next task step towards user input to complete --> to put into feedback from previous task
|
|
||||||
- create a new instance of AgentTask, to add it to self.service.tasks.history, to set reference in self.service.tasks.next
|
|
||||||
- HELP: HERE TOO COMPLEX: HOW TO DO IT using AI to have a generic approach to read document contents with dedicated prompts, then to handle any user request. E.g. "Search all sharepoint documents from valueon account and extract parts containing customer data into a summary excel file" or find websites for product "shampoo" and generate a marketing flyer for our product to show usp"
|
|
||||||
- to process actionMessages of tasks.next --> results to integrate in tasks.next object
|
|
||||||
- error handling and return agentTask object
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
6. Adapt self.service object:
|
|
||||||
|
|
||||||
HELP: How to structure and organize this object to be used in the context?
|
|
||||||
|
|
||||||
- self.service.state:
|
|
||||||
- roundsMax
|
|
||||||
- roundsCount
|
|
||||||
|
|
||||||
- self.service.tasks part to add and to use in the code
|
|
||||||
|
|
||||||
- self.service.context to add:
|
|
||||||
- userInput: UserInputMessage
|
|
||||||
- dataConnections: list of UserConnection
|
|
||||||
- methodList: list of MethodObject
|
|
||||||
|
|
||||||
7. Adapt AgentTask object:
|
|
||||||
- userInput: summary for ai prompt what finally to deliver to the user based on UserInputMessage
|
|
||||||
- dataList: list of user connections for AI prompt (in the format "authority":"externalUsername" from user's connections object UserConnection)
|
|
||||||
- methodList: list of methods for AI prompt
|
|
||||||
- chatHistory: summary from the chat messages in the workflow before the user input (message summary with file list per message, as existing workflow could be continued)
|
|
||||||
- taskHistory: summary of the messages with file list per message after user input message id (there could also be user inputs before this message in former chat rounds)
|
|
||||||
- previousTaskFeedback
|
|
||||||
- thisTaskFeedback
|
|
||||||
- status: One of pending, success, failed, retry
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
5. what to ensure for "promptTaskGeneration"
|
|
||||||
- to have clear ai prompt for the task to do and for the result format to deliver. the result shall
|
|
||||||
- feedback from previous task to include
|
|
||||||
- instruction on how to use methodList catalog
|
|
||||||
- HELP: How to do the prompt?
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
***************************
|
|
||||||
|
|
||||||
TO include...
|
|
||||||
- Dict: agents objects accessible by "name"
|
|
||||||
|
|
||||||
|
|
||||||
Core:
|
|
||||||
logAdd: Logging functionality
|
|
||||||
workflow: Direct reference to workflow object
|
|
||||||
user: User information containing:
|
|
||||||
id: User ID
|
|
||||||
name: User name
|
|
||||||
language: User's preferred language (defaults to 'en')
|
|
||||||
|
|
||||||
Function Components:
|
|
||||||
functions: Dictionary containing utility functions:
|
|
||||||
forEach: Lambda function for iterating over items
|
|
||||||
while: Lambda function for while loop operations
|
|
||||||
getFile: Function to get file information
|
|
||||||
|
|
||||||
Model Components:
|
|
||||||
model: Dictionary containing AI model operations:
|
|
||||||
callAiBasic: Basic AI call function
|
|
||||||
callAiComplex: Complex AI call function
|
|
||||||
callAiImage: Image AI call function
|
|
||||||
|
|
||||||
Document Operations:
|
|
||||||
document: Dictionary containing document-related functions:
|
|
||||||
extract: Extract content from documents
|
|
||||||
convertFileRefToFileId: Convert file references to file IDs
|
|
||||||
convertFileIdToFileRef: Convert file IDs to file references
|
|
||||||
convertDataFormat: Convert data formats
|
|
||||||
agentInputFilesCreate: Create agent input file lists
|
|
||||||
agentOutputFilesSave: Save agent output files
|
|
||||||
|
|
||||||
Data Operations:
|
|
||||||
connections: Connection data storage
|
|
||||||
|
|
||||||
msft: Microsoft service functions and metadata
|
|
||||||
google: Google service functions and metadata
|
|
||||||
|
|
||||||
Document Operations:
|
|
||||||
document: Dictionary for document operations (populated by agentManager)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
********************
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
187
notes/data_specification.md
Normal file
187
notes/data_specification.md
Normal file
|
|
@ -0,0 +1,187 @@
|
||||||
|
# Document Management Refactoring Specification
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
This specification outlines the refactoring of document management in the system, focusing on proper model separation, centralized content extraction, and future-proof neutralization integration.
|
||||||
|
|
||||||
|
## Model Structure
|
||||||
|
|
||||||
|
### Base Document Models
|
||||||
|
```python
|
||||||
|
class ContentMetadata(BaseModel, ModelMixin):
|
||||||
|
"""Metadata for content items"""
|
||||||
|
size: int = Field(description="Content size in bytes")
|
||||||
|
pages: Optional[int] = Field(None, description="Number of pages for multi-page content")
|
||||||
|
error: Optional[str] = Field(None, description="Processing error if any")
|
||||||
|
# Media-specific attributes
|
||||||
|
width: Optional[int] = Field(None, description="Width in pixels for images/videos")
|
||||||
|
height: Optional[int] = Field(None, description="Height in pixels for images/videos")
|
||||||
|
colorMode: Optional[str] = Field(None, description="Color mode (e.g., RGB, CMYK, grayscale)")
|
||||||
|
fps: Optional[float] = Field(None, description="Frames per second for videos")
|
||||||
|
durationSec: Optional[float] = Field(None, description="Duration in seconds for videos/audio")
|
||||||
|
|
||||||
|
class ContentItem(BaseModel, ModelMixin):
|
||||||
|
"""Individual content item from a document"""
|
||||||
|
label: str = Field(description="Content label (e.g., tab name, tag name)")
|
||||||
|
data: str = Field(description="Text content")
|
||||||
|
metadata: ContentMetadata = Field(description="Content metadata")
|
||||||
|
|
||||||
|
class ChatDocument(BaseModel, ModelMixin):
|
||||||
|
id: str = Field(default_factory=lambda: str(uuid.uuid4()))
|
||||||
|
fileId: str
|
||||||
|
filename: str
|
||||||
|
fileSize: int
|
||||||
|
mimeType: str
|
||||||
|
|
||||||
|
class TaskDocument(BaseModel, ModelMixin):
|
||||||
|
id: str = Field(default_factory=lambda: str(uuid.uuid4()))
|
||||||
|
filename: str
|
||||||
|
fileSize: int
|
||||||
|
mimeType: str
|
||||||
|
data: str # Base64 encoded file data
|
||||||
|
|
||||||
|
class ExtractedContent(BaseModel, ModelMixin):
|
||||||
|
objectId: str # Reference to source document
|
||||||
|
objectType: str = Field(description="Type of source object ('ChatDocument' or 'TaskDocument')")
|
||||||
|
contents: List[ContentItem]
|
||||||
|
```
|
||||||
|
|
||||||
|
## Service Layer Structure
|
||||||
|
|
||||||
|
### Document Service
|
||||||
|
```python
|
||||||
|
class DocumentService:
|
||||||
|
def __init__(self, service_container):
|
||||||
|
self.service = service_container
|
||||||
|
self.neutralizer_enabled = False # Flag for neutralization feature
|
||||||
|
|
||||||
|
async def extractFromChatDocument(self, prompt: str, document: ChatDocument) -> ExtractedContent:
|
||||||
|
"""
|
||||||
|
Extract content from a ChatDocument by converting it to TaskDocument first.
|
||||||
|
"""
|
||||||
|
# Convert ChatDocument to TaskDocument
|
||||||
|
task_doc = await self._convertToTaskDocument(document)
|
||||||
|
return await self.getDocumentContent(task_doc, prompt)
|
||||||
|
|
||||||
|
async def extractFromTaskDocument(self, prompt: str, document: TaskDocument) -> ExtractedContent:
|
||||||
|
"""
|
||||||
|
Extract content directly from a TaskDocument.
|
||||||
|
"""
|
||||||
|
return await self.getDocumentContent(document, prompt)
|
||||||
|
|
||||||
|
async def getDocumentContent(self, document: TaskDocument, prompt: str) -> ExtractedContent:
|
||||||
|
"""
|
||||||
|
Helper function for centralized content extraction.
|
||||||
|
Handles the actual content extraction and optional neutralization.
|
||||||
|
"""
|
||||||
|
# Extract content based on mimeType
|
||||||
|
content = await self._extractRawContent(document)
|
||||||
|
|
||||||
|
# Apply neutralization if enabled
|
||||||
|
if self.neutralizer_enabled:
|
||||||
|
from modules.neutralizer import neutralizer
|
||||||
|
content = await neutralizer.process_content(content)
|
||||||
|
|
||||||
|
# Process content with AI using prompt
|
||||||
|
processed_content = await self._processWithAI(content, prompt)
|
||||||
|
|
||||||
|
return ExtractedContent(
|
||||||
|
objectId=document.id,
|
||||||
|
objectType="TaskDocument",
|
||||||
|
contents=processed_content
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
## Implementation Steps
|
||||||
|
|
||||||
|
1. **Model Cleanup**
|
||||||
|
- Create new model classes in `serviceChatModel.py`
|
||||||
|
- Remove deprecated models:
|
||||||
|
- DocumentExtraction
|
||||||
|
- DocumentContext
|
||||||
|
- ProcessedDocument
|
||||||
|
- ChatContent (replaced by ContentItem)
|
||||||
|
- Update ChatDocument to remove contents attribute
|
||||||
|
- Convert all snake_case to camelCase in manager*.py and method*.py
|
||||||
|
|
||||||
|
2. **Service Implementation**
|
||||||
|
- Create new `DocumentService` class in `serviceDocument.py`
|
||||||
|
- Implement the three main methods:
|
||||||
|
- extractFromChatDocument
|
||||||
|
- extractFromTaskDocument
|
||||||
|
- getDocumentContent (helper function)
|
||||||
|
- Add neutralization integration with feature flag
|
||||||
|
|
||||||
|
3. **UserInput Processing**
|
||||||
|
- Update `UserInputRequest` processing to use `ChatMessage`
|
||||||
|
- Implement `processFileIds` in `serviceChatClass`
|
||||||
|
- Update all references to use new model structure
|
||||||
|
|
||||||
|
4. **Method Module Updates**
|
||||||
|
- Update all method*.py modules to use new service layer
|
||||||
|
- Remove direct file access
|
||||||
|
- Implement proper error handling and logging
|
||||||
|
|
||||||
|
5. **Testing and Validation**
|
||||||
|
- Create unit tests for new models and services
|
||||||
|
- Test document processing with various file types
|
||||||
|
- Validate content extraction and neutralization
|
||||||
|
- Test error handling and edge cases
|
||||||
|
|
||||||
|
## Files to be Removed/Modified
|
||||||
|
|
||||||
|
### To be Removed
|
||||||
|
1. `DocumentExtraction` class from serviceChatModel.py
|
||||||
|
2. `DocumentContext` class from serviceChatModel.py
|
||||||
|
3. `ProcessedDocument` class from serviceChatModel.py
|
||||||
|
4. `ChatContent` class from serviceChatModel.py
|
||||||
|
5. Direct file access methods from method*.py modules
|
||||||
|
|
||||||
|
### To be Modified
|
||||||
|
1. `serviceChatModel.py`
|
||||||
|
- Add new model classes
|
||||||
|
- Remove deprecated classes
|
||||||
|
- Update existing classes
|
||||||
|
|
||||||
|
2. `managerDocument.py`
|
||||||
|
- Move core functionality to DocumentService
|
||||||
|
- Update to use new model structure
|
||||||
|
- Remove redundant methods
|
||||||
|
|
||||||
|
3. `method*.py` modules
|
||||||
|
- Update to use DocumentService
|
||||||
|
- Remove direct file access
|
||||||
|
- Update error handling
|
||||||
|
|
||||||
|
4. `serviceChatClass.py`
|
||||||
|
- Implement processFileIds
|
||||||
|
- Update document handling
|
||||||
|
|
||||||
|
## Neutralization Integration
|
||||||
|
|
||||||
|
The neutralization feature is integrated into the `getDocumentContent` method with a feature flag. When enabled, it will process content through the neutralizer before sending it to AI processing.
|
||||||
|
|
||||||
|
```python
|
||||||
|
# In getDocumentContent method
|
||||||
|
if self.neutralizer_enabled:
|
||||||
|
from modules.neutralizer import neutralizer
|
||||||
|
content = await neutralizer.process_content(content)
|
||||||
|
```
|
||||||
|
|
||||||
|
This allows for easy enabling/disabling of the feature and future expansion of neutralization capabilities.
|
||||||
|
|
||||||
|
## Migration Strategy
|
||||||
|
|
||||||
|
1. Create new models and services
|
||||||
|
2. Implement new functionality alongside existing code
|
||||||
|
3. Gradually migrate method modules to use new services
|
||||||
|
4. Remove deprecated code once migration is complete
|
||||||
|
5. Enable neutralization feature when ready
|
||||||
|
|
||||||
|
## Testing Requirements
|
||||||
|
|
||||||
|
1. Unit tests for all new model classes
|
||||||
|
2. Integration tests for DocumentService
|
||||||
|
3. Tests for content extraction with various file types
|
||||||
|
4. Tests for neutralization integration
|
||||||
|
5. Performance tests for large file handling
|
||||||
|
6. Error handling and edge case tests
|
||||||
|
|
@ -1,643 +0,0 @@
|
||||||
# Agent Chat System Handbook
|
|
||||||
|
|
||||||
# Einführung in das Agent Chat System Handbuch
|
|
||||||
|
|
||||||
## Zweck und Umfang des Dokuments
|
|
||||||
|
|
||||||
Willkommen zum "Agent Chat System Handbook". Dieses Handbuch dient als umfassende Anleitung für die Implementierung und Verwaltung eines Agent Chat Systems unter Verwendung von FastAPI. Es richtet sich an technische Fachkräfte, die für die Einrichtung, Verwaltung und Optimierung von Chat-Systemen verantwortlich sind. Ziel ist es, Ihnen die notwendigen Kenntnisse und Werkzeuge an die Hand zu geben, um ein effizientes und sicheres Chat-System zu entwickeln und zu betreiben.
|
|
||||||
|
|
||||||
## Kontext und Hintergrundinformationen
|
|
||||||
|
|
||||||
In der heutigen digitalen Welt sind Chat-Systeme ein wesentlicher Bestandteil der Kundenkommunikation und des Supports. Mit der zunehmenden Integration von Künstlicher Intelligenz (KI) in diese Systeme wird es immer wichtiger, robuste und skalierbare Lösungen zu entwickeln. FastAPI bietet eine moderne und leistungsstarke Plattform zur Erstellung von Web-APIs, die sich ideal für die Entwicklung eines solchen Systems eignet. Dieses Handbuch basiert auf den bereitgestellten FastAPI-Anwendungsdateien und bietet eine detaillierte Anleitung zur Implementierung eines Agent Chat Systems.
|
|
||||||
|
|
||||||
## Inhalt des Dokuments
|
|
||||||
|
|
||||||
Im "Agent Chat System Handbook" finden Sie detaillierte Informationen zu folgenden Themen:
|
|
||||||
|
|
||||||
- **FastAPI Setup**: Schritt-für-Schritt-Anleitung zur Einrichtung der FastAPI-Umgebung.
|
|
||||||
- **Benutzerverwaltung**: Methoden zur Verwaltung von Benutzerkonten und -rollen.
|
|
||||||
- **KI-Integration**: Implementierung von KI-Funktionen zur Verbesserung der Chat-Interaktionen.
|
|
||||||
- **Authentifizierung**: Sicherstellung der Sicherheit und Integrität des Systems durch robuste Authentifizierungsmechanismen.
|
|
||||||
- **Mandatsverwaltung**: Verwaltung von Benutzerrechten und -mandaten innerhalb des Systems.
|
|
||||||
- **Attributverwaltung**: Umgang mit benutzerdefinierten Attributen und deren Verwaltung.
|
|
||||||
- **Prompt-Management**: Erstellung und Verwaltung von Eingabeaufforderungen für die KI-Interaktion.
|
|
||||||
- **Dateioperationen**: Verwaltung und Verarbeitung von Dateien innerhalb des Systems.
|
|
||||||
- **Workflow-Management**: Optimierung und Automatisierung von Arbeitsabläufen im Chat-System.
|
|
||||||
|
|
||||||
## Ton und Zielgruppe
|
|
||||||
|
|
||||||
Dieses Handbuch ist in einem formellen und technischen Ton verfasst, um den Anforderungen einer professionellen Leserschaft gerecht zu werden. Es richtet sich an Entwickler, Systemadministratoren und technische Projektleiter, die mit der Implementierung und Verwaltung von Chat-Systemen betraut sind. Wir empfehlen, dass die Leser über grundlegende Kenntnisse in FastAPI und Web-API-Entwicklung verfügen, um den maximalen Nutzen aus diesem Handbuch zu ziehen.
|
|
||||||
|
|
||||||
Wir hoffen, dass dieses Handbuch Ihnen als wertvolle Ressource dient und Sie bei der erfolgreichen Implementierung Ihres Agent Chat Systems unterstützt.
|
|
||||||
|
|
||||||
# Einführung
|
|
||||||
|
|
||||||
## Zweck des Handbuchs
|
|
||||||
|
|
||||||
Das "Agent Chat System Handbook" dient als umfassende Anleitung zur Implementierung und Nutzung des Agenten-Chat-Systems, das auf der FastAPI-Plattform basiert. Dieses Handbuch richtet sich an technische Anwender, die eine detaillierte Anleitung zur Einrichtung, Verwaltung und Optimierung des Systems benötigen. Es bietet eine strukturierte Übersicht über die verschiedenen Komponenten und Funktionen des Systems, um eine effiziente Nutzung und Anpassung zu gewährleisten. Ziel ist es, den Anwendern ein tiefes Verständnis der Systemarchitektur und der zugrunde liegenden Prozesse zu vermitteln, um eine reibungslose Integration und Verwaltung zu ermöglichen.
|
|
||||||
|
|
||||||
## Systemübersicht
|
|
||||||
|
|
||||||
Das Agenten-Chat-System ist eine leistungsstarke Plattform, die entwickelt wurde, um die Kommunikation zwischen Agenten und Nutzern zu optimieren. Es nutzt die FastAPI-Technologie, um eine schnelle und skalierbare Lösung zu bieten. Die Hauptkomponenten des Systems umfassen:
|
|
||||||
|
|
||||||
- **Anwendungssetup**: Die FastAPI-Anwendung wird mit spezifischen Konfigurationen für Logging, CORS (Cross-Origin Resource Sharing) und Authentifizierung eingerichtet. Diese Konfigurationen sind entscheidend für die Sicherheit und Leistung der Anwendung.
|
|
||||||
|
|
||||||
- **Benutzerverwaltung**: Ein robustes Modul zur Verwaltung von Benutzerkonten, das die Erstellung, Aktualisierung und Löschung von Benutzerprofilen ermöglicht. Es stellt sicher, dass nur autorisierte Benutzer Zugriff auf das System haben.
|
|
||||||
|
|
||||||
- **Mandatsverwaltung**: Diese Komponente ermöglicht die Verwaltung von Mandaten, die den Zugriff und die Berechtigungen innerhalb des Systems regeln. Sie ist essenziell für die Einhaltung von Sicherheitsrichtlinien.
|
|
||||||
|
|
||||||
- **Attributverwaltung**: Ein flexibles System zur Verwaltung von Attributen, die zur Personalisierung und Anpassung der Benutzererfahrung verwendet werden können.
|
|
||||||
|
|
||||||
- **Prompt-Management**: Diese Funktion ermöglicht die Verwaltung von Eingabeaufforderungen, die zur Interaktion mit den Nutzern verwendet werden. Sie ist entscheidend für die Anpassung der Kommunikation an spezifische Anforderungen.
|
|
||||||
|
|
||||||
- **Dateioperationen**: Ein Modul zur effizienten Handhabung von Dateivorgängen, das das Hochladen, Herunterladen und Verwalten von Dateien innerhalb des Systems unterstützt.
|
|
||||||
|
|
||||||
- **Workflow-Management**: Diese Komponente ermöglicht die Definition und Verwaltung von Arbeitsabläufen, um die Effizienz und Konsistenz der Prozesse zu gewährleisten.
|
|
||||||
|
|
||||||
- **KI-Integration**: Das System bietet eine nahtlose Integration von KI-Technologien, um die Interaktion und Entscheidungsfindung zu verbessern.
|
|
||||||
|
|
||||||
- **Authentifizierung**: Ein sicheres Authentifizierungssystem, das sicherstellt, dass nur berechtigte Benutzer Zugriff auf die Anwendung haben.
|
|
||||||
|
|
||||||
Dieses Handbuch wird detaillierte Anleitungen und Beispiele für jede dieser Komponenten bieten, um eine umfassende Unterstützung bei der Implementierung und Verwaltung des Agenten-Chat-Systems zu gewährleisten.
|
|
||||||
|
|
||||||
# Application Setup
|
|
||||||
|
|
||||||
In diesem Abschnitt des "Agent Chat System Handbook" wird die Einrichtung der Anwendung detailliert beschrieben. Diese Anleitung richtet sich an technische Benutzer und bietet eine umfassende Übersicht über die Initialisierung der FastAPI-Anwendung, die Konfiguration von statischen Dateien und die allgemeinen Endpunkte.
|
|
||||||
|
|
||||||
## FastAPI Initialization
|
|
||||||
|
|
||||||
Die Initialisierung der FastAPI-Anwendung ist der erste Schritt zur Einrichtung des Agent Chat Systems. Hierbei werden grundlegende Parameter und Konfigurationen festgelegt, die für den Betrieb der Anwendung erforderlich sind.
|
|
||||||
|
|
||||||
### Schritte zur Initialisierung:
|
|
||||||
|
|
||||||
1. **Anwendungserstellung**:
|
|
||||||
- Die FastAPI-Anwendung wird mit einem Titel und einer Beschreibung initialisiert. Diese Informationen sind nützlich für die Dokumentation und API-Dokumentationsseiten.
|
|
||||||
- Beispiel:
|
|
||||||
```python
|
|
||||||
from fastapi import FastAPI
|
|
||||||
|
|
||||||
app = FastAPI(
|
|
||||||
title="Agent Chat System",
|
|
||||||
description="Ein System zur Verwaltung von Agenten-Chats"
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
2. **Lebenszyklus-Management**:
|
|
||||||
- Die Anwendung verwendet einen Lebenszyklus-Manager, um Ereignisse beim Start und Herunterfahren der Anwendung zu verwalten. Dies ist entscheidend für die ordnungsgemäße Ressourcenverwaltung.
|
|
||||||
- Beispiel:
|
|
||||||
```python
|
|
||||||
@app.on_event("startup")
|
|
||||||
async def startup_event():
|
|
||||||
# Initialisierungslogik hier
|
|
||||||
|
|
||||||
@app.on_event("shutdown")
|
|
||||||
async def shutdown_event():
|
|
||||||
# Bereinigungslogik hier
|
|
||||||
```
|
|
||||||
|
|
||||||
3. **CORS-Konfiguration**:
|
|
||||||
- Die Cross-Origin Resource Sharing (CORS) Einstellungen werden konfiguriert, um den Zugriff von verschiedenen Ursprüngen zu ermöglichen, was besonders wichtig für Webanwendungen ist, die auf verschiedenen Domains gehostet werden.
|
|
||||||
|
|
||||||
## Static Files Setup
|
|
||||||
|
|
||||||
Die Konfiguration von statischen Dateien ermöglicht es der Anwendung, Ressourcen wie Bilder, CSS-Dateien und JavaScript-Dateien bereitzustellen, die für die Benutzeroberfläche benötigt werden.
|
|
||||||
|
|
||||||
### Schritte zur Konfiguration:
|
|
||||||
|
|
||||||
1. **Verzeichnis für statische Dateien**:
|
|
||||||
- Ein Verzeichnis wird definiert, in dem alle statischen Dateien gespeichert werden. Dieses Verzeichnis wird in der Regel relativ zum Projektverzeichnis angegeben.
|
|
||||||
- Beispiel:
|
|
||||||
```python
|
|
||||||
from fastapi.staticfiles import StaticFiles
|
|
||||||
|
|
||||||
app.mount("/static", StaticFiles(directory="static"), name="static")
|
|
||||||
```
|
|
||||||
|
|
||||||
2. **Zugriff auf statische Dateien**:
|
|
||||||
- Die Anwendung stellt sicher, dass die statischen Dateien über einen bestimmten URL-Pfad zugänglich sind, was die Bereitstellung und den Zugriff auf diese Ressourcen erleichtert.
|
|
||||||
|
|
||||||
## Endpoints Overview
|
|
||||||
|
|
||||||
Die Endpunkte der Anwendung sind die Schnittstellen, über die externe Systeme und Benutzer mit der Anwendung interagieren können. Eine klare Übersicht über die verfügbaren Endpunkte ist entscheidend für die Integration und Nutzung der Anwendung.
|
|
||||||
|
|
||||||
### Allgemeine Endpunkte:
|
|
||||||
|
|
||||||
1. **Benutzerverwaltung**:
|
|
||||||
- Endpunkte zur Erstellung, Aktualisierung und Löschung von Benutzern.
|
|
||||||
- Beispiel:
|
|
||||||
```python
|
|
||||||
@app.post("/users/")
|
|
||||||
async def create_user(user: User):
|
|
||||||
# Logik zur Benutzererstellung
|
|
||||||
```
|
|
||||||
|
|
||||||
2. **Mandatsverwaltung**:
|
|
||||||
- Endpunkte zur Verwaltung von Mandaten, einschließlich der Zuweisung und Verwaltung von Berechtigungen.
|
|
||||||
|
|
||||||
3. **Attributverwaltung**:
|
|
||||||
- Endpunkte zur Verwaltung von Attributen, die für die Anpassung und Personalisierung der Agenten-Chats verwendet werden.
|
|
||||||
|
|
||||||
4. **Prompt-Management**:
|
|
||||||
- Endpunkte zur Verwaltung von Eingabeaufforderungen, die für die Interaktion mit Benutzern verwendet werden.
|
|
||||||
|
|
||||||
Diese detaillierte Anleitung zur Einrichtung der Anwendung stellt sicher, dass technische Benutzer die FastAPI-Anwendung korrekt initialisieren und konfigurieren können, um eine reibungslose Funktionalität des Agent Chat Systems zu gewährleisten.
|
|
||||||
|
|
||||||
# Logging
|
|
||||||
|
|
||||||
In diesem Abschnitt des "Agent Chat System Handbook" wird die Konfiguration und Einrichtung des Loggings im Rahmen der FastAPI-Anwendung beschrieben. Eine ordnungsgemäße Protokollierung ist entscheidend für die Überwachung und Fehlerbehebung der Anwendung. Dieser Abschnitt ist in zwei Hauptunterabschnitte unterteilt: Initialisierung und Handler.
|
|
||||||
|
|
||||||
## Initialisierung
|
|
||||||
|
|
||||||
Die Initialisierung des Loggings ist ein wesentlicher Schritt, um sicherzustellen, dass alle Ereignisse innerhalb der Anwendung korrekt erfasst werden. Die Konfiguration des Loggings erfolgt in der Regel zu Beginn der Anwendung, um sicherzustellen, dass alle nachfolgenden Prozesse und Ereignisse protokolliert werden.
|
|
||||||
|
|
||||||
### Beispiel für die Logging-Initialisierung
|
|
||||||
|
|
||||||
```python
|
|
||||||
import logging
|
|
||||||
|
|
||||||
def initialize_logging():
|
|
||||||
logging.basicConfig(
|
|
||||||
level=logging.INFO,
|
|
||||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
|
||||||
handlers=[
|
|
||||||
logging.FileHandler("app.log"),
|
|
||||||
logging.StreamHandler()
|
|
||||||
]
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
In diesem Beispiel wird das Logging mit einem Basislevel von `INFO` konfiguriert. Die Formatierung der Log-Nachrichten umfasst das Datum und die Uhrzeit, den Namen des Loggers, die Log-Stufe und die eigentliche Nachricht. Zwei Handler werden eingerichtet: ein `FileHandler`, der die Logs in eine Datei schreibt, und ein `StreamHandler`, der die Logs auf der Konsole ausgibt.
|
|
||||||
|
|
||||||
## Handler
|
|
||||||
|
|
||||||
Handler sind ein wesentlicher Bestandteil des Loggings, da sie bestimmen, wohin die Log-Nachrichten gesendet werden. In der FastAPI-Anwendung können verschiedene Arten von Handlern eingerichtet werden, um die Protokollierung flexibel und anpassbar zu gestalten.
|
|
||||||
|
|
||||||
### Einrichtung von Handlers
|
|
||||||
|
|
||||||
1. **FileHandler**: Dieser Handler schreibt Log-Nachrichten in eine Datei. Er ist nützlich für die langfristige Speicherung von Logs und die spätere Analyse.
|
|
||||||
|
|
||||||
```python
|
|
||||||
file_handler = logging.FileHandler('app.log')
|
|
||||||
file_handler.setLevel(logging.INFO)
|
|
||||||
```
|
|
||||||
|
|
||||||
2. **StreamHandler**: Dieser Handler gibt Log-Nachrichten auf der Konsole aus. Er ist besonders nützlich für die Echtzeitüberwachung während der Entwicklung und des Debuggings.
|
|
||||||
|
|
||||||
```python
|
|
||||||
stream_handler = logging.StreamHandler()
|
|
||||||
stream_handler.setLevel(logging.DEBUG)
|
|
||||||
```
|
|
||||||
|
|
||||||
3. **Custom Handler**: Bei Bedarf können benutzerdefinierte Handler erstellt werden, um spezielle Anforderungen zu erfüllen, wie z.B. das Senden von Logs an externe Systeme oder Dienste.
|
|
||||||
|
|
||||||
### Beispiel für die Handler-Konfiguration
|
|
||||||
|
|
||||||
```python
|
|
||||||
logger = logging.getLogger('agent_chat_system')
|
|
||||||
logger.setLevel(logging.DEBUG)
|
|
||||||
|
|
||||||
# Hinzufügen der Handler zum Logger
|
|
||||||
logger.addHandler(file_handler)
|
|
||||||
logger.addHandler(stream_handler)
|
|
||||||
```
|
|
||||||
|
|
||||||
In diesem Beispiel wird ein Logger mit dem Namen `agent_chat_system` erstellt und auf das Level `DEBUG` gesetzt. Die zuvor definierten `FileHandler` und `StreamHandler` werden dem Logger hinzugefügt, um die Log-Nachrichten sowohl in eine Datei als auch auf der Konsole auszugeben.
|
|
||||||
|
|
||||||
Durch die sorgfältige Konfiguration von Logging und Handlers kann die FastAPI-Anwendung effektiv überwacht und gewartet werden, was zu einer verbesserten Stabilität und Fehlerbehebung führt.
|
|
||||||
|
|
||||||
# Benutzerverwaltung
|
|
||||||
|
|
||||||
In diesem Abschnitt des "Agent Chat System Handbook" wird die Benutzerverwaltung detailliert beschrieben. Die Benutzerverwaltung ist ein zentraler Bestandteil des Systems, da sie die Zuweisung von Rollen und Berechtigungen sowie die Authentifizierungsmechanismen umfasst. Diese Aspekte sind entscheidend für die Sicherheit und Effizienz des Systems.
|
|
||||||
|
|
||||||
## Rollen und Berechtigungen
|
|
||||||
|
|
||||||
### Benutzerrollen
|
|
||||||
|
|
||||||
Benutzerrollen definieren die verschiedenen Zugriffsebenen und Verantwortlichkeiten innerhalb des Agent Chat Systems. Jede Rolle hat spezifische Berechtigungen, die den Zugriff auf bestimmte Funktionen und Daten steuern. Die Hauptrollen sind:
|
|
||||||
|
|
||||||
- **Administrator**: Hat umfassende Berechtigungen, einschließlich der Verwaltung von Benutzern, Rollen und Systemeinstellungen.
|
|
||||||
- **Agent**: Kann auf die Chat-Funktionalitäten zugreifen und mit Kunden interagieren.
|
|
||||||
- **Supervisor**: Überwacht die Aktivitäten der Agenten und hat Zugriff auf Berichte und Analysen.
|
|
||||||
|
|
||||||
### Berechtigungen
|
|
||||||
|
|
||||||
Berechtigungen sind spezifische Rechte, die einer Rolle zugewiesen werden. Sie bestimmen, welche Aktionen ein Benutzer innerhalb des Systems ausführen kann. Beispiele für Berechtigungen sind:
|
|
||||||
|
|
||||||
- Zugriff auf das Dashboard
|
|
||||||
- Verwaltung von Benutzerkonten
|
|
||||||
- Einsicht in Berichte und Statistiken
|
|
||||||
- Konfiguration von Systemeinstellungen
|
|
||||||
|
|
||||||
Die Zuweisung von Rollen und Berechtigungen erfolgt über die Administrationsoberfläche des Systems, wo Administratoren die Möglichkeit haben, Benutzerkonten zu erstellen und zu verwalten.
|
|
||||||
|
|
||||||
## Authentifizierung
|
|
||||||
|
|
||||||
### Authentifizierungsmechanismen
|
|
||||||
|
|
||||||
Die Authentifizierung ist ein kritischer Sicherheitsaspekt des Agent Chat Systems. Sie stellt sicher, dass nur autorisierte Benutzer Zugriff auf das System erhalten. Die gängigen Authentifizierungsmechanismen umfassen:
|
|
||||||
|
|
||||||
- **Passwortbasierte Authentifizierung**: Benutzer melden sich mit einem Benutzernamen und einem Passwort an. Es wird empfohlen, starke Passwörter zu verwenden und regelmäßige Passwortänderungen durchzuführen.
|
|
||||||
|
|
||||||
- **Zwei-Faktor-Authentifizierung (2FA)**: Erhöht die Sicherheit, indem ein zusätzlicher Verifizierungsschritt hinzugefügt wird, z.B. ein einmaliger Code, der an das Mobiltelefon des Benutzers gesendet wird.
|
|
||||||
|
|
||||||
- **OAuth 2.0**: Ermöglicht die Authentifizierung über Drittanbieter, wie Google oder Facebook, was den Anmeldeprozess für Benutzer vereinfacht und die Sicherheit erhöht.
|
|
||||||
|
|
||||||
### Implementierung in FastAPI
|
|
||||||
|
|
||||||
Die FastAPI-Anwendung implementiert diese Authentifizierungsmechanismen durch die Integration von Sicherheitsprotokollen und Middleware. Die Konfiguration erfolgt in der `app.py` Datei, wo die Authentifizierungslogik definiert ist. Hier ein Beispiel für die Implementierung der passwortbasierten Authentifizierung:
|
|
||||||
|
|
||||||
```python
|
|
||||||
from fastapi import FastAPI, Depends
|
|
||||||
from fastapi.security import OAuth2PasswordBearer
|
|
||||||
|
|
||||||
app = FastAPI()
|
|
||||||
|
|
||||||
oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token")
|
|
||||||
|
|
||||||
@app.post("/token")
|
|
||||||
async def login(form_data: OAuth2PasswordRequestForm = Depends()):
|
|
||||||
# Authentifizierungslogik hier
|
|
||||||
return {"access_token": "token", "token_type": "bearer"}
|
|
||||||
```
|
|
||||||
|
|
||||||
Diese Struktur ermöglicht eine flexible und sichere Verwaltung der Benutzerzugriffe und gewährleistet, dass das System den aktuellen Sicherheitsstandards entspricht.
|
|
||||||
|
|
||||||
Durch die sorgfältige Verwaltung von Rollen, Berechtigungen und Authentifizierungsmechanismen wird sichergestellt, dass das Agent Chat System sowohl sicher als auch effizient betrieben werden kann.
|
|
||||||
|
|
||||||
# Mandate Management
|
|
||||||
|
|
||||||
In diesem Abschnitt des "Agent Chat System Handbook" wird das Mandatsmanagement detailliert beschrieben. Das Mandatsmanagement ist ein wesentlicher Bestandteil des Systems, der die Erstellung und Verwaltung von Mandaten umfasst. Diese Funktionen sind entscheidend für die Organisation und den Betrieb des Agenten-Chat-Systems.
|
|
||||||
|
|
||||||
## Mandate Creation
|
|
||||||
|
|
||||||
### Erstellung von Mandaten
|
|
||||||
|
|
||||||
Die Erstellung von Mandaten ist der erste Schritt im Mandatsmanagement. Ein Mandat definiert die spezifischen Aufgaben und Verantwortlichkeiten, die einem Agenten oder einer Gruppe von Agenten zugewiesen werden. Die Erstellung eines Mandats erfolgt in mehreren Schritten:
|
|
||||||
|
|
||||||
1. **Initialisierung**: Beginnen Sie mit der Definition der grundlegenden Parameter des Mandats, einschließlich des Titels, der Beschreibung und der beteiligten Agenten.
|
|
||||||
|
|
||||||
2. **Zuweisung von Aufgaben**: Weisen Sie spezifische Aufgaben oder Ziele zu, die im Rahmen des Mandats erreicht werden sollen. Diese Aufgaben sollten klar definiert und messbar sein.
|
|
||||||
|
|
||||||
3. **Festlegung von Fristen**: Bestimmen Sie die zeitlichen Rahmenbedingungen für das Mandat, einschließlich Start- und Enddatum sowie Meilensteine.
|
|
||||||
|
|
||||||
4. **Ressourcenzuweisung**: Stellen Sie sicher, dass die notwendigen Ressourcen, wie z.B. technische Tools oder Daten, den Agenten zur Verfügung stehen.
|
|
||||||
|
|
||||||
5. **Genehmigung**: Das Mandat muss von einer autorisierten Person oder einem Gremium genehmigt werden, bevor es aktiv wird.
|
|
||||||
|
|
||||||
## Mandate Lifecycle
|
|
||||||
|
|
||||||
### Lebenszyklus eines Mandats
|
|
||||||
|
|
||||||
Der Lebenszyklus eines Mandats umfasst mehrere Phasen, die sicherstellen, dass das Mandat effektiv verwaltet und abgeschlossen wird:
|
|
||||||
|
|
||||||
1. **Initiierung**: Nach der Erstellung wird das Mandat offiziell gestartet. Alle beteiligten Parteien werden informiert und die notwendigen Ressourcen bereitgestellt.
|
|
||||||
|
|
||||||
2. **Durchführung**: In dieser Phase arbeiten die Agenten an den zugewiesenen Aufgaben. Fortschritte werden regelmäßig überwacht und dokumentiert.
|
|
||||||
|
|
||||||
3. **Überwachung und Anpassung**: Der Fortschritt des Mandats wird kontinuierlich überwacht. Bei Bedarf werden Anpassungen vorgenommen, um sicherzustellen, dass die Ziele erreicht werden.
|
|
||||||
|
|
||||||
4. **Abschluss**: Nach Erreichen der Ziele oder dem Ende der Laufzeit wird das Mandat abgeschlossen. Eine abschließende Bewertung wird durchgeführt, um den Erfolg zu messen und Erkenntnisse für zukünftige Mandate zu gewinnen.
|
|
||||||
|
|
||||||
5. **Archivierung**: Alle relevanten Dokumente und Berichte werden archiviert, um eine Nachverfolgbarkeit und Referenz für zukünftige Projekte zu gewährleisten.
|
|
||||||
|
|
||||||
## Managing Mandates
|
|
||||||
|
|
||||||
### Verwaltung von Mandaten
|
|
||||||
|
|
||||||
Die Verwaltung von Mandaten erfordert eine kontinuierliche Überwachung und Anpassung, um sicherzustellen, dass die gesetzten Ziele erreicht werden. Zu den wichtigsten Verwaltungsaufgaben gehören:
|
|
||||||
|
|
||||||
- **Statusüberprüfung**: Regelmäßige Überprüfung des Status und Fortschritts des Mandats.
|
|
||||||
- **Kommunikation**: Sicherstellen, dass alle Beteiligten über den Fortschritt und etwaige Änderungen informiert sind.
|
|
||||||
- **Risikomanagement**: Identifizierung und Management potenzieller Risiken, die den Erfolg des Mandats gefährden könnten.
|
|
||||||
- **Berichterstattung**: Erstellung regelmäßiger Berichte, um den Fortschritt zu dokumentieren und Transparenz zu gewährleisten.
|
|
||||||
|
|
||||||
Durch die sorgfältige Erstellung und Verwaltung von Mandaten wird sichergestellt, dass das Agenten-Chat-System effizient und effektiv arbeitet, um die gesteckten Ziele zu erreichen.
|
|
||||||
|
|
||||||
# Attribute Handling
|
|
||||||
|
|
||||||
In diesem Abschnitt des "Agent Chat System Handbook" wird die Handhabung von Attributen im System detailliert beschrieben. Attribute sind wesentliche Komponenten, die zur Verwaltung und Organisation von Daten innerhalb des Systems verwendet werden. Dieser Abschnitt behandelt die verschiedenen Typen von Attributen und die Operationen, die auf ihnen ausgeführt werden können.
|
|
||||||
|
|
||||||
## Typen von Attributen
|
|
||||||
|
|
||||||
Attribute im Agent Chat System sind in verschiedene Kategorien unterteilt, die jeweils spezifische Funktionen und Anwendungsbereiche haben. Die wichtigsten Attributtypen sind:
|
|
||||||
|
|
||||||
### 1. Systemattribute
|
|
||||||
Systemattribute sind vordefinierte Attribute, die für die grundlegende Funktionalität des Systems erforderlich sind. Sie werden automatisch vom System verwaltet und können nicht vom Benutzer geändert werden. Beispiele für Systemattribute sind Benutzer-ID, Erstellungsdatum und Änderungsdatum.
|
|
||||||
|
|
||||||
### 2. Benutzerdefinierte Attribute
|
|
||||||
Benutzerdefinierte Attribute werden von den Benutzern erstellt, um spezifische Anforderungen zu erfüllen. Diese Attribute bieten Flexibilität und Anpassungsfähigkeit, indem sie es den Benutzern ermöglichen, zusätzliche Informationen zu speichern, die nicht durch Systemattribute abgedeckt sind. Ein Beispiel könnte ein Attribut für die bevorzugte Sprache eines Benutzers sein.
|
|
||||||
|
|
||||||
### 3. Temporäre Attribute
|
|
||||||
Temporäre Attribute werden für kurzfristige Zwecke erstellt und haben eine begrenzte Lebensdauer. Sie werden häufig in Sitzungen oder für bestimmte Operationen verwendet, bei denen die Daten nicht dauerhaft gespeichert werden müssen. Ein Beispiel wäre ein Attribut, das während einer Chat-Sitzung verwendet wird, um den aktuellen Status eines Gesprächs zu verfolgen.
|
|
||||||
|
|
||||||
## Operationen auf Attributen
|
|
||||||
|
|
||||||
Die Verwaltung von Attributen umfasst eine Vielzahl von Operationen, die es ermöglichen, Attribute zu erstellen, zu ändern, zu löschen und zu analysieren. Die wichtigsten Operationen sind:
|
|
||||||
|
|
||||||
### 1. Erstellung von Attributen
|
|
||||||
Die Erstellung von Attributen erfolgt entweder automatisch durch das System (für Systemattribute) oder manuell durch den Benutzer (für benutzerdefinierte Attribute). Bei der Erstellung eines Attributs müssen der Attributtyp, der Name und der Datentyp spezifiziert werden.
|
|
||||||
|
|
||||||
### 2. Aktualisierung von Attributen
|
|
||||||
Attribute können aktualisiert werden, um Änderungen in den Daten widerzuspiegeln. Dies umfasst das Ändern von Attributwerten oder das Aktualisieren von Attributmetadaten. Beispielsweise kann ein Benutzer das Attribut "Telefonnummer" aktualisieren, um eine neue Nummer zu speichern.
|
|
||||||
|
|
||||||
### 3. Löschung von Attributen
|
|
||||||
Nicht mehr benötigte Attribute können gelöscht werden. Bei der Löschung von Attributen ist Vorsicht geboten, da dies irreversible Änderungen an den gespeicherten Daten zur Folge haben kann. Systemattribute können in der Regel nicht gelöscht werden, um die Integrität des Systems zu gewährleisten.
|
|
||||||
|
|
||||||
### 4. Abfrage von Attributen
|
|
||||||
Das System ermöglicht die Abfrage von Attributen, um Informationen zu extrahieren und Berichte zu erstellen. Dies ist besonders nützlich für die Analyse von Daten und die Generierung von Einblicken. Beispielsweise kann ein Administrator eine Abfrage durchführen, um alle Benutzer mit einem bestimmten Attributwert zu identifizieren.
|
|
||||||
|
|
||||||
### 5. Validierung von Attributen
|
|
||||||
Die Validierung von Attributen stellt sicher, dass die eingegebenen Daten den festgelegten Kriterien entsprechen. Dies umfasst die Überprüfung von Datentypen, Wertebereichen und anderen Einschränkungen. Eine korrekte Validierung ist entscheidend, um Datenintegrität und -konsistenz zu gewährleisten.
|
|
||||||
|
|
||||||
Durch das Verständnis der verschiedenen Attributtypen und der auf ihnen ausführbaren Operationen können Benutzer das Agent Chat System effektiver nutzen und an ihre spezifischen Bedürfnisse anpassen.
|
|
||||||
|
|
||||||
# Prompt Management
|
|
||||||
|
|
||||||
In diesem Abschnitt des "Agent Chat System Handbook" wird die Verwaltung von Prompts behandelt. Prompts sind wesentliche Bestandteile des Agent Chat Systems, da sie die Interaktion zwischen Benutzern und dem System steuern. Dieser Abschnitt bietet eine detaillierte Anleitung zur Erstellung und Nutzung von Prompts.
|
|
||||||
|
|
||||||
## Erstellen von Prompts
|
|
||||||
|
|
||||||
Die Erstellung von Prompts ist ein zentraler Bestandteil der Systemkonfiguration und ermöglicht es, spezifische Anfragen oder Anweisungen für die Interaktion mit dem System zu definieren.
|
|
||||||
|
|
||||||
### Schritte zur Erstellung von Prompts
|
|
||||||
|
|
||||||
1. **Identifikation des Bedarfs**: Bestimmen Sie den spezifischen Bedarf oder das Szenario, für das ein Prompt erforderlich ist. Dies könnte eine häufig gestellte Frage oder eine spezifische Anweisung sein, die regelmäßig benötigt wird.
|
|
||||||
|
|
||||||
2. **Definition des Inhalts**: Formulieren Sie den Inhalt des Prompts klar und präzise. Der Inhalt sollte direkt und verständlich sein, um Missverständnisse zu vermeiden.
|
|
||||||
|
|
||||||
3. **Formatierung**: Achten Sie darauf, dass der Prompt in einem konsistenten Format erstellt wird, das mit den anderen Systemkomponenten kompatibel ist. Nutzen Sie Markdown oder andere unterstützte Formate, um die Lesbarkeit zu verbessern.
|
|
||||||
|
|
||||||
4. **Implementierung im System**: Integrieren Sie den erstellten Prompt in das System. Dies kann durch die Anpassung der entsprechenden Konfigurationsdateien oder durch die Nutzung der API-Schnittstellen erfolgen.
|
|
||||||
|
|
||||||
### Beispiel
|
|
||||||
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"prompt_id": "faq_shipping",
|
|
||||||
"content": "Wie lange dauert der Versand?",
|
|
||||||
"response": "Der Versand dauert in der Regel 3-5 Werktage."
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
## Nutzung von Prompts
|
|
||||||
|
|
||||||
Die Nutzung von Prompts ist entscheidend, um eine effiziente und konsistente Kommunikation innerhalb des Agent Chat Systems sicherzustellen.
|
|
||||||
|
|
||||||
### Schritte zur Nutzung von Prompts
|
|
||||||
|
|
||||||
1. **Abrufen von Prompts**: Verwenden Sie die API-Endpunkte, um verfügbare Prompts abzurufen. Dies ermöglicht es Agenten, schnell auf vorgefertigte Antworten zuzugreifen.
|
|
||||||
|
|
||||||
2. **Anpassung an den Kontext**: Stellen Sie sicher, dass der ausgewählte Prompt dem aktuellen Kontext der Benutzeranfrage entspricht. Passen Sie den Inhalt gegebenenfalls an, um spezifische Details oder Variationen zu berücksichtigen.
|
|
||||||
|
|
||||||
3. **Feedback und Optimierung**: Sammeln Sie regelmäßig Feedback zur Effektivität der Prompts und optimieren Sie diese basierend auf den Rückmeldungen. Dies kann durch die Analyse von Benutzerinteraktionen und die Anpassung der Inhalte erfolgen.
|
|
||||||
|
|
||||||
### Beispiel
|
|
||||||
|
|
||||||
Ein Agent erhält eine Anfrage zu den Versandzeiten. Anstatt die Antwort manuell zu formulieren, ruft der Agent den entsprechenden Prompt ab und liefert eine konsistente und schnelle Antwort.
|
|
||||||
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"user_query": "Wann kommt meine Bestellung an?",
|
|
||||||
"prompt_used": "faq_shipping",
|
|
||||||
"response": "Der Versand dauert in der Regel 3-5 Werktage."
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
Durch die strukturierte Verwaltung und Nutzung von Prompts wird die Effizienz des Agent Chat Systems erheblich gesteigert, was zu einer verbesserten Benutzererfahrung führt.
|
|
||||||
|
|
||||||
# File Operations
|
|
||||||
|
|
||||||
In diesem Abschnitt des "Agent Chat System Handbook" werden die wesentlichen Aspekte der Dateiverwaltung und -speicherung im Rahmen des Agent Chat Systems behandelt. Diese Informationen sind entscheidend für die technische Verwaltung und den Betrieb des Systems. Der Abschnitt ist in zwei Hauptunterabschnitte unterteilt: "Handling Files" und "Storing Files".
|
|
||||||
|
|
||||||
## Handling Files
|
|
||||||
|
|
||||||
Die Handhabung von Dateien ist ein zentraler Bestandteil des Agent Chat Systems, da es die Interaktion mit verschiedenen Dateitypen ermöglicht, die für die Funktionalität des Systems erforderlich sind.
|
|
||||||
|
|
||||||
### Dateiverwaltung
|
|
||||||
|
|
||||||
- **Öffnen und Schließen von Dateien**: Dateien sollten mit geeigneten Methoden geöffnet und geschlossen werden, um Datenverlust oder -beschädigung zu vermeiden. In Python wird dies häufig mit dem `with`-Statement erreicht, das sicherstellt, dass Dateien nach der Verwendung ordnungsgemäß geschlossen werden.
|
|
||||||
|
|
||||||
```python
|
|
||||||
with open('datei.txt', 'r') as file:
|
|
||||||
inhalt = file.read()
|
|
||||||
```
|
|
||||||
|
|
||||||
- **Lesen und Schreiben von Dateien**: Das System unterstützt sowohl das Lesen als auch das Schreiben von Dateien. Es ist wichtig, die korrekten Modi (`'r'` für Lesen, `'w'` für Schreiben, `'a'` für Anhängen) zu verwenden, um die Integrität der Daten zu gewährleisten.
|
|
||||||
|
|
||||||
- **Fehlerbehandlung**: Beim Umgang mit Dateien können verschiedene Fehler auftreten, wie z.B. `FileNotFoundError` oder `IOError`. Eine robuste Fehlerbehandlung ist notwendig, um das System vor unerwarteten Abstürzen zu schützen.
|
|
||||||
|
|
||||||
```python
|
|
||||||
try:
|
|
||||||
with open('datei.txt', 'r') as file:
|
|
||||||
inhalt = file.read()
|
|
||||||
except FileNotFoundError:
|
|
||||||
print("Die Datei wurde nicht gefunden.")
|
|
||||||
```
|
|
||||||
|
|
||||||
## Storing Files
|
|
||||||
|
|
||||||
Die Speicherung von Dateien ist ein weiterer kritischer Aspekt, der sicherstellt, dass Daten sicher und effizient abgelegt werden.
|
|
||||||
|
|
||||||
### Dateispeicherung
|
|
||||||
|
|
||||||
- **Verzeichnisstruktur**: Eine gut organisierte Verzeichnisstruktur ist entscheidend für die effiziente Speicherung und den schnellen Zugriff auf Dateien. Es wird empfohlen, Dateien in thematisch geordneten Unterverzeichnissen zu speichern.
|
|
||||||
|
|
||||||
- **Datenbankintegration**: In einigen Fällen kann es notwendig sein, Dateien in einer Datenbank zu speichern, insbesondere wenn Metadaten oder eine hohe Zugriffsgeschwindigkeit erforderlich sind. Das System kann Datenbanklösungen wie PostgreSQL oder MongoDB integrieren, um Dateien und ihre Metadaten zu verwalten.
|
|
||||||
|
|
||||||
- **Sicherheitsaspekte**: Bei der Speicherung von Dateien müssen Sicherheitsaspekte berücksichtigt werden, insbesondere wenn es sich um sensible Daten handelt. Dies umfasst die Verschlüsselung von Dateien und die Implementierung von Zugriffsberechtigungen.
|
|
||||||
|
|
||||||
- **Backup und Wiederherstellung**: Regelmäßige Backups sind unerlässlich, um Datenverlust zu vermeiden. Das System sollte über Mechanismen zur automatisierten Sicherung und Wiederherstellung von Dateien verfügen.
|
|
||||||
|
|
||||||
Durch die Beachtung dieser Richtlinien und Praktiken wird sichergestellt, dass das Agent Chat System Dateien effizient und sicher handhabt und speichert, was zu einem reibungslosen Betrieb und einer hohen Zuverlässigkeit des Systems beiträgt.
|
|
||||||
|
|
||||||
# Workflow Management
|
|
||||||
|
|
||||||
In diesem Abschnitt des "Agent Chat System Handbook" wird das Workflow-Management detailliert beschrieben. Das Ziel ist es, den technischen Benutzern ein umfassendes Verständnis für die Erstellung und Ausführung von Workflows innerhalb des Agent Chat Systems zu vermitteln.
|
|
||||||
|
|
||||||
## Inhaltsverzeichnis
|
|
||||||
1. [Erstellung von Workflows](#erstellung-von-workflows)
|
|
||||||
2. [Ausführung von Workflows](#ausfuehrung-von-workflows)
|
|
||||||
|
|
||||||
## Erstellung von Workflows
|
|
||||||
|
|
||||||
Die Erstellung von Workflows ist ein zentraler Bestandteil des Workflow-Managements im Agent Chat System. Ein Workflow definiert eine Abfolge von Schritten, die automatisiert oder manuell ausgeführt werden können, um spezifische Aufgaben oder Prozesse zu steuern.
|
|
||||||
|
|
||||||
### Schritte zur Erstellung eines Workflows
|
|
||||||
|
|
||||||
1. **Identifikation der Anforderungen**: Bestimmen Sie die spezifischen Anforderungen und Ziele des Workflows. Dies könnte die Automatisierung von Kundenanfragen oder die Verwaltung von Support-Tickets umfassen.
|
|
||||||
|
|
||||||
2. **Definition der Schritte**: Listen Sie die einzelnen Schritte auf, die zur Erreichung des Workflows erforderlich sind. Jeder Schritt sollte klar definiert und in einer logischen Reihenfolge angeordnet sein.
|
|
||||||
|
|
||||||
3. **Konfiguration der Aktionen**: Weisen Sie jedem Schritt spezifische Aktionen zu. Diese Aktionen könnten API-Aufrufe, Datenbankabfragen oder Benachrichtigungen umfassen.
|
|
||||||
|
|
||||||
4. **Erstellung von Bedingungen**: Definieren Sie Bedingungen, die den Fluss des Workflows steuern. Bedingungen können auf Ereignissen, Datenwerten oder Benutzerinteraktionen basieren.
|
|
||||||
|
|
||||||
5. **Testen des Workflows**: Vor der Implementierung sollte der Workflow in einer Testumgebung ausgeführt werden, um sicherzustellen, dass alle Schritte korrekt funktionieren.
|
|
||||||
|
|
||||||
6. **Dokumentation**: Dokumentieren Sie den Workflow umfassend, einschließlich der Ziele, Schritte, Bedingungen und erwarteten Ergebnisse.
|
|
||||||
|
|
||||||
### Beispiel
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
- name: "Kundenanfrage-Workflow"
|
|
||||||
steps:
|
|
||||||
- step: "Anfrage erhalten"
|
|
||||||
action: "API-Aufruf"
|
|
||||||
- step: "Anfrage analysieren"
|
|
||||||
action: "AI-Analyse"
|
|
||||||
- step: "Antwort generieren"
|
|
||||||
action: "Textgenerierung"
|
|
||||||
- step: "Antwort senden"
|
|
||||||
action: "Benachrichtigung"
|
|
||||||
conditions:
|
|
||||||
- if: "Anfrage enthält 'dringend'"
|
|
||||||
then: "Priorität hochsetzen"
|
|
||||||
```
|
|
||||||
|
|
||||||
## Ausführung von Workflows
|
|
||||||
|
|
||||||
Die Ausführung von Workflows ist der Prozess, bei dem die definierten Schritte eines Workflows in der Praxis umgesetzt werden. Dies kann manuell durch einen Benutzer oder automatisch durch das System erfolgen.
|
|
||||||
|
|
||||||
### Schritte zur Ausführung eines Workflows
|
|
||||||
|
|
||||||
1. **Initiierung**: Der Workflow wird entweder durch ein Ereignis, eine Benutzeraktion oder einen Zeitplan initiiert.
|
|
||||||
|
|
||||||
2. **Verarbeitung der Schritte**: Jeder Schritt des Workflows wird in der festgelegten Reihenfolge ausgeführt. Das System überwacht den Fortschritt und stellt sicher, dass alle Bedingungen erfüllt sind, bevor zum nächsten Schritt übergegangen wird.
|
|
||||||
|
|
||||||
3. **Überwachung und Protokollierung**: Während der Ausführung werden alle Aktionen und Ergebnisse protokolliert. Dies ermöglicht eine spätere Analyse und Fehlerbehebung.
|
|
||||||
|
|
||||||
4. **Fehlerbehandlung**: Bei Auftreten eines Fehlers wird der Workflow entweder pausiert oder abgebrochen, je nach Konfiguration. Fehlerprotokolle werden erstellt, um die Ursache zu identifizieren und zu beheben.
|
|
||||||
|
|
||||||
5. **Abschluss**: Nach erfolgreicher Ausführung aller Schritte wird der Workflow abgeschlossen und eine Zusammenfassung der Ergebnisse erstellt.
|
|
||||||
|
|
||||||
### Beispiel
|
|
||||||
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"workflow_id": "12345",
|
|
||||||
"status": "in_progress",
|
|
||||||
"current_step": "Anfrage analysieren",
|
|
||||||
"logs": [
|
|
||||||
{"timestamp": "2023-10-01T10:00:00Z", "message": "Anfrage erhalten"},
|
|
||||||
{"timestamp": "2023-10-01T10:01:00Z", "message": "Anfrage analysieren gestartet"}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
Durch die sorgfältige Erstellung und Ausführung von Workflows können Unternehmen die Effizienz und Genauigkeit ihrer Prozesse erheblich verbessern. Das Agent Chat System bietet die Flexibilität und Kontrolle, die erforderlich sind, um komplexe Workflows effektiv zu verwalten.
|
|
||||||
|
|
||||||
# AI Integration
|
|
||||||
|
|
||||||
In diesem Abschnitt wird die Integration von Künstlicher Intelligenz (KI) in das Agent Chat System detailliert beschrieben. Der Fokus liegt auf den verwendeten KI-Modellen und den Integrationspunkten innerhalb des Systems. Diese Informationen sind entscheidend für das Verständnis der technischen Architektur und der Funktionsweise der KI-Komponenten.
|
|
||||||
|
|
||||||
## AI Models
|
|
||||||
|
|
||||||
### Verwendete KI-Modelle
|
|
||||||
|
|
||||||
Das Agent Chat System nutzt fortschrittliche KI-Modelle, um die Interaktion zwischen Agenten und Nutzern zu optimieren. Diese Modelle sind darauf ausgelegt, natürliche Sprachverarbeitung (NLP) zu unterstützen und kontextbezogene Antworten zu generieren. Die wichtigsten Modelle umfassen:
|
|
||||||
|
|
||||||
- **GPT-3**: Ein leistungsstarkes Sprachmodell, das für die Generierung von menschenähnlichen Texten verwendet wird. Es ist in der Lage, komplexe Anfragen zu verstehen und relevante Antworten zu liefern.
|
|
||||||
- **BERT**: Ein Modell, das für Aufgaben der Sprachverständnisoptimierung eingesetzt wird, insbesondere bei der Analyse von Benutzeranfragen und der Extraktion von Schlüsselinformationen.
|
|
||||||
- **Custom Sentiment Analysis Model**: Ein speziell entwickeltes Modell zur Analyse der Stimmung in Benutzeranfragen, um die Reaktionen der Agenten entsprechend anzupassen.
|
|
||||||
|
|
||||||
Diese Modelle werden kontinuierlich aktualisiert und optimiert, um die Effizienz und Genauigkeit der Interaktionen zu verbessern.
|
|
||||||
|
|
||||||
## Integration
|
|
||||||
|
|
||||||
### Integrationspunkte
|
|
||||||
|
|
||||||
Die Integration der KI-Modelle erfolgt an mehreren strategischen Punkten innerhalb des Agent Chat Systems. Diese Integrationspunkte sind entscheidend für die nahtlose Funktionalität und umfassen:
|
|
||||||
|
|
||||||
- **Anfrageverarbeitung**: Bei der Eingabe einer Benutzeranfrage wird diese zunächst durch das NLP-Modul geleitet, das die Anfrage analysiert und an das entsprechende KI-Modell weiterleitet.
|
|
||||||
- **Antwortgenerierung**: Die generierten Antworten werden durch das GPT-3-Modell erstellt und anschließend durch das Sentiment Analysis Model überprüft, um sicherzustellen, dass die Antwort dem emotionalen Kontext des Benutzers entspricht.
|
|
||||||
- **Datenanalyse**: Die gesammelten Daten aus den Interaktionen werden durch BERT analysiert, um Muster und Trends zu identifizieren, die zur Verbesserung der Systemleistung beitragen können.
|
|
||||||
- **Feedback-Schleife**: Eine kontinuierliche Feedback-Schleife ermöglicht es, die Modelle basierend auf Benutzerinteraktionen und Agenten-Feedback zu verfeinern und anzupassen.
|
|
||||||
|
|
||||||
### Technische Implementierung
|
|
||||||
|
|
||||||
Die Implementierung der KI-Integration erfolgt über spezialisierte APIs, die in die FastAPI-Anwendung eingebettet sind. Diese APIs ermöglichen eine effiziente Kommunikation zwischen den verschiedenen Modulen und den KI-Modellen. Die Integration ist so gestaltet, dass sie skalierbar und erweiterbar ist, um zukünftige Anforderungen und technologische Fortschritte zu berücksichtigen.
|
|
||||||
|
|
||||||
Durch die sorgfältige Auswahl und Integration dieser KI-Modelle wird sichergestellt, dass das Agent Chat System nicht nur effizient, sondern auch flexibel und anpassungsfähig bleibt, um den sich ständig ändernden Anforderungen der Benutzer gerecht zu werden.
|
|
||||||
|
|
||||||
## Authentication
|
|
||||||
|
|
||||||
```md
|
|
||||||
# Authentication
|
|
||||||
|
|
||||||
In diesem Abschnitt des "Agent Chat System Handbook" wird das Authentifizierungssystem detailliert beschrieben. Die Authentifizierung ist ein kritischer Bestandteil des Systems, der sicherstellt, dass nur autorisierte Benutzer Zugriff auf die Anwendung und ihre Funktionen haben. Wir werden die verschiedenen Authentifizierungsmethoden sowie die Sicherheitsmaßnahmen, die implementiert wurden, um die Integrität und Vertraulichkeit der Benutzerdaten zu gewährleisten, untersuchen.
|
|
||||||
|
|
||||||
## Methoden
|
|
||||||
|
|
||||||
### Authentifizierungsmethoden
|
|
||||||
|
|
||||||
Das Agent Chat System unterstützt mehrere Authentifizierungsmethoden, um Flexibilität und Sicherheit zu bieten. Die wichtigsten Methoden sind:
|
|
||||||
|
|
||||||
1. **Token-basierte Authentifizierung**:
|
|
||||||
- **Beschreibung**: Diese Methode verwendet JSON Web Tokens (JWT), um Benutzer zu authentifizieren. Nach erfolgreicher Anmeldung erhält der Benutzer ein Token, das bei jeder Anfrage an den Server gesendet wird.
|
|
||||||
- **Vorteile**: Erhöhte Sicherheit durch zeitlich begrenzte Token und die Möglichkeit, Token zu widerrufen.
|
|
||||||
- **Implementierung**: Der Token wird im Header der HTTP-Anfrage übermittelt und vom Server validiert.
|
|
||||||
|
|
||||||
2. **OAuth 2.0**:
|
|
||||||
- **Beschreibung**: OAuth 2.0 ist ein weit verbreitetes Protokoll, das es Benutzern ermöglicht, sich mit ihren bestehenden Konten von Drittanbietern (z.B. Google, Facebook) anzumelden.
|
|
||||||
- **Vorteile**: Benutzerfreundlichkeit und erhöhte Sicherheit, da keine Passwörter direkt im System gespeichert werden müssen.
|
|
||||||
- **Implementierung**: Die Anwendung leitet den Benutzer zur Authentifizierungsseite des Drittanbieters weiter und erhält nach erfolgreicher Authentifizierung ein Zugriffstoken.
|
|
||||||
|
|
||||||
3. **Zwei-Faktor-Authentifizierung (2FA)**:
|
|
||||||
- **Beschreibung**: Diese Methode fügt eine zusätzliche Sicherheitsebene hinzu, indem sie einen zweiten Authentifizierungsfaktor erfordert, z.B. einen SMS-Code oder eine Authentifizierungs-App.
|
|
||||||
- **Vorteile**: Erhöhte Sicherheit durch die Kombination von etwas, das der Benutzer kennt (Passwort) und etwas, das der Benutzer hat (zweiter Faktor).
|
|
||||||
- **Implementierung**: Nach der Eingabe des Passworts wird der Benutzer aufgefordert, den zweiten Faktor einzugeben, bevor der Zugriff gewährt wird.
|
|
||||||
|
|
||||||
## Sicherheit
|
|
||||||
|
|
||||||
### Sicherheitsmaßnahmen
|
|
||||||
|
|
||||||
Um die Sicherheit der Authentifizierung im Agent Chat System zu gewährleisten, wurden mehrere Maßnahmen implementiert:
|
|
||||||
|
|
||||||
1. **Datenverschlüsselung**:
|
|
||||||
- Alle sensiblen Daten, einschließlich Passwörter und Token, werden mit starken Verschlüsselungsalgorithmen gespeichert und übertragen. Dies schützt die Daten vor unbefugtem Zugriff und Manipulation.
|
|
||||||
|
|
||||||
2. **Sichere Passwortspeicherung**:
|
|
||||||
- Passwörter werden nicht im Klartext gespeichert. Stattdessen werden sie mit einem sicheren Hashing-Algorithmus (z.B. bcrypt) gehasht, bevor sie in der Datenbank gespeichert werden.
|
|
||||||
|
|
||||||
3. **Regelmäßige Sicherheitsüberprüfungen**:
|
|
||||||
- Das System wird regelmäßig auf Sicherheitslücken überprüft, und es werden Patches und Updates angewendet, um bekannte Schwachstellen zu beheben.
|
|
||||||
|
|
||||||
4. **Sitzungsverwaltung**:
|
|
||||||
- Sitzungen werden überwacht und bei Inaktivität automatisch abgemeldet, um das Risiko von Sitzungsentführungen zu minimieren.
|
|
||||||
|
|
||||||
5. **Protokollierung und Überwachung**:
|
|
||||||
- Alle Authentifizierungsversuche und sicherheitsrelevanten Ereignisse werden protokolliert und überwacht, um verdächtige Aktivitäten frühzeitig zu erkennen und darauf zu reagieren.
|
|
||||||
|
|
||||||
Durch die Implementierung dieser Methoden und Sicherheitsmaßnahmen stellt das Agent Chat System sicher, dass die Authentifizierung sowohl benutzerfreundlich als auch sicher ist, und schützt die Integrität und Vertraulichkeit der Benutzerdaten effektiv.
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
## Conclusion
|
|
||||||
|
|
||||||
```md
|
|
||||||
## Fazit
|
|
||||||
|
|
||||||
In diesem Handbuch zum "Agent Chat System" haben wir die wesentlichen Komponenten und Prozesse detailliert beschrieben, die für den erfolgreichen Einsatz und die Verwaltung eines Chat-Agenten-Systems erforderlich sind. Die behandelten Themen umfassen die Einrichtung der FastAPI-Anwendung, das Benutzer- und Mandatsmanagement, die Attributverwaltung, das Prompt-Management, Dateioperationen, das Workflow-Management, die Integration von Künstlicher Intelligenz sowie die Authentifizierung.
|
|
||||||
|
|
||||||
### Zusammenfassung der Hauptpunkte
|
|
||||||
|
|
||||||
1. **FastAPI Setup**: Wir haben die Schritte zur Einrichtung und Konfiguration der FastAPI-Anwendung erläutert, um eine stabile Grundlage für das Agentensystem zu schaffen.
|
|
||||||
|
|
||||||
2. **Benutzerverwaltung**: Die Verwaltung von Benutzern und deren Rollen ist entscheidend für die Sicherheit und Effizienz des Systems. Wir haben die Methoden zur Erstellung, Aktualisierung und Löschung von Benutzerkonten behandelt.
|
|
||||||
|
|
||||||
3. **AI-Integration**: Die Integration von KI-Technologien ermöglicht es dem System, intelligentere und kontextbezogene Antworten zu generieren. Wir haben die Implementierung und Optimierung dieser Funktionalität beschrieben.
|
|
||||||
|
|
||||||
4. **Authentifizierung**: Sicherheit ist ein zentrales Element jeder Anwendung. Wir haben die Authentifizierungsmechanismen und deren Implementierung im System detailliert dargestellt.
|
|
||||||
|
|
||||||
### Empfehlungen und nächste Schritte
|
|
||||||
|
|
||||||
- **Regelmäßige Updates**: Stellen Sie sicher, dass alle Systemkomponenten regelmäßig aktualisiert werden, um Sicherheitslücken zu schließen und die Leistung zu optimieren.
|
|
||||||
- **Erweiterung der AI-Funktionalitäten**: Erwägen Sie die Implementierung fortschrittlicherer KI-Modelle, um die Interaktionsqualität weiter zu verbessern.
|
|
||||||
- **Benutzerfeedback einholen**: Nutzen Sie das Feedback der Benutzer, um kontinuierlich Verbesserungen am System vorzunehmen.
|
|
||||||
|
|
||||||
### Bedeutung des Dokuments
|
|
||||||
|
|
||||||
Dieses Handbuch dient als umfassende Ressource für technische Fachleute, die für die Implementierung und Wartung des Agent Chat Systems verantwortlich sind. Es bietet nicht nur eine detaillierte Anleitung zur Einrichtung und Verwaltung des Systems, sondern auch wertvolle Einblicke in die Optimierung der Benutzererfahrung und der Systemleistung. Mit diesem Wissen sind Sie bestens gerüstet, um ein effizientes und sicheres Chat-Agenten-System zu betreiben.
|
|
||||||
|
|
||||||
Wir hoffen, dass dieses Handbuch Ihnen als wertvolle Referenz dient und Sie bei der erfolgreichen Implementierung und Verwaltung Ihres Agent Chat Systems unterstützt.
|
|
||||||
```
|
|
||||||
|
|
@ -175,9 +175,9 @@ class ServiceContainer:
|
||||||
}
|
}
|
||||||
self.methods: Dict[str, MethodBase] = {}
|
self.methods: Dict[str, MethodBase] = {}
|
||||||
self.tasks: Dict[str, AgentTask] = {}
|
self.tasks: Dict[str, AgentTask] = {}
|
||||||
self.prompt_manager = AIPromptManager()
|
self.promptManager = AIPromptManager()
|
||||||
self.task_state_manager = TaskStateManager()
|
self.taskStateManager = TaskStateManager()
|
||||||
self.document_processor = DocumentProcessor()
|
self.documentProcessor = DocumentProcessor()
|
||||||
|
|
||||||
async def execute_task(self, task: AgentTask) -> None:
|
async def execute_task(self, task: AgentTask) -> None:
|
||||||
"""Execute task with improved error handling and timeout"""
|
"""Execute task with improved error handling and timeout"""
|
||||||
|
|
@ -257,7 +257,7 @@ class ServiceContainer:
|
||||||
class AIPromptManager:
|
class AIPromptManager:
|
||||||
"""Manages AI prompts and response validation"""
|
"""Manages AI prompts and response validation"""
|
||||||
|
|
||||||
def generate_prompt(self, context: Dict[str, Any], examples: List[Dict]) -> str:
|
def generatePrompt(self, context: Dict[str, Any], examples: List[Dict]) -> str:
|
||||||
"""Generate a context-aware prompt with few-shot examples"""
|
"""Generate a context-aware prompt with few-shot examples"""
|
||||||
prompt = (
|
prompt = (
|
||||||
f"Task: {context['task']}\n"
|
f"Task: {context['task']}\n"
|
||||||
|
|
@ -269,7 +269,7 @@ class AIPromptManager:
|
||||||
prompt += "Extract the most relevant information for the task above."
|
prompt += "Extract the most relevant information for the task above."
|
||||||
return prompt
|
return prompt
|
||||||
|
|
||||||
def validate_response(self, response: str, schema: Dict) -> bool:
|
def validateResponse(self, response: str, schema: Dict) -> bool:
|
||||||
"""Validate AI response against a schema"""
|
"""Validate AI response against a schema"""
|
||||||
import jsonschema
|
import jsonschema
|
||||||
try:
|
try:
|
||||||
|
|
@ -282,20 +282,20 @@ class TaskStateManager:
|
||||||
"""Manages task state and retry tracking"""
|
"""Manages task state and retry tracking"""
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.task_states = {}
|
self.taskStates = {}
|
||||||
|
|
||||||
def track_state(self, task: AgentTask):
|
def trackState(self, task: AgentTask):
|
||||||
"""Track task state"""
|
"""Track task state"""
|
||||||
self.task_states[task.id] = {
|
self.taskStates[task.id] = {
|
||||||
"status": task.status,
|
"status": task.status,
|
||||||
"retryState": getattr(task, "retryState", {}),
|
"retryState": getattr(task, "retryState", {}),
|
||||||
"history": getattr(task, "history", [])
|
"history": getattr(task, "history", [])
|
||||||
}
|
}
|
||||||
|
|
||||||
def can_retry(self, task: AgentTask, method: str) -> bool:
|
def canRetry(self, task: AgentTask, method: str) -> bool:
|
||||||
"""Check if task can be retried"""
|
"""Check if task can be retried"""
|
||||||
retry_state = self.task_states[task.id].get("retryState", {})
|
retryState = self.taskStates[task.id].get("retryState", {})
|
||||||
return retry_state.get(method, 0) < getattr(task, "retryMax", 3)
|
return retryState.get(method, 0) < getattr(task, "retryMax", 3)
|
||||||
|
|
||||||
class DocumentContext(BaseModel):
|
class DocumentContext(BaseModel):
|
||||||
"""Model for document context"""
|
"""Model for document context"""
|
||||||
|
|
@ -748,7 +748,7 @@ async def _processUserInput(self, input: str, documents: List[str]) -> str:
|
||||||
examples = [
|
examples = [
|
||||||
{"input": "Search documents", "output": "Extract relevant information"}
|
{"input": "Search documents", "output": "Extract relevant information"}
|
||||||
]
|
]
|
||||||
prompt = self.service.prompt_manager.generate_prompt(context, examples)
|
prompt = self.service.promptManager.generatePrompt(context, examples)
|
||||||
|
|
||||||
return await self.service.model['callAiBasic'](
|
return await self.service.model['callAiBasic'](
|
||||||
f"""Analyze user request and documents:
|
f"""Analyze user request and documents:
|
||||||
|
|
@ -787,7 +787,7 @@ async def _analyzeTaskResults(self, task: AgentTask) -> str:
|
||||||
examples = [
|
examples = [
|
||||||
{"input": "Task completed", "output": "Generate next steps"}
|
{"input": "Task completed", "output": "Generate next steps"}
|
||||||
]
|
]
|
||||||
prompt = self.service.prompt_manager.generate_prompt(context, examples)
|
prompt = self.service.promptManager.generatePrompt(context, examples)
|
||||||
|
|
||||||
return await self.service.model['callAiBasic'](
|
return await self.service.model['callAiBasic'](
|
||||||
f"""Analyze task results and determine next steps:
|
f"""Analyze task results and determine next steps:
|
||||||
|
|
@ -826,7 +826,7 @@ async def _processTaskResults(self, task: AgentTask) -> str:
|
||||||
examples = [
|
examples = [
|
||||||
{"input": "Task results", "output": "Generate summary"}
|
{"input": "Task results", "output": "Generate summary"}
|
||||||
]
|
]
|
||||||
prompt = self.service.prompt_manager.generate_prompt(context, examples)
|
prompt = self.service.promptManager.generatePrompt(context, examples)
|
||||||
|
|
||||||
return await self.service.model['callAiBasic'](
|
return await self.service.model['callAiBasic'](
|
||||||
f"""Process task results and generate feedback:
|
f"""Process task results and generate feedback:
|
||||||
Loading…
Reference in a new issue