commit
460bb70ef7
38 changed files with 3055 additions and 4529 deletions
27
config.ini
27
config.ini
|
|
@ -36,20 +36,6 @@ Security_LOCK_DURATION_MINUTES = 30
|
|||
# Content Neutralization configuration
|
||||
Content_Neutralization_ENABLED = False
|
||||
|
||||
# Agent Webcrawler configuration
|
||||
Agent_Webcrawler_SERPAPI_ENGINE = google
|
||||
Agent_Webcrawler_SERPAPI_APIKEY = 7304bd34bca767aa52dd3233297e30a9edc0abc57871f702b3f8238b9d3ee7bc
|
||||
Agent_Webcrawler_SERPAPI_MAX_URLS = 3
|
||||
Agent_Webcrawler_SERPAPI_MAX_SEARCH_KEYWORDS = 3
|
||||
Agent_Webcrawler_SERPAPI_MAX_SEARCH_RESULTS = 5
|
||||
Agent_Webcrawler_SERPAPI_TIMEOUT = 10
|
||||
Agent_Webcrawler_SERPAPI_USER_AGENT = Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36
|
||||
|
||||
# Agent Coder configuration
|
||||
Agent_Coder_INSTALL_TIMEOUT = 180
|
||||
Agent_Coder_EXECUTION_TIMEOUT = 60
|
||||
Agent_Coder_EXECUTION_RETRY = 5
|
||||
|
||||
# Agent Mail configuration
|
||||
Service_MSFT_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
|
||||
Service_MSFT_CLIENT_SECRET = Kxf8Q~2lJIteZ~JaI32kMf1lfaWKATqxXiNiFbzV
|
||||
|
|
@ -58,3 +44,16 @@ Service_MSFT_TENANT_ID = common
|
|||
# Google Service configuration
|
||||
Service_GOOGLE_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
|
||||
Service_GOOGLE_CLIENT_SECRET = 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
|
||||
|
||||
# Tavily Web Search configuration
|
||||
Connector_WebTavily_API_KEY = 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
|
||||
|
||||
# Web Search configuration
|
||||
Web_Search_MAX_QUERY_LENGTH = 400
|
||||
Web_Search_MAX_RESULTS = 20
|
||||
Web_Search_MIN_RESULTS = 1
|
||||
|
||||
# Web Crawl configuration
|
||||
Web_Crawl_TIMEOUT = 30
|
||||
Web_Crawl_MAX_RETRIES = 3
|
||||
Web_Crawl_RETRY_DELAY = 2
|
||||
|
|
@ -341,7 +341,7 @@ class DocumentExtraction:
|
|||
|
||||
|
||||
# Use documentUtility for mime type
|
||||
mime_type = getMimeTypeFromExtension(getFileExtension(fileName), self._serviceCenter)
|
||||
mime_type = getMimeTypeFromExtension(getFileExtension(fileName))
|
||||
return [ContentItem(
|
||||
label="main",
|
||||
data=content,
|
||||
|
|
@ -360,7 +360,7 @@ class DocumentExtraction:
|
|||
"""Process CSV document with robust encoding detection"""
|
||||
try:
|
||||
content = self._robustTextDecode(fileData, fileName)
|
||||
mime_type = getMimeTypeFromExtension(getFileExtension(fileName), self._serviceCenter)
|
||||
mime_type = getMimeTypeFromExtension(getFileExtension(fileName))
|
||||
return [ContentItem(
|
||||
label="main",
|
||||
data=content,
|
||||
|
|
@ -380,7 +380,7 @@ class DocumentExtraction:
|
|||
try:
|
||||
content = self._robustTextDecode(fileData, fileName)
|
||||
jsonData = json.loads(content)
|
||||
mime_type = getMimeTypeFromExtension(getFileExtension(fileName), self._serviceCenter)
|
||||
mime_type = getMimeTypeFromExtension(getFileExtension(fileName))
|
||||
return [ContentItem(
|
||||
label="main",
|
||||
data=content,
|
||||
|
|
@ -399,7 +399,7 @@ class DocumentExtraction:
|
|||
"""Process XML document with robust encoding detection"""
|
||||
try:
|
||||
content = self._robustTextDecode(fileData, fileName)
|
||||
mime_type = getMimeTypeFromExtension(getFileExtension(fileName), self._serviceCenter)
|
||||
mime_type = getMimeTypeFromExtension(getFileExtension(fileName))
|
||||
return [ContentItem(
|
||||
label="main",
|
||||
data=content,
|
||||
|
|
@ -418,7 +418,7 @@ class DocumentExtraction:
|
|||
"""Process HTML document with robust encoding detection"""
|
||||
try:
|
||||
content = self._robustTextDecode(fileData, fileName)
|
||||
mime_type = getMimeTypeFromExtension(getFileExtension(fileName), self._serviceCenter)
|
||||
mime_type = getMimeTypeFromExtension(getFileExtension(fileName))
|
||||
return [ContentItem(
|
||||
label="main",
|
||||
data=content,
|
||||
|
|
@ -512,7 +512,7 @@ class DocumentExtraction:
|
|||
# Combine all meaningful content
|
||||
final_content = "\n".join(meaningful_content)
|
||||
|
||||
mime_type = getMimeTypeFromExtension(getFileExtension(fileName), self._serviceCenter)
|
||||
mime_type = getMimeTypeFromExtension(getFileExtension(fileName))
|
||||
return [ContentItem(
|
||||
label="svg_content",
|
||||
data=final_content,
|
||||
|
|
|
|||
|
|
@ -98,26 +98,12 @@ class DocumentGenerator:
|
|||
|
||||
logger.info(f"Document {document_name} has content: {len(content)} characters")
|
||||
|
||||
# Create file in system
|
||||
file_id = self.service.createFile(
|
||||
fileName=document_name,
|
||||
mimeType=mime_type,
|
||||
content=content,
|
||||
base64encoded=False
|
||||
)
|
||||
if not file_id:
|
||||
logger.error(f"Failed to create file for document {document_name}")
|
||||
continue
|
||||
|
||||
logger.info(f"Created file with ID: {file_id}")
|
||||
|
||||
# Create document object using existing file ID
|
||||
# Create document with file in one step
|
||||
document = self.service.createDocument(
|
||||
fileName=document_name,
|
||||
mimeType=mime_type,
|
||||
content=content,
|
||||
base64encoded=False,
|
||||
existing_file_id=file_id
|
||||
base64encoded=False
|
||||
)
|
||||
if document:
|
||||
# Set workflow context on the document if possible
|
||||
|
|
|
|||
|
|
@ -1,51 +1,160 @@
|
|||
import json
|
||||
import logging
|
||||
import os
|
||||
from typing import Any, Dict
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def getFileExtension(fileName: str) -> str:
|
||||
"""Extract file extension from fileName"""
|
||||
"""Extract file extension from fileName (without dot, lowercased)."""
|
||||
if '.' in fileName:
|
||||
return fileName.rsplit('.', 1)[-1].lower()
|
||||
return ''
|
||||
|
||||
def getMimeTypeFromExtension(extension: str, service=None) -> str:
|
||||
"""Get MIME type based on file extension. Optionally use a service for mapping."""
|
||||
if service:
|
||||
return service.getMimeTypeFromExtension(extension)
|
||||
# Fallback mapping
|
||||
mapping = {
|
||||
def getMimeTypeFromExtension(extension: str) -> str:
|
||||
"""
|
||||
Get MIME type based on file extension.
|
||||
This method consolidates MIME type detection from extension.
|
||||
|
||||
Args:
|
||||
extension: File extension (with or without dot)
|
||||
|
||||
Returns:
|
||||
str: MIME type for the extension
|
||||
"""
|
||||
# Normalize extension (remove dot if present)
|
||||
if extension.startswith('.'):
|
||||
extension = extension[1:]
|
||||
|
||||
# Map extensions to MIME types
|
||||
mime_types = {
|
||||
'txt': 'text/plain',
|
||||
'md': 'text/markdown',
|
||||
'html': 'text/html',
|
||||
'css': 'text/css',
|
||||
'js': 'application/javascript',
|
||||
'json': 'application/json',
|
||||
'csv': 'text/csv',
|
||||
'xml': 'application/xml',
|
||||
'csv': 'text/csv',
|
||||
'html': 'text/html',
|
||||
'htm': 'text/html',
|
||||
'md': 'text/markdown',
|
||||
'py': 'text/x-python',
|
||||
'js': 'application/javascript',
|
||||
'css': 'text/css',
|
||||
'pdf': 'application/pdf',
|
||||
'doc': 'application/msword',
|
||||
'docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
||||
'xls': 'application/vnd.ms-excel',
|
||||
'xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
|
||||
'png': 'image/png',
|
||||
'ppt': 'application/vnd.ms-powerpoint',
|
||||
'pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
|
||||
'svg': 'image/svg+xml',
|
||||
'jpg': 'image/jpeg',
|
||||
'jpeg': 'image/jpeg',
|
||||
'png': 'image/png',
|
||||
'gif': 'image/gif',
|
||||
'svg': 'image/svg+xml',
|
||||
'bmp': 'image/bmp',
|
||||
'webp': 'image/webp',
|
||||
'zip': 'application/zip',
|
||||
'rar': 'application/x-rar-compressed',
|
||||
'7z': 'application/x-7z-compressed',
|
||||
'tar': 'application/x-tar',
|
||||
'gz': 'application/gzip'
|
||||
}
|
||||
return mapping.get(extension.lower(), 'application/octet-stream')
|
||||
return mime_types.get(extension.lower(), 'application/octet-stream')
|
||||
|
||||
def detectContentTypeFromData(fileData: bytes, fileName: str) -> str:
|
||||
"""
|
||||
Detect content type from file data and fileName.
|
||||
This method makes the MIME type detection function accessible through the service center.
|
||||
|
||||
Args:
|
||||
fileData: Raw file data as bytes
|
||||
fileName: Name of the file
|
||||
|
||||
Returns:
|
||||
str: Detected MIME type
|
||||
"""
|
||||
try:
|
||||
# Check file extension first
|
||||
ext = os.path.splitext(fileName)[1].lower()
|
||||
if ext:
|
||||
# Map common extensions to MIME types
|
||||
extToMime = {
|
||||
'.txt': 'text/plain',
|
||||
'.md': 'text/markdown',
|
||||
'.csv': 'text/csv',
|
||||
'.json': 'application/json',
|
||||
'.xml': 'application/xml',
|
||||
'.js': 'application/javascript',
|
||||
'.py': 'application/x-python',
|
||||
'.svg': 'image/svg+xml',
|
||||
'.jpg': 'image/jpeg',
|
||||
'.jpeg': 'image/jpeg',
|
||||
'.png': 'image/png',
|
||||
'.gif': 'image/gif',
|
||||
'.bmp': 'image/bmp',
|
||||
'.webp': 'image/webp',
|
||||
'.pdf': 'application/pdf',
|
||||
'.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
||||
'.doc': 'application/msword',
|
||||
'.xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
|
||||
'.xls': 'application/vnd.ms-excel',
|
||||
'.pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
|
||||
'.ppt': 'application/vnd.ms-powerpoint',
|
||||
'.html': 'text/html',
|
||||
'.htm': 'text/html',
|
||||
'.css': 'text/css',
|
||||
'.zip': 'application/zip',
|
||||
'.rar': 'application/x-rar-compressed',
|
||||
'.7z': 'application/x-7z-compressed',
|
||||
'.tar': 'application/x-tar',
|
||||
'.gz': 'application/gzip'
|
||||
}
|
||||
if ext in extToMime:
|
||||
return extToMime[ext]
|
||||
|
||||
# Try to detect from content
|
||||
if fileData.startswith(b'%PDF'):
|
||||
return 'application/pdf'
|
||||
elif fileData.startswith(b'PK\x03\x04'):
|
||||
# ZIP-based formats (docx, xlsx, pptx)
|
||||
return 'application/zip'
|
||||
elif fileData.startswith(b'<'):
|
||||
# XML-based formats
|
||||
try:
|
||||
text = fileData.decode('utf-8', errors='ignore')
|
||||
if '<svg' in text.lower():
|
||||
return 'image/svg+xml'
|
||||
elif '<html' in text.lower():
|
||||
return 'text/html'
|
||||
else:
|
||||
return 'application/xml'
|
||||
except:
|
||||
pass
|
||||
elif fileData.startswith(b'\x89PNG\r\n\x1a\n'):
|
||||
return 'image/png'
|
||||
elif fileData.startswith(b'\xff\xd8\xff'):
|
||||
return 'image/jpeg'
|
||||
elif fileData.startswith(b'GIF87a') or fileData.startswith(b'GIF89a'):
|
||||
return 'image/gif'
|
||||
elif fileData.startswith(b'BM'):
|
||||
return 'image/bmp'
|
||||
elif fileData.startswith(b'RIFF') and fileData[8:12] == b'WEBP':
|
||||
return 'image/webp'
|
||||
|
||||
return 'application/octet-stream'
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error detecting content type from data: {str(e)}")
|
||||
return 'application/octet-stream'
|
||||
|
||||
def detectMimeTypeFromData(file_bytes: bytes, fileName: str, service=None) -> str:
|
||||
"""Detect MIME type from file bytes and fileName using a service if provided."""
|
||||
try:
|
||||
if service:
|
||||
if service and hasattr(service, 'detectContentTypeFromData'):
|
||||
detected = service.detectContentTypeFromData(file_bytes, fileName)
|
||||
if detected and detected != 'application/octet-stream':
|
||||
return detected
|
||||
# Fallback: guess from extension
|
||||
ext = getFileExtension(fileName)
|
||||
return getMimeTypeFromExtension(ext, service)
|
||||
# Fallback: use our consolidated function
|
||||
return detectContentTypeFromData(file_bytes, fileName)
|
||||
except Exception as e:
|
||||
logger.warning(f"Error in MIME type detection for {fileName}: {str(e)}")
|
||||
return 'application/octet-stream'
|
||||
|
|
|
|||
|
|
@ -108,7 +108,7 @@ class HandlingTasks:
|
|||
# Log the full task planning prompt being sent to AI for debugging
|
||||
logger.info("=== TASK PLANNING PROMPT SENT TO AI ===")
|
||||
logger.info(f"User Input: {userInput}")
|
||||
logger.info(f"Available Documents: {len(available_docs) if available_docs else 0}")
|
||||
logger.info(f"Available Documents: {available_docs}")
|
||||
logger.info("=== FULL TASK PLANNING PROMPT ===")
|
||||
logger.info(task_planning_prompt)
|
||||
logger.info("=== END TASK PLANNING PROMPT ===")
|
||||
|
|
@ -192,7 +192,8 @@ class HandlingTasks:
|
|||
|
||||
task_plan = TaskPlan(
|
||||
overview=task_plan_dict.get('overview', ''),
|
||||
tasks=tasks
|
||||
tasks=tasks,
|
||||
userMessage=task_plan_dict.get('userMessage', '')
|
||||
)
|
||||
|
||||
# Set workflow totals for progress tracking
|
||||
|
|
@ -217,24 +218,19 @@ class HandlingTasks:
|
|||
"""Create a chat message containing the task plan with user-friendly messages"""
|
||||
try:
|
||||
# Build task plan summary
|
||||
task_summary = f"📋 **Task Plan Generated**\n\n"
|
||||
task_summary += f"**Overview:** {task_plan.overview}\n\n"
|
||||
task_summary += f"**Total Tasks:** {len(task_plan.tasks)}\n\n"
|
||||
|
||||
# Add each task with its user message
|
||||
for i, task in enumerate(task_plan.tasks):
|
||||
task_summary += f"**Task {i+1}:** {task.objective}\n"
|
||||
if task.userMessage:
|
||||
task_summary += f" 💬 {task.userMessage}\n"
|
||||
if task.success_criteria:
|
||||
criteria_str = ', '.join(task.success_criteria)
|
||||
task_summary += f" ✅ Success Criteria: {criteria_str}\n"
|
||||
task_summary += "\n"
|
||||
|
||||
task_summary = f"📋 **Task Plan**\n\n"
|
||||
|
||||
# Get overall user message from task plan if available
|
||||
overall_message = task_plan.userMessage
|
||||
if overall_message:
|
||||
task_summary += f"**Plan Summary:** {overall_message}\n\n"
|
||||
task_summary += f"{overall_message}\n\n"
|
||||
|
||||
# Add each task with its user message
|
||||
for i, task in enumerate(task_plan.tasks):
|
||||
if task.userMessage:
|
||||
task_summary += f"💬 {task.userMessage}\n"
|
||||
task_summary += "\n"
|
||||
|
||||
|
||||
# Create workflow message
|
||||
message_data = {
|
||||
|
|
@ -269,76 +265,6 @@ class HandlingTasks:
|
|||
except Exception as e:
|
||||
logger.error(f"Error creating task plan message: {str(e)}")
|
||||
|
||||
async def createDocumentContextMessage(self, documents: List, workflow):
|
||||
"""Create a chat message with document context and workflow labeling"""
|
||||
try:
|
||||
# Get current workflow context and stats
|
||||
workflow_context = self.service.getWorkflowContext()
|
||||
workflow_stats = self.service.getWorkflowStats()
|
||||
|
||||
# Create a simple document context message without AI dependency
|
||||
message_text = f"📄 **Document Context**\n\n"
|
||||
message_text += f"**Total Documents:** {len(documents)}\n\n"
|
||||
|
||||
# Add workflow context information
|
||||
current_round = workflow_context.get('currentRound', 0)
|
||||
current_task = workflow_context.get('currentTask', 0)
|
||||
total_tasks = workflow_stats.get('totalTasks', 0)
|
||||
current_action = workflow_context.get('currentAction', 0)
|
||||
total_actions = workflow_stats.get('totalActions', 0)
|
||||
|
||||
message_text += f"**Workflow Context:**\n"
|
||||
message_text += f"- Round: {current_round}\n"
|
||||
if total_tasks > 0:
|
||||
message_text += f"- Task: {current_task}/{total_tasks}\n"
|
||||
else:
|
||||
message_text += f"- Task: {current_task}\n"
|
||||
if total_actions > 0:
|
||||
message_text += f"- Action: {current_action}/{total_actions}\n"
|
||||
else:
|
||||
message_text += f"- Action: {current_action}\n"
|
||||
message_text += f"- Status: {workflow_stats.get('workflowStatus', 'unknown')}\n\n"
|
||||
|
||||
# Add document list
|
||||
if documents:
|
||||
message_text += "**Available Documents:**\n"
|
||||
for i, doc in enumerate(documents[:5]): # Show first 5 documents
|
||||
message_text += f"- {doc.fileName if hasattr(doc, 'fileName') else f'Document {i+1}'}\n"
|
||||
if len(documents) > 5:
|
||||
message_text += f"- ... and {len(documents) - 5} more documents\n"
|
||||
message_text += "\n"
|
||||
|
||||
message_text += "Document context information is available for processing."
|
||||
|
||||
# Create workflow message
|
||||
message_data = {
|
||||
"workflowId": workflow.id,
|
||||
"role": "assistant",
|
||||
"message": message_text,
|
||||
"status": "step",
|
||||
"sequenceNr": len(workflow.messages) + 1,
|
||||
"publishedAt": get_utc_timestamp(),
|
||||
"documentsLabel": "document_context",
|
||||
"documents": [], # Empty documents for context message
|
||||
# Add workflow context fields
|
||||
"roundNumber": workflow_context.get('currentRound', 0),
|
||||
"taskNumber": workflow_context.get('currentTask', 0),
|
||||
"actionNumber": workflow_context.get('currentAction', 0),
|
||||
# Add progress status
|
||||
"taskProgress": "pending",
|
||||
"actionProgress": "pending"
|
||||
}
|
||||
|
||||
message = self.chatInterface.createWorkflowMessage(message_data)
|
||||
if message:
|
||||
workflow.messages.append(message)
|
||||
logger.info(f"Document context message created with {len(documents)} documents")
|
||||
else:
|
||||
logger.error("Failed to create document context message")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error creating document context message: {str(e)}")
|
||||
|
||||
async def generateTaskActions(self, task_step, workflow, previous_results=None, enhanced_context=None) -> List[TaskAction]:
|
||||
"""Generate actions for a given task step."""
|
||||
try:
|
||||
|
|
@ -386,12 +312,8 @@ class HandlingTasks:
|
|||
|
||||
# Log available resources for debugging
|
||||
logger.info("=== AVAILABLE RESOURCES FOR ACTION GENERATION ===")
|
||||
logger.info(f"Available Documents: {len(available_docs) if available_docs else 0}")
|
||||
if available_docs:
|
||||
for i, doc in enumerate(available_docs[:5]): # Show first 5
|
||||
logger.info(f" Doc {i+1}: {doc}")
|
||||
if len(available_docs) > 5:
|
||||
logger.info(f" ... and {len(available_docs) - 5} more documents")
|
||||
logger.info(f"Available Documents: {available_docs}")
|
||||
# Note: available_docs is now a string description, not a list
|
||||
logger.info(f"Available Connections: {len(available_connections) if available_connections else 0}")
|
||||
if available_connections:
|
||||
for i, conn in enumerate(available_connections[:5]): # Show first 5
|
||||
|
|
@ -450,7 +372,7 @@ class HandlingTasks:
|
|||
logger.info(f"Task Step ID: {action_context.task_step.id if action_context.task_step else 'None'}")
|
||||
logger.info(f"Task Step Objective: {action_context.task_step.objective if action_context.task_step else 'None'}")
|
||||
logger.info(f"Workflow ID: {action_context.workflow_id}")
|
||||
logger.info(f"Available Documents Count: {len(action_context.available_documents) if action_context.available_documents else 0}")
|
||||
logger.info(f"Available Documents: {action_context.available_documents or 'No documents available'}")
|
||||
logger.info(f"Available Connections Count: {len(action_context.available_connections) if action_context.available_connections else 0}")
|
||||
logger.info(f"Previous Results Count: {len(action_context.previous_results) if action_context.previous_results else 0}")
|
||||
logger.info(f"Retry Count: {action_context.retry_count}")
|
||||
|
|
@ -546,25 +468,13 @@ class HandlingTasks:
|
|||
|
||||
# Create database log entry for task start in format expected by frontend
|
||||
if task_index is not None:
|
||||
if total_tasks is not None:
|
||||
self.chatInterface.createWorkflowLog({
|
||||
"workflowId": workflow.id,
|
||||
"message": f"Executing task {task_index}/{total_tasks}",
|
||||
"type": "info"
|
||||
})
|
||||
else:
|
||||
self.chatInterface.createWorkflowLog({
|
||||
"workflowId": workflow.id,
|
||||
"message": f"Executing task {task_index}/?",
|
||||
"type": "info"
|
||||
})
|
||||
|
||||
|
||||
# Create a task start message for the user
|
||||
task_progress = f"{task_index}/{total_tasks}" if total_tasks is not None else str(task_index)
|
||||
task_start_message = {
|
||||
"workflowId": workflow.id,
|
||||
"role": "assistant",
|
||||
"message": f"🚀 Starting Task {task_progress}\n\nObjective: {task_step.objective}",
|
||||
"message": f"🚀 **Task {task_progress}**",
|
||||
"status": "step",
|
||||
"sequenceNr": len(workflow.messages) + 1,
|
||||
"publishedAt": get_utc_timestamp(),
|
||||
|
|
@ -617,11 +527,6 @@ class HandlingTasks:
|
|||
logger.error("No actions defined for task step, aborting task execution")
|
||||
break
|
||||
|
||||
# Create document context message if documents are available
|
||||
available_docs = self.service.getAvailableDocuments(workflow)
|
||||
if available_docs:
|
||||
await self.createDocumentContextMessage(available_docs, workflow)
|
||||
|
||||
action_results = []
|
||||
for action_idx, action in enumerate(actions):
|
||||
# Check workflow status before each action execution
|
||||
|
|
@ -639,18 +544,11 @@ class HandlingTasks:
|
|||
# Log action start in format expected by frontend
|
||||
logger.info(f"Task {task_index} - Starting action {action_number}/{total_actions}")
|
||||
|
||||
# Create database log entry for action start
|
||||
self.chatInterface.createWorkflowLog({
|
||||
"workflowId": workflow.id,
|
||||
"message": f"Task {task_index} - Starting action {action_number}/{total_actions}",
|
||||
"type": "info"
|
||||
})
|
||||
|
||||
# Create an action start message for the user
|
||||
action_start_message = {
|
||||
"workflowId": workflow.id,
|
||||
"role": "assistant",
|
||||
"message": f"⚡ Task {task_index} - Action {action_number}/{total_actions}\n\nMethod: {action.execMethod}.{action.execAction}",
|
||||
"message": f"⚡ **Action {action_number}/{total_actions}** (Method {action.execMethod}.{action.execAction})",
|
||||
"status": "step",
|
||||
"sequenceNr": len(workflow.messages) + 1,
|
||||
"publishedAt": get_utc_timestamp(),
|
||||
|
|
@ -694,34 +592,19 @@ class HandlingTasks:
|
|||
if success:
|
||||
logger.info(f"=== TASK {task_index or '?'} COMPLETED SUCCESSFULLY: {task_step.objective} ===")
|
||||
|
||||
# Create database log entry for task completion
|
||||
if total_tasks is not None:
|
||||
self.chatInterface.createWorkflowLog({
|
||||
"workflowId": workflow.id,
|
||||
"message": f"🎯 Task {task_index}/{total_tasks} completed",
|
||||
"type": "success"
|
||||
})
|
||||
else:
|
||||
self.chatInterface.createWorkflowLog({
|
||||
"workflowId": workflow.id,
|
||||
"message": f"🎯 Task {task_index}/? completed",
|
||||
"type": "success"
|
||||
})
|
||||
|
||||
# Create a task completion message for the user
|
||||
task_progress = f"{task_index}/{total_tasks}" if total_tasks is not None else str(task_index)
|
||||
|
||||
# Enhanced completion message with criteria details
|
||||
completion_message = f"🎯 Task {task_progress} Completed Successfully!\n\nObjective: {task_step.objective}\n\nFeedback: {feedback or 'Task completed successfully'}"
|
||||
completion_message = f"🎯 **Task {task_progress}**\n\n✅ {feedback or 'Task completed successfully'}"
|
||||
|
||||
# Add criteria status if available
|
||||
if hasattr(review_result, 'met_criteria') and review_result.met_criteria:
|
||||
completion_message += f"\n\n✅ **Success Criteria Met:**\n"
|
||||
for criterion in review_result.met_criteria:
|
||||
completion_message += f"• {criterion}\n"
|
||||
completion_message += f"\n• {criterion}"
|
||||
|
||||
if hasattr(review_result, 'quality_score'):
|
||||
completion_message += f"\n📊 **Quality Score:** {review_result.quality_score}/10"
|
||||
completion_message += f"\n📊 Score {review_result.quality_score}/10"
|
||||
|
||||
task_completion_message = {
|
||||
"workflowId": workflow.id,
|
||||
|
|
@ -740,10 +623,6 @@ class HandlingTasks:
|
|||
"taskProgress": "success"
|
||||
}
|
||||
|
||||
# Add user-friendly message if available
|
||||
if task_step.userMessage:
|
||||
task_completion_message["message"] += f"\n\n💬 {task_step.userMessage}"
|
||||
|
||||
message = self.chatInterface.createWorkflowMessage(task_completion_message)
|
||||
if message:
|
||||
workflow.messages.append(message)
|
||||
|
|
@ -824,7 +703,7 @@ class HandlingTasks:
|
|||
retry_message = {
|
||||
"workflowId": workflow.id,
|
||||
"role": "assistant",
|
||||
"message": f"🔄 Task {task_index} requires retry: {review_result.improvements}",
|
||||
"message": f"🔄 **Task {task_index}** needs retry: {review_result.improvements}",
|
||||
"status": "step",
|
||||
"sequenceNr": len(workflow.messages) + 1,
|
||||
"publishedAt": get_utc_timestamp(),
|
||||
|
|
@ -843,19 +722,19 @@ class HandlingTasks:
|
|||
continue
|
||||
else:
|
||||
logger.error(f"=== TASK {task_index or '?'} FAILED: {task_step.objective} after {attempt+1} attempts ===")
|
||||
|
||||
task_progress = f"{task_index}/{total_tasks}" if total_tasks is not None else str(task_index)
|
||||
|
||||
# Create user-facing error message for task failure
|
||||
error_message = f"❌ Task {task_index or '?'} - '{task_step.objective}' failed after {attempt+1} attempts\n\n"
|
||||
error_message += f"Objective: {task_step.objective}\n\n"
|
||||
error_message = f"**Task {task_progress}**\n\n❌ '{task_step.objective}' {attempt+1}x failed\n\n"
|
||||
|
||||
# Add specific error details if available
|
||||
if review_result and hasattr(review_result, 'reason') and review_result.reason:
|
||||
error_message += f"Reason: {review_result.reason}\n\n"
|
||||
error_message += f"{review_result.reason}\n\n"
|
||||
|
||||
# Add criteria progress information if available
|
||||
if retry_context and hasattr(retry_context, 'criteria_progress'):
|
||||
progress = retry_context.criteria_progress
|
||||
error_message += f"📊 **Progress Summary:**\n"
|
||||
error_message += f"📊 **Details**\n"
|
||||
if progress.get('met_criteria'):
|
||||
error_message += f"✅ Met criteria: {', '.join(progress['met_criteria'])}\n"
|
||||
if progress.get('unmet_criteria'):
|
||||
|
|
@ -908,19 +787,18 @@ class HandlingTasks:
|
|||
logger.error(f"=== TASK {task_index or '?'} FAILED AFTER ALL RETRIES: {task_step.objective} ===")
|
||||
|
||||
# Create user-facing error message for task failure
|
||||
error_message = f"❌ Task {task_index or '?'} - '{task_step.objective}' failed after all retries\n\n"
|
||||
error_message += f"Objective: {task_step.objective}\n\n"
|
||||
error_message = f"**Task {task_index or '?'}**\n\n❌ '{task_step.objective}' failed after all retries\n\n"
|
||||
error_message += f"{task_step.objective}\n\n"
|
||||
|
||||
# Add specific error details if available
|
||||
if retry_context and hasattr(retry_context, 'previous_review_result') and retry_context.previous_review_result:
|
||||
reason = retry_context.previous_review_result.get('reason', '')
|
||||
if reason and reason != "Task failed after all retries.":
|
||||
error_message += f"Reason: {reason}\n\n"
|
||||
error_message += f"{reason}\n\n"
|
||||
|
||||
# Add retry information
|
||||
error_message += f"Retries attempted: {retry_context.retry_count if retry_context else 'Unknown'}\n"
|
||||
error_message += f"Status: Task failed permanently\n\n"
|
||||
error_message += "Please check the connection and try again, or contact support if the issue persists."
|
||||
error_message += f"Status: Task failed permanently"
|
||||
|
||||
# Create workflow message for user
|
||||
message_data = {
|
||||
|
|
@ -1170,7 +1048,8 @@ class HandlingTasks:
|
|||
processingTime=createdAction.get("processingTime"),
|
||||
timestamp=float(createdAction.get("timestamp", get_utc_timestamp())),
|
||||
result=createdAction.get("result"),
|
||||
resultDocuments=createdAction.get("resultDocuments", [])
|
||||
resultDocuments=createdAction.get("resultDocuments", []),
|
||||
userMessage=createdAction.get("userMessage")
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
|
|
@ -1241,20 +1120,6 @@ class HandlingTasks:
|
|||
# Log action results
|
||||
logger.info(f"Action completed successfully")
|
||||
|
||||
# Create database log entry for action completion
|
||||
if total_actions is not None:
|
||||
self.chatInterface.createWorkflowLog({
|
||||
"workflowId": workflow.id,
|
||||
"message": f"✅ Task {task_num} - Action {action_num}/{total_actions} completed",
|
||||
"type": "success"
|
||||
})
|
||||
else:
|
||||
self.chatInterface.createWorkflowLog({
|
||||
"workflowId": workflow.id,
|
||||
"message": f"✅ Task {task_num} - Action {action_num}/? completed",
|
||||
"type": "success"
|
||||
})
|
||||
|
||||
if created_documents:
|
||||
logger.info(f"Output documents ({len(created_documents)}):")
|
||||
for i, doc in enumerate(created_documents):
|
||||
|
|
@ -1276,19 +1141,12 @@ class HandlingTasks:
|
|||
await self.createActionMessage(action, result, workflow, result_label, [], task_step, task_index)
|
||||
|
||||
# Create database log entry for action failure
|
||||
if total_actions is not None:
|
||||
self.chatInterface.createWorkflowLog({
|
||||
"workflowId": workflow.id,
|
||||
"message": f"❌ Task {task_num} - Action {action_num}/{total_actions} failed: {result.error}",
|
||||
"type": "error"
|
||||
})
|
||||
else:
|
||||
self.chatInterface.createWorkflowLog({
|
||||
"workflowId": workflow.id,
|
||||
"message": f"❌ Task {task_num} - Action {action_num}/? failed: {result.error}",
|
||||
"type": "error"
|
||||
})
|
||||
|
||||
self.chatInterface.createWorkflowLog({
|
||||
"workflowId": workflow.id,
|
||||
"message": f"❌ **Task {task_num}**\n\n❌ **Action {action_num}/{total_actions}** failed: {result.error}",
|
||||
"type": "error"
|
||||
})
|
||||
|
||||
# Log action summary
|
||||
logger.info(f"=== TASK {task_num} ACTION {action_num} COMPLETED ===")
|
||||
|
||||
|
|
@ -1336,89 +1194,25 @@ class HandlingTasks:
|
|||
|
||||
# Create a more meaningful message that includes task context
|
||||
task_objective = task_step.objective if task_step else 'Unknown task'
|
||||
|
||||
|
||||
# Add comprehensive workflow context
|
||||
current_round = workflow_context.get('currentRound', 0)
|
||||
current_task = workflow_context.get('currentTask', 0)
|
||||
total_tasks = workflow_stats.get('totalTasks', 0)
|
||||
current_action = workflow_context.get('currentAction', 0)
|
||||
total_actions = workflow_stats.get('totalActions', 0)
|
||||
|
||||
# Build a user-friendly message based on success/failure
|
||||
if result.success:
|
||||
if created_documents and len(created_documents) > 0:
|
||||
doc_names = [doc.fileName for doc in created_documents[:3]]
|
||||
if len(created_documents) > 3:
|
||||
doc_names.append(f"... and {len(created_documents) - 3} more")
|
||||
|
||||
# Enhanced message with workflow context
|
||||
message_text = f"✅ **Task {task_index or '?'} - Action {action.execMethod}.{action.execAction} Completed**\n\n"
|
||||
message_text += f"**Objective:** {task_objective}\n\n"
|
||||
message_text += f"**Generated {len(created_documents)} document(s):** {', '.join(doc_names)}\n\n"
|
||||
message_text += f"**Result Label:** {result_label}\n"
|
||||
|
||||
# Add comprehensive workflow context
|
||||
current_round = workflow_context.get('currentRound', 0)
|
||||
current_task = workflow_context.get('currentTask', 0)
|
||||
total_tasks = workflow_stats.get('totalTasks', 0)
|
||||
current_action = workflow_context.get('currentAction', 0)
|
||||
total_actions = workflow_stats.get('totalActions', 0)
|
||||
|
||||
message_text += f"**Workflow Context:**\n"
|
||||
message_text += f"- Round: {current_round}\n"
|
||||
if total_tasks > 0:
|
||||
message_text += f"- Task: {current_task}/{total_tasks}\n"
|
||||
else:
|
||||
message_text += f"- Task: {current_task}\n"
|
||||
if total_actions > 0:
|
||||
message_text += f"- Action: {current_action}/{total_actions}\n"
|
||||
else:
|
||||
message_text += f"- Action: {current_action}\n"
|
||||
message_text += f"- Status: {workflow_stats.get('workflowStatus', 'unknown')}"
|
||||
else:
|
||||
message_text = f"✅ **Task {task_index or '?'} - Action {action.execMethod}.{action.execAction} Completed**\n\n"
|
||||
message_text += f"**Objective:** {task_objective}\n\n"
|
||||
message_text += "**Action executed successfully**\n\n"
|
||||
message_text += f"**Result Label:** {result_label}\n"
|
||||
|
||||
# Add comprehensive workflow context
|
||||
current_round = workflow_context.get('currentRound', 0)
|
||||
current_task = workflow_context.get('currentTask', 0)
|
||||
total_tasks = workflow_stats.get('totalTasks', 0)
|
||||
current_action = workflow_context.get('currentAction', 0)
|
||||
total_actions = workflow_stats.get('totalActions', 0)
|
||||
|
||||
message_text += f"**Workflow Context:**\n"
|
||||
message_text += f"- Round: {current_round}\n"
|
||||
if total_tasks > 0:
|
||||
message_text += f"- Task: {current_task}/{total_tasks}\n"
|
||||
else:
|
||||
message_text += f"- Task: {current_task}\n"
|
||||
if total_actions > 0:
|
||||
message_text += f"- Action: {current_action}/{total_actions}\n"
|
||||
else:
|
||||
message_text += f"- Action: {current_action}\n"
|
||||
message_text += f"- Status: {workflow_stats.get('workflowStats', 'unknown')}"
|
||||
message_text = f"**Action {current_action}/{total_actions} ({action.execMethod}.{action.execAction})**\n\n"
|
||||
message_text += f"✅ {task_objective}\n\n"
|
||||
else:
|
||||
# ⚠️ FAILURE MESSAGE - Show error details to user
|
||||
error_details = result.error if result.error else "Unknown error occurred"
|
||||
message_text = f"❌ **Task {task_index or '?'} - Action {action.execMethod}.{action.execAction} Failed**\n\n"
|
||||
message_text += f"**Objective:** {task_objective}\n\n"
|
||||
message_text += f"**Error:** {error_details}\n\n"
|
||||
message_text += f"**Result Label:** {result_label}\n"
|
||||
|
||||
# Add comprehensive workflow context
|
||||
current_round = workflow_context.get('currentRound', 0)
|
||||
current_task = workflow_context.get('currentTask', 0)
|
||||
total_tasks = workflow_stats.get('totalTasks', 0)
|
||||
current_action = workflow_context.get('currentAction', 0)
|
||||
total_actions = workflow_stats.get('totalActions', 0)
|
||||
|
||||
message_text += f"**Workflow Context:**\n"
|
||||
message_text += f"- Round: {current_round}\n"
|
||||
if total_tasks > 0:
|
||||
message_text += f"- Task: {current_task}/{total_tasks}\n"
|
||||
else:
|
||||
message_text += f"- Task: {current_task}\n"
|
||||
if total_actions > 0:
|
||||
message_text += f"- Action: {current_action}/{total_actions}\n"
|
||||
message_text += f"- Action: {current_action}\n"
|
||||
message_text += f"- Status: {workflow_stats.get('workflowStatus', 'unknown')}\n\n"
|
||||
message_text += "Please check the connection and try again."
|
||||
|
||||
message_text = f"**Action {current_action}/{total_actions} ({action.execMethod}.{action.execAction})**\n\n"
|
||||
message_text += f"❌ {task_objective}\n\n"
|
||||
message_text += f"{error_details}\n\n"
|
||||
|
||||
message_data = {
|
||||
"workflowId": workflow.id,
|
||||
"role": "assistant",
|
||||
|
|
@ -1432,19 +1226,12 @@ class HandlingTasks:
|
|||
"documentsLabel": result_label,
|
||||
"documents": created_documents,
|
||||
# Add workflow context fields - extract from result_label to match document reference
|
||||
"roundNumber": workflow_context.get('currentRound', 0),
|
||||
"taskNumber": task_index,
|
||||
"actionNumber": self._extractActionNumberFromLabel(result_label) if result_label else workflow_context.get('currentAction', 0),
|
||||
"roundNumber": current_round,
|
||||
"taskNumber": current_task,
|
||||
"actionNumber": current_action,
|
||||
"actionProgress": "success" if result.success else "fail"
|
||||
}
|
||||
|
||||
# Add user-friendly message if available
|
||||
if action.userMessage:
|
||||
if result.success:
|
||||
message_data["message"] += f"\n\n💬 {action.userMessage}"
|
||||
else:
|
||||
message_data["message"] += f"\n\n💬 Action was intended to: {action.userMessage}"
|
||||
|
||||
# Add debugging for error messages
|
||||
if not result.success:
|
||||
logger.info(f"Creating ERROR message: {message_text}")
|
||||
|
|
|
|||
|
|
@ -1,3 +0,0 @@
|
|||
|
||||
|
||||
|
||||
|
|
@ -20,13 +20,13 @@ def createTaskPlanningPrompt(context: TaskContext, service) -> str:
|
|||
user_request = context.task_step.objective if context.task_step else 'No request specified'
|
||||
|
||||
# Extract available documents from context - use Pydantic model directly
|
||||
available_documents = context.available_documents or []
|
||||
available_documents = context.available_documents or "No documents available"
|
||||
|
||||
return f"""You are a task planning AI that analyzes user requests and creates structured task plans with user-friendly feedback messages.
|
||||
|
||||
USER REQUEST: {user_request}
|
||||
|
||||
AVAILABLE DOCUMENTS: {', '.join(available_documents)}
|
||||
AVAILABLE DOCUMENTS: {available_documents}
|
||||
|
||||
INSTRUCTIONS:
|
||||
1. Analyze the user request and available documents
|
||||
|
|
@ -34,8 +34,8 @@ INSTRUCTIONS:
|
|||
3. Focus on business outcomes, not technical operations
|
||||
4. Each task should produce meaningful, usable outputs
|
||||
5. Ensure proper handover between tasks using result labels
|
||||
6. Generate user-friendly messages for each task in the user's language ({user_language})
|
||||
7. Detect the language of the user request and include it in languageUserDetected
|
||||
6. Detect the language of the user request and include it in languageUserDetected
|
||||
7. Generate user-friendly messages for each task in the user's request language
|
||||
8. Return a JSON object with the exact structure shown below
|
||||
|
||||
TASK GROUPING PRINCIPLES:
|
||||
|
|
@ -63,15 +63,15 @@ TASK PLANNING PRINCIPLES:
|
|||
- Keep tasks at a meaningful level of abstraction
|
||||
- Each task should produce results that can be used by subsequent tasks
|
||||
- Ensure clear dependencies and handovers between tasks
|
||||
- Provide clear, actionable user messages in the user's language ({user_language})
|
||||
- Provide clear, actionable user messages in the user's request language
|
||||
- Group related activities to minimize task fragmentation
|
||||
- Only create multiple tasks when dealing with truly different, independent objectives
|
||||
|
||||
REQUIRED JSON STRUCTURE:
|
||||
{{
|
||||
"overview": "Brief description of the overall plan",
|
||||
"userMessage": "User-friendly message explaining the task plan in {user_language}",
|
||||
"languageUserDetected": "en", // Language code detected from user request (en, de, fr, it, es, etc.)
|
||||
"userMessage": "User-friendly message explaining the task plan in user's request language",
|
||||
"tasks": [
|
||||
{{
|
||||
"id": "task_1",
|
||||
|
|
@ -79,7 +79,7 @@ REQUIRED JSON STRUCTURE:
|
|||
"dependencies": ["task_0"], // IDs of tasks that must complete first
|
||||
"success_criteria": ["criteria1", "criteria2"],
|
||||
"estimated_complexity": "low|medium|high",
|
||||
"userMessage": "User-friendly message explaining what this task will accomplish in {user_language}"
|
||||
"userMessage": "User-friendly message explaining what this task will accomplish in user's request language"
|
||||
}}
|
||||
]
|
||||
}}
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@ from modules.interfaces.interfaceChatModel import ActionResult
|
|||
from modules.interfaces.interfaceComponentObjects import getInterface as getComponentObjects
|
||||
from modules.interfaces.interfaceAppObjects import getInterface as getAppObjects
|
||||
from modules.chat.documents.documentExtraction import DocumentExtraction
|
||||
from modules.chat.documents.documentUtility import getFileExtension, getMimeTypeFromExtension, detectContentTypeFromData
|
||||
from modules.chat.methodBase import MethodBase
|
||||
from modules.shared.timezoneUtils import get_utc_timestamp
|
||||
import uuid
|
||||
|
|
@ -111,165 +112,9 @@ class ServiceCenter:
|
|||
except Exception as e:
|
||||
logger.error(f"Error discovering methods: {str(e)}")
|
||||
|
||||
def detectContentTypeFromData(self, fileData: bytes, fileName: str) -> str:
|
||||
"""
|
||||
Detect content type from file data and fileName.
|
||||
This method makes the MIME type detection function accessible through the service center.
|
||||
|
||||
Args:
|
||||
fileData: Raw file data as bytes
|
||||
fileName: Name of the file
|
||||
|
||||
Returns:
|
||||
str: Detected MIME type
|
||||
"""
|
||||
try:
|
||||
# Check file extension first
|
||||
ext = os.path.splitext(fileName)[1].lower()
|
||||
if ext:
|
||||
# Map common extensions to MIME types
|
||||
extToMime = {
|
||||
'.txt': 'text/plain',
|
||||
'.md': 'text/markdown',
|
||||
'.csv': 'text/csv',
|
||||
'.json': 'application/json',
|
||||
'.xml': 'application/xml',
|
||||
'.js': 'application/javascript',
|
||||
'.py': 'application/x-python',
|
||||
'.svg': 'image/svg+xml',
|
||||
'.jpg': 'image/jpeg',
|
||||
'.jpeg': 'image/jpeg',
|
||||
'.png': 'image/png',
|
||||
'.gif': 'image/gif',
|
||||
'.bmp': 'image/bmp',
|
||||
'.webp': 'image/webp',
|
||||
'.pdf': 'application/pdf',
|
||||
'.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
||||
'.doc': 'application/msword',
|
||||
'.xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
|
||||
'.xls': 'application/vnd.ms-excel',
|
||||
'.pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
|
||||
'.ppt': 'application/vnd.ms-powerpoint',
|
||||
'.html': 'text/html',
|
||||
'.htm': 'text/html',
|
||||
'.css': 'text/css',
|
||||
'.zip': 'application/zip',
|
||||
'.rar': 'application/x-rar-compressed',
|
||||
'.7z': 'application/x-7z-compressed',
|
||||
'.tar': 'application/x-tar',
|
||||
'.gz': 'application/gzip'
|
||||
}
|
||||
if ext in extToMime:
|
||||
return extToMime[ext]
|
||||
|
||||
# Try to detect from content
|
||||
if fileData.startswith(b'%PDF'):
|
||||
return 'application/pdf'
|
||||
elif fileData.startswith(b'PK\x03\x04'):
|
||||
# ZIP-based formats (docx, xlsx, pptx)
|
||||
return 'application/zip'
|
||||
elif fileData.startswith(b'<'):
|
||||
# XML-based formats
|
||||
try:
|
||||
text = fileData.decode('utf-8', errors='ignore')
|
||||
if '<svg' in text.lower():
|
||||
return 'image/svg+xml'
|
||||
elif '<html' in text.lower():
|
||||
return 'text/html'
|
||||
else:
|
||||
return 'application/xml'
|
||||
except:
|
||||
pass
|
||||
elif fileData.startswith(b'\x89PNG\r\n\x1a\n'):
|
||||
return 'image/png'
|
||||
elif fileData.startswith(b'\xff\xd8\xff'):
|
||||
return 'image/jpeg'
|
||||
elif fileData.startswith(b'GIF87a') or fileData.startswith(b'GIF89a'):
|
||||
return 'image/gif'
|
||||
elif fileData.startswith(b'BM'):
|
||||
return 'image/bmp'
|
||||
elif fileData.startswith(b'RIFF') and fileData[8:12] == b'WEBP':
|
||||
return 'image/webp'
|
||||
|
||||
return 'application/octet-stream'
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error detecting content type from data: {str(e)}")
|
||||
return 'application/octet-stream'
|
||||
|
||||
def getMimeTypeFromExtension(self, extension: str) -> str:
|
||||
"""
|
||||
Get MIME type based on file extension.
|
||||
This method consolidates MIME type detection from extension.
|
||||
|
||||
Args:
|
||||
extension: File extension (with or without dot)
|
||||
|
||||
Returns:
|
||||
str: MIME type for the extension
|
||||
"""
|
||||
# Normalize extension (remove dot if present)
|
||||
if extension.startswith('.'):
|
||||
extension = extension[1:]
|
||||
|
||||
# Map extensions to MIME types
|
||||
mime_types = {
|
||||
'txt': 'text/plain',
|
||||
'json': 'application/json',
|
||||
'xml': 'application/xml',
|
||||
'csv': 'text/csv',
|
||||
'html': 'text/html',
|
||||
'htm': 'text/html',
|
||||
'md': 'text/markdown',
|
||||
'py': 'text/x-python',
|
||||
'js': 'application/javascript',
|
||||
'css': 'text/css',
|
||||
'pdf': 'application/pdf',
|
||||
'doc': 'application/msword',
|
||||
'docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
||||
'xls': 'application/vnd.ms-excel',
|
||||
'xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
|
||||
'ppt': 'application/vnd.ms-powerpoint',
|
||||
'pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
|
||||
'svg': 'image/svg+xml',
|
||||
'jpg': 'image/jpeg',
|
||||
'jpeg': 'image/jpeg',
|
||||
'png': 'image/png',
|
||||
'gif': 'image/gif',
|
||||
'bmp': 'image/bmp',
|
||||
'webp': 'image/webp',
|
||||
'zip': 'application/zip',
|
||||
'rar': 'application/x-rar-compressed',
|
||||
'7z': 'application/x-7z-compressed',
|
||||
'tar': 'application/x-tar',
|
||||
'gz': 'application/gzip'
|
||||
}
|
||||
return mime_types.get(extension.lower(), 'application/octet-stream')
|
||||
|
||||
def getFileExtension(self, fileName: str) -> str:
|
||||
"""
|
||||
Extract file extension from fileName.
|
||||
|
||||
Args:
|
||||
fileName: Name of the file
|
||||
|
||||
Returns:
|
||||
str: File extension (without dot)
|
||||
"""
|
||||
if '.' in fileName:
|
||||
return fileName.split('.')[-1].lower()
|
||||
return "txt" # Default to text
|
||||
|
||||
def getFileExtension(self, fileName):
|
||||
"""
|
||||
Extract file extension from fileName (without dot, lowercased).
|
||||
Returns empty string if no extension is found.
|
||||
"""
|
||||
if '.' in fileName:
|
||||
return fileName.rsplit('.', 1)[-1].lower()
|
||||
return ''
|
||||
|
||||
# ===== Functions =====
|
||||
# ===== Functions for Prompts: Context =====
|
||||
|
||||
def getMethodsList(self) -> List[str]:
|
||||
"""Get list of available methods with their signatures in the required format"""
|
||||
|
|
@ -283,48 +128,122 @@ class ServiceCenter:
|
|||
methodList.append(signature)
|
||||
return methodList
|
||||
|
||||
async def summarizeChat(self, messages: List[ChatMessage]) -> str:
|
||||
"""
|
||||
Summarize chat messages from last to first message with status="first"
|
||||
|
||||
def generateDocumentLabel(self, document: ChatDocument, message: ChatMessage) -> str:
|
||||
"""Generate new document label: round+task+action+filename.extension"""
|
||||
Args:
|
||||
messages: List of chat messages to summarize
|
||||
|
||||
Returns:
|
||||
str: Summary of the chat in user's language
|
||||
"""
|
||||
try:
|
||||
# Get workflow context from message
|
||||
round_num = message.roundNumber if hasattr(message, 'roundNumber') else 1
|
||||
task_num = message.taskNumber if hasattr(message, 'taskNumber') else 0
|
||||
action_num = message.actionNumber if hasattr(message, 'actionNumber') else 0
|
||||
# Get messages from last to first, stopping at first message with status="first"
|
||||
relevantMessages = []
|
||||
for msg in reversed(messages):
|
||||
relevantMessages.append(msg)
|
||||
if msg.status == "first":
|
||||
break
|
||||
|
||||
# Get file extension from document's fileName property
|
||||
try:
|
||||
file_extension = self.getFileExtension(document.fileName)
|
||||
filename = document.fileName
|
||||
except Exception as e:
|
||||
# Try to diagnose and recover the issue
|
||||
diagnosis = self.diagnoseDocumentAccess(document)
|
||||
logger.error(f"Critical error: Cannot access document fileName for document {document.id}. Diagnosis: {diagnosis}")
|
||||
|
||||
# Attempt recovery
|
||||
if self.recoverDocumentAccess(document):
|
||||
try:
|
||||
file_extension = self.getFileExtension(document.fileName)
|
||||
filename = document.fileName
|
||||
logger.info(f"Document access recovered for {document.id}")
|
||||
except Exception as recovery_error:
|
||||
logger.error(f"Recovery failed for document {document.id}: {str(recovery_error)}")
|
||||
raise RuntimeError(f"Document {document.id} is permanently inaccessible after recovery attempt: {str(recovery_error)}")
|
||||
else:
|
||||
# Recovery failed - don't continue with invalid data
|
||||
raise RuntimeError(f"Document {document.id} is inaccessible and recovery failed. Diagnosis: {diagnosis}")
|
||||
# Create prompt for AI
|
||||
prompt = f"""You are an AI assistant providing a summary of a chat conversation.
|
||||
Please respond in '{self.user.language}' language.
|
||||
|
||||
Chat History:
|
||||
{chr(10).join(f"- {msg.message}" for msg in reversed(relevantMessages))}
|
||||
|
||||
Instructions:
|
||||
1. Summarize the conversation's key points and outcomes
|
||||
2. Be concise but informative
|
||||
3. Use a professional but friendly tone
|
||||
4. Focus on important decisions and next steps if any
|
||||
|
||||
Please provide a comprehensive summary of this conversation."""
|
||||
|
||||
# Construct label: round1_task2_action3_filename.ext
|
||||
if file_extension:
|
||||
label = f"round{round_num}_task{task_num}_action{action_num}_{filename}"
|
||||
else:
|
||||
label = f"round{round_num}_task{task_num}_action{action_num}_{filename}"
|
||||
# Get summary using AI
|
||||
return await self.callAiTextBasic(prompt)
|
||||
|
||||
return label
|
||||
except Exception as e:
|
||||
logger.error(f"Critical error generating document label for document {document.id}: {str(e)}")
|
||||
# Re-raise the error to prevent workflow from continuing with invalid data
|
||||
raise
|
||||
logger.error(f"Error summarizing chat: {str(e)}")
|
||||
return f"Error summarizing chat: {str(e)}"
|
||||
|
||||
# ===== Functions for Prompts + Actions: Document References generation and resolution =====
|
||||
|
||||
def getEnhancedDocumentContext(self) -> str:
|
||||
"""Get enhanced document context formatted for action planning prompts with proper docList and docItem references"""
|
||||
try:
|
||||
document_list = self.getDocumentReferenceList()
|
||||
|
||||
# Build technical context string for AI action planning
|
||||
context = "AVAILABLE DOCUMENTS:\n\n"
|
||||
|
||||
# Process chat exchanges (current round)
|
||||
if document_list["chat"]:
|
||||
context += "CURRENT ROUND DOCUMENTS:\n"
|
||||
for exchange in document_list["chat"]:
|
||||
# Generate docList reference for the exchange (using message ID and label)
|
||||
# Find the message that corresponds to this exchange
|
||||
message_id = None
|
||||
for message in self.workflow.messages:
|
||||
if hasattr(message, 'documentsLabel') and message.documentsLabel == exchange.documentsLabel:
|
||||
message_id = message.id
|
||||
break
|
||||
|
||||
if message_id:
|
||||
doc_list_ref = f"docList:{message_id}:{exchange.documentsLabel}"
|
||||
else:
|
||||
# Fallback to label-only format if message ID not found
|
||||
doc_list_ref = f"docList:{exchange.documentsLabel}"
|
||||
|
||||
logger.debug(f"Using document label for action planning: {exchange.documentsLabel} (message_id: {message_id})")
|
||||
context += f"- {doc_list_ref} contains:\n"
|
||||
# Generate docItem references for each document in the list
|
||||
for doc_ref in exchange.documents:
|
||||
if doc_ref.startswith("docItem:"):
|
||||
context += f" - {doc_ref}\n"
|
||||
else:
|
||||
# Convert to proper docItem format if needed
|
||||
context += f" - docItem:{doc_ref}\n"
|
||||
context += "\n"
|
||||
|
||||
# Process history exchanges (previous rounds)
|
||||
if document_list["history"]:
|
||||
context += "WORKFLOW HISTORY DOCUMENTS:\n"
|
||||
for exchange in document_list["history"]:
|
||||
# Generate docList reference for the exchange (using message ID and label)
|
||||
# Find the message that corresponds to this exchange
|
||||
message_id = None
|
||||
for message in self.workflow.messages:
|
||||
if hasattr(message, 'documentsLabel') and message.documentsLabel == exchange.documentsLabel:
|
||||
message_id = message.id
|
||||
break
|
||||
|
||||
if message_id:
|
||||
doc_list_ref = f"docList:{message_id}:{exchange.documentsLabel}"
|
||||
else:
|
||||
# Fallback to label-only format if message ID not found
|
||||
doc_list_ref = f"docList:{exchange.documentsLabel}"
|
||||
|
||||
logger.debug(f"Using history document label for action planning: {exchange.documentsLabel} (message_id: {message_id})")
|
||||
context += f"- {doc_list_ref} contains:\n"
|
||||
# Generate docItem references for each document in the list
|
||||
for doc_ref in exchange.documents:
|
||||
if doc_ref.startswith("docItem:"):
|
||||
context += f" - {doc_ref}\n"
|
||||
else:
|
||||
# Convert to proper docItem format if needed
|
||||
context += f" - docItem:{doc_ref}\n"
|
||||
context += "\n"
|
||||
|
||||
if not document_list["chat"] and not document_list["history"]:
|
||||
context += "NO DOCUMENTS AVAILABLE - This workflow has no documents to process.\n"
|
||||
|
||||
return context
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating enhanced document context: {str(e)}")
|
||||
return "NO DOCUMENTS AVAILABLE - Error generating document context."
|
||||
|
||||
def getDocumentReferenceList(self) -> Dict[str, List[DocumentExchange]]:
|
||||
"""Get list of document exchanges with new labeling format, sorted by recency"""
|
||||
|
|
@ -336,7 +255,7 @@ class ServiceCenter:
|
|||
|
||||
# Refresh file attributes for all documents
|
||||
if all_documents:
|
||||
self.refreshDocumentFileAttributes(all_documents)
|
||||
self._refreshDocumentFileAttributes(all_documents)
|
||||
|
||||
chat_exchanges = []
|
||||
history_exchanges = []
|
||||
|
|
@ -350,29 +269,30 @@ class ServiceCenter:
|
|||
doc_exchange = None
|
||||
if message.documents:
|
||||
if message.actionId and message.documentsLabel:
|
||||
# Use new document label format
|
||||
# Validate that we use the same label as in the message
|
||||
validated_label = self._validateDocumentLabelConsistency(message)
|
||||
|
||||
# Use the message's actual documentsLabel
|
||||
doc_refs = []
|
||||
for doc in message.documents:
|
||||
doc_ref = self.getDocumentReferenceFromChatDocument(doc, message)
|
||||
doc_ref = self._getDocumentReferenceFromChatDocument(doc, message)
|
||||
doc_refs.append(doc_ref)
|
||||
|
||||
doc_exchange = DocumentExchange(
|
||||
documentsLabel=message.documentsLabel,
|
||||
doc_exchange = DocumentExchange(
|
||||
documentsLabel=validated_label,
|
||||
documents=doc_refs
|
||||
)
|
||||
)
|
||||
else:
|
||||
# Generate new labels for documents without explicit labels
|
||||
doc_refs = []
|
||||
for doc in message.documents:
|
||||
doc_ref = self.getDocumentReferenceFromChatDocument(doc, message)
|
||||
doc_ref = self._getDocumentReferenceFromChatDocument(doc, message)
|
||||
doc_refs.append(doc_ref)
|
||||
|
||||
if doc_refs:
|
||||
# Create a label based on message context
|
||||
round_num = message.roundNumber if hasattr(message, 'roundNumber') else 1
|
||||
task_num = message.taskNumber if hasattr(message, 'taskNumber') else 0
|
||||
action_num = message.actionNumber if hasattr(message, 'actionNumber') else 0
|
||||
context_label = f"round{round_num}_task{task_num}_action{action_num}_context"
|
||||
context_prefix = self._generateWorkflowContextPrefix(message)
|
||||
context_label = f"{context_prefix}_context"
|
||||
|
||||
doc_exchange = DocumentExchange(
|
||||
documentsLabel=context_label,
|
||||
|
|
@ -400,7 +320,38 @@ class ServiceCenter:
|
|||
"chat": chat_exchanges,
|
||||
"history": history_exchanges
|
||||
}
|
||||
|
||||
|
||||
def _refreshDocumentFileAttributes(self, documents: List[ChatDocument]) -> None:
|
||||
"""Update file attributes (fileName, fileSize, mimeType) for documents"""
|
||||
for doc in documents:
|
||||
try:
|
||||
file_item = self.interfaceComponent.getFile(doc.fileId)
|
||||
if file_item:
|
||||
doc.fileName = file_item.fileName
|
||||
doc.fileSize = file_item.fileSize
|
||||
doc.mimeType = file_item.mimeType
|
||||
else:
|
||||
logger.warning(f"File not found for document {doc.id}, fileId: {doc.fileId}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error refreshing file attributes for document {doc.id}: {e}")
|
||||
|
||||
def _generateWorkflowContextPrefix(self, message: ChatMessage) -> str:
|
||||
"""Generate workflow context prefix: round{num}_task{num}_action{num}"""
|
||||
round_num = message.roundNumber if hasattr(message, 'roundNumber') else 1
|
||||
task_num = message.taskNumber if hasattr(message, 'taskNumber') else 0
|
||||
action_num = message.actionNumber if hasattr(message, 'actionNumber') else 0
|
||||
return f"round{round_num}_task{task_num}_action{action_num}"
|
||||
|
||||
def _getDocumentReferenceFromChatDocument(self, document: ChatDocument, message: ChatMessage) -> str:
|
||||
"""Get document reference using document ID and filename."""
|
||||
try:
|
||||
# Use document ID and filename for simple reference
|
||||
return f"docItem:{document.id}:{document.fileName}"
|
||||
except Exception as e:
|
||||
logger.error(f"Critical error creating document reference for document {document.id}: {str(e)}")
|
||||
# Re-raise the error to prevent workflow from continuing with invalid data
|
||||
raise
|
||||
|
||||
def _getMessageSequenceForExchange(self, exchange: DocumentExchange) -> int:
|
||||
"""Get message sequence number for sorting exchanges by recency"""
|
||||
try:
|
||||
|
|
@ -432,54 +383,15 @@ class ServiceCenter:
|
|||
logger.error(f"Error getting message sequence for exchange: {str(e)}")
|
||||
return 0
|
||||
|
||||
def getEnhancedDocumentContext(self) -> str:
|
||||
"""Get enhanced document context formatted for action planning prompts with proper docList and docItem references"""
|
||||
try:
|
||||
document_list = self.getDocumentReferenceList()
|
||||
def _validateDocumentLabelConsistency(self, message) -> str:
|
||||
"""Validate that the document label used for references matches the message's actual label"""
|
||||
if not hasattr(message, 'documentsLabel') or not message.documentsLabel:
|
||||
logger.debug(f"Message {message.id} has no documentsLabel, returning None")
|
||||
return None
|
||||
|
||||
# Build technical context string for AI action planning
|
||||
context = "AVAILABLE DOCUMENTS:\n\n"
|
||||
|
||||
# Process chat exchanges (current round)
|
||||
if document_list["chat"]:
|
||||
context += "CURRENT ROUND DOCUMENTS:\n"
|
||||
for exchange in document_list["chat"]:
|
||||
# Generate docList reference for the exchange (using message ID)
|
||||
doc_list_ref = f"docList:{exchange.documentsLabel}"
|
||||
context += f"- {doc_list_ref} contains:\n"
|
||||
# Generate docItem references for each document in the list
|
||||
for doc_ref in exchange.documents:
|
||||
if doc_ref.startswith("docItem:"):
|
||||
context += f" - {doc_ref}\n"
|
||||
else:
|
||||
# Convert to proper docItem format if needed
|
||||
context += f" - docItem:{doc_ref}\n"
|
||||
context += "\n"
|
||||
|
||||
# Process history exchanges (previous rounds)
|
||||
if document_list["history"]:
|
||||
context += "WORKFLOW HISTORY DOCUMENTS:\n"
|
||||
for exchange in document_list["history"]:
|
||||
# Generate docList reference for the exchange (using message ID)
|
||||
doc_list_ref = f"docList:{exchange.documentsLabel}"
|
||||
context += f"- {doc_list_ref} contains:\n"
|
||||
# Generate docItem references for each document in the list
|
||||
for doc_ref in exchange.documents:
|
||||
if doc_ref.startswith("docItem:"):
|
||||
context += f" - {doc_ref}\n"
|
||||
else:
|
||||
# Convert to proper docItem format if needed
|
||||
context += f" - docItem:{doc_ref}\n"
|
||||
context += "\n"
|
||||
|
||||
if not document_list["chat"] and not document_list["history"]:
|
||||
context += "NO DOCUMENTS AVAILABLE - This workflow has no documents to process.\n"
|
||||
|
||||
return context
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating enhanced document context: {str(e)}")
|
||||
return "NO DOCUMENTS AVAILABLE - Error generating document context."
|
||||
# Simply return the message's actual documentsLabel - no correction, just validation
|
||||
logger.debug(f"Using message's documentsLabel for references: '{message.documentsLabel}'")
|
||||
return message.documentsLabel
|
||||
|
||||
def _extractDocumentInfoFromReference(self, doc_ref: str) -> Dict[str, str]:
|
||||
"""Extract document information from reference string"""
|
||||
|
|
@ -533,27 +445,6 @@ class ServiceCenter:
|
|||
logger.error(f"Error extracting document info from reference: {str(e)}")
|
||||
return None
|
||||
|
||||
def getDocumentReferenceFromChatDocument(self, document: ChatDocument, message: ChatMessage) -> str:
|
||||
"""Get document reference using document ID and filename."""
|
||||
try:
|
||||
# Use document ID and filename for simple reference
|
||||
return f"docItem:{document.id}:{document.fileName}"
|
||||
except Exception as e:
|
||||
logger.error(f"Critical error creating document reference for document {document.id}: {str(e)}")
|
||||
# Re-raise the error to prevent workflow from continuing with invalid data
|
||||
raise
|
||||
|
||||
def getDocumentListReferenceFromChatMessage(self, message: ChatMessage) -> str:
|
||||
"""Get document list reference using message ID and label."""
|
||||
try:
|
||||
# Use message ID and documentsLabel for document list reference
|
||||
label = getattr(message, 'documentsLabel', f"message_{message.id}")
|
||||
return f"docList:{message.id}:{label}"
|
||||
except Exception as e:
|
||||
logger.error(f"Critical error creating document list reference for message {message.id}: {str(e)}")
|
||||
# Re-raise the error to prevent workflow from continuing with invalid data
|
||||
raise
|
||||
|
||||
def getChatDocumentsFromDocumentList(self, documentList: List[str]) -> List[ChatDocument]:
|
||||
"""Get ChatDocuments from a list of document references using all three formats."""
|
||||
try:
|
||||
|
|
@ -569,19 +460,56 @@ class ServiceCenter:
|
|||
if message.documents:
|
||||
for doc in message.documents:
|
||||
if doc.id == doc_id:
|
||||
doc_name = getattr(doc, 'fileName', 'unknown')
|
||||
logger.debug(f"Found docItem reference {doc_ref}: {doc_name}")
|
||||
all_documents.append(doc)
|
||||
break
|
||||
elif doc_ref.startswith("docList:"):
|
||||
# docList:<messageId>:<label> - extract message ID and find document list
|
||||
# docList:<messageId>:<label> or docList:<label> - extract message ID and find document list
|
||||
parts = doc_ref.split(':')
|
||||
if len(parts) >= 2:
|
||||
if len(parts) >= 3:
|
||||
# Format: docList:<messageId>:<label>
|
||||
message_id = parts[1]
|
||||
label = parts[2]
|
||||
# Find the message by ID and get all its documents
|
||||
for message in self.workflow.messages:
|
||||
if str(message.id) == message_id:
|
||||
if message.documents:
|
||||
doc_names = [doc.fileName for doc in message.documents if hasattr(doc, 'fileName')]
|
||||
logger.debug(f"Found docList reference {doc_ref}: {len(message.documents)} documents - {doc_names}")
|
||||
all_documents.extend(message.documents)
|
||||
else:
|
||||
logger.debug(f"Found docList reference {doc_ref} but message has no documents")
|
||||
break
|
||||
elif len(parts) >= 2:
|
||||
# Format: docList:<label> - find message by documentsLabel
|
||||
label = parts[1]
|
||||
logger.debug(f"Looking for message with documentsLabel: {label}")
|
||||
# Find messages with matching documentsLabel
|
||||
matching_messages = []
|
||||
for message in self.workflow.messages:
|
||||
# Check both attribute and raw data for documentsLabel
|
||||
msg_label = getattr(message, 'documentsLabel', None)
|
||||
if msg_label == label:
|
||||
matching_messages.append(message)
|
||||
logger.debug(f"Found message {message.id} with matching documentsLabel: {msg_label}")
|
||||
else:
|
||||
# Debug: show what labels we're comparing
|
||||
logger.debug(f"Message {message.id} has documentsLabel: '{msg_label}' (looking for: '{label}')")
|
||||
|
||||
if matching_messages:
|
||||
# Use the newest message (highest publishedAt)
|
||||
matching_messages.sort(key=lambda msg: getattr(msg, 'publishedAt', 0), reverse=True)
|
||||
newest_message = matching_messages[0]
|
||||
|
||||
if newest_message.documents:
|
||||
doc_names = [doc.fileName for doc in newest_message.documents if hasattr(doc, 'fileName')]
|
||||
logger.debug(f"Found docList reference {doc_ref}: {len(newest_message.documents)} documents - {doc_names}")
|
||||
all_documents.extend(newest_message.documents)
|
||||
else:
|
||||
logger.debug(f"Found docList reference {doc_ref} but message has no documents")
|
||||
else:
|
||||
logger.debug(f"No messages found with documentsLabel: {label}")
|
||||
else:
|
||||
# Direct label reference (round1_task2_action3_contextinfo)
|
||||
# Search for messages with matching documentsLabel to find the actual documents
|
||||
|
|
@ -619,8 +547,9 @@ class ServiceCenter:
|
|||
logger.debug(f"Newest message has {len(newest_message.documents) if newest_message.documents else 0} documents")
|
||||
|
||||
if newest_message.documents:
|
||||
doc_names = [doc.fileName for doc in newest_message.documents if hasattr(doc, 'fileName')]
|
||||
logger.debug(f"Added {len(newest_message.documents)} documents from newest message {newest_message.id}: {doc_names}")
|
||||
all_documents.extend(newest_message.documents)
|
||||
logger.debug(f"Added {len(newest_message.documents)} documents from newest message {newest_message.id}")
|
||||
else:
|
||||
logger.debug(f"No documents found in newest message {newest_message.id}")
|
||||
else:
|
||||
|
|
@ -641,8 +570,9 @@ class ServiceCenter:
|
|||
|
||||
logger.debug(f"Using fallback message {newest_fallback.id} with documentsLabel: {getattr(newest_fallback, 'documentsLabel', 'unknown')}")
|
||||
if newest_fallback.documents:
|
||||
doc_names = [doc.fileName for doc in newest_fallback.documents if hasattr(doc, 'fileName')]
|
||||
logger.debug(f"Added {len(newest_fallback.documents)} documents from fallback message {newest_fallback.id}: {doc_names}")
|
||||
all_documents.extend(newest_fallback.documents)
|
||||
logger.debug(f"Added {len(newest_fallback.documents)} documents from fallback message {newest_fallback.id}")
|
||||
else:
|
||||
logger.debug(f"No documents found in fallback message {newest_fallback.id}")
|
||||
else:
|
||||
|
|
@ -654,6 +584,8 @@ class ServiceCenter:
|
|||
logger.error(f"Error getting documents from document list: {str(e)}")
|
||||
return []
|
||||
|
||||
# ===== Functions for Prompts + Actions: Connection References generation and resolution =====
|
||||
|
||||
def getConnectionReferenceList(self) -> List[str]:
|
||||
"""Get list of all UserConnection objects as references with enhanced state information"""
|
||||
connections = []
|
||||
|
|
@ -750,46 +682,8 @@ class ServiceCenter:
|
|||
logger.error(f"Error parsing connection reference: {str(e)}")
|
||||
return None
|
||||
|
||||
async def summarizeChat(self, messages: List[ChatMessage]) -> str:
|
||||
"""
|
||||
Summarize chat messages from last to first message with status="first"
|
||||
|
||||
Args:
|
||||
messages: List of chat messages to summarize
|
||||
# ===== Functions for Actions: AI calls =====
|
||||
|
||||
Returns:
|
||||
str: Summary of the chat in user's language
|
||||
"""
|
||||
try:
|
||||
# Get messages from last to first, stopping at first message with status="first"
|
||||
relevantMessages = []
|
||||
for msg in reversed(messages):
|
||||
relevantMessages.append(msg)
|
||||
if msg.status == "first":
|
||||
break
|
||||
|
||||
# Create prompt for AI
|
||||
prompt = f"""You are an AI assistant providing a summary of a chat conversation.
|
||||
Please respond in '{self.user.language}' language.
|
||||
|
||||
Chat History:
|
||||
{chr(10).join(f"- {msg.message}" for msg in reversed(relevantMessages))}
|
||||
|
||||
Instructions:
|
||||
1. Summarize the conversation's key points and outcomes
|
||||
2. Be concise but informative
|
||||
3. Use a professional but friendly tone
|
||||
4. Focus on important decisions and next steps if any
|
||||
|
||||
Please provide a comprehensive summary of this conversation."""
|
||||
|
||||
# Get summary using AI
|
||||
return await self.callAiTextBasic(prompt)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error summarizing chat: {str(e)}")
|
||||
return f"Error summarizing chat: {str(e)}"
|
||||
|
||||
async def callAiTextAdvanced(self, prompt: str, context: str = None) -> str:
|
||||
"""Advanced text processing using Anthropic, with fallback to OpenAI basic if advanced fails."""
|
||||
max_retries = 3
|
||||
|
|
@ -882,6 +776,8 @@ Please provide a comprehensive summary of this conversation."""
|
|||
|
||||
return response
|
||||
|
||||
# ===== Functions for Actions: Data management =====
|
||||
|
||||
def getFileInfo(self, fileId: str) -> Dict[str, Any]:
|
||||
"""Get file information"""
|
||||
file_item = self.interfaceComponent.getFile(fileId)
|
||||
|
|
@ -920,11 +816,11 @@ Please provide a comprehensive summary of this conversation."""
|
|||
mimeType = document.mimeType
|
||||
except Exception as e:
|
||||
# Try to diagnose and recover the issue
|
||||
diagnosis = self.diagnoseDocumentAccess(document)
|
||||
diagnosis = self._diagnoseDocumentAccess(document)
|
||||
logger.error(f"Critical error: Cannot access document properties for document {document.id}. Diagnosis: {diagnosis}")
|
||||
|
||||
# Attempt recovery
|
||||
if self.recoverDocumentAccess(document):
|
||||
if self._recoverDocumentAccess(document):
|
||||
try:
|
||||
fileName = document.fileName
|
||||
mimeType = document.mimeType
|
||||
|
|
@ -954,9 +850,78 @@ Please provide a comprehensive summary of this conversation."""
|
|||
except Exception as e:
|
||||
logger.error(f"Error extracting from document: {str(e)}")
|
||||
raise
|
||||
|
||||
def createFile(self, fileName: str, mimeType: str, content: str, base64encoded: bool = False) -> str:
|
||||
"""Create new file and return its ID"""
|
||||
|
||||
def _diagnoseDocumentAccess(self, document: ChatDocument) -> Dict[str, Any]:
|
||||
"""
|
||||
Diagnose document access issues and provide recovery information.
|
||||
This method helps identify why document properties are inaccessible.
|
||||
"""
|
||||
try:
|
||||
diagnosis = {
|
||||
'document_id': document.id,
|
||||
'file_id': document.fileId,
|
||||
'has_component_interface': document._componentInterface is not None,
|
||||
'component_interface_type': type(document._componentInterface).__name__ if document._componentInterface else None,
|
||||
'file_exists': False,
|
||||
'file_info': None,
|
||||
'error_details': None
|
||||
}
|
||||
|
||||
# Check if component interface is set
|
||||
if not document._componentInterface:
|
||||
diagnosis['error_details'] = "Component interface not set - document cannot access file system"
|
||||
return diagnosis
|
||||
|
||||
# Try to access the file directly
|
||||
try:
|
||||
file_info = self.interfaceComponent.getFile(document.fileId)
|
||||
if file_info:
|
||||
diagnosis['file_exists'] = True
|
||||
diagnosis['file_info'] = {
|
||||
'fileName': file_info.fileName if hasattr(file_info, 'fileName') else 'N/A',
|
||||
'fileSize': file_info.fileSize if hasattr(file_info, 'fileSize') else 'N/A',
|
||||
'mimeType': file_info.mimeType if hasattr(file_info, 'mimeType') else 'N/A'
|
||||
}
|
||||
else:
|
||||
diagnosis['error_details'] = f"File with ID {document.fileId} not found in component interface"
|
||||
except Exception as e:
|
||||
diagnosis['error_details'] = f"Error accessing file {document.fileId}: {str(e)}"
|
||||
|
||||
return diagnosis
|
||||
|
||||
except Exception as e:
|
||||
return {
|
||||
'document_id': document.id if hasattr(document, 'id') else 'unknown',
|
||||
'file_id': document.fileId if hasattr(document, 'fileId') else 'unknown',
|
||||
'error_details': f"Error during diagnosis: {str(e)}"
|
||||
}
|
||||
|
||||
def _recoverDocumentAccess(self, document: ChatDocument) -> bool:
|
||||
"""
|
||||
Attempt to recover document access by re-setting the component interface.
|
||||
Returns True if recovery was successful.
|
||||
"""
|
||||
try:
|
||||
logger.info(f"Attempting to recover document access for document {document.id}")
|
||||
|
||||
# Re-set the component interface
|
||||
document.setComponentInterface(self.interfaceComponent)
|
||||
|
||||
# Test if we can now access the fileName
|
||||
try:
|
||||
test_fileName = document.fileName
|
||||
logger.info(f"Document access recovered for {document.id} -> {test_fileName}")
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"Document access recovery failed for {document.id}: {str(e)}")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error during document access recovery for {document.id}: {str(e)}")
|
||||
return False
|
||||
|
||||
def createDocument(self, fileName: str, mimeType: str, content: str, base64encoded: bool = True) -> ChatDocument:
|
||||
"""Create document with file in one step - handles file creation internally"""
|
||||
# Convert content to bytes based on base64 flag
|
||||
if base64encoded:
|
||||
import base64
|
||||
|
|
@ -974,27 +939,16 @@ Please provide a comprehensive summary of this conversation."""
|
|||
# Then store the file data
|
||||
self.interfaceComponent.createFileData(file_item.id, content_bytes)
|
||||
|
||||
return file_item.id
|
||||
|
||||
def createDocument(self, fileName: str, mimeType: str, content: str, base64encoded: bool = True, existing_file_id: str = None) -> ChatDocument:
|
||||
"""Create document AND file from file data object created by AI call"""
|
||||
# Use existing file ID if provided, otherwise create new file
|
||||
if existing_file_id:
|
||||
file_id = existing_file_id
|
||||
else:
|
||||
# First create the file and get its ID
|
||||
file_id = self.createFile(fileName, mimeType, content, base64encoded)
|
||||
|
||||
# Get file info to copy attributes
|
||||
file_info = self.getFileInfo(file_id)
|
||||
file_info = self.getFileInfo(file_item.id)
|
||||
if not file_info:
|
||||
logger.error(f"Could not get file info for fileId: {file_id}")
|
||||
raise ValueError(f"File info not found for fileId: {file_id}")
|
||||
logger.error(f"Could not get file info for fileId: {file_item.id}")
|
||||
raise ValueError(f"File info not found for fileId: {file_item.id}")
|
||||
|
||||
# Create document with all file attributes copied
|
||||
document = ChatDocument(
|
||||
id=str(uuid.uuid4()),
|
||||
fileId=file_id,
|
||||
fileId=file_item.id,
|
||||
fileName=file_info.get("fileName", fileName),
|
||||
fileSize=file_info.get("size", 0),
|
||||
mimeType=file_info.get("mimeType", mimeType)
|
||||
|
|
@ -1002,6 +956,8 @@ Please provide a comprehensive summary of this conversation."""
|
|||
|
||||
return document
|
||||
|
||||
# ===== Internal public helper functions =====
|
||||
|
||||
def updateWorkflowStats(self, eventLabel: str = None, bytesSent: int = 0, bytesReceived: int = 0, tokenCount: int = 0) -> None:
|
||||
"""
|
||||
Centralized function to update workflow statistics in database and running workflow.
|
||||
|
|
@ -1051,24 +1007,40 @@ Please provide a comprehensive summary of this conversation."""
|
|||
logger.error(f"Error calculating object size: {str(e)}")
|
||||
return 0
|
||||
|
||||
def getAvailableDocuments(self, workflow) -> List[str]:
|
||||
def getAvailableDocuments(self, workflow) -> str:
|
||||
"""
|
||||
Get list of available document fileNames from workflow with new labeling format.
|
||||
Get simple description of available documents for task planning.
|
||||
|
||||
Args:
|
||||
workflow: ChatWorkflow object
|
||||
|
||||
Returns:
|
||||
List[str]: List of document labels in new format
|
||||
str: Simple description of document availability
|
||||
"""
|
||||
documents = []
|
||||
total_documents = 0
|
||||
document_types = set()
|
||||
|
||||
for message in workflow.messages:
|
||||
for doc in message.documents:
|
||||
# Generate new label format
|
||||
label = self.generateDocumentLabel(doc, message)
|
||||
documents.append(label)
|
||||
return documents
|
||||
if message.documents:
|
||||
total_documents += len(message.documents)
|
||||
for doc in message.documents:
|
||||
try:
|
||||
file_extension = getFileExtension(doc.fileName)
|
||||
if file_extension:
|
||||
document_types.add(file_extension.upper())
|
||||
except:
|
||||
pass
|
||||
|
||||
if total_documents == 0:
|
||||
return "No documents available"
|
||||
elif len(document_types) == 0:
|
||||
return f"{total_documents} document(s) available"
|
||||
else:
|
||||
types_str = ", ".join(sorted(document_types))
|
||||
return f"{total_documents} document(s) available ({types_str} files)"
|
||||
|
||||
# ===== Functions for Manager: Execution Tools =====
|
||||
|
||||
async def executeAction(self, methodName: str, actionName: str, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""Execute a method action"""
|
||||
try:
|
||||
|
|
@ -1116,6 +1088,8 @@ Please provide a comprehensive summary of this conversation."""
|
|||
"""Set user language for the service center"""
|
||||
self.user.language = language
|
||||
|
||||
# ===== Functions for Manager: Workflow Tools =====
|
||||
|
||||
def setWorkflowContext(self, round_number: int = None, task_number: int = None, action_number: int = None):
|
||||
"""Set current workflow context for document generation and routing"""
|
||||
try:
|
||||
|
|
@ -1210,92 +1184,5 @@ Please provide a comprehensive summary of this conversation."""
|
|||
'workflowId': 'unknown'
|
||||
}
|
||||
|
||||
def refreshDocumentFileAttributes(self, documents: List[ChatDocument]) -> None:
|
||||
"""Update file attributes (fileName, fileSize, mimeType) for documents"""
|
||||
for doc in documents:
|
||||
try:
|
||||
file_item = self.interfaceComponent.getFile(doc.fileId)
|
||||
if file_item:
|
||||
doc.fileName = file_item.fileName
|
||||
doc.fileSize = file_item.fileSize
|
||||
doc.mimeType = file_item.mimeType
|
||||
else:
|
||||
logger.warning(f"File not found for document {doc.id}, fileId: {doc.fileId}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error refreshing file attributes for document {doc.id}: {e}")
|
||||
|
||||
# Note: Workflow progress update methods have been moved to handlingTasks.py
|
||||
# where they belong since that's where the actual workflow execution happens
|
||||
# This avoids circular import issues between ServiceCenter and ChatInterface
|
||||
|
||||
def diagnoseDocumentAccess(self, document: ChatDocument) -> Dict[str, Any]:
|
||||
"""
|
||||
Diagnose document access issues and provide recovery information.
|
||||
This method helps identify why document properties are inaccessible.
|
||||
"""
|
||||
try:
|
||||
diagnosis = {
|
||||
'document_id': document.id,
|
||||
'file_id': document.fileId,
|
||||
'has_component_interface': document._componentInterface is not None,
|
||||
'component_interface_type': type(document._componentInterface).__name__ if document._componentInterface else None,
|
||||
'file_exists': False,
|
||||
'file_info': None,
|
||||
'error_details': None
|
||||
}
|
||||
|
||||
# Check if component interface is set
|
||||
if not document._componentInterface:
|
||||
diagnosis['error_details'] = "Component interface not set - document cannot access file system"
|
||||
return diagnosis
|
||||
|
||||
# Try to access the file directly
|
||||
try:
|
||||
file_info = self.interfaceComponent.getFile(document.fileId)
|
||||
if file_info:
|
||||
diagnosis['file_exists'] = True
|
||||
diagnosis['file_info'] = {
|
||||
'fileName': file_info.fileName if hasattr(file_info, 'fileName') else 'N/A',
|
||||
'fileSize': file_info.fileSize if hasattr(file_info, 'fileSize') else 'N/A',
|
||||
'mimeType': file_info.mimeType if hasattr(file_info, 'mimeType') else 'N/A'
|
||||
}
|
||||
else:
|
||||
diagnosis['error_details'] = f"File with ID {document.fileId} not found in component interface"
|
||||
except Exception as e:
|
||||
diagnosis['error_details'] = f"Error accessing file {document.fileId}: {str(e)}"
|
||||
|
||||
return diagnosis
|
||||
|
||||
except Exception as e:
|
||||
return {
|
||||
'document_id': document.id if hasattr(document, 'id') else 'unknown',
|
||||
'file_id': document.fileId if hasattr(document, 'fileId') else 'unknown',
|
||||
'error_details': f"Error during diagnosis: {str(e)}"
|
||||
}
|
||||
|
||||
def recoverDocumentAccess(self, document: ChatDocument) -> bool:
|
||||
"""
|
||||
Attempt to recover document access by re-setting the component interface.
|
||||
Returns True if recovery was successful.
|
||||
"""
|
||||
try:
|
||||
logger.info(f"Attempting to recover document access for document {document.id}")
|
||||
|
||||
# Re-set the component interface
|
||||
document.setComponentInterface(self.interfaceComponent)
|
||||
|
||||
# Test if we can now access the fileName
|
||||
try:
|
||||
test_fileName = document.fileName
|
||||
logger.info(f"Document access recovered for {document.id} -> {test_fileName}")
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"Document access recovery failed for {document.id}: {str(e)}")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error during document access recovery for {document.id}: {str(e)}")
|
||||
return False
|
||||
|
||||
# Create singleton instance
|
||||
serviceObject = None
|
||||
|
|
|
|||
|
|
@ -8,6 +8,10 @@ from modules.shared.configuration import APP_CONFIG
|
|||
# Configure logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class ContextLengthExceededException(Exception):
|
||||
"""Exception raised when the context length exceeds the model's limit"""
|
||||
pass
|
||||
|
||||
def loadConfigData():
|
||||
"""Load configuration data for OpenAI connector"""
|
||||
return {
|
||||
|
|
@ -75,12 +79,29 @@ class AiOpenai:
|
|||
|
||||
if response.status_code != 200:
|
||||
logger.error(f"OpenAI API error: {response.status_code} - {response.text}")
|
||||
|
||||
# Check for context length exceeded error
|
||||
if response.status_code == 400:
|
||||
try:
|
||||
error_data = response.json()
|
||||
if (error_data.get("error", {}).get("code") == "context_length_exceeded" or
|
||||
"context length" in error_data.get("error", {}).get("message", "").lower()):
|
||||
# Raise a specific exception for context length issues
|
||||
raise ContextLengthExceededException(
|
||||
f"Context length exceeded: {error_data.get('error', {}).get('message', 'Unknown error')}"
|
||||
)
|
||||
except (ValueError, KeyError):
|
||||
pass # If we can't parse the error, fall through to generic error
|
||||
|
||||
raise HTTPException(status_code=500, detail="Error communicating with OpenAI API")
|
||||
|
||||
responseJson = response.json()
|
||||
content = responseJson["choices"][0]["message"]["content"]
|
||||
return content
|
||||
|
||||
except ContextLengthExceededException:
|
||||
# Re-raise context length exceptions without wrapping
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Error calling OpenAI API: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"Error calling OpenAI API: {str(e)}")
|
||||
|
|
|
|||
268
modules/connectors/connectorWebTavily.py
Normal file
268
modules/connectors/connectorWebTavily.py
Normal file
|
|
@ -0,0 +1,268 @@
|
|||
"""Tavily web search class."""
|
||||
|
||||
import logging
|
||||
import os
|
||||
from dataclasses import dataclass
|
||||
from modules.interfaces.interfaceWebModel import (
|
||||
WebCrawlBase,
|
||||
WebCrawlDocumentData,
|
||||
WebCrawlRequest,
|
||||
WebCrawlResultItem,
|
||||
WebScrapeActionDocument,
|
||||
WebScrapeActionResult,
|
||||
WebScrapeBase,
|
||||
WebScrapeDocumentData,
|
||||
WebScrapeRequest,
|
||||
WebScrapeResultItem,
|
||||
WebSearchBase,
|
||||
WebSearchRequest,
|
||||
WebSearchActionResult,
|
||||
WebSearchActionDocument,
|
||||
WebSearchDocumentData,
|
||||
WebSearchResultItem,
|
||||
WebCrawlActionDocument,
|
||||
WebCrawlActionResult,
|
||||
get_web_search_min_results,
|
||||
get_web_search_max_results,
|
||||
)
|
||||
|
||||
# from modules.interfaces.interfaceChatModel import ActionResult, ActionDocument
|
||||
from tavily import AsyncTavilyClient
|
||||
from modules.shared.timezoneUtils import get_utc_timestamp
|
||||
from modules.shared.configuration import APP_CONFIG
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# Configuration loading functions
|
||||
def get_web_crawl_timeout() -> int:
|
||||
"""Get web crawl timeout from configuration"""
|
||||
return int(APP_CONFIG.get("Web_Crawl_TIMEOUT", "30"))
|
||||
|
||||
|
||||
def get_web_crawl_max_retries() -> int:
|
||||
"""Get web crawl max retries from configuration"""
|
||||
return int(APP_CONFIG.get("Web_Crawl_MAX_RETRIES", "3"))
|
||||
|
||||
|
||||
def get_web_crawl_retry_delay() -> int:
|
||||
"""Get web crawl retry delay from configuration"""
|
||||
return int(APP_CONFIG.get("Web_Crawl_RETRY_DELAY", "2"))
|
||||
|
||||
|
||||
@dataclass
|
||||
class TavilySearchResult:
|
||||
title: str
|
||||
url: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class TavilyCrawlResult:
|
||||
url: str
|
||||
content: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class ConnectorTavily(WebSearchBase, WebCrawlBase, WebScrapeBase):
|
||||
client: AsyncTavilyClient = None
|
||||
|
||||
@classmethod
|
||||
async def create(cls):
|
||||
api_key = APP_CONFIG.get("Connector_WebTavily_API_KEY")
|
||||
if not api_key:
|
||||
raise ValueError("Tavily API key not configured. Please set Connector_WebTavily_API_KEY in config.ini")
|
||||
return cls(client=AsyncTavilyClient(api_key=api_key))
|
||||
|
||||
async def search_urls(self, request: WebSearchRequest) -> WebSearchActionResult:
|
||||
"""Handles the web search request.
|
||||
|
||||
Takes a query and returns a list of URLs.
|
||||
"""
|
||||
# Step 1: Search
|
||||
try:
|
||||
search_results = await self._search(request.query, request.max_results)
|
||||
except Exception as e:
|
||||
return WebSearchActionResult(success=False, error=str(e))
|
||||
|
||||
# Step 2: Build ActionResult
|
||||
try:
|
||||
result = self._build_search_action_result(search_results, request.query)
|
||||
except Exception as e:
|
||||
return WebSearchActionResult(success=False, error=str(e))
|
||||
|
||||
return result
|
||||
|
||||
async def crawl_urls(self, request: WebCrawlRequest) -> WebCrawlActionResult:
|
||||
"""Crawls the given URLs and returns the extracted text content."""
|
||||
# Step 1: Crawl
|
||||
try:
|
||||
crawl_results = await self._crawl(request.urls)
|
||||
except Exception as e:
|
||||
return WebCrawlActionResult(success=False, error=str(e))
|
||||
|
||||
# Step 2: Build ActionResult
|
||||
try:
|
||||
result = self._build_crawl_action_result(crawl_results, request.urls)
|
||||
except Exception as e:
|
||||
return WebCrawlActionResult(success=False, error=str(e))
|
||||
|
||||
return result
|
||||
|
||||
async def scrape(self, request: WebScrapeRequest) -> WebScrapeActionResult:
|
||||
"""Turns a query in a list of urls with extracted content."""
|
||||
# Step 1: Search
|
||||
try:
|
||||
search_results = await self._search(request.query, request.max_results)
|
||||
except Exception as e:
|
||||
return WebScrapeActionResult(success=False, error=str(e))
|
||||
|
||||
# Step 2: Crawl
|
||||
try:
|
||||
urls = [result.url for result in search_results]
|
||||
crawl_results = await self._crawl(urls)
|
||||
except Exception as e:
|
||||
return WebScrapeActionResult(success=False, error=str(e))
|
||||
|
||||
# Step 3: Build ActionResult
|
||||
try:
|
||||
result = self._build_scrape_action_result(crawl_results, request.query)
|
||||
except Exception as e:
|
||||
return WebScrapeActionResult(success=False, error=str(e))
|
||||
|
||||
return result
|
||||
|
||||
async def _search(self, query: str, max_results: int) -> list[TavilySearchResult]:
|
||||
"""Calls the Tavily API to perform a web search."""
|
||||
# Make sure max_results is within the allowed range
|
||||
min_results = get_web_search_min_results()
|
||||
max_allowed_results = get_web_search_max_results()
|
||||
if max_results < min_results or max_results > max_allowed_results:
|
||||
raise ValueError(f"max_results must be between {min_results} and {max_allowed_results}")
|
||||
|
||||
# Perform actual API call
|
||||
response = await self.client.search(query=query, max_results=max_results)
|
||||
|
||||
return [
|
||||
TavilySearchResult(title=result["title"], url=result["url"])
|
||||
for result in response["results"]
|
||||
]
|
||||
|
||||
def _build_search_action_result(
|
||||
self, search_results: list[TavilySearchResult], query: str = ""
|
||||
) -> WebSearchActionResult:
|
||||
"""Builds the ActionResult from the search results."""
|
||||
# Convert to result items
|
||||
result_items = [
|
||||
WebSearchResultItem(title=result.title, url=result.url)
|
||||
for result in search_results
|
||||
]
|
||||
|
||||
# Create document data with all results
|
||||
document_data = WebSearchDocumentData(
|
||||
query=query, results=result_items, total_count=len(result_items)
|
||||
)
|
||||
|
||||
# Create single document
|
||||
document = WebSearchActionDocument(
|
||||
documentName=f"web_search_results_{get_utc_timestamp()}.json",
|
||||
documentData=document_data,
|
||||
mimeType="application/json",
|
||||
)
|
||||
|
||||
return WebSearchActionResult(
|
||||
success=True, documents=[document], resultLabel="web_search_results"
|
||||
)
|
||||
|
||||
async def _crawl(self, urls: list) -> list[TavilyCrawlResult]:
|
||||
"""Calls the Tavily API to extract text content from URLs with retry logic."""
|
||||
import asyncio
|
||||
|
||||
max_retries = get_web_crawl_max_retries()
|
||||
retry_delay = get_web_crawl_retry_delay()
|
||||
timeout = get_web_crawl_timeout()
|
||||
|
||||
for attempt in range(max_retries + 1):
|
||||
try:
|
||||
# Use asyncio.wait_for for timeout
|
||||
response = await asyncio.wait_for(
|
||||
self.client.extract(urls=urls, extract_depth="advanced", format="text"),
|
||||
timeout=timeout
|
||||
)
|
||||
|
||||
return [
|
||||
TavilyCrawlResult(url=result["url"], content=result["raw_content"])
|
||||
for result in response["results"]
|
||||
]
|
||||
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning(f"Crawl attempt {attempt + 1} timed out after {timeout} seconds")
|
||||
if attempt < max_retries:
|
||||
logger.info(f"Retrying in {retry_delay} seconds...")
|
||||
await asyncio.sleep(retry_delay)
|
||||
else:
|
||||
raise Exception(f"Crawl failed after {max_retries + 1} attempts due to timeout")
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Crawl attempt {attempt + 1} failed: {str(e)}")
|
||||
if attempt < max_retries:
|
||||
logger.info(f"Retrying in {retry_delay} seconds...")
|
||||
await asyncio.sleep(retry_delay)
|
||||
else:
|
||||
raise Exception(f"Crawl failed after {max_retries + 1} attempts: {str(e)}")
|
||||
|
||||
def _build_crawl_action_result(
|
||||
self, crawl_results: list[TavilyCrawlResult], urls: list[str] = None
|
||||
) -> WebCrawlActionResult:
|
||||
"""Builds the ActionResult from the crawl results."""
|
||||
# Convert to result items
|
||||
result_items = [
|
||||
WebCrawlResultItem(url=result.url, content=result.content)
|
||||
for result in crawl_results
|
||||
]
|
||||
|
||||
# Create document data with all results
|
||||
document_data = WebCrawlDocumentData(
|
||||
urls=urls or [result.url for result in crawl_results],
|
||||
results=result_items,
|
||||
total_count=len(result_items),
|
||||
)
|
||||
|
||||
# Create single document
|
||||
document = WebCrawlActionDocument(
|
||||
documentName=f"web_crawl_results_{get_utc_timestamp()}.json",
|
||||
documentData=document_data,
|
||||
mimeType="application/json",
|
||||
)
|
||||
|
||||
return WebCrawlActionResult(
|
||||
success=True, documents=[document], resultLabel="web_crawl_results"
|
||||
)
|
||||
|
||||
def _build_scrape_action_result(
|
||||
self, crawl_results: list[TavilyCrawlResult], query: str = ""
|
||||
) -> WebScrapeActionResult:
|
||||
"""Builds the ActionResult from the scrape results."""
|
||||
# Convert to result items
|
||||
result_items = [
|
||||
WebScrapeResultItem(url=result.url, content=result.content)
|
||||
for result in crawl_results
|
||||
]
|
||||
|
||||
# Create document data with all results
|
||||
document_data = WebScrapeDocumentData(
|
||||
query=query,
|
||||
results=result_items,
|
||||
total_count=len(result_items),
|
||||
)
|
||||
|
||||
# Create single document
|
||||
document = WebScrapeActionDocument(
|
||||
documentName=f"web_scrape_results_{get_utc_timestamp()}.json",
|
||||
documentData=document_data,
|
||||
mimeType="application/json",
|
||||
)
|
||||
|
||||
return WebScrapeActionResult(
|
||||
success=True, documents=[document], resultLabel="web_scrape_results"
|
||||
)
|
||||
|
|
@ -1,20 +1,378 @@
|
|||
import logging
|
||||
from typing import Dict, Any, List, Union, Optional
|
||||
from modules.connectors.connectorAiOpenai import AiOpenai
|
||||
from modules.connectors.connectorAiOpenai import AiOpenai, ContextLengthExceededException
|
||||
from modules.connectors.connectorAiAnthropic import AiAnthropic
|
||||
from modules.chat.documents.documentExtraction import DocumentExtraction
|
||||
from modules.interfaces.interfaceChatModel import ChatDocument
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# AI Model Registry with Performance Data
|
||||
AI_MODELS = {
|
||||
"openai_gpt4o": {
|
||||
"connector": "openai",
|
||||
"max_tokens": 128000,
|
||||
"cost_per_1k_tokens": 0.03, # Input
|
||||
"cost_per_1k_tokens_output": 0.06, # Output
|
||||
"speed_rating": 8, # 1-10
|
||||
"quality_rating": 9, # 1-10
|
||||
"supports_images": True,
|
||||
"supports_documents": True,
|
||||
"context_length": 128000,
|
||||
"model_name": "gpt-4o"
|
||||
},
|
||||
"openai_gpt35": {
|
||||
"connector": "openai",
|
||||
"max_tokens": 16000,
|
||||
"cost_per_1k_tokens": 0.0015,
|
||||
"cost_per_1k_tokens_output": 0.002,
|
||||
"speed_rating": 9,
|
||||
"quality_rating": 7,
|
||||
"supports_images": False,
|
||||
"supports_documents": True,
|
||||
"context_length": 16000,
|
||||
"model_name": "gpt-3.5-turbo"
|
||||
},
|
||||
"anthropic_claude": {
|
||||
"connector": "anthropic",
|
||||
"max_tokens": 200000,
|
||||
"cost_per_1k_tokens": 0.015,
|
||||
"cost_per_1k_tokens_output": 0.075,
|
||||
"speed_rating": 7,
|
||||
"quality_rating": 10,
|
||||
"supports_images": True,
|
||||
"supports_documents": True,
|
||||
"context_length": 200000,
|
||||
"model_name": "claude-3-sonnet-20240229"
|
||||
}
|
||||
}
|
||||
|
||||
class AiCalls:
|
||||
"""Interface for AI service interactions"""
|
||||
"""Interface for AI service interactions with centralized call method"""
|
||||
|
||||
def __init__(self):
|
||||
self.openaiService = AiOpenai()
|
||||
self.anthropicService = AiAnthropic()
|
||||
self.document_extractor = DocumentExtraction()
|
||||
|
||||
async def callAi(
|
||||
self,
|
||||
prompt: str,
|
||||
documents: List[ChatDocument] = None,
|
||||
operation_type: str = "general",
|
||||
priority: str = "balanced", # "speed", "quality", "cost", "balanced"
|
||||
compress_prompt: bool = True,
|
||||
compress_documents: bool = True,
|
||||
process_documents_individually: bool = False,
|
||||
max_cost: float = None,
|
||||
max_processing_time: int = None
|
||||
) -> str:
|
||||
"""
|
||||
Zentrale AI Call Methode mit intelligenter Modell-Auswahl und Content-Verarbeitung.
|
||||
|
||||
Args:
|
||||
prompt: Der Hauptprompt für die AI
|
||||
documents: Liste von Dokumenten zur Verarbeitung
|
||||
operation_type: Art der Operation ("general", "document_analysis", "image_analysis", etc.)
|
||||
priority: Priorität für Modell-Auswahl ("speed", "quality", "cost", "balanced")
|
||||
compress_prompt: Ob der Prompt komprimiert werden soll
|
||||
compress_documents: Ob Dokumente komprimiert werden sollen
|
||||
process_documents_individually: Ob Dokumente einzeln verarbeitet werden sollen
|
||||
max_cost: Maximale Kosten für den Call
|
||||
max_processing_time: Maximale Verarbeitungszeit in Sekunden
|
||||
|
||||
Returns:
|
||||
AI Response als String
|
||||
"""
|
||||
try:
|
||||
# 1. Dokumente verarbeiten falls vorhanden
|
||||
document_content = ""
|
||||
if documents:
|
||||
document_content = await self._process_documents_for_ai(
|
||||
documents,
|
||||
operation_type,
|
||||
compress_documents,
|
||||
process_documents_individually
|
||||
)
|
||||
|
||||
# 2. Bestes Modell basierend auf Priorität und Content auswählen
|
||||
selected_model = self._select_optimal_model(
|
||||
prompt,
|
||||
document_content,
|
||||
priority,
|
||||
operation_type,
|
||||
max_cost,
|
||||
max_processing_time
|
||||
)
|
||||
|
||||
# 3. Content für das gewählte Modell optimieren
|
||||
optimized_prompt, optimized_content = await self._optimize_content_for_model(
|
||||
prompt,
|
||||
document_content,
|
||||
selected_model,
|
||||
compress_prompt,
|
||||
compress_documents
|
||||
)
|
||||
|
||||
# 4. AI Call mit Failover ausführen
|
||||
return await self._execute_ai_call_with_failover(
|
||||
selected_model,
|
||||
optimized_prompt,
|
||||
optimized_content
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in centralized AI call: {str(e)}")
|
||||
return f"Error: {str(e)}"
|
||||
|
||||
def _select_optimal_model(
|
||||
self,
|
||||
prompt: str,
|
||||
document_content: str,
|
||||
priority: str,
|
||||
operation_type: str,
|
||||
max_cost: float = None,
|
||||
max_processing_time: int = None
|
||||
) -> str:
|
||||
"""Wählt das optimale Modell basierend auf Priorität und Content aus"""
|
||||
|
||||
# Content-Größe berechnen
|
||||
total_content_size = len(prompt.encode('utf-8')) + len(document_content.encode('utf-8'))
|
||||
|
||||
# Verfügbare Modelle filtern
|
||||
available_models = {}
|
||||
for model_name, model_info in AI_MODELS.items():
|
||||
# Prüfe ob Modell für Content-Größe geeignet ist
|
||||
if total_content_size > model_info["context_length"] * 0.8: # 80% für Content
|
||||
continue
|
||||
|
||||
# Prüfe Kosten-Limit
|
||||
if max_cost:
|
||||
estimated_cost = self._estimate_cost(model_info, total_content_size)
|
||||
if estimated_cost > max_cost:
|
||||
continue
|
||||
|
||||
# Prüfe Operation-Type Kompatibilität
|
||||
if operation_type == "image_analysis" and not model_info["supports_images"]:
|
||||
continue
|
||||
|
||||
available_models[model_name] = model_info
|
||||
|
||||
if not available_models:
|
||||
# Fallback zum kleinsten Modell
|
||||
return "openai_gpt35"
|
||||
|
||||
# Modell basierend auf Priorität auswählen
|
||||
if priority == "speed":
|
||||
return max(available_models.keys(), key=lambda x: available_models[x]["speed_rating"])
|
||||
elif priority == "quality":
|
||||
return max(available_models.keys(), key=lambda x: available_models[x]["quality_rating"])
|
||||
elif priority == "cost":
|
||||
return min(available_models.keys(), key=lambda x: available_models[x]["cost_per_1k_tokens"])
|
||||
else: # balanced
|
||||
# Gewichtete Bewertung: 40% Qualität, 30% Geschwindigkeit, 30% Kosten
|
||||
def balanced_score(model_name):
|
||||
model_info = available_models[model_name]
|
||||
quality_score = model_info["quality_rating"] * 0.4
|
||||
speed_score = model_info["speed_rating"] * 0.3
|
||||
cost_score = (10 - (model_info["cost_per_1k_tokens"] * 1000)) * 0.3 # Niedrigere Kosten = höherer Score
|
||||
return quality_score + speed_score + cost_score
|
||||
|
||||
return max(available_models.keys(), key=balanced_score)
|
||||
|
||||
def _estimate_cost(self, model_info: Dict, content_size: int) -> float:
|
||||
"""Schätzt die Kosten für einen AI Call"""
|
||||
# Grobe Schätzung: 1 Token ≈ 4 Zeichen
|
||||
estimated_tokens = content_size / 4
|
||||
input_cost = (estimated_tokens / 1000) * model_info["cost_per_1k_tokens"]
|
||||
output_cost = (estimated_tokens / 1000) * model_info["cost_per_1k_tokens_output"] * 0.1 # 10% für Output
|
||||
return input_cost + output_cost
|
||||
|
||||
async def _process_documents_for_ai(
|
||||
self,
|
||||
documents: List[ChatDocument],
|
||||
operation_type: str,
|
||||
compress_documents: bool,
|
||||
process_individually: bool
|
||||
) -> str:
|
||||
"""Verarbeitet Dokumente für AI Call mit documentExtraction.py"""
|
||||
|
||||
if not documents:
|
||||
return ""
|
||||
|
||||
processed_contents = []
|
||||
|
||||
for doc in documents:
|
||||
try:
|
||||
# Extrahiere Content mit documentExtraction.py
|
||||
extracted = await self.document_extractor.processFileData(
|
||||
doc.fileData,
|
||||
doc.fileName,
|
||||
doc.mimeType,
|
||||
prompt=f"Extract relevant content for {operation_type}",
|
||||
documentId=doc.id,
|
||||
enableAI=True
|
||||
)
|
||||
|
||||
# Kombiniere alle Content-Items
|
||||
doc_content = []
|
||||
for content_item in extracted.contents:
|
||||
if content_item.data and content_item.data.strip():
|
||||
doc_content.append(content_item.data)
|
||||
|
||||
if doc_content:
|
||||
combined_doc_content = "\n\n".join(doc_content)
|
||||
|
||||
# Komprimiere falls gewünscht
|
||||
if compress_documents and len(combined_doc_content.encode('utf-8')) > 10000: # 10KB Limit
|
||||
combined_doc_content = await self._compress_content(
|
||||
combined_doc_content,
|
||||
10000,
|
||||
"document"
|
||||
)
|
||||
|
||||
processed_contents.append(f"Document: {doc.fileName}\n{combined_doc_content}")
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Error processing document {doc.fileName}: {str(e)}")
|
||||
processed_contents.append(f"Document: {doc.fileName}\n[Error processing document: {str(e)}]")
|
||||
|
||||
return "\n\n---\n\n".join(processed_contents)
|
||||
|
||||
async def _optimize_content_for_model(
|
||||
self,
|
||||
prompt: str,
|
||||
document_content: str,
|
||||
model_name: str,
|
||||
compress_prompt: bool,
|
||||
compress_documents: bool
|
||||
) -> tuple[str, str]:
|
||||
"""Optimiert Content für das gewählte Modell"""
|
||||
|
||||
model_info = AI_MODELS[model_name]
|
||||
max_content_size = model_info["context_length"] * 0.7 # 70% für Content
|
||||
|
||||
optimized_prompt = prompt
|
||||
optimized_content = document_content
|
||||
|
||||
# Prompt komprimieren falls gewünscht
|
||||
if compress_prompt and len(prompt.encode('utf-8')) > 2000: # 2KB Limit für Prompt
|
||||
optimized_prompt = await self._compress_content(prompt, 2000, "prompt")
|
||||
|
||||
# Dokument-Content komprimieren falls gewünscht
|
||||
if compress_documents and document_content:
|
||||
content_size = len(document_content.encode('utf-8'))
|
||||
if content_size > max_content_size:
|
||||
optimized_content = await self._compress_content(
|
||||
document_content,
|
||||
int(max_content_size),
|
||||
"document"
|
||||
)
|
||||
|
||||
return optimized_prompt, optimized_content
|
||||
|
||||
async def _compress_content(self, content: str, target_size: int, content_type: str) -> str:
|
||||
"""Komprimiert Content intelligent basierend auf Typ"""
|
||||
|
||||
if len(content.encode('utf-8')) <= target_size:
|
||||
return content
|
||||
|
||||
try:
|
||||
# Verwende AI für intelligente Kompression
|
||||
compression_prompt = f"""
|
||||
Komprimiere den folgenden {content_type} auf maximal {target_size} Zeichen,
|
||||
behalte aber alle wichtigen Informationen bei:
|
||||
|
||||
{content}
|
||||
|
||||
Gib nur den komprimierten Inhalt zurück, ohne zusätzliche Erklärungen.
|
||||
"""
|
||||
|
||||
# Verwende das schnellste verfügbare Modell für Kompression
|
||||
compression_model = "openai_gpt35"
|
||||
model_info = AI_MODELS[compression_model]
|
||||
connector = getattr(self, f"{model_info['connector']}Service")
|
||||
|
||||
messages = [{"role": "user", "content": compression_prompt}]
|
||||
|
||||
if model_info["connector"] == "openai":
|
||||
compressed = await connector.callAiBasic(messages)
|
||||
else:
|
||||
response = await connector.callAiBasic(messages)
|
||||
compressed = response["choices"][0]["message"]["content"]
|
||||
|
||||
return compressed
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"AI compression failed, using truncation: {str(e)}")
|
||||
# Fallback: Einfache Truncation
|
||||
return content[:target_size] + "... [truncated]"
|
||||
|
||||
async def _execute_ai_call_with_failover(
|
||||
self,
|
||||
model_name: str,
|
||||
prompt: str,
|
||||
document_content: str
|
||||
) -> str:
|
||||
"""Führt AI Call mit automatischem Failover aus"""
|
||||
|
||||
try:
|
||||
model_info = AI_MODELS[model_name]
|
||||
connector = getattr(self, f"{model_info['connector']}Service")
|
||||
|
||||
# Messages vorbereiten
|
||||
messages = []
|
||||
if document_content:
|
||||
messages.append({
|
||||
"role": "system",
|
||||
"content": f"Context from documents:\n{document_content}"
|
||||
})
|
||||
|
||||
messages.append({
|
||||
"role": "user",
|
||||
"content": prompt
|
||||
})
|
||||
|
||||
# AI Call ausführen
|
||||
if model_info["connector"] == "openai":
|
||||
return await connector.callAiBasic(messages)
|
||||
else: # anthropic
|
||||
response = await connector.callAiBasic(messages)
|
||||
return response["choices"][0]["message"]["content"]
|
||||
|
||||
except ContextLengthExceededException:
|
||||
logger.warning(f"Context length exceeded for {model_name}, trying fallback")
|
||||
# Fallback zu Modell mit größerem Context
|
||||
fallback_model = self._find_fallback_model(model_name)
|
||||
if fallback_model:
|
||||
return await self._execute_ai_call_with_failover(fallback_model, prompt, document_content)
|
||||
else:
|
||||
# Letzter Ausweg: Content weiter komprimieren
|
||||
compressed_prompt = await self._compress_content(prompt, 1000, "prompt")
|
||||
compressed_content = await self._compress_content(document_content, 5000, "document")
|
||||
return await self._execute_ai_call_with_failover("openai_gpt35", compressed_prompt, compressed_content)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"AI call failed with {model_name}: {e}")
|
||||
# Allgemeiner Fallback
|
||||
return await self._execute_ai_call_with_failover("openai_gpt35", prompt, document_content)
|
||||
|
||||
def _find_fallback_model(self, current_model: str) -> Optional[str]:
|
||||
"""Findet ein Fallback-Modell mit größerem Context"""
|
||||
current_context = AI_MODELS[current_model]["context_length"]
|
||||
|
||||
# Suche Modell mit größerem Context
|
||||
for model_name, model_info in AI_MODELS.items():
|
||||
if model_info["context_length"] > current_context:
|
||||
return model_name
|
||||
|
||||
return None
|
||||
|
||||
# Legacy methods
|
||||
|
||||
async def callAiTextBasic(self, prompt: str, context: Optional[str] = None) -> str:
|
||||
"""
|
||||
Basic text processing using OpenAI.
|
||||
Basic text processing - now uses centralized AI call method.
|
||||
|
||||
Args:
|
||||
prompt: The user prompt to process
|
||||
|
|
@ -23,86 +381,47 @@ class AiCalls:
|
|||
Returns:
|
||||
The AI response as text
|
||||
"""
|
||||
# Prepare messages in OpenAI format
|
||||
messages = []
|
||||
|
||||
# Add system message if context provided
|
||||
# Combine context with prompt if provided
|
||||
full_prompt = prompt
|
||||
if context:
|
||||
messages.append({
|
||||
"role": "system",
|
||||
"content": context
|
||||
})
|
||||
full_prompt = f"Context: {context}\n\nUser Request: {prompt}"
|
||||
|
||||
# Add user message
|
||||
messages.append({
|
||||
"role": "user",
|
||||
"content": prompt
|
||||
})
|
||||
|
||||
# Add language instruction for user-facing responses
|
||||
if hasattr(self, 'userLanguage') and self.userLanguage:
|
||||
ltext = f"Please respond in '{self.userLanguage}' language."
|
||||
if messages and messages[0]["role"] == "system":
|
||||
if "language" not in messages[0]["content"].lower():
|
||||
messages[0]["content"] = f"{ltext} {messages[0]['content']}"
|
||||
else:
|
||||
messages.insert(0, {
|
||||
"role": "system",
|
||||
"content": ltext
|
||||
})
|
||||
|
||||
try:
|
||||
return await self.openaiService.callAiBasic(messages)
|
||||
except Exception as e:
|
||||
logger.error(f"Error in OpenAI call: {str(e)}")
|
||||
return f"Error: {str(e)}"
|
||||
# Use centralized AI call with speed priority for basic calls
|
||||
return await self.callAi(
|
||||
prompt=full_prompt,
|
||||
priority="speed",
|
||||
compress_prompt=True,
|
||||
compress_documents=False
|
||||
)
|
||||
|
||||
async def callAiTextAdvanced(self, prompt: str, context: Optional[str] = None) -> str:
|
||||
async def callAiTextAdvanced(self, prompt: str, context: Optional[str] = None, _is_fallback: bool = False) -> str:
|
||||
"""
|
||||
Advanced text processing using Anthropic.
|
||||
Fallback to OpenAI if Anthropic is overloaded or rate-limited.
|
||||
Advanced text processing - now uses centralized AI call method.
|
||||
|
||||
Args:
|
||||
prompt: The user prompt to process
|
||||
context: Optional system context/prompt
|
||||
_is_fallback: Internal flag (kept for compatibility)
|
||||
|
||||
Returns:
|
||||
The AI response as text
|
||||
"""
|
||||
# For Anthropic, we need to handle system content differently
|
||||
# Anthropic expects system content in a top-level parameter, not as a message role
|
||||
try:
|
||||
# Create messages without system role for Anthropic
|
||||
anthropic_messages = []
|
||||
if hasattr(self, 'userLanguage') and self.userLanguage:
|
||||
ltext = f"Please respond in '{self.userLanguage}' language."
|
||||
if context:
|
||||
# Combine context and language instruction
|
||||
full_context = f"{ltext}\n\n{context}"
|
||||
else:
|
||||
full_context = ltext
|
||||
else:
|
||||
full_context = context
|
||||
|
||||
# Add user message
|
||||
anthropic_messages.append({
|
||||
"role": "user",
|
||||
"content": prompt
|
||||
})
|
||||
|
||||
# Call Anthropic - let the connector handle system content conversion
|
||||
if full_context:
|
||||
# Send context as part of the user message for Anthropic
|
||||
enhanced_prompt = f"Context:\n{full_context}\n\nUser Request:\n{prompt}"
|
||||
response = await self.anthropicService.callAiBasic([
|
||||
{"role": "user", "content": enhanced_prompt}
|
||||
])
|
||||
else:
|
||||
response = await self.anthropicService.callAiBasic(anthropic_messages)
|
||||
|
||||
return response["choices"][0]["message"]["content"]
|
||||
except Exception as e:
|
||||
err_str = str(e)
|
||||
logger.warning(f"[UI NOTICE] Advanced AI failed, falling back to Basic AI (OpenAI). Reason: {err_str}")
|
||||
# Fallback to OpenAI basic
|
||||
return await self.callAiTextBasic(prompt, context)
|
||||
# Combine context with prompt if provided
|
||||
full_prompt = prompt
|
||||
if context:
|
||||
full_prompt = f"Context: {context}\n\nUser Request: {prompt}"
|
||||
|
||||
# Use centralized AI call with quality priority for advanced calls
|
||||
return await self.callAi(
|
||||
prompt=full_prompt,
|
||||
priority="quality",
|
||||
compress_prompt=False,
|
||||
compress_documents=False
|
||||
)
|
||||
|
||||
async def callAiImageBasic(self, prompt: str, imageData: Union[str, bytes], mimeType: str = None) -> str:
|
||||
"""
|
||||
Basic image processing using OpenAI.
|
||||
Basic image processing - now uses centralized AI call method.
|
||||
|
||||
Args:
|
||||
prompt: The prompt for image analysis
|
||||
|
|
@ -113,6 +432,8 @@ class AiCalls:
|
|||
The AI response as text
|
||||
"""
|
||||
try:
|
||||
# For image processing, use the original connector directly
|
||||
# as the centralized method doesn't handle images yet
|
||||
return await self.openaiService.callAiImage(prompt, imageData, mimeType)
|
||||
except Exception as e:
|
||||
logger.error(f"Error in OpenAI image call: {str(e)}")
|
||||
|
|
@ -120,7 +441,7 @@ class AiCalls:
|
|||
|
||||
async def callAiImageAdvanced(self, prompt: str, imageData: Union[str, bytes], mimeType: str = None) -> str:
|
||||
"""
|
||||
Advanced image processing using Anthropic.
|
||||
Advanced image processing - now uses centralized AI call method.
|
||||
|
||||
Args:
|
||||
prompt: The prompt for image analysis
|
||||
|
|
@ -131,8 +452,76 @@ class AiCalls:
|
|||
The AI response as text
|
||||
"""
|
||||
try:
|
||||
# For image processing, use the original connector directly
|
||||
# as the centralized method doesn't handle images yet
|
||||
return await self.anthropicService.callAiImage(prompt, imageData, mimeType)
|
||||
except Exception as e:
|
||||
logger.error(f"Error in Anthropic image call: {str(e)}")
|
||||
return f"Error: {str(e)}"
|
||||
|
||||
# Convenience methods for common use cases
|
||||
|
||||
async def callAiForDocumentAnalysis(
|
||||
self,
|
||||
prompt: str,
|
||||
documents: List[ChatDocument],
|
||||
priority: str = "balanced"
|
||||
) -> str:
|
||||
"""Convenience method for document analysis"""
|
||||
return await self.callAi(
|
||||
prompt=prompt,
|
||||
documents=documents,
|
||||
operation_type="document_analysis",
|
||||
priority=priority,
|
||||
compress_documents=True,
|
||||
process_documents_individually=False
|
||||
)
|
||||
|
||||
async def callAiForReportGeneration(
|
||||
self,
|
||||
prompt: str,
|
||||
documents: List[ChatDocument],
|
||||
priority: str = "quality"
|
||||
) -> str:
|
||||
"""Convenience method for report generation"""
|
||||
return await self.callAi(
|
||||
prompt=prompt,
|
||||
documents=documents,
|
||||
operation_type="report_generation",
|
||||
priority=priority,
|
||||
compress_documents=True,
|
||||
process_documents_individually=True
|
||||
)
|
||||
|
||||
async def callAiForEmailComposition(
|
||||
self,
|
||||
prompt: str,
|
||||
documents: List[ChatDocument] = None,
|
||||
priority: str = "speed"
|
||||
) -> str:
|
||||
"""Convenience method for email composition"""
|
||||
return await self.callAi(
|
||||
prompt=prompt,
|
||||
documents=documents,
|
||||
operation_type="email_composition",
|
||||
priority=priority,
|
||||
compress_prompt=True,
|
||||
compress_documents=True
|
||||
)
|
||||
|
||||
async def callAiForTaskPlanning(
|
||||
self,
|
||||
prompt: str,
|
||||
documents: List[ChatDocument] = None,
|
||||
priority: str = "balanced"
|
||||
) -> str:
|
||||
"""Convenience method for task planning"""
|
||||
return await self.callAi(
|
||||
prompt=prompt,
|
||||
documents=documents,
|
||||
operation_type="task_planning",
|
||||
priority=priority,
|
||||
compress_prompt=False,
|
||||
compress_documents=True
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -732,7 +732,7 @@ class TaskContext(BaseModel, ModelMixin):
|
|||
workflow_id: Optional[str] = None
|
||||
|
||||
# Available resources
|
||||
available_documents: Optional[list[str]] = []
|
||||
available_documents: Optional[str] = "No documents available"
|
||||
available_connections: Optional[list[str]] = []
|
||||
|
||||
# Previous execution state
|
||||
|
|
@ -755,8 +755,8 @@ class TaskContext(BaseModel, ModelMixin):
|
|||
criteria_progress: Optional[dict] = None
|
||||
|
||||
def getDocumentReferences(self) -> List[str]:
|
||||
"""Get all available document references"""
|
||||
docs = self.available_documents or []
|
||||
"""Get all available document references from previous handover"""
|
||||
docs = []
|
||||
if self.previous_handover:
|
||||
for doc_exchange in self.previous_handover.inputDocuments:
|
||||
docs.extend(doc_exchange.documents)
|
||||
|
|
|
|||
140
modules/interfaces/interfaceWebModel.py
Normal file
140
modules/interfaces/interfaceWebModel.py
Normal file
|
|
@ -0,0 +1,140 @@
|
|||
"""Base class for web classes."""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from modules.interfaces.interfaceChatModel import ActionDocument, ActionResult
|
||||
from pydantic import BaseModel, Field, HttpUrl
|
||||
from typing import List
|
||||
from modules.shared.configuration import APP_CONFIG
|
||||
|
||||
|
||||
# Configuration loading functions
|
||||
def get_web_search_max_query_length() -> int:
|
||||
"""Get maximum query length from configuration"""
|
||||
return int(APP_CONFIG.get("Web_Search_MAX_QUERY_LENGTH", "400"))
|
||||
|
||||
|
||||
def get_web_search_max_results() -> int:
|
||||
"""Get maximum search results from configuration"""
|
||||
return int(APP_CONFIG.get("Web_Search_MAX_RESULTS", "20"))
|
||||
|
||||
|
||||
def get_web_search_min_results() -> int:
|
||||
"""Get minimum search results from configuration"""
|
||||
return int(APP_CONFIG.get("Web_Search_MIN_RESULTS", "1"))
|
||||
|
||||
|
||||
# --- Web search ---
|
||||
|
||||
# query -> list of URLs
|
||||
|
||||
|
||||
class WebSearchRequest(BaseModel):
|
||||
query: str = Field(min_length=1, max_length=get_web_search_max_query_length())
|
||||
max_results: int = Field(ge=get_web_search_min_results(), le=get_web_search_max_results())
|
||||
|
||||
|
||||
class WebSearchResultItem(BaseModel):
|
||||
"""Individual search result"""
|
||||
|
||||
title: str
|
||||
url: HttpUrl
|
||||
|
||||
|
||||
class WebSearchDocumentData(BaseModel):
|
||||
"""Complete search results document"""
|
||||
|
||||
query: str = Field(min_length=1, max_length=get_web_search_max_query_length())
|
||||
results: List[WebSearchResultItem]
|
||||
total_count: int
|
||||
|
||||
|
||||
class WebSearchActionDocument(ActionDocument):
|
||||
documentData: WebSearchDocumentData
|
||||
|
||||
|
||||
class WebSearchActionResult(ActionResult):
|
||||
documents: List[WebSearchActionDocument] = Field(default_factory=list)
|
||||
|
||||
|
||||
class WebSearchBase(ABC):
|
||||
@abstractmethod
|
||||
async def search_urls(self, request: WebSearchRequest) -> WebSearchActionResult: ...
|
||||
|
||||
|
||||
# --- Web crawl ---
|
||||
|
||||
# list of URLs -> list of extracted HTML content
|
||||
|
||||
|
||||
class WebCrawlRequest(BaseModel):
|
||||
urls: List[HttpUrl]
|
||||
|
||||
|
||||
class WebCrawlResultItem(BaseModel):
|
||||
"""Individual crawl result"""
|
||||
|
||||
url: HttpUrl
|
||||
content: str
|
||||
|
||||
|
||||
class WebCrawlDocumentData(BaseModel):
|
||||
"""Complete crawl results document"""
|
||||
|
||||
urls: List[HttpUrl]
|
||||
results: List[WebCrawlResultItem]
|
||||
total_count: int
|
||||
|
||||
|
||||
class WebCrawlActionDocument(ActionDocument):
|
||||
documentData: WebCrawlDocumentData = Field(
|
||||
description="The data extracted from crawled URLs"
|
||||
)
|
||||
|
||||
|
||||
class WebCrawlActionResult(ActionResult):
|
||||
documents: List[WebCrawlActionDocument] = Field(default_factory=list)
|
||||
|
||||
|
||||
class WebCrawlBase(ABC):
|
||||
@abstractmethod
|
||||
async def crawl_urls(self, request: WebCrawlRequest) -> WebCrawlActionResult: ...
|
||||
|
||||
|
||||
# --- Web scrape ---
|
||||
|
||||
# scrape -> list of extracted text; combines web search and crawl in one step
|
||||
|
||||
|
||||
class WebScrapeRequest(BaseModel):
|
||||
query: str = Field(min_length=1, max_length=get_web_search_max_query_length())
|
||||
max_results: int = Field(ge=get_web_search_min_results(), le=get_web_search_max_results())
|
||||
|
||||
|
||||
class WebScrapeResultItem(BaseModel):
|
||||
"""Individual scrape result"""
|
||||
|
||||
url: HttpUrl
|
||||
content: str
|
||||
|
||||
|
||||
class WebScrapeDocumentData(BaseModel):
|
||||
"""Complete scrape results document"""
|
||||
|
||||
query: str = Field(min_length=1, max_length=get_web_search_max_query_length())
|
||||
results: List[WebScrapeResultItem]
|
||||
total_count: int
|
||||
|
||||
|
||||
class WebScrapeActionDocument(ActionDocument):
|
||||
documentData: WebScrapeDocumentData = Field(
|
||||
description="The data extracted from scraped URLs"
|
||||
)
|
||||
|
||||
|
||||
class WebScrapeActionResult(ActionResult):
|
||||
documents: List[WebScrapeActionDocument] = Field(default_factory=list)
|
||||
|
||||
|
||||
class WebScrapeBase(ABC):
|
||||
@abstractmethod
|
||||
async def scrape(self, request: WebScrapeRequest) -> WebScrapeActionResult: ...
|
||||
118
modules/interfaces/interfaceWebObjects.py
Normal file
118
modules/interfaces/interfaceWebObjects.py
Normal file
|
|
@ -0,0 +1,118 @@
|
|||
from typing import Optional
|
||||
import json
|
||||
import csv
|
||||
import io
|
||||
from modules.interfaces.interfaceWebModel import (
|
||||
WebCrawlActionResult,
|
||||
WebSearchActionResult,
|
||||
WebSearchRequest,
|
||||
WebCrawlRequest,
|
||||
WebScrapeActionResult,
|
||||
WebScrapeRequest,
|
||||
WebCrawlDocumentData,
|
||||
WebScrapeDocumentData,
|
||||
WebSearchDocumentData,
|
||||
)
|
||||
|
||||
from dataclasses import dataclass
|
||||
from modules.connectors.connectorWebTavily import ConnectorTavily
|
||||
from modules.interfaces.interfaceChatModel import ActionDocument
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class WebInterface:
|
||||
connectorWebTavily: ConnectorTavily
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
if self.connectorWebTavily is None:
|
||||
raise TypeError(
|
||||
"connectorWebTavily must be provided. "
|
||||
"Use `await WebInterface.create()` or pass a ConnectorTavily."
|
||||
)
|
||||
|
||||
@classmethod
|
||||
async def create(cls) -> "WebInterface":
|
||||
connectorWebTavily = await ConnectorTavily.create()
|
||||
|
||||
return WebInterface(connectorWebTavily=connectorWebTavily)
|
||||
|
||||
async def search(
|
||||
self, web_search_request: WebSearchRequest
|
||||
) -> WebSearchActionResult:
|
||||
# NOTE: Add connectors here
|
||||
return await self.connectorWebTavily.search_urls(web_search_request)
|
||||
|
||||
async def crawl(self, web_crawl_request: WebCrawlRequest) -> WebCrawlActionResult:
|
||||
# NOTE: Add connectors here
|
||||
return await self.connectorWebTavily.crawl_urls(web_crawl_request)
|
||||
|
||||
async def scrape(
|
||||
self, web_scrape_request: WebScrapeRequest
|
||||
) -> WebScrapeActionResult:
|
||||
# NOTE: Add connectors here
|
||||
return await self.connectorWebTavily.scrape(web_scrape_request)
|
||||
|
||||
def convert_web_result_to_json(self, web_result) -> str:
|
||||
"""Convert WebCrawlActionResult or WebScrapeActionResult to proper JSON format"""
|
||||
if not web_result.success or not web_result.documents:
|
||||
return json.dumps({"success": web_result.success, "error": web_result.error})
|
||||
|
||||
# Extract the document data and convert to dict
|
||||
document_data = web_result.documents[0].documentData
|
||||
|
||||
# Convert Pydantic model to dict
|
||||
result_dict = {
|
||||
"success": web_result.success,
|
||||
"results": [
|
||||
{
|
||||
"url": str(result.url),
|
||||
"content": result.content
|
||||
}
|
||||
for result in document_data.results
|
||||
],
|
||||
"total_count": document_data.total_count
|
||||
}
|
||||
|
||||
# Add type-specific fields
|
||||
if hasattr(document_data, 'urls'):
|
||||
# WebCrawlDocumentData has urls field
|
||||
result_dict["urls"] = [str(url) for url in document_data.urls]
|
||||
elif hasattr(document_data, 'query'):
|
||||
# WebScrapeDocumentData has query field
|
||||
result_dict["query"] = document_data.query
|
||||
|
||||
return json.dumps(result_dict, indent=2, ensure_ascii=False)
|
||||
|
||||
def convert_web_search_result_to_csv(self, web_search_result: WebSearchActionResult) -> str:
|
||||
"""Convert WebSearchActionResult to CSV format with url and title columns"""
|
||||
if not web_search_result.success or not web_search_result.documents:
|
||||
return ""
|
||||
|
||||
output = io.StringIO()
|
||||
writer = csv.writer(output, delimiter=';')
|
||||
|
||||
# Write header
|
||||
writer.writerow(['url', 'title'])
|
||||
|
||||
# Write data rows
|
||||
document_data = web_search_result.documents[0].documentData
|
||||
for result in document_data.results:
|
||||
writer.writerow([str(result.url), result.title])
|
||||
|
||||
return output.getvalue()
|
||||
|
||||
def create_json_action_document(self, json_content: str, document_name: str) -> ActionDocument:
|
||||
"""Create an ActionDocument with JSON content"""
|
||||
return ActionDocument(
|
||||
documentName=document_name,
|
||||
documentData=json_content,
|
||||
mimeType="application/json"
|
||||
)
|
||||
|
||||
def create_csv_action_document(self, csv_content: str, document_name: str) -> ActionDocument:
|
||||
"""Create an ActionDocument with CSV content"""
|
||||
return ActionDocument(
|
||||
documentName=document_name,
|
||||
documentData=csv_content,
|
||||
mimeType="text/csv"
|
||||
)
|
||||
|
|
@ -441,6 +441,10 @@ class MethodDocument(MethodBase):
|
|||
if len(lines) > 2:
|
||||
formatted_content = '\n'.join(lines[1:-1])
|
||||
|
||||
# For HTML format, check if AI returned complete HTML document
|
||||
if extension == ".html" and (formatted_content.startswith('<!DOCTYPE') or formatted_content.startswith('<html')):
|
||||
return formatted_content
|
||||
|
||||
return formatted_content
|
||||
|
||||
except Exception as e:
|
||||
|
|
@ -643,7 +647,22 @@ class MethodDocument(MethodBase):
|
|||
raise Exception("AI report generation failed - AI is required for report generation")
|
||||
|
||||
# Clean up the AI response and ensure it's valid HTML
|
||||
if not aiReport.strip().startswith('<html'):
|
||||
aiReport = aiReport.strip()
|
||||
|
||||
# Strip fenced code blocks like ```html ... ``` if present
|
||||
if aiReport.startswith("```") and aiReport.endswith("```"):
|
||||
lines = aiReport.split('\n')
|
||||
if len(lines) >= 2:
|
||||
# remove first and last fence lines (language tag allowed on first)
|
||||
aiReport = '\n'.join(lines[1:-1]).strip()
|
||||
|
||||
# Check if AI response starts with DOCTYPE or html tag (complete HTML document)
|
||||
if aiReport.startswith('<!DOCTYPE') or aiReport.startswith('<html'):
|
||||
# AI returned complete HTML document, use it directly
|
||||
return aiReport
|
||||
else:
|
||||
# AI returned HTML content without document structure, wrap it
|
||||
|
||||
# Check if AI response already contains a title/header
|
||||
has_title = any(title.lower() in aiReport.lower() for title in [title, "outlook", "report", "status"])
|
||||
|
||||
|
|
@ -660,9 +679,6 @@ class MethodDocument(MethodBase):
|
|||
html.append(aiReport)
|
||||
html.append("</body></html>")
|
||||
return '\n'.join(html)
|
||||
else:
|
||||
# AI returned complete HTML, use it directly
|
||||
return aiReport
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating AI report: {str(e)}")
|
||||
|
|
|
|||
|
|
@ -731,8 +731,6 @@ class MethodOutlook(MethodBase):
|
|||
attachment_docs = self.service.getChatDocumentsFromDocumentList([attachment_ref])
|
||||
if attachment_docs:
|
||||
for doc in attachment_docs:
|
||||
|
||||
|
||||
# Get the actual file content using fileId
|
||||
file_id = getattr(doc, 'fileId', None)
|
||||
if file_id:
|
||||
|
|
@ -757,15 +755,15 @@ class MethodOutlook(MethodBase):
|
|||
"contentBytes": base64_content
|
||||
}
|
||||
message["attachments"].append(attachment)
|
||||
|
||||
|
||||
else:
|
||||
logger.warning(f"No content found for attachment: {doc.fileName}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error reading attachment file {doc.fileName}: {str(e)}")
|
||||
else:
|
||||
logger.warning(f"Attachment document has no fileId: {doc.fileName}")
|
||||
else:
|
||||
logger.warning(f"No attachment documents found for reference: {attachment_ref}")
|
||||
else:
|
||||
logger.warning(f"No attachment documents found for reference: {attachment_ref}")
|
||||
|
||||
# Create the draft message
|
||||
# First, get the Drafts folder ID to ensure the draft is created there
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
|
@ -270,15 +270,6 @@ async def get_workflow_messages(
|
|||
# Get all messages
|
||||
allMessages = interfaceChat.getWorkflowMessages(workflowId)
|
||||
|
||||
# Debug logging: Log attributes for each message
|
||||
logger.debug(f"Retrieved {len(allMessages)} messages for workflow {workflowId}")
|
||||
for i, message in enumerate(allMessages):
|
||||
logger.debug(f"Message {i+1} (ID: {message.id}): {message}")
|
||||
logger.debug(f" - Type: {getattr(message, 'type', 'N/A')}")
|
||||
logger.debug(f" - Content: {getattr(message, 'content', 'N/A')[:100]}...")
|
||||
logger.debug(f" - PublishedAt: {getattr(message, 'publishedAt', 'N/A')}")
|
||||
logger.debug(f" - All attributes: {message.__dict__}")
|
||||
|
||||
# Apply selective data transfer if messageId is provided
|
||||
if messageId:
|
||||
# Find the index of the message with the given ID
|
||||
|
|
|
|||
|
|
@ -141,6 +141,12 @@ class WorkflowManager:
|
|||
self.chatManager.handlingTasks._checkWorkflowStopped()
|
||||
|
||||
# Create initial message using interface
|
||||
# Generate the correct documentsLabel that matches what getDocumentReferenceString will create
|
||||
round_num = workflow.currentRound
|
||||
task_num = 0
|
||||
action_num = 0
|
||||
context_label = f"round{round_num}_task{task_num}_action{action_num}_context"
|
||||
|
||||
messageData = {
|
||||
"workflowId": workflow.id,
|
||||
"role": "user",
|
||||
|
|
@ -148,7 +154,7 @@ class WorkflowManager:
|
|||
"status": "first",
|
||||
"sequenceNr": 1,
|
||||
"publishedAt": get_utc_timestamp(),
|
||||
"documentsLabel": "workflow_start",
|
||||
"documentsLabel": context_label,
|
||||
"documents": [],
|
||||
# Add workflow context fields
|
||||
"roundNumber": workflow.currentRound,
|
||||
|
|
@ -390,7 +396,7 @@ class WorkflowManager:
|
|||
summary_message = {
|
||||
"workflowId": workflow.id,
|
||||
"role": "assistant",
|
||||
"message": f"Workflow completed successfully. Completed {workflow_result.completed_tasks}/{workflow_result.total_tasks} tasks in {workflow_result.execution_time:.2f} seconds.",
|
||||
"message": f"Workflow completed successfully.",
|
||||
"status": "last",
|
||||
"sequenceNr": len(workflow.messages) + 1,
|
||||
"publishedAt": get_utc_timestamp(),
|
||||
|
|
|
|||
|
|
@ -2,8 +2,9 @@
|
|||
TODO
|
||||
|
||||
# System
|
||||
- Backend/UI fix Table Connections mit korrekten Token Infos, View jedesmal neu laden im formGeneric
|
||||
- model reference diagram for all models. who uses who? --> to see the basic building blocks
|
||||
- sharepoint to fix
|
||||
- document handling centralized
|
||||
- ai handling centralized
|
||||
- neutralizer to activate AND put back placeholders to the returned data
|
||||
|
||||
# Tests
|
||||
|
|
@ -20,7 +21,13 @@ TODO
|
|||
- check zusammenfassung von 10 dokumenten >10 MB
|
||||
- test case bewerbung
|
||||
|
||||
|
||||
# DOCUMENTATION
|
||||
Design principles
|
||||
- UI: Module classes for data management (CRUD tables & forms --> formGeneric)
|
||||
- Basic: All timestamps to be timezone aware fehlerabfangroutinen
|
||||
- Backend: All external components to attach over connectorXxx --> interfaceXxx --> our codebase
|
||||
- all model definitions in interfaceXxxModel
|
||||
- action functions for ai: why to use documentList and not just document as input parameter? --> to have full flexibility to pass either list of documents, or documentList
|
||||
|
||||
********************
|
||||
|
||||
|
|
|
|||
|
|
@ -1,128 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="de">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Management Summary: Methoden-basierte Chat-Architektur</title>
|
||||
<style>
|
||||
body {
|
||||
font-family: Arial, sans-serif;
|
||||
line-height: 1.6;
|
||||
max-width: 800px;
|
||||
margin: 0 auto;
|
||||
padding: 20px;
|
||||
color: #333;
|
||||
}
|
||||
h1 {
|
||||
color: #2c3e50;
|
||||
border-bottom: 2px solid #3498db;
|
||||
padding-bottom: 10px;
|
||||
}
|
||||
h2 {
|
||||
color: #2c3e50;
|
||||
margin-top: 30px;
|
||||
}
|
||||
.example {
|
||||
background-color: #f8f9fa;
|
||||
border-left: 4px solid #3498db;
|
||||
padding: 15px;
|
||||
margin: 20px 0;
|
||||
}
|
||||
.old-arch, .new-arch {
|
||||
margin: 15px 0;
|
||||
padding: 15px;
|
||||
border-radius: 5px;
|
||||
}
|
||||
.old-arch {
|
||||
background-color: #fff3cd;
|
||||
border: 1px solid #ffeeba;
|
||||
}
|
||||
.new-arch {
|
||||
background-color: #d4edda;
|
||||
border: 1px solid #c3e6cb;
|
||||
}
|
||||
.benefits {
|
||||
background-color: #e8f4f8;
|
||||
padding: 15px;
|
||||
border-radius: 5px;
|
||||
margin: 20px 0;
|
||||
}
|
||||
.benefits ul {
|
||||
margin: 10px 0;
|
||||
padding-left: 20px;
|
||||
}
|
||||
.benefits li {
|
||||
margin: 5px 0;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<h1>Management Summary: Methoden-basierte Chat-Architektur</h1>
|
||||
|
||||
<p>Die Umstellung von einer Agenten-basierten auf eine Methoden-basierte Chat-Architektur stellt einen fundamentalen Paradigmenwechsel dar. Während die Mehrheit der KI-Chat-Systeme weiterhin auf Agenten-Architekturen setzt, ermöglicht unser methoden-basierter Ansatz eine präzisere Kontrolle und effizientere Integration.</p>
|
||||
|
||||
<p>Der methoden-basierte Ansatz definiert klare, selbstbeschreibende Operationen mit festgelegten Parametern und Ergebnissen. Im Gegensatz zu Agenten, die als Blackbox-Operationen fungieren, bieten Methoden eine transparente, validierbare und vorhersehbare Ausführung. Diese Struktur ermöglicht eine präzise Fehlerbehandlung und Retry-Logik auf Aktions-Ebene, anstatt auf Agenten-Ebene.</p>
|
||||
|
||||
<p>Die Integration mit Benutzerdaten erfolgt direkt über definierte Authentifizierungspfade, was die Sicherheit erhöht und die Komplexität reduziert. Jede Methode ist selbstbeschreibend und enthält ihre eigenen Validierungsregeln, was die Wartbarkeit verbessert und die Entwicklung neuer Funktionen beschleunigt.</p>
|
||||
|
||||
<p>Der methoden-basierte Ansatz reduziert die KI-Abhängigkeit bei der Ausführung von Operationen, während die KI weiterhin für die Planung und Koordination der Methoden eingesetzt wird. Diese Trennung von Planung und Ausführung führt zu zuverlässigeren Ergebnissen und besserer Nachvollziehbarkeit.</p>
|
||||
|
||||
<p>Die Architektur ermöglicht eine präzise Dokumentation und Validierung jeder Operation, was in einer regulierten Umgebung von besonderem Wert ist. Die klare Struktur erleichtert die Integration neuer Dienste und die Erweiterung bestehender Funktionalitäten.</p>
|
||||
|
||||
<h2>Praktisches Beispiel: Dokumentenverarbeitung und E-Mail-Versand</h2>
|
||||
|
||||
<div class="example">
|
||||
<div class="old-arch">
|
||||
<strong>Alte Agenten-basierte Architektur:</strong><br>
|
||||
<pre>
|
||||
Benutzer: "Suche nach Verträgen im SharePoint und sende mir eine Zusammenfassung per E-Mail"
|
||||
|
||||
Agent SharePoint:
|
||||
- Sucht nach Verträgen
|
||||
- Extrahiert Inhalte
|
||||
- Speichert Ergebnisse
|
||||
|
||||
Agent Outlook:
|
||||
- Liest Ergebnisse
|
||||
- Erstellt E-Mail
|
||||
- Sendet E-Mail</pre>
|
||||
</div>
|
||||
|
||||
<div class="new-arch">
|
||||
<strong>Neue Methoden-basierte Architektur:</strong><br>
|
||||
<pre>
|
||||
Benutzer: "Suche nach Verträgen im SharePoint und sende mir eine Zusammenfassung per E-Mail"
|
||||
|
||||
Methoden-Katalog:
|
||||
1. SharePoint.searchDocuments
|
||||
- Parameter: {query: "Verträge", site: "valueon"}
|
||||
- Retry: 3x bei Netzwerkfehler
|
||||
- Auth: MSFT
|
||||
|
||||
2. Document.extractContent
|
||||
- Parameter: {documents: [...], sections: ["Zusammenfassung"]}
|
||||
- Retry: 2x bei Extraktionsfehler
|
||||
- Auth: LOCAL
|
||||
|
||||
3. Outlook.sendMail
|
||||
- Parameter: {to: ["user@example.com"], subject: "Vertragszusammenfassung"}
|
||||
- Retry: 1x bei SMTP-Fehler
|
||||
- Auth: MSFT</pre>
|
||||
</div>
|
||||
|
||||
<div class="benefits">
|
||||
<strong>Vorteile im Beispiel:</strong>
|
||||
<ul>
|
||||
<li>Jede Operation ist klar definiert und validierbar</li>
|
||||
<li>Retry-Logik ist spezifisch für jede Operation</li>
|
||||
<li>Authentifizierung ist explizit definiert</li>
|
||||
<li>Fehler können präzise zugeordnet werden</li>
|
||||
<li>Operationen können unabhängig voneinander getestet werden</li>
|
||||
<li>Neue Operationen können einfach hinzugefügt werden</li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<p>Die KI plant die Ausführung dieser Methoden, aber die eigentliche Ausführung erfolgt durch die definierten Methoden mit klaren Parametern und Ergebnissen. Dies führt zu einer zuverlässigeren und besser nachvollziehbaren Ausführung.</p>
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -1,129 +0,0 @@
|
|||
|
||||
<!DOCTYPE html>
|
||||
<html lang="de">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Management Summary: Methoden-basierte Chat-Architektur</title>
|
||||
<style>
|
||||
body {
|
||||
font-family: Arial, sans-serif;
|
||||
line-height: 1.6;
|
||||
max-width: 800px;
|
||||
margin: 0 auto;
|
||||
padding: 20px;
|
||||
color: #333;
|
||||
}
|
||||
h1 {
|
||||
color: #2c3e50;
|
||||
border-bottom: 2px solid #3498db;
|
||||
padding-bottom: 10px;
|
||||
}
|
||||
h2 {
|
||||
color: #2c3e50;
|
||||
margin-top: 30px;
|
||||
}
|
||||
.example {
|
||||
background-color: #f8f9fa;
|
||||
border-left: 4px solid #3498db;
|
||||
padding: 15px;
|
||||
margin: 20px 0;
|
||||
}
|
||||
.old-arch, .new-arch {
|
||||
margin: 15px 0;
|
||||
padding: 15px;
|
||||
border-radius: 5px;
|
||||
}
|
||||
.old-arch {
|
||||
background-color: #fff3cd;
|
||||
border: 1px solid #ffeeba;
|
||||
}
|
||||
.new-arch {
|
||||
background-color: #d4edda;
|
||||
border: 1px solid #c3e6cb;
|
||||
}
|
||||
.benefits {
|
||||
background-color: #e8f4f8;
|
||||
padding: 15px;
|
||||
border-radius: 5px;
|
||||
margin: 20px 0;
|
||||
}
|
||||
.benefits ul {
|
||||
margin: 10px 0;
|
||||
padding-left: 20px;
|
||||
}
|
||||
.benefits li {
|
||||
margin: 5px 0;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<h1>Management Summary: Methoden-basierte Chat-Architektur</h1>
|
||||
|
||||
<p>Die Umstellung von einer Agenten-basierten auf eine Methoden-basierte Chat-Architektur stellt einen fundamentalen Paradigmenwechsel dar. Während die Mehrheit der KI-Chat-Systeme weiterhin auf Agenten-Architekturen setzt, ermöglicht unser methoden-basierter Ansatz eine präzisere Kontrolle und effizientere Integration.</p>
|
||||
|
||||
<p>Der methoden-basierte Ansatz definiert klare, selbstbeschreibende Operationen mit festgelegten Parametern und Ergebnissen. Im Gegensatz zu Agenten, die als Blackbox-Operationen fungieren, bieten Methoden eine transparente, validierbare und vorhersehbare Ausführung. Diese Struktur ermöglicht eine präzise Fehlerbehandlung und Retry-Logik auf Aktions-Ebene, anstatt auf Agenten-Ebene.</p>
|
||||
|
||||
<p>Die Integration mit Benutzerdaten erfolgt direkt über definierte Authentifizierungspfade, was die Sicherheit erhöht und die Komplexität reduziert. Jede Methode ist selbstbeschreibend und enthält ihre eigenen Validierungsregeln, was die Wartbarkeit verbessert und die Entwicklung neuer Funktionen beschleunigt.</p>
|
||||
|
||||
<p>Der methoden-basierte Ansatz reduziert die KI-Abhängigkeit bei der Ausführung von Operationen, während die KI weiterhin für die Planung und Koordination der Methoden eingesetzt wird. Diese Trennung von Planung und Ausführung führt zu zuverlässigeren Ergebnissen und besserer Nachvollziehbarkeit.</p>
|
||||
|
||||
<p>Die Architektur ermöglicht eine präzise Dokumentation und Validierung jeder Operation, was in einer regulierten Umgebung von besonderem Wert ist. Die klare Struktur erleichtert die Integration neuer Dienste und die Erweiterung bestehender Funktionalitäten.</p>
|
||||
|
||||
<h2>Praktisches Beispiel: Dokumentenverarbeitung und E-Mail-Versand</h2>
|
||||
|
||||
<div class="example">
|
||||
<div class="old-arch">
|
||||
<strong>Alte Agenten-basierte Architektur:</strong><br>
|
||||
<pre>
|
||||
Benutzer: "Suche nach Verträgen im SharePoint und sende mir eine Zusammenfassung per E-Mail"
|
||||
|
||||
Agent SharePoint:
|
||||
- Sucht nach Verträgen
|
||||
- Extrahiert Inhalte
|
||||
- Speichert Ergebnisse
|
||||
|
||||
Agent Outlook:
|
||||
- Liest Ergebnisse
|
||||
- Erstellt E-Mail
|
||||
- Sendet E-Mail</pre>
|
||||
</div>
|
||||
|
||||
<div class="new-arch">
|
||||
<strong>Neue Methoden-basierte Architektur:</strong><br>
|
||||
<pre>
|
||||
Benutzer: "Suche nach Verträgen im SharePoint und sende mir eine Zusammenfassung per E-Mail"
|
||||
|
||||
Methoden-Katalog:
|
||||
1. SharePoint.searchDocuments
|
||||
- Parameter: {query: "Verträge", site: "valueon"}
|
||||
- Retry: 3x bei Netzwerkfehler
|
||||
- Auth: MSFT
|
||||
|
||||
2. Document.extractContent
|
||||
- Parameter: {documents: [...], sections: ["Zusammenfassung"]}
|
||||
- Retry: 2x bei Extraktionsfehler
|
||||
- Auth: LOCAL
|
||||
|
||||
3. Outlook.sendMail
|
||||
- Parameter: {to: ["user@example.com"], subject: "Vertragszusammenfassung"}
|
||||
- Retry: 1x bei SMTP-Fehler
|
||||
- Auth: MSFT</pre>
|
||||
</div>
|
||||
|
||||
<div class="benefits">
|
||||
<strong>Vorteile im Beispiel:</strong>
|
||||
<ul>
|
||||
<li>Jede Operation ist klar definiert und validierbar</li>
|
||||
<li>Retry-Logik ist spezifisch für jede Operation</li>
|
||||
<li>Authentifizierung ist explizit definiert</li>
|
||||
<li>Fehler können präzise zugeordnet werden</li>
|
||||
<li>Operationen können unabhängig voneinander getestet werden</li>
|
||||
<li>Neue Operationen können einfach hinzugefügt werden</li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<p>Die KI plant die Ausführung dieser Methoden, aber die eigentliche Ausführung erfolgt durch die definierten Methoden mit klaren Parametern und Ergebnissen. Dies führt zu einer zuverlässigeren und besser nachvollziehbaren Ausführung.</p>
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -1,999 +0,0 @@
|
|||
# Chat System Process Flow Specification
|
||||
|
||||
## 1. System Overview
|
||||
|
||||
### 1.1 Core Components
|
||||
- **WorkflowManager**: Orchestrates the overall workflow process
|
||||
- **ChatManager**: Manages chat interactions and task execution
|
||||
- **ServiceCenter**: Central state and context management
|
||||
- **AgentTask**: Core data object for task execution
|
||||
|
||||
### 1.2 Service center Structure
|
||||
```python
|
||||
from enum import Enum
|
||||
from typing import Dict, List, Optional, Any, Literal
|
||||
from datetime import datetime, UTC
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
class TaskStatus(str, Enum):
|
||||
PENDING = "pending"
|
||||
SUCCESS = "success"
|
||||
FAILED = "failed"
|
||||
RETRY = "retry"
|
||||
TIMEOUT = "timeout"
|
||||
ROLLBACK = "rollback"
|
||||
|
||||
class ActionStatus(str, Enum):
|
||||
PENDING = "pending"
|
||||
SUCCESS = "success"
|
||||
FAILED = "failed"
|
||||
RETRY = "retry"
|
||||
TIMEOUT = "timeout"
|
||||
SKIPPED = "skipped"
|
||||
DEPENDENCY_FAILED = "dependency_failed"
|
||||
|
||||
class AuthSource(str, Enum):
|
||||
LOCAL = "local"
|
||||
MSFT = "msft"
|
||||
GOOGLE = "google"
|
||||
# Add more auth sources as needed
|
||||
|
||||
class MethodParameter(BaseModel):
|
||||
"""Model for method parameters"""
|
||||
name: str
|
||||
type: str
|
||||
required: bool
|
||||
validation: Optional[callable] = None
|
||||
description: str
|
||||
|
||||
class ActionResult(BaseModel):
|
||||
"""Model for method results"""
|
||||
success: bool
|
||||
data: Dict[str, Any]
|
||||
metadata: Dict[str, Any]
|
||||
validation: List[str]
|
||||
|
||||
class MethodBase:
|
||||
"""Base class for all methods"""
|
||||
|
||||
def __init__(self, service):
|
||||
self.service = service
|
||||
self.name: str
|
||||
self.description: str
|
||||
self.auth_source: AuthSource = AuthSource.LOCAL # Default to local auth
|
||||
|
||||
@property
|
||||
def actions(self) -> Dict[str, Dict[str, Any]]:
|
||||
"""Available actions and their parameters"""
|
||||
raise NotImplementedError
|
||||
|
||||
async def execute(self, action: str, parameters: Dict[str, Any], auth_data: Optional[Dict[str, Any]] = None) -> ActionResult:
|
||||
"""Execute method action with authentication data"""
|
||||
raise NotImplementedError
|
||||
|
||||
async def validate_parameters(self, action: str, parameters: Dict[str, Any]) -> bool:
|
||||
"""Validate action parameters"""
|
||||
if action not in self.actions:
|
||||
return False
|
||||
|
||||
action_def = self.actions[action]
|
||||
required_params = {k for k, v in action_def['parameters'].items() if v['required']}
|
||||
return all(param in parameters for param in required_params)
|
||||
|
||||
async def rollback(self, action: str, parameters: Dict[str, Any], auth_data: Optional[Dict[str, Any]] = None) -> None:
|
||||
"""Rollback action if needed"""
|
||||
pass
|
||||
|
||||
class Action(BaseModel):
|
||||
"""Action model with validation"""
|
||||
method: str
|
||||
action: str
|
||||
parameters: Dict[str, Any]
|
||||
retryCount: int = 0
|
||||
retryMax: int
|
||||
status: ActionStatus = ActionStatus.PENDING
|
||||
timeout: Optional[int] = None
|
||||
dependencies: List[str] = []
|
||||
rollback_on_failure: bool = False
|
||||
auth_source: Optional[AuthSource] = None # Auth source for this action
|
||||
|
||||
class Config:
|
||||
use_enum_values = True
|
||||
|
||||
class AgentTask(BaseModel):
|
||||
"""Task model with validation"""
|
||||
id: str
|
||||
workflowId: str
|
||||
status: TaskStatus = TaskStatus.PENDING
|
||||
userInput: str
|
||||
dataList: List[Dict[str, str]] # List of available connections
|
||||
actionList: List[Action]
|
||||
chatHistory: str
|
||||
taskHistory: str
|
||||
previousTaskFeedback: Optional[str]
|
||||
thisTaskFeedback: Optional[str]
|
||||
result: Optional[Dict[str, Any]]
|
||||
documentsInput: List[Dict]
|
||||
documentsOutput: List[Dict]
|
||||
startedAt: str
|
||||
finishedAt: Optional[str]
|
||||
error: Optional[str]
|
||||
dependencies: List[str] = []
|
||||
requiredOutputs: List[str] = []
|
||||
|
||||
class Config:
|
||||
use_enum_values = True
|
||||
|
||||
def get_auth_data(self, auth_source: AuthSource) -> Optional[Dict[str, Any]]:
|
||||
"""Get authentication data for the specified source"""
|
||||
return next(
|
||||
(conn for conn in self.dataList if conn.get('source') == auth_source),
|
||||
None
|
||||
)
|
||||
|
||||
def get_action_by_id(self, action_id: str) -> Optional[Action]:
|
||||
"""Get action by its ID (method:action)"""
|
||||
return next((a for a in self.actionList if f"{a.method}:{a.action}" == action_id), None)
|
||||
|
||||
def can_execute_action(self, action: Action) -> bool:
|
||||
"""Check if action can be executed based on dependencies and auth"""
|
||||
# Check dependencies
|
||||
if action.dependencies:
|
||||
if not all(
|
||||
self.get_action_by_id(dep).status == ActionStatus.SUCCESS
|
||||
for dep in action.dependencies
|
||||
):
|
||||
return False
|
||||
|
||||
# Check authentication
|
||||
if action.auth_source and action.auth_source != AuthSource.LOCAL:
|
||||
if not self.get_auth_data(action.auth_source):
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def is_complete(self) -> bool:
|
||||
"""Check if all actions are complete"""
|
||||
return all(a.status in [ActionStatus.SUCCESS, ActionStatus.SKIPPED]
|
||||
for a in self.actionList)
|
||||
|
||||
def has_failed(self) -> bool:
|
||||
"""Check if any action has failed"""
|
||||
return any(a.status == ActionStatus.FAILED for a in self.actionList)
|
||||
|
||||
class ServiceCenter:
|
||||
"""Service center with improved state management"""
|
||||
|
||||
def __init__(self):
|
||||
self.state = {
|
||||
'status': TaskStatus.PENDING,
|
||||
'retryCount': 0,
|
||||
'retryMax': 3,
|
||||
'timeout': 300, # 5 minutes
|
||||
'lastError': None,
|
||||
'lastErrorTime': None
|
||||
}
|
||||
self.methods: Dict[str, MethodBase] = {}
|
||||
self.tasks: Dict[str, AgentTask] = {}
|
||||
self.promptManager = AIPromptManager()
|
||||
self.taskStateManager = TaskStateManager()
|
||||
self.documentProcessor = DocumentExtraction()
|
||||
|
||||
async def execute_task(self, task: AgentTask) -> None:
|
||||
"""Execute task with improved error handling and timeout"""
|
||||
try:
|
||||
# Check for timeout
|
||||
if (datetime.now(UTC) - datetime.fromisoformat(task.startedAt)).seconds > self.state['timeout']:
|
||||
task.status = TaskStatus.TIMEOUT
|
||||
return
|
||||
|
||||
# Execute actions
|
||||
for action in task.actionList:
|
||||
if not task.can_execute_action(action):
|
||||
if not task.get_auth_data(action.auth_source):
|
||||
action.status = ActionStatus.FAILED
|
||||
task.error = f"Missing authentication for {action.auth_source}"
|
||||
else:
|
||||
action.status = ActionStatus.DEPENDENCY_FAILED
|
||||
continue
|
||||
|
||||
try:
|
||||
# Get method
|
||||
method = self.methods.get(action.method)
|
||||
if not method:
|
||||
raise ValueError(f"Unknown method: {action.method}")
|
||||
|
||||
# Validate parameters
|
||||
if not await method.validate_parameters(action.action, action.parameters):
|
||||
raise ValueError(f"Invalid parameters for {action.method}:{action.action}")
|
||||
|
||||
# Get auth data if needed
|
||||
auth_data = None
|
||||
if action.auth_source and action.auth_source != AuthSource.LOCAL:
|
||||
auth_data = task.get_auth_data(action.auth_source)
|
||||
if not auth_data:
|
||||
raise ValueError(f"Missing authentication data for {action.auth_source}")
|
||||
|
||||
# Execute with timeout
|
||||
result = await asyncio.wait_for(
|
||||
method.execute(action.action, action.parameters, auth_data),
|
||||
timeout=action.timeout or 60
|
||||
)
|
||||
|
||||
if result.success:
|
||||
action.status = ActionStatus.SUCCESS
|
||||
else:
|
||||
if self._should_retry(result.data.get('error')):
|
||||
action.retryCount += 1
|
||||
if action.retryCount > action.retryMax:
|
||||
action.status = ActionStatus.FAILED
|
||||
if action.rollback_on_failure:
|
||||
await method.rollback(action.action, action.parameters, auth_data)
|
||||
else:
|
||||
action.status = ActionStatus.RETRY
|
||||
else:
|
||||
action.status = ActionStatus.FAILED
|
||||
if action.rollback_on_failure:
|
||||
await method.rollback(action.action, action.parameters, auth_data)
|
||||
|
||||
except asyncio.TimeoutError:
|
||||
action.status = ActionStatus.TIMEOUT
|
||||
except Exception as e:
|
||||
action.status = ActionStatus.FAILED
|
||||
if action.rollback_on_failure:
|
||||
await method.rollback(action.action, action.parameters, auth_data)
|
||||
|
||||
# Update task status
|
||||
if task.has_failed():
|
||||
task.status = TaskStatus.FAILED
|
||||
elif task.is_complete():
|
||||
task.status = TaskStatus.SUCCESS
|
||||
task.finishedAt = datetime.now(UTC).isoformat()
|
||||
|
||||
except Exception as e:
|
||||
task.status = TaskStatus.FAILED
|
||||
task.error = str(e)
|
||||
|
||||
class AIPromptManager:
|
||||
"""Manages AI prompts and response validation"""
|
||||
|
||||
def generatePrompt(self, context: Dict[str, Any], examples: List[Dict]) -> str:
|
||||
"""Generate a context-aware prompt with few-shot examples"""
|
||||
prompt = (
|
||||
f"Task: {context['task']}\n"
|
||||
f"Document: {context['document']['name']} ({context['document']['type']})\n"
|
||||
"Examples:\n"
|
||||
)
|
||||
for ex in examples:
|
||||
prompt += f"- {ex['input']} => {ex['output']}\n"
|
||||
prompt += "Extract the most relevant information for the task above."
|
||||
return prompt
|
||||
|
||||
def validateResponse(self, response: str, schema: Dict) -> bool:
|
||||
"""Validate AI response against a schema"""
|
||||
import jsonschema
|
||||
try:
|
||||
jsonschema.validate(instance=response, schema=schema)
|
||||
return True
|
||||
except jsonschema.ValidationError:
|
||||
return False
|
||||
|
||||
class TaskStateManager:
|
||||
"""Manages task state and retry tracking"""
|
||||
|
||||
def __init__(self):
|
||||
self.taskStates = {}
|
||||
|
||||
def trackState(self, task: AgentTask):
|
||||
"""Track task state"""
|
||||
self.taskStates[task.id] = {
|
||||
"status": task.status,
|
||||
"retryState": getattr(task, "retryState", {}),
|
||||
"history": getattr(task, "history", [])
|
||||
}
|
||||
|
||||
def canRetry(self, task: AgentTask, method: str) -> bool:
|
||||
"""Check if task can be retried"""
|
||||
retryState = self.taskStates[task.id].get("retryState", {})
|
||||
return retryState.get(method, 0) < getattr(task, "retryMax", 3)
|
||||
|
||||
class DocumentContext(BaseModel):
|
||||
"""Model for document context"""
|
||||
id: str
|
||||
extractionHistory: List[Dict]
|
||||
relevantSections: List[str]
|
||||
processingStatus: Dict[str, str]
|
||||
|
||||
class DocumentExtraction:
|
||||
"""Processes documents with context awareness"""
|
||||
|
||||
def process_with_context(self, doc: Dict, context: DocumentContext) -> Dict:
|
||||
"""Process document with context"""
|
||||
extracted = {}
|
||||
for section in context.relevantSections:
|
||||
extracted[section] = doc.get(section)
|
||||
return extracted
|
||||
|
||||
def track_extraction(self, doc: Dict, extraction: Dict):
|
||||
"""Track document extraction"""
|
||||
if 'extractionHistory' not in doc:
|
||||
doc['extractionHistory'] = []
|
||||
doc['extractionHistory'].append(extraction)
|
||||
|
||||
class ErrorRecovery(BaseModel):
|
||||
"""Model for error recovery strategies"""
|
||||
strategy: str # e.g., "retry", "fallback", "skip"
|
||||
fallbackActions: List[str]
|
||||
contextPreservation: bool
|
||||
|
||||
### 1.3 Method-Based Module Structure
|
||||
```python
|
||||
# Example: methodSharepoint.py
|
||||
class MethodSharepoint:
|
||||
"""SharePoint method implementation"""
|
||||
|
||||
def __init__(self, service):
|
||||
self.service = service
|
||||
self.name = "sharepoint"
|
||||
self.description = "Search and process SharePoint documents"
|
||||
self.auth_source = AuthSource.MSFT # Requires Microsoft authentication
|
||||
|
||||
@property
|
||||
def actions(self) -> Dict[str, Dict[str, Any]]:
|
||||
"""Available actions and their parameters"""
|
||||
return {
|
||||
"search": {
|
||||
"description": "Search SharePoint documents",
|
||||
"retryMax": 3,
|
||||
"timeout": 30,
|
||||
"parameters": {
|
||||
"query": {"type": "string", "required": True},
|
||||
"site": {"type": "string", "required": False},
|
||||
"folder": {"type": "string", "required": False},
|
||||
"maxResults": {"type": "number", "required": False}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async def execute(self, action: str, parameters: Dict[str, Any], auth_data: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
|
||||
"""Execute SharePoint method"""
|
||||
if not auth_data:
|
||||
return {"success": False, "error": "Missing Microsoft authentication"}
|
||||
|
||||
if action == "search":
|
||||
return await self._searchDocuments(parameters, auth_data)
|
||||
return {"success": False, "error": f"Unknown action: {action}"}
|
||||
|
||||
async def _searchDocuments(self, parameters: Dict[str, Any], auth_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Search SharePoint documents"""
|
||||
# Implementation using existing SharePoint agent functionality
|
||||
pass
|
||||
|
||||
# Example: methodOutlook.py
|
||||
class MethodOutlook:
|
||||
"""Outlook method implementation"""
|
||||
|
||||
def __init__(self, service):
|
||||
self.service = service
|
||||
self.name = "outlook"
|
||||
self.description = "Handle Outlook email operations"
|
||||
|
||||
@property
|
||||
def actions(self) -> Dict[str, Dict[str, Any]]:
|
||||
"""Available actions and their parameters"""
|
||||
return {
|
||||
"readMails": {
|
||||
"description": "Read emails from specified folder",
|
||||
"retryMax": 2, # Action-specific retry limit
|
||||
"parameters": {
|
||||
"folder": {"type": "string", "required": False},
|
||||
"unreadOnly": {"type": "boolean", "required": False},
|
||||
"fromAddress": {"type": "string", "required": False},
|
||||
"maxResults": {"type": "number", "required": False}
|
||||
}
|
||||
},
|
||||
"sendMail": {
|
||||
"description": "Send an email",
|
||||
"retryMax": 1, # Action-specific retry limit
|
||||
"parameters": {
|
||||
"to": {"type": "array", "items": "string", "required": True},
|
||||
"subject": {"type": "string", "required": True},
|
||||
"body": {"type": "string", "required": True},
|
||||
"attachments": {"type": "array", "items": "FileRef", "required": False}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async def execute(self, action: str, parameters: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Execute Outlook method"""
|
||||
if action == "readMails":
|
||||
return await self._readMails(parameters)
|
||||
elif action == "sendMail":
|
||||
return await self._sendMail(parameters)
|
||||
return {"success": False, "error": f"Unknown action: {action}"}
|
||||
```
|
||||
|
||||
### 1.4 Key Data Objects
|
||||
```python
|
||||
class ChatWorkflow:
|
||||
id: str
|
||||
mandateId: str
|
||||
status: str
|
||||
name: Optional[str]
|
||||
currentRound: int
|
||||
lastActivity: str
|
||||
startedAt: str
|
||||
logs: List[ChatLog]
|
||||
messages: List[ChatMessage]
|
||||
stats: Optional[ChatStat]
|
||||
tasks: List[Task]
|
||||
|
||||
class AgentTask:
|
||||
id: str
|
||||
workflowId: str
|
||||
status: str # pending, success, failed, retry
|
||||
userInput: str # AI-processed summary
|
||||
dataList: List[Dict[str, str]] # User connections
|
||||
actionList: List[Dict[str, Any]] # Actions to execute, e.g.:
|
||||
# [
|
||||
# {
|
||||
# "method": "sharepoint",
|
||||
# "action": "search",
|
||||
# "parameters": {
|
||||
# "query": "offerings",
|
||||
# "site": "valueon"
|
||||
# },
|
||||
# "retryCount": 0,
|
||||
# "retryMax": 3,
|
||||
# "status": "pending" # pending, success, failed, retry
|
||||
# },
|
||||
# {
|
||||
# "method": "outlook",
|
||||
# "action": "sendMail",
|
||||
# "parameters": {
|
||||
# "to": ["user@example.com"],
|
||||
# "subject": "Offer Summary",
|
||||
# "body": "..."
|
||||
# },
|
||||
# "retryCount": 0,
|
||||
# "retryMax": 1,
|
||||
# "status": "pending"
|
||||
# }
|
||||
# ]
|
||||
chatHistory: str # Summary of previous messages
|
||||
taskHistory: str # Summary of previous tasks
|
||||
previousTaskFeedback: Optional[str]
|
||||
thisTaskFeedback: Optional[str]
|
||||
result: Optional[ChatMessage]
|
||||
documentsInput: List[Dict]
|
||||
documentsOutput: List[Dict]
|
||||
startedAt: str
|
||||
finishedAt: Optional[str]
|
||||
error: Optional[str]
|
||||
dependencies: List[str] = [] # Task dependencies
|
||||
requiredOutputs: List[str] = [] # Required outputs from dependencies
|
||||
```
|
||||
|
||||
## 2. Process Flow
|
||||
|
||||
### 2.1 Initialization Phase
|
||||
```mermaid
|
||||
graph TD
|
||||
A[User Input] --> B[WorkflowManager.workflowProcess]
|
||||
B --> C[ChatManager.initialize]
|
||||
C --> D[Create ServiceCenter]
|
||||
D --> E[Create Initial Task]
|
||||
```
|
||||
|
||||
1. **WorkflowManager.workflowProcess**
|
||||
- Receives user input and workflow
|
||||
- Initializes chat manager
|
||||
- Starts task processing loop
|
||||
|
||||
2. **ChatManager.initialize**
|
||||
- Creates ServiceCenter with all required components
|
||||
- Initializes service interfaces
|
||||
- Sets up task and state management
|
||||
|
||||
### 2.2 Task Creation Phase
|
||||
|
||||
1. **Create Initial Task**
|
||||
```python
|
||||
def createInitialTask(self, userInput: UserInputRequest) -> AgentTask:
|
||||
# 1. Get available methods and their actions
|
||||
available_methods = self._getAvailableMethods()
|
||||
method_catalog = {
|
||||
method.name: {
|
||||
"description": method.description,
|
||||
"actions": method.actions
|
||||
}
|
||||
for method in available_methods
|
||||
}
|
||||
|
||||
# 2. Process user input with AI including document analysis
|
||||
processedInput = await self.service.model['callAiBasic'](
|
||||
f"""Analyze user request and documents:
|
||||
User Prompt: {userInput.prompt}
|
||||
Documents: {userInput.listFileId}
|
||||
|
||||
Available Methods:
|
||||
{json.dumps(method_catalog, indent=2)}
|
||||
|
||||
Please provide:
|
||||
1. Main objective
|
||||
2. Required actions (using available methods and their actions)
|
||||
3. Required data sources
|
||||
4. Document processing requirements
|
||||
5. Expected output format
|
||||
|
||||
Format your response as JSON:
|
||||
{{
|
||||
"objective": "string",
|
||||
"actions": [
|
||||
{{
|
||||
"method": "string",
|
||||
"action": "string",
|
||||
"parameters": {{
|
||||
"param1": "value1",
|
||||
"param2": "value2"
|
||||
}}
|
||||
}}
|
||||
],
|
||||
"dataSources": ["string"],
|
||||
"documentRequirements": ["string"],
|
||||
"outputFormat": "string"
|
||||
}}
|
||||
"""
|
||||
)
|
||||
|
||||
# 3. Create task with processed input and initialize action states
|
||||
actions = []
|
||||
for action in processedInput['actions']:
|
||||
method = next(m for m in available_methods if m.name == action['method'])
|
||||
action_info = method.actions[action['action']]
|
||||
actions.append({
|
||||
**action,
|
||||
"retryCount": 0,
|
||||
"retryMax": action_info['retryMax'],
|
||||
"status": "pending"
|
||||
})
|
||||
|
||||
task = AgentTask(
|
||||
workflowId=self.service.workflow.id,
|
||||
userInput=processedInput,
|
||||
dataList=self.service.context['dataConnections'],
|
||||
actionList=actions,
|
||||
chatHistory=await self.workflowSummarize(userInput),
|
||||
startedAt=datetime.now(UTC).isoformat()
|
||||
)
|
||||
|
||||
# 4. Store in service
|
||||
self.service.tasks['current'] = task
|
||||
return task
|
||||
```
|
||||
|
||||
### 2.3 Task Execution Phase
|
||||
|
||||
1. **Execute Task**
|
||||
```python
|
||||
async def executeTask(self, task: AgentTask) -> None:
|
||||
"""Execute task actions in sequence"""
|
||||
for action in task.actionList:
|
||||
if action['status'] == 'pending':
|
||||
try:
|
||||
# Get method instance
|
||||
method = self.service.methods[action['method']]
|
||||
|
||||
# Execute action
|
||||
result = await method.execute(
|
||||
action['action'],
|
||||
action['parameters']
|
||||
)
|
||||
|
||||
if result['success']:
|
||||
action['status'] = 'success'
|
||||
else:
|
||||
if self._shouldRetry(result['error']):
|
||||
action['retryCount'] += 1
|
||||
if action['retryCount'] > action['retryMax']:
|
||||
action['status'] = 'failed'
|
||||
task.status = 'failed'
|
||||
task.error = "Maximum retries exceeded"
|
||||
else:
|
||||
action['status'] = 'retry'
|
||||
task.status = 'retry'
|
||||
else:
|
||||
action['status'] = 'failed'
|
||||
task.status = 'failed'
|
||||
task.error = result['error']
|
||||
|
||||
except Exception as e:
|
||||
action['status'] = 'failed'
|
||||
task.status = 'failed'
|
||||
task.error = str(e)
|
||||
|
||||
# Update task status based on action status
|
||||
if action['status'] == 'failed':
|
||||
break
|
||||
|
||||
# Mark task as complete if all actions succeeded
|
||||
if all(a['status'] == 'success' for a in task.actionList):
|
||||
task.status = 'success'
|
||||
task.finishedAt = datetime.now(UTC).isoformat()
|
||||
```
|
||||
|
||||
### 2.4 Task Analysis Phase
|
||||
|
||||
1. **Define Next Task**
|
||||
```python
|
||||
def defineNextTask(self, currentTask: AgentTask) -> Optional[AgentTask]:
|
||||
try:
|
||||
# 1. Analyze current task results using basic AI
|
||||
analysis = await self.service.model['callAiBasic'](
|
||||
f"""Analyze task results and determine next steps:
|
||||
Previous Feedback: {currentTask.previousTaskFeedback}
|
||||
Current Feedback: {currentTask.thisTaskFeedback}
|
||||
User Input: {currentTask.userInput}
|
||||
Current Documents: {currentTask.documentsOutput}
|
||||
|
||||
Please provide:
|
||||
1. Task completion status
|
||||
2. Next required actions
|
||||
3. Required documents
|
||||
4. Method recommendations
|
||||
|
||||
Format your response as JSON:
|
||||
{{
|
||||
"isComplete": boolean,
|
||||
"nextActions": ["string"],
|
||||
"requiredDocuments": ["string"],
|
||||
"recommendedMethods": ["string"]
|
||||
}}
|
||||
"""
|
||||
)
|
||||
|
||||
# 2. Parse and validate AI response
|
||||
analysis_data = json.loads(analysis)
|
||||
|
||||
# 3. Determine if next task needed
|
||||
if not analysis_data["isComplete"]:
|
||||
# 4. Create next task
|
||||
nextTask = self._createNextTask(currentTask, analysis_data)
|
||||
self.service.tasks['previous'] = currentTask
|
||||
self.service.tasks['current'] = nextTask
|
||||
return nextTask
|
||||
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error defining next task: {str(e)}")
|
||||
return None
|
||||
```
|
||||
|
||||
## 3. Method Integration
|
||||
|
||||
### 3.1 Method Registration
|
||||
```python
|
||||
def _registerMethods(self):
|
||||
"""Register available methods in service center"""
|
||||
self.service.methods = {
|
||||
"sharepoint": MethodSharepoint(self.service),
|
||||
"outlook": MethodOutlook(self.service),
|
||||
"web": MethodWeb(self.service),
|
||||
"document": MethodDocument(self.service)
|
||||
}
|
||||
```
|
||||
|
||||
### 3.2 Method Execution
|
||||
```python
|
||||
def _executeMethod(self, method: str, parameters: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Execute a method with parameters"""
|
||||
try:
|
||||
# Get method implementation
|
||||
method_impl = self.service.methods.get(method)
|
||||
if not method_impl:
|
||||
return {"success": False, "error": f"Unknown method: {method}"}
|
||||
|
||||
# Execute method
|
||||
return await method_impl.execute(parameters)
|
||||
|
||||
except Exception as e:
|
||||
return {"success": False, "error": str(e)}
|
||||
```
|
||||
|
||||
## 4. Error Handling
|
||||
|
||||
### 4.1 Error Types
|
||||
1. **AI Errors**
|
||||
- Model unavailable
|
||||
- Invalid response
|
||||
- Timeout
|
||||
|
||||
2. **Method Errors**
|
||||
- Invalid method
|
||||
- Execution failure
|
||||
- Resource unavailable
|
||||
|
||||
3. **Task Errors**
|
||||
- Invalid state
|
||||
- Missing data
|
||||
- Timeout
|
||||
|
||||
### 4.2 Retry Logic
|
||||
```python
|
||||
def _shouldRetry(self, error: str) -> bool:
|
||||
"""Determine if error is retryable"""
|
||||
retryable_errors = [
|
||||
"AI down",
|
||||
"Document not found",
|
||||
"Content extraction failed"
|
||||
]
|
||||
return any(err in error for err in retryable_errors)
|
||||
|
||||
def _shouldCreateNextTask(self, analysis: Dict[str, Any]) -> bool:
|
||||
"""Determine if next task is needed based on AI analysis"""
|
||||
return not analysis.get("isComplete", True)
|
||||
```
|
||||
|
||||
## 5. AI Integration Points
|
||||
|
||||
### 5.1 User Input Processing
|
||||
```python
|
||||
async def _processUserInput(self, input: str, documents: List[str]) -> str:
|
||||
"""Process user input including document analysis"""
|
||||
context = {
|
||||
"task": "Process user input",
|
||||
"document": {"name": "User Input", "type": "text"}
|
||||
}
|
||||
examples = [
|
||||
{"input": "Search documents", "output": "Extract relevant information"}
|
||||
]
|
||||
prompt = self.service.promptManager.generatePrompt(context, examples)
|
||||
|
||||
return await self.service.model['callAiBasic'](
|
||||
f"""Analyze user request and documents:
|
||||
User Input: {input}
|
||||
Documents: {documents}
|
||||
|
||||
{prompt}
|
||||
|
||||
Please provide:
|
||||
1. Main objective
|
||||
2. Required actions
|
||||
3. Required data sources
|
||||
4. Document processing requirements
|
||||
5. Expected output format
|
||||
|
||||
Format your response as JSON:
|
||||
{{
|
||||
"objective": "string",
|
||||
"actions": ["string"],
|
||||
"dataSources": ["string"],
|
||||
"documentRequirements": ["string"],
|
||||
"outputFormat": "string"
|
||||
}}
|
||||
"""
|
||||
)
|
||||
```
|
||||
|
||||
### 5.2 Task Analysis
|
||||
```python
|
||||
async def _analyzeTaskResults(self, task: AgentTask) -> str:
|
||||
"""Analyze task results and determine next steps"""
|
||||
context = {
|
||||
"task": "Analyze task results",
|
||||
"document": {"name": "Task Results", "type": "json"}
|
||||
}
|
||||
examples = [
|
||||
{"input": "Task completed", "output": "Generate next steps"}
|
||||
]
|
||||
prompt = self.service.promptManager.generatePrompt(context, examples)
|
||||
|
||||
return await self.service.model['callAiBasic'](
|
||||
f"""Analyze task results and determine next steps:
|
||||
Task Input: {task.userInput}
|
||||
Previous Feedback: {task.previousTaskFeedback}
|
||||
Current Feedback: {task.thisTaskFeedback}
|
||||
Current Documents: {task.documentsOutput}
|
||||
|
||||
{prompt}
|
||||
|
||||
Please provide:
|
||||
1. Task completion status
|
||||
2. Next required actions
|
||||
3. Required documents
|
||||
4. Method recommendations
|
||||
|
||||
Format your response as JSON:
|
||||
{{
|
||||
"isComplete": boolean,
|
||||
"nextActions": ["string"],
|
||||
"requiredDocuments": ["string"],
|
||||
"recommendedMethods": ["string"]
|
||||
}}
|
||||
"""
|
||||
)
|
||||
```
|
||||
|
||||
### 5.3 Result Processing
|
||||
```python
|
||||
async def _processTaskResults(self, task: AgentTask) -> str:
|
||||
"""Process task results and generate feedback"""
|
||||
context = {
|
||||
"task": "Process task results",
|
||||
"document": {"name": "Task Results", "type": "json"}
|
||||
}
|
||||
examples = [
|
||||
{"input": "Task results", "output": "Generate summary"}
|
||||
]
|
||||
prompt = self.service.promptManager.generatePrompt(context, examples)
|
||||
|
||||
return await self.service.model['callAiBasic'](
|
||||
f"""Process task results and generate feedback:
|
||||
Task Input: {task.userInput}
|
||||
Method Results: {task.result}
|
||||
Generated Documents: {task.documentsOutput}
|
||||
|
||||
{prompt}
|
||||
|
||||
Please provide:
|
||||
1. Summary of completed actions
|
||||
2. Generated document descriptions
|
||||
3. Next steps or completion status
|
||||
|
||||
Format your response as JSON:
|
||||
{{
|
||||
"summary": "string",
|
||||
"documents": ["string"],
|
||||
"nextSteps": ["string"]
|
||||
}}
|
||||
"""
|
||||
)
|
||||
```
|
||||
|
||||
## 6. File Structure and Implementation Plan
|
||||
|
||||
### 6.1 File Structure
|
||||
```
|
||||
gateway/
|
||||
├── modules/
|
||||
│ ├── workflow/
|
||||
│ │ ├── managerWorkflow.py # Workflow management and state machine
|
||||
│ │ ├── managerChat.py # Chat management and AI response validation
|
||||
│ │ ├── managerPrompt.py # AI prompt generation and management
|
||||
│ │ ├── methodBase.py # Base method class with result validation
|
||||
│ │ └── documentExtraction.py # Document content extraction
|
||||
│ │
|
||||
│ ├── agents/ # To be refactored into methods
|
||||
│ │ ├── agentSharepoint.py → methods/methodSharepoint.py
|
||||
│ │ ├── agentOutlook.py → methods/methodOutlook.py
|
||||
│ │ ├── agentWebcrawler.py → methods/methodWeb.py
|
||||
│ │ ├── agentDocument.py → methods/methodDocument.py
|
||||
│ │ └── agentCoder.py → methods/methodCoder.py
|
||||
│ │
|
||||
│ ├── methods/ # New directory for method implementations
|
||||
│ │ ├── methodSharepoint.py # SharePoint operations
|
||||
│ │ ├── methodOutlook.py # Outlook operations
|
||||
│ │ ├── methodWeb.py # Web operations
|
||||
│ │ ├── methodDocument.py # Document operations
|
||||
│ │ ├── methodCoder.py # Code generation operations
|
||||
│ │ └── methodPowerpoint.py # PowerPoint operations
|
||||
│ │
|
||||
│ └── interfaces/
|
||||
│ ├── interfaceChatModel.py # Chat system models and enums
|
||||
│ └── interfaceAppModel.py # Application models including UserConnection
|
||||
```
|
||||
|
||||
### 6.2 Implementation Plan
|
||||
|
||||
#### Phase 1: Core Structure Setup
|
||||
1. **File Renaming and Organization**
|
||||
- Rename manager files to follow `manager*.py` pattern
|
||||
- Move document processor to `documentExtraction.py`
|
||||
- Create new `methods` directory
|
||||
|
||||
2. **Model Updates**
|
||||
- Update `interfaceChatModel.py` with new enums and models
|
||||
- Integrate `UserConnection` from `interfaceAppModel.py`
|
||||
- Update validation logic in respective modules
|
||||
|
||||
#### Phase 2: Method Migration
|
||||
1. **Base Method Implementation**
|
||||
- Implement `methodBase.py` with core functionality
|
||||
- Add method result validation
|
||||
- Set up authentication handling
|
||||
|
||||
2. **Agent to Method Conversion**
|
||||
- Convert each agent to its method implementation
|
||||
- Migrate functionality while maintaining existing behavior
|
||||
- Add method-specific validation
|
||||
|
||||
3. **New Method Implementation**
|
||||
- Implement `methodPowerpoint.py`
|
||||
- Add PowerPoint-specific operations
|
||||
- Integrate with document processing
|
||||
|
||||
#### Phase 3: Manager Updates
|
||||
1. **Chat Manager Enhancement**
|
||||
- Integrate AI response validation
|
||||
- Update service center structure
|
||||
- Improve error handling
|
||||
|
||||
2. **Document Manager Integration**
|
||||
- Update document operations for new method structure
|
||||
- Enhance content extraction capabilities
|
||||
- Improve file handling
|
||||
|
||||
3. **Workflow Manager Updates**
|
||||
- Update state machine for method-based approach
|
||||
- Improve task management
|
||||
- Enhance error recovery
|
||||
|
||||
#### Phase 4: Testing and Validation
|
||||
1. **Unit Testing**
|
||||
- Test each method implementation
|
||||
- Validate error handling
|
||||
- Verify authentication flow
|
||||
|
||||
2. **Integration Testing**
|
||||
- Test method interactions
|
||||
- Validate document processing
|
||||
- Verify workflow execution
|
||||
|
||||
3. **Performance Testing**
|
||||
- Measure response times
|
||||
- Validate resource usage
|
||||
- Test concurrent operations
|
||||
|
||||
#### Phase 5: Documentation and Cleanup
|
||||
1. **Documentation**
|
||||
- Update API documentation
|
||||
- Document method implementations
|
||||
- Add usage examples
|
||||
|
||||
2. **Code Cleanup**
|
||||
- Remove deprecated code
|
||||
- Clean up old agent files
|
||||
- Optimize imports
|
||||
|
||||
3. **Final Review**
|
||||
- Code review
|
||||
- Security audit
|
||||
- Performance optimization
|
||||
|
||||
### 6.3 Migration Strategy
|
||||
1. **Incremental Migration**
|
||||
- Migrate one agent at a time
|
||||
- Maintain backward compatibility
|
||||
- Use feature flags for gradual rollout
|
||||
|
||||
2. **Testing Strategy**
|
||||
- Unit tests for each method
|
||||
- Integration tests for workflows
|
||||
- End-to-end tests for complete scenarios
|
||||
|
||||
3. **Rollback Plan**
|
||||
- Keep old agent implementations until stable
|
||||
- Maintain version control
|
||||
- Document rollback procedures
|
||||
|
||||
### 6.4 Success Criteria
|
||||
1. **Functionality**
|
||||
- All existing features working
|
||||
- New method-based structure operational
|
||||
- Improved error handling
|
||||
|
||||
2. **Performance**
|
||||
- Equal or better response times
|
||||
- Reduced resource usage
|
||||
- Improved scalability
|
||||
|
||||
3. **Maintainability**
|
||||
- Clear code structure
|
||||
- Comprehensive documentation
|
||||
- Easy to extend
|
||||
|
||||
4. **Security**
|
||||
- Proper authentication handling
|
||||
- Secure data processing
|
||||
- Access control implementation
|
||||
13
pytest.ini
Normal file
13
pytest.ini
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
[pytest]
|
||||
testpaths = tests
|
||||
python_paths = .
|
||||
python_files = test_*.py
|
||||
python_classes = Test*
|
||||
python_functions = test_*
|
||||
log_file = logs/test_logs.log
|
||||
log_file_level = INFO
|
||||
log_file_format = %(asctime)s %(levelname)s %(message)s
|
||||
log_file_date_format = %Y-%m-%d %H:%M:%S
|
||||
# Only run non-expensive tests by default, verbose log, short traceback
|
||||
# Use 'pytest -m ""' to run ALL tests.
|
||||
addopts = -v --tb=short -m 'not expensive'
|
||||
|
|
@ -42,6 +42,7 @@ requests==2.31.0
|
|||
chardet>=5.0.0 # Für Zeichensatzerkennung bei Webinhalten
|
||||
aiohttp>=3.8.0 # Required for SharePoint operations (async HTTP)
|
||||
selenium>=4.15.0 # Required for web automation and JavaScript-heavy pages
|
||||
tavily-python==0.7.11 # Tavily SDK
|
||||
|
||||
## Image Processing
|
||||
Pillow>=10.0.0 # Für Bildverarbeitung (als PIL importiert)
|
||||
|
|
@ -67,3 +68,26 @@ PyPDF2>=3.0.0
|
|||
PyMuPDF>=1.20.0
|
||||
beautifulsoup4>=4.11.0
|
||||
chardet>=4.0.0 # For encoding detection
|
||||
|
||||
## Testing Dependencies
|
||||
pytest>=8.0.0
|
||||
pytest-asyncio>=0.21.0
|
||||
|
||||
## Missing Dependencies for IPython and other tools
|
||||
decorator>=5.0.0
|
||||
jedi>=0.16
|
||||
matplotlib-inline>=0.1.0
|
||||
stack-data>=0.1.0
|
||||
traitlets>=5.0.0
|
||||
docutils>=0.13.1
|
||||
markdown-it-py>=2.2.0
|
||||
keyring>=15.1
|
||||
pkginfo>=1.8.1
|
||||
|
||||
## Missing Dependencies for Panel 1.3.8
|
||||
bleach>=4.0.0
|
||||
bokeh>=3.2.0,<3.4.0
|
||||
linkify-it-py>=1.0.0
|
||||
mdit-py-plugins>=0.3.0
|
||||
pyviz-comms>=2.0.0
|
||||
xyzservices>=2021.09.1
|
||||
|
|
@ -1,855 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Test script for DocumentExtraction class.
|
||||
Processes all files in d:/temp folder and stores extracted content in d:/temp/extracted.
|
||||
|
||||
Features:
|
||||
- Option to extract content WITH AI processing (default)
|
||||
- Option to extract content WITHOUT AI processing (content-only mode)
|
||||
- Supports all document types: text, images, PDFs, Office documents, etc.
|
||||
- Detailed logging and progress tracking
|
||||
- Separate output directories for AI vs content-only modes
|
||||
|
||||
Usage:
|
||||
- Interactive mode: python test_documentExtraction.py
|
||||
- Content-only mode: python test_documentExtraction.py --no-ai
|
||||
- Content-only mode: python test_documentExtraction.py --content-only
|
||||
- Specify custom input/output: python test_documentExtraction.py --input-dir /path/to/input --output-dir /path/to/output --no-ai
|
||||
"""
|
||||
|
||||
import os
|
||||
import asyncio
|
||||
import logging
|
||||
import sys
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
from typing import List, Optional
|
||||
from datetime import datetime, UTC
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(
|
||||
level=logging.DEBUG, # Changed from INFO to DEBUG
|
||||
format='%(asctime)s - %(levelname)s - %(message)s'
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Filter out specific unwanted log messages
|
||||
class LogFilter(logging.Filter):
|
||||
"""Filter to hide specific unwanted log messages."""
|
||||
|
||||
def filter(self, record):
|
||||
# Hide workflow stats update errors
|
||||
if "Workflow" in record.getMessage() and "not found for stats update" in record.getMessage():
|
||||
return False
|
||||
|
||||
# Hide HTTP request info messages
|
||||
if "HTTP Request:" in record.getMessage() and "POST https://api.openai.com" in record.getMessage():
|
||||
return False
|
||||
|
||||
# Hide HTTP response info messages
|
||||
if "HTTP/1.1 200 OK" in record.getMessage():
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
# Apply the filter to the root logger
|
||||
root_logger = logging.getLogger()
|
||||
root_logger.addFilter(LogFilter())
|
||||
|
||||
def check_dependencies():
|
||||
"""Check if required dependencies are available and provide installation instructions."""
|
||||
missing_deps = []
|
||||
|
||||
# Check for required dependencies
|
||||
try:
|
||||
import bs4
|
||||
logger.info("beautifulsoup4 is available")
|
||||
except ImportError:
|
||||
missing_deps.append("beautifulsoup4")
|
||||
logger.error("beautifulsoup4 is missing")
|
||||
|
||||
try:
|
||||
import PyPDF2
|
||||
logger.info("PyPDF2 is available")
|
||||
except ImportError:
|
||||
missing_deps.append("PyPDF2")
|
||||
logger.error("PyPDF2 is missing")
|
||||
|
||||
try:
|
||||
import fitz
|
||||
logger.info("PyMuPDF (fitz) is available")
|
||||
except ImportError:
|
||||
missing_deps.append("PyMuPDF")
|
||||
logger.error("PyMuPDF (fitz) is missing")
|
||||
|
||||
try:
|
||||
import docx
|
||||
logger.info("python-docx is available")
|
||||
except ImportError:
|
||||
missing_deps.append("python-docx")
|
||||
logger.error("python-docx is missing")
|
||||
|
||||
try:
|
||||
import openpyxl
|
||||
logger.info("openpyxl is available")
|
||||
except ImportError:
|
||||
missing_deps.append("openpyxl")
|
||||
logger.error("openpyxl is missing")
|
||||
|
||||
try:
|
||||
import pptx
|
||||
logger.info("python-pptx is available")
|
||||
except ImportError:
|
||||
missing_deps.append("python-pptx")
|
||||
logger.error("python-pptx is missing")
|
||||
|
||||
try:
|
||||
from PIL import Image
|
||||
logger.info("Pillow (PIL) is available")
|
||||
except ImportError:
|
||||
missing_deps.append("Pillow")
|
||||
logger.error("Pillow (PIL) is missing")
|
||||
|
||||
if missing_deps:
|
||||
logger.error("\n" + "="*60)
|
||||
logger.error("MISSING DEPENDENCIES DETECTED!")
|
||||
logger.error("="*60)
|
||||
logger.error("The following packages are required but not installed:")
|
||||
for dep in missing_deps:
|
||||
logger.error(f" - {dep}")
|
||||
logger.error("\nTo install all dependencies, run:")
|
||||
logger.error("pip install -r requirements.txt")
|
||||
logger.error("\nOr install individual packages:")
|
||||
for dep in missing_deps:
|
||||
if dep == "beautifulsoup4":
|
||||
logger.error(f" pip install {dep}")
|
||||
elif dep == "PyMuPDF":
|
||||
logger.error(f" pip install {dep}")
|
||||
elif dep == "Pillow":
|
||||
logger.error(f" pip install {dep}")
|
||||
else:
|
||||
logger.error(f" pip install {dep}")
|
||||
logger.error("="*60)
|
||||
return False
|
||||
|
||||
logger.info("All required dependencies are available!")
|
||||
return True
|
||||
|
||||
def check_module_imports():
|
||||
"""Check if we can import the required modules."""
|
||||
try:
|
||||
# Add the gateway directory to the path so we can import our modules
|
||||
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', '..'))
|
||||
|
||||
from modules.chat.documents.documentExtraction import DocumentExtraction
|
||||
from modules.chat.serviceCenter import ServiceCenter
|
||||
from modules.interfaces.interfaceAppModel import User, UserConnection
|
||||
from modules.interfaces.interfaceChatModel import ChatWorkflow, TaskItem
|
||||
|
||||
logger.info("All required modules imported successfully")
|
||||
return True
|
||||
except ImportError as e:
|
||||
logger.error(f"Failed to import required modules: {e}")
|
||||
logger.error("Make sure you're running this script from the gateway directory")
|
||||
return False
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected error importing modules: {e}")
|
||||
return False
|
||||
|
||||
def create_mock_service_center():
|
||||
"""Create a proper ServiceCenter for testing purposes with all required fields."""
|
||||
try:
|
||||
from modules.chat.serviceCenter import ServiceCenter
|
||||
from modules.interfaces.interfaceAppModel import User, UserPrivilege, AuthAuthority
|
||||
from modules.interfaces.interfaceChatModel import ChatWorkflow, TaskItem, TaskStatus
|
||||
from modules.interfaces.interfaceChatModel import ChatLog, ChatMessage, ChatStat
|
||||
|
||||
# Create proper user with all required fields
|
||||
mock_user = User(
|
||||
id="test_user_001",
|
||||
username="testuser",
|
||||
email="test@example.com",
|
||||
fullName="Test User",
|
||||
language="en",
|
||||
enabled=True,
|
||||
privilege=UserPrivilege.USER,
|
||||
authenticationAuthority=AuthAuthority.LOCAL,
|
||||
mandateId="test_mandate_001"
|
||||
)
|
||||
|
||||
# Create proper workflow with all required fields
|
||||
current_time = datetime.now(UTC).isoformat()
|
||||
mock_workflow = ChatWorkflow(
|
||||
id="test_workflow_001",
|
||||
mandateId="test_mandate_001",
|
||||
status="active",
|
||||
name="Test Document Extraction Workflow",
|
||||
currentRound=1,
|
||||
lastActivity=current_time,
|
||||
startedAt=current_time,
|
||||
logs=[],
|
||||
messages=[],
|
||||
stats=None,
|
||||
tasks=[]
|
||||
)
|
||||
|
||||
# Create service center
|
||||
service_center = ServiceCenter(mock_user, mock_workflow)
|
||||
logger.info("ServiceCenter created successfully with proper objects")
|
||||
return service_center
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to create ServiceCenter: {e}")
|
||||
return None
|
||||
|
||||
class DocumentExtractionTester:
|
||||
"""Test class for DocumentExtraction functionality."""
|
||||
|
||||
def __init__(self, input_dir: str = "d:/temp/test-extraction", output_dir: str = None, enable_ai: bool = True):
|
||||
"""
|
||||
Initialize the tester.
|
||||
|
||||
Args:
|
||||
input_dir: Directory containing files to process
|
||||
output_dir: Directory to store extracted content (auto-generated if None)
|
||||
enable_ai: Whether to enable AI processing (default: True)
|
||||
"""
|
||||
self.input_dir = Path(input_dir)
|
||||
|
||||
# Auto-generate output directory if not specified
|
||||
if output_dir is None:
|
||||
if enable_ai:
|
||||
self.output_dir = Path(input_dir) / "extracted"
|
||||
else:
|
||||
self.output_dir = Path(input_dir) / "extracted-raw"
|
||||
else:
|
||||
self.output_dir = Path(output_dir)
|
||||
|
||||
self.extractor = None
|
||||
self.service_center = None
|
||||
self.enable_ai = enable_ai
|
||||
|
||||
if enable_ai:
|
||||
self.prompt = "Make a summary of each sentence for each page or chapter of the document"
|
||||
else:
|
||||
self.prompt = None # No prompt needed for content-only extraction
|
||||
|
||||
# Track processing results for summary
|
||||
self.processing_results = []
|
||||
|
||||
# Ensure output directory exists
|
||||
logger.info(f"Creating output directory: {self.output_dir}")
|
||||
self.output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Verify directory was created
|
||||
if self.output_dir.exists():
|
||||
logger.info(f"Output directory created/verified: {self.output_dir}")
|
||||
logger.info(f"Output directory absolute path: {self.output_dir.absolute()}")
|
||||
else:
|
||||
logger.error(f"Failed to create output directory: {self.output_dir}")
|
||||
|
||||
# Log configuration
|
||||
logger.info(f"Configuration: AI processing = {'ENABLED' if self.enable_ai else 'DISABLED'}")
|
||||
logger.info(f"Input directory: {self.input_dir}")
|
||||
logger.info(f"Output directory: {self.output_dir}")
|
||||
|
||||
# Test basic file writing capability
|
||||
test_file = self.output_dir / "test_write_capability.txt"
|
||||
try:
|
||||
logger.info(f"Testing file write capability to: {test_file}")
|
||||
logger.info(f"Absolute path: {test_file.absolute()}")
|
||||
|
||||
with open(test_file, 'w', encoding='utf-8') as f:
|
||||
f.write("Test file to verify write capability")
|
||||
|
||||
if test_file.exists():
|
||||
actual_size = test_file.stat().st_size
|
||||
logger.info(f"Basic file writing test passed: {test_file} (size: {actual_size} bytes)")
|
||||
|
||||
# Test reading the file back
|
||||
with open(test_file, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
logger.info(f"File read test passed: content length = {len(content)}")
|
||||
|
||||
# Clean up test file
|
||||
test_file.unlink()
|
||||
logger.info("Test file cleaned up")
|
||||
else:
|
||||
logger.error(f"Basic file writing test failed: {test_file}")
|
||||
except Exception as e:
|
||||
logger.error(f"Basic file writing test failed with error: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
|
||||
# Supported file extensions for content extraction
|
||||
self.supported_extensions = {
|
||||
# Text and data files
|
||||
'.txt', '.csv', '.json', '.xml', '.html', '.htm', '.svg',
|
||||
'.md', '.markdown', '.rst', '.log', '.ini', '.cfg', '.conf',
|
||||
|
||||
# Programming languages
|
||||
'.js', '.ts', '.jsx', '.tsx', '.py', '.java', '.c', '.cpp', '.cc', '.cxx',
|
||||
'.h', '.hpp', '.cs', '.php', '.rb', '.go', '.rs', '.swift', '.kt', '.scala',
|
||||
'.r', '.m', '.pl', '.sh', '.bash', '.zsh', '.fish', '.ps1', '.bat', '.cmd',
|
||||
'.vbs', '.lua', '.sql', '.r', '.dart', '.elm', '.clj', '.hs', '.fs', '.ml',
|
||||
|
||||
# Web technologies
|
||||
'.css', '.scss', '.sass', '.less', '.vue', '.svelte', '.astro',
|
||||
|
||||
# Configuration and build files
|
||||
'.yaml', '.yml', '.toml', '.env', '.gitignore', '.dockerfile', '.dockerignore',
|
||||
'.makefile', '.cmake', '.gradle', '.maven', '.pom', '.sln', '.vcxproj',
|
||||
'.csproj', '.fsproj', '.vbproj', '.xcodeproj', '.pbxproj',
|
||||
|
||||
# Documentation and markup
|
||||
'.tex', '.bib', '.adoc', '.asciidoc', '.wiki', '.creole',
|
||||
|
||||
# Images
|
||||
'.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp', '.tiff', '.ico',
|
||||
|
||||
# Documents
|
||||
'.pdf', '.docx', '.xlsx', '.pptx', '.odt', '.ods', '.odp',
|
||||
|
||||
# Legacy Office formats
|
||||
'.doc', '.xls', '.ppt',
|
||||
|
||||
# Archives and binaries
|
||||
'.zip', '.tar', '.gz', '.7z', '.rar', '.exe', '.dll', '.so', '.dylib'
|
||||
}
|
||||
|
||||
def initialize_extractor(self):
|
||||
"""Initialize the DocumentExtraction instance with a proper ServiceCenter."""
|
||||
try:
|
||||
# First create the service center
|
||||
self.service_center = create_mock_service_center()
|
||||
if not self.service_center:
|
||||
logger.error("Failed to create ServiceCenter!")
|
||||
return False
|
||||
|
||||
# Now create DocumentExtraction with the service center
|
||||
from modules.chat.documents.documentExtraction import DocumentExtraction
|
||||
self.extractor = DocumentExtraction(self.service_center)
|
||||
logger.info("DocumentExtraction initialized successfully with ServiceCenter")
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to initialize DocumentExtraction: {e}")
|
||||
return False
|
||||
|
||||
def get_files_to_process(self) -> List[Path]:
|
||||
"""Get list of files to process from input directory."""
|
||||
if not self.input_dir.exists():
|
||||
logger.error(f"Input directory {self.input_dir} does not exist!")
|
||||
logger.info("Creating input directory and adding a test file...")
|
||||
self.input_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Create a test file if none exist
|
||||
test_file = self.input_dir / "test.txt"
|
||||
with open(test_file, 'w') as f:
|
||||
f.write("This is a test file for document extraction.\nIt contains multiple lines.\nAnd some special characters: äöüß")
|
||||
logger.info(f"Created test file: {test_file}")
|
||||
|
||||
files = []
|
||||
all_files = list(self.input_dir.iterdir())
|
||||
logger.info(f"All files in directory: {[f.name for f in all_files]}")
|
||||
|
||||
for file_path in all_files:
|
||||
if file_path.is_file():
|
||||
logger.debug(f"Checking file: {file_path.name} (extension: {file_path.suffix})")
|
||||
if file_path.suffix.lower() in self.supported_extensions:
|
||||
files.append(file_path)
|
||||
logger.debug(f"Added file: {file_path.name}")
|
||||
else:
|
||||
logger.debug(f"Skipped file: {file_path.name} (unsupported extension)")
|
||||
|
||||
logger.info(f"Found {len(files)} supported files to process")
|
||||
if files:
|
||||
logger.info(f"Files to process: {[f.name for f in files]}")
|
||||
return files
|
||||
|
||||
async def process_single_file(self, file_path: Path) -> bool:
|
||||
"""
|
||||
Process a single file and extract its content.
|
||||
|
||||
Args:
|
||||
file_path: Path to the file to process
|
||||
|
||||
Returns:
|
||||
True if successful, False otherwise
|
||||
"""
|
||||
if not self.extractor:
|
||||
logger.error("DocumentExtraction not initialized!")
|
||||
return False
|
||||
|
||||
try:
|
||||
logger.info(f"Processing file: {file_path.name}")
|
||||
|
||||
# Read file data
|
||||
with open(file_path, 'rb') as f:
|
||||
file_data = f.read()
|
||||
|
||||
logger.debug(f"File size: {len(file_data)} bytes")
|
||||
|
||||
# Determine MIME type based on extension
|
||||
mime_type = self._get_mime_type(file_path.suffix)
|
||||
logger.debug(f"MIME type: {mime_type}")
|
||||
|
||||
# Process the file with or without AI based on configuration
|
||||
extracted_content = await self.extractor.processFileData(
|
||||
fileData=file_data,
|
||||
fileName=file_path.name,
|
||||
mimeType=mime_type,
|
||||
base64Encoded=False,
|
||||
prompt=self.prompt,
|
||||
enableAI=self.enable_ai
|
||||
)
|
||||
|
||||
logger.debug(f"Extracted {len(extracted_content.contents)} content items")
|
||||
|
||||
# Debug: Show content details
|
||||
for i, content_item in enumerate(extracted_content.contents):
|
||||
logger.debug(f"Content item {i+1}: label='{content_item.label}', has_data={content_item.data is not None}, data_length={len(content_item.data) if content_item.data else 0}")
|
||||
|
||||
# Special logging for JavaScript files
|
||||
if mime_type == "application/javascript":
|
||||
logger.debug(f"JavaScript file detected: {file_path.name}")
|
||||
logger.debug(f"Original file size: {len(file_data)} bytes")
|
||||
for i, content_item in enumerate(extracted_content.contents):
|
||||
if content_item.data:
|
||||
content_size = len(content_item.data.encode('utf-8'))
|
||||
logger.debug(f"JavaScript content item {i+1}: {content_size} bytes")
|
||||
# Check if content was truncated
|
||||
if content_size < len(file_data) * 0.9: # If less than 90% of original
|
||||
logger.warning(f"JavaScript content may be truncated: {content_size} bytes vs {len(file_data)} bytes original")
|
||||
|
||||
# Track processing result
|
||||
result = {
|
||||
'fileName': file_path.name,
|
||||
'status': 'OK',
|
||||
'content_items': 0,
|
||||
'output_files': [],
|
||||
'total_content_size': 0
|
||||
}
|
||||
|
||||
# Save each content item as a separate file
|
||||
if extracted_content.contents:
|
||||
for i, content_item in enumerate(extracted_content.contents):
|
||||
if content_item.data:
|
||||
content_size = len(content_item.data.encode('utf-8'))
|
||||
result['total_content_size'] += content_size
|
||||
logger.debug(f"Content item {i+1}: {content_item.label}, size: {content_size} bytes")
|
||||
|
||||
# Generate fileName with new naming convention
|
||||
if len(extracted_content.contents) == 1:
|
||||
# Single content item
|
||||
output_fileName = f"{file_path.stem} - {content_item.label} 1.txt"
|
||||
else:
|
||||
# Multiple content items - add sequence number
|
||||
output_fileName = f"{file_path.stem} - {content_item.label} {i+1}.txt"
|
||||
|
||||
output_file = self.output_dir / output_fileName
|
||||
|
||||
# Write only the raw extracted content
|
||||
logger.debug(f"Attempting to write to: {output_file}")
|
||||
try:
|
||||
with open(output_file, 'w', encoding='utf-8') as f:
|
||||
f.write(content_item.data)
|
||||
|
||||
# Verify file was created
|
||||
if output_file.exists():
|
||||
actual_size = output_file.stat().st_size
|
||||
logger.info(f"File created successfully: {output_fileName} (expected: {content_size} bytes, actual: {actual_size} bytes)")
|
||||
else:
|
||||
logger.error(f"File was not created: {output_file}")
|
||||
|
||||
result['output_files'].append(output_fileName)
|
||||
result['content_items'] += 1
|
||||
except Exception as write_error:
|
||||
logger.error(f"Error writing file {output_fileName}: {write_error}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
else:
|
||||
logger.warning(f"Content item {i+1} has no data, skipping")
|
||||
else:
|
||||
logger.warning(f"No content extracted from {file_path.name}")
|
||||
result['status'] = 'FAIL'
|
||||
result['error'] = 'No content extracted'
|
||||
|
||||
# Add result to tracking list
|
||||
self.processing_results.append(result)
|
||||
|
||||
logger.info(f"Successfully processed {file_path.name} - Total content: {result['total_content_size']} bytes")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
error_msg = str(e)
|
||||
logger.error(f"Error processing {file_path.name}: {error_msg}")
|
||||
|
||||
# Track failed result
|
||||
result = {
|
||||
'fileName': file_path.name,
|
||||
'status': 'FAIL',
|
||||
'content_items': 0,
|
||||
'output_files': [],
|
||||
'error': error_msg,
|
||||
'total_content_size': 0
|
||||
}
|
||||
self.processing_results.append(result)
|
||||
|
||||
return False
|
||||
|
||||
def _get_mime_type(self, extension: str) -> str:
|
||||
"""Get MIME type based on file extension."""
|
||||
mime_types = {
|
||||
# Text and data files
|
||||
'.txt': 'text/plain',
|
||||
'.csv': 'text/csv',
|
||||
'.json': 'application/json',
|
||||
'.xml': 'application/xml',
|
||||
'.html': 'text/html',
|
||||
'.htm': 'text/html',
|
||||
'.svg': 'image/svg+xml',
|
||||
'.md': 'text/markdown',
|
||||
'.markdown': 'text/markdown',
|
||||
'.rst': 'text/x-rst',
|
||||
'.log': 'text/plain',
|
||||
'.ini': 'text/plain',
|
||||
'.cfg': 'text/plain',
|
||||
'.conf': 'text/plain',
|
||||
|
||||
# Programming languages
|
||||
'.js': 'application/javascript',
|
||||
'.ts': 'application/typescript',
|
||||
'.jsx': 'text/jsx',
|
||||
'.tsx': 'text/tsx',
|
||||
'.py': 'text/x-python',
|
||||
'.java': 'text/x-java-source',
|
||||
'.c': 'text/x-c',
|
||||
'.cpp': 'text/x-c++src',
|
||||
'.cc': 'text/x-c++src',
|
||||
'.cxx': 'text/x-c++src',
|
||||
'.h': 'text/x-c',
|
||||
'.hpp': 'text/x-c++hdr',
|
||||
'.cs': 'text/x-csharp',
|
||||
'.php': 'application/x-httpd-php',
|
||||
'.rb': 'text/x-ruby',
|
||||
'.go': 'text/x-go',
|
||||
'.rs': 'text/x-rust',
|
||||
'.swift': 'text/x-swift',
|
||||
'.kt': 'text/x-kotlin',
|
||||
'.scala': 'text/x-scala',
|
||||
'.r': 'text/x-r',
|
||||
'.m': 'text/x-matlab',
|
||||
'.pl': 'text/x-perl',
|
||||
'.sh': 'application/x-sh',
|
||||
'.bash': 'application/x-sh',
|
||||
'.zsh': 'application/x-sh',
|
||||
'.fish': 'application/x-sh',
|
||||
'.ps1': 'application/x-powershell',
|
||||
'.bat': 'application/x-msdos-program',
|
||||
'.cmd': 'application/x-msdos-program',
|
||||
'.vbs': 'text/vbscript',
|
||||
'.lua': 'text/x-lua',
|
||||
'.sql': 'application/sql',
|
||||
'.dart': 'application/dart',
|
||||
'.elm': 'text/x-elm',
|
||||
'.clj': 'text/x-clojure',
|
||||
'.hs': 'text/x-haskell',
|
||||
'.fs': 'text/x-fsharp',
|
||||
'.ml': 'text/x-ocaml',
|
||||
|
||||
# Web technologies
|
||||
'.css': 'text/css',
|
||||
'.scss': 'text/x-scss',
|
||||
'.sass': 'text/x-sass',
|
||||
'.less': 'text/x-less',
|
||||
'.vue': 'text/x-vue',
|
||||
'.svelte': 'text/x-svelte',
|
||||
'.astro': 'text/x-astro',
|
||||
|
||||
# Configuration and build files
|
||||
'.yaml': 'application/x-yaml',
|
||||
'.yml': 'application/x-yaml',
|
||||
'.toml': 'application/toml',
|
||||
'.env': 'text/plain',
|
||||
'.gitignore': 'text/plain',
|
||||
'.dockerfile': 'text/x-dockerfile',
|
||||
'.dockerignore': 'text/plain',
|
||||
'.makefile': 'text/x-makefile',
|
||||
'.cmake': 'text/x-cmake',
|
||||
'.gradle': 'text/x-gradle',
|
||||
'.maven': 'text/x-maven',
|
||||
'.pom': 'application/xml',
|
||||
'.sln': 'text/plain',
|
||||
'.vcxproj': 'application/xml',
|
||||
'.csproj': 'application/xml',
|
||||
'.fsproj': 'application/xml',
|
||||
'.vbproj': 'application/xml',
|
||||
'.xcodeproj': 'text/plain',
|
||||
'.pbxproj': 'text/plain',
|
||||
|
||||
# Documentation and markup
|
||||
'.tex': 'application/x-tex',
|
||||
'.bib': 'text/x-bibtex',
|
||||
'.adoc': 'text/asciidoc',
|
||||
'.asciidoc': 'text/asciidoc',
|
||||
'.wiki': 'text/x-wiki',
|
||||
'.creole': 'text/x-wiki',
|
||||
|
||||
# Images
|
||||
'.jpg': 'image/jpeg',
|
||||
'.jpeg': 'image/jpeg',
|
||||
'.png': 'image/png',
|
||||
'.gif': 'image/gif',
|
||||
'.webp': 'image/webp',
|
||||
'.bmp': 'image/bmp',
|
||||
'.tiff': 'image/tiff',
|
||||
'.ico': 'image/x-icon',
|
||||
|
||||
# Documents
|
||||
'.pdf': 'application/pdf',
|
||||
'.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
||||
'.xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
|
||||
'.pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
|
||||
'.odt': 'application/vnd.oasis.opendocument.text',
|
||||
'.ods': 'application/vnd.oasis.opendocument.spreadsheet',
|
||||
'.odp': 'application/vnd.oasis.opendocument.presentation',
|
||||
|
||||
# Legacy Office formats
|
||||
'.doc': 'application/msword',
|
||||
'.xls': 'application/vnd.ms-excel',
|
||||
'.ppt': 'application/vnd.ms-powerpoint',
|
||||
|
||||
# Archives and binaries (will be processed as binary)
|
||||
'.zip': 'application/zip',
|
||||
'.tar': 'application/x-tar',
|
||||
'.gz': 'application/gzip',
|
||||
'.7z': 'application/x-7z-compressed',
|
||||
'.rar': 'application/vnd.rar',
|
||||
'.exe': 'application/x-msdownload',
|
||||
'.dll': 'application/x-msdownload',
|
||||
'.so': 'application/x-sharedlib',
|
||||
'.dylib': 'application/x-mach-binary'
|
||||
}
|
||||
return mime_types.get(extension.lower(), 'application/octet-stream')
|
||||
|
||||
async def run_tests(self) -> None:
|
||||
"""Run the document extraction tests on all files."""
|
||||
mode = "WITH AI" if self.enable_ai else "CONTENT ONLY (No AI)"
|
||||
logger.info(f"Starting document extraction tests - {mode}")
|
||||
logger.info(f"Input directory: {self.input_dir}")
|
||||
logger.info(f"Output directory: {self.output_dir}")
|
||||
if self.enable_ai:
|
||||
logger.info(f"Processing prompt: {self.prompt}")
|
||||
else:
|
||||
logger.info("AI processing: DISABLED - Raw content extraction only")
|
||||
|
||||
# Initialize the extractor
|
||||
if not self.initialize_extractor():
|
||||
logger.error("Cannot proceed without DocumentExtraction!")
|
||||
return
|
||||
|
||||
# Get files to process
|
||||
files = self.get_files_to_process()
|
||||
|
||||
if not files:
|
||||
logger.warning("No files found to process!")
|
||||
return
|
||||
|
||||
# Process each file
|
||||
successful = 0
|
||||
failed = 0
|
||||
|
||||
logger.info(f"Starting to process {len(files)} files...")
|
||||
for i, file_path in enumerate(files):
|
||||
logger.info(f"Processing file {i+1}/{len(files)}: {file_path.name}")
|
||||
try:
|
||||
if await self.process_single_file(file_path):
|
||||
successful += 1
|
||||
logger.info(f"File {i+1} processed successfully")
|
||||
else:
|
||||
failed += 1
|
||||
logger.error(f"File {i+1} processing failed")
|
||||
except Exception as e:
|
||||
failed += 1
|
||||
logger.error(f"Exception processing file {i+1}: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
|
||||
# Print detailed summary
|
||||
mode = "WITH AI" if self.enable_ai else "CONTENT ONLY (No AI)"
|
||||
logger.info("\n" + "=" * 80)
|
||||
logger.info(f"DETAILED TEST SUMMARY - {mode}")
|
||||
logger.info("=" * 80)
|
||||
logger.info(f"Total files processed: {len(files)}")
|
||||
logger.info(f"Successful: {successful}")
|
||||
logger.info(f"Failed: {failed}")
|
||||
logger.info(f"Output directory: {self.output_dir}")
|
||||
if self.enable_ai:
|
||||
logger.info("AI processing: ENABLED")
|
||||
else:
|
||||
logger.info("AI processing: DISABLED")
|
||||
logger.info("=" * 80)
|
||||
|
||||
# List all processed documents with results
|
||||
logger.info("\nPROCESSING RESULTS:")
|
||||
logger.info("-" * 80)
|
||||
|
||||
for result in self.processing_results:
|
||||
status_icon = "✅" if result['status'] == 'OK' else "❌"
|
||||
logger.info(f"{status_icon} {result['fileName']} - {result['status']}")
|
||||
|
||||
if result['status'] == 'OK':
|
||||
if result['content_items'] == 1:
|
||||
logger.info(f" └─ Generated: {result['output_files'][0]} ({result['total_content_size']} bytes)")
|
||||
else:
|
||||
logger.info(f" └─ Generated {result['content_items']} files ({result['total_content_size']} total bytes):")
|
||||
for output_file in result['output_files']:
|
||||
logger.info(f" └─ {output_file}")
|
||||
else:
|
||||
error_msg = result.get('error', 'Unknown error')
|
||||
logger.info(f" └─ Error: {error_msg}")
|
||||
|
||||
logger.info("-" * 80)
|
||||
logger.info("=" * 80)
|
||||
|
||||
def parse_arguments():
|
||||
"""Parse command line arguments."""
|
||||
parser = argparse.ArgumentParser(description='Document Extraction Test Script')
|
||||
parser.add_argument('--no-ai', '--content-only', action='store_true',
|
||||
help='Run in content-only mode without AI processing')
|
||||
parser.add_argument('--input-dir', type=str, default='d:/temp/test-extraction',
|
||||
help='Input directory containing files to process (default: d:/temp/test-extraction)')
|
||||
parser.add_argument('--output-dir', type=str,
|
||||
help='Output directory for extracted content (auto-generated if not specified)')
|
||||
parser.add_argument('--verbose', '-v', action='store_true',
|
||||
help='Enable verbose logging')
|
||||
|
||||
return parser.parse_args()
|
||||
|
||||
async def main():
|
||||
"""Main function to run the tests."""
|
||||
# Parse command line arguments
|
||||
args = parse_arguments()
|
||||
|
||||
# Set logging level based on verbosity
|
||||
if args.verbose:
|
||||
logging.getLogger().setLevel(logging.DEBUG)
|
||||
else:
|
||||
logging.getLogger().setLevel(logging.INFO)
|
||||
|
||||
logger.info("DocumentExtraction Test Script")
|
||||
logger.info("=" * 50)
|
||||
logger.info(f"Source: {args.input_dir}")
|
||||
|
||||
# Determine output directory
|
||||
if args.output_dir:
|
||||
output_dir = args.output_dir
|
||||
else:
|
||||
if args.no_ai:
|
||||
output_dir = f"{args.input_dir}/extracted-raw"
|
||||
else:
|
||||
output_dir = f"{args.input_dir}/extracted"
|
||||
|
||||
logger.info(f"Output: {output_dir}")
|
||||
logger.info("=" * 50)
|
||||
|
||||
# Check dependencies first
|
||||
if not check_dependencies():
|
||||
logger.error("Please install missing dependencies before running tests.")
|
||||
return
|
||||
|
||||
# Check module imports
|
||||
if not check_module_imports():
|
||||
logger.error("Cannot import required modules. Please check your setup.")
|
||||
return
|
||||
|
||||
# Determine mode based on command line arguments
|
||||
if args.no_ai:
|
||||
enable_ai = False
|
||||
logger.info("Running in CONTENT ONLY mode (no AI processing)")
|
||||
else:
|
||||
# Interactive mode: ask user for choice
|
||||
print("\n" + "=" * 50)
|
||||
print("SELECT EXTRACTION MODE:")
|
||||
print("=" * 50)
|
||||
print("1. With AI processing (default)")
|
||||
print("2. Content only (no AI processing)")
|
||||
print("=" * 50)
|
||||
|
||||
try:
|
||||
choice = input("Enter your choice (1 or 2, default is 1): ").strip()
|
||||
if choice == "2":
|
||||
enable_ai = False
|
||||
output_dir = f"{args.input_dir}/extracted-raw"
|
||||
logger.info("Selected: Content only mode (no AI processing)")
|
||||
else:
|
||||
enable_ai = True
|
||||
output_dir = f"{args.input_dir}/extracted"
|
||||
logger.info("Selected: AI processing mode")
|
||||
except (EOFError, KeyboardInterrupt):
|
||||
# Default to AI mode if input fails
|
||||
enable_ai = True
|
||||
output_dir = f"{args.input_dir}/extracted"
|
||||
logger.info("Defaulting to AI processing mode")
|
||||
|
||||
# Run tests with selected mode
|
||||
tester = DocumentExtractionTester(
|
||||
input_dir=args.input_dir,
|
||||
output_dir=output_dir,
|
||||
enable_ai=enable_ai
|
||||
)
|
||||
await tester.run_tests()
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Check if command line arguments are provided for automated testing
|
||||
if len(sys.argv) > 1:
|
||||
# Parse arguments and run directly
|
||||
asyncio.run(main())
|
||||
else:
|
||||
# Interactive mode: ask user for choice
|
||||
asyncio.run(main())
|
||||
|
||||
# Convenience function for easy content-only extraction
|
||||
async def extract_documents_content_only(input_folder: str, output_folder: str = None):
|
||||
"""
|
||||
Convenience function to extract documents without AI processing.
|
||||
|
||||
Args:
|
||||
input_folder: Path to folder containing documents to extract
|
||||
output_folder: Path to folder where extracted content will be stored (optional)
|
||||
|
||||
Example:
|
||||
# Extract from d:/temp to d:/temp/extracted-raw
|
||||
asyncio.run(extract_documents_content_only("d:/temp"))
|
||||
|
||||
# Extract from custom folders
|
||||
asyncio.run(extract_documents_content_only("c:/my_docs", "c:/my_docs/extracted"))
|
||||
"""
|
||||
if output_folder is None:
|
||||
output_folder = f"{input_folder}/extracted-raw"
|
||||
|
||||
logger.info(f"Running content-only extraction from {input_folder} to {output_folder}")
|
||||
|
||||
# Check dependencies and imports
|
||||
if not check_dependencies():
|
||||
logger.error("Missing dependencies. Please install required packages.")
|
||||
return False
|
||||
|
||||
if not check_module_imports():
|
||||
logger.error("Cannot import required modules. Please check your setup.")
|
||||
return False
|
||||
|
||||
# Create tester and run
|
||||
tester = DocumentExtractionTester(
|
||||
input_dir=input_folder,
|
||||
output_dir=output_folder,
|
||||
enable_ai=False
|
||||
)
|
||||
|
||||
await tester.run_tests()
|
||||
return True
|
||||
|
||||
# Example usage (uncomment to use):
|
||||
# if __name__ == "__main__":
|
||||
# # For content-only extraction from d:/temp to d:/temp/extracted-raw
|
||||
# asyncio.run(extract_documents_content_only("d:/temp"))
|
||||
|
|
@ -1,189 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Simple test script for enhanced Excel processing functionality.
|
||||
This script tests the DocumentExtraction class with Excel files.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import asyncio
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(
|
||||
level=logging.DEBUG,
|
||||
format='%(asctime)s - %(levelname)s - %(message)s'
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Add the gateway directory to the path
|
||||
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', '..'))
|
||||
|
||||
async def test_excel_processing():
|
||||
"""Test Excel processing functionality."""
|
||||
try:
|
||||
# Import required modules
|
||||
from modules.chat.documents.documentExtraction import DocumentExtraction
|
||||
from modules.chat.serviceCenter import ServiceCenter
|
||||
from modules.interfaces.interfaceAppModel import User, UserPrivilege, AuthAuthority
|
||||
from modules.interfaces.interfaceChatModel import ChatWorkflow
|
||||
from datetime import datetime, UTC
|
||||
|
||||
logger.info("Testing Excel processing functionality...")
|
||||
|
||||
# Create mock service center
|
||||
mock_user = User(
|
||||
id="test_user_001",
|
||||
username="testuser",
|
||||
email="test@example.com",
|
||||
fullName="Test User",
|
||||
language="en",
|
||||
enabled=True,
|
||||
privilege=UserPrivilege.USER,
|
||||
authenticationAuthority=AuthAuthority.LOCAL,
|
||||
mandateId="test_mandate_001"
|
||||
)
|
||||
|
||||
current_time = datetime.now(UTC).isoformat()
|
||||
mock_workflow = ChatWorkflow(
|
||||
id="test_workflow_001",
|
||||
mandateId="test_mandate_001",
|
||||
status="active",
|
||||
name="Test Excel Processing Workflow",
|
||||
currentRound=1,
|
||||
lastActivity=current_time,
|
||||
startedAt=current_time,
|
||||
logs=[],
|
||||
messages=[],
|
||||
stats=None,
|
||||
tasks=[]
|
||||
)
|
||||
|
||||
service_center = ServiceCenter(mock_user, mock_workflow)
|
||||
logger.info("ServiceCenter created successfully")
|
||||
|
||||
# Create DocumentExtraction instance
|
||||
extractor = DocumentExtraction(service_center)
|
||||
logger.info("DocumentExtraction created successfully")
|
||||
|
||||
# Test with a sample Excel file if available
|
||||
test_file_path = "d:/temp/test-extraction/test.xlsx"
|
||||
|
||||
if os.path.exists(test_file_path):
|
||||
logger.info(f"Found test file: {test_file_path}")
|
||||
|
||||
# Read the file
|
||||
with open(test_file_path, 'rb') as f:
|
||||
file_data = f.read()
|
||||
|
||||
logger.info(f"File size: {len(file_data)} bytes")
|
||||
|
||||
# Process the Excel file
|
||||
logger.info("Processing Excel file...")
|
||||
result = await extractor.processFileData(
|
||||
fileData=file_data,
|
||||
fileName="test.xlsx",
|
||||
mimeType="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
||||
base64Encoded=False,
|
||||
prompt=None,
|
||||
enableAI=False
|
||||
)
|
||||
|
||||
logger.info(f"Excel processing completed successfully!")
|
||||
logger.info(f"Generated {len(result.contents)} content items:")
|
||||
|
||||
for i, content_item in enumerate(result.contents):
|
||||
logger.info(f" Item {i+1}: {content_item.label}")
|
||||
logger.info(f" MIME type: {content_item.metadata.mimeType}")
|
||||
logger.info(f" Size: {content_item.metadata.size} bytes")
|
||||
if content_item.data:
|
||||
logger.info(f" Data preview: {content_item.data[:100]}...")
|
||||
else:
|
||||
logger.info(f" Data: None")
|
||||
|
||||
else:
|
||||
logger.info("No test Excel file found. Creating a simple test...")
|
||||
|
||||
# Test the openpyxl library directly
|
||||
try:
|
||||
import openpyxl
|
||||
from openpyxl import Workbook
|
||||
|
||||
# Create a test workbook
|
||||
wb = Workbook()
|
||||
ws = wb.active
|
||||
ws.title = "Test Sheet"
|
||||
|
||||
# Add some test data
|
||||
ws['A1'] = "Name"
|
||||
ws['B1'] = "Age"
|
||||
ws['C1'] = "City"
|
||||
ws['A2'] = "John Doe"
|
||||
ws['B2'] = 30
|
||||
ws['C2'] = "New York"
|
||||
ws['A3'] = "Jane Smith"
|
||||
ws['B3'] = 25
|
||||
ws['C3'] = "Los Angeles"
|
||||
|
||||
# Test properties
|
||||
wb.properties.title = "Test Workbook"
|
||||
wb.properties.creator = "Test User"
|
||||
wb.properties.subject = "Test Subject"
|
||||
|
||||
logger.info("Test workbook created successfully")
|
||||
logger.info(f" Title: {wb.properties.title}")
|
||||
logger.info(f" Creator: {wb.properties.creator}")
|
||||
logger.info(f" Subject: {wb.properties.subject}")
|
||||
logger.info(f" Sheets: {wb.sheetnames}")
|
||||
|
||||
# Test the DocumentExtraction with this workbook
|
||||
from io import BytesIO
|
||||
|
||||
# Save to bytes
|
||||
buffer = BytesIO()
|
||||
wb.save(buffer)
|
||||
buffer.seek(0)
|
||||
file_data = buffer.getvalue()
|
||||
|
||||
logger.info(f"Test workbook size: {len(file_data)} bytes")
|
||||
|
||||
# Process with DocumentExtraction
|
||||
result = await extractor.processFileData(
|
||||
fileData=file_data,
|
||||
fileName="test_workbook.xlsx",
|
||||
mimeType="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
||||
base64Encoded=False,
|
||||
prompt=None,
|
||||
enableAI=False
|
||||
)
|
||||
|
||||
logger.info(f"Test workbook processing completed successfully!")
|
||||
logger.info(f"Generated {len(result.contents)} content items:")
|
||||
|
||||
for i, content_item in enumerate(result.contents):
|
||||
logger.info(f" Item {i+1}: {content_item.label}")
|
||||
logger.info(f" MIME type: {content_item.metadata.mimeType}")
|
||||
logger.info(f" Size: {content_item.metadata.size} bytes")
|
||||
if content_item.data:
|
||||
logger.info(f" Data preview: {content_item.data[:200]}...")
|
||||
else:
|
||||
logger.info(f" Data: None")
|
||||
|
||||
except ImportError as e:
|
||||
logger.error(f"openpyxl not available: {e}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error testing Excel functionality: {e}")
|
||||
|
||||
logger.info("Excel processing test completed!")
|
||||
|
||||
except ImportError as e:
|
||||
logger.error(f"Failed to import required modules: {e}")
|
||||
logger.error("Make sure you're running this script from the gateway directory")
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected error: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(test_excel_processing())
|
||||
|
|
@ -1,51 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Test script for Outlook filter logic
|
||||
"""
|
||||
|
||||
def test_build_graph_filter():
|
||||
"""Test the filter building logic"""
|
||||
|
||||
# Mock the _buildGraphFilter method
|
||||
def _buildGraphFilter(filter_text):
|
||||
if not filter_text:
|
||||
return {}
|
||||
|
||||
filter_text = filter_text.strip()
|
||||
|
||||
# Handle email address filters
|
||||
if '@' in filter_text and '.' in filter_text and ' ' not in filter_text:
|
||||
return {"$filter": f"from/fromAddress/address eq '{filter_text}'"}
|
||||
|
||||
# Handle search queries (from:, to:, subject:, etc.)
|
||||
if any(filter_text.startswith(prefix) for prefix in ['from:', 'to:', 'subject:', 'received:', 'hasattachment:']):
|
||||
return {"$search": f'"{filter_text}"'}
|
||||
|
||||
# Handle text content - search in subject
|
||||
return {"$filter": f"contains(subject,'{filter_text}')"}
|
||||
|
||||
# Test cases
|
||||
test_cases = [
|
||||
("peter.muster@domain.com", {"$filter": "from/fromAddress/address eq 'peter.muster@domain.com'"}),
|
||||
("from:user@example.com", {"$search": '"from:user@example.com"'}),
|
||||
("subject:meeting", {"$search": '"subject:meeting"'}),
|
||||
("project update", {"$filter": "contains(subject,'project update')"}),
|
||||
("", {}),
|
||||
(" hello world ", {"$filter": "contains(subject,'hello world')"}),
|
||||
]
|
||||
|
||||
print("Testing Outlook filter logic:")
|
||||
print("=" * 50)
|
||||
|
||||
for test_input, expected_output in test_cases:
|
||||
result = _buildGraphFilter(test_input)
|
||||
status = "✓ PASS" if result == expected_output else "✗ FAIL"
|
||||
print(f"{status} | Input: '{test_input}'")
|
||||
print(f" | Expected: {expected_output}")
|
||||
print(f" | Got: {result}")
|
||||
print()
|
||||
|
||||
print("Test completed!")
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_build_graph_filter()
|
||||
|
|
@ -1,70 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Test script for fixed Outlook filter logic
|
||||
"""
|
||||
|
||||
def test_build_graph_filter():
|
||||
"""Test the corrected filter building logic"""
|
||||
|
||||
# Mock the corrected _buildGraphFilter method
|
||||
def _buildGraphFilter(filter_text):
|
||||
if not filter_text:
|
||||
return {}
|
||||
|
||||
filter_text = filter_text.strip()
|
||||
|
||||
# Handle search queries (from:, to:, subject:, etc.) - check this FIRST
|
||||
if any(filter_text.startswith(prefix) for prefix in ['from:', 'to:', 'subject:', 'received:', 'hasattachment:']):
|
||||
return {"$search": f'"{filter_text}"'}
|
||||
|
||||
# Handle email address filters (only if it's NOT a search query)
|
||||
if '@' in filter_text and '.' in filter_text and ' ' not in filter_text and not filter_text.startswith('from:'):
|
||||
return {"$filter": f"from/fromAddress/address eq '{filter_text}'"}
|
||||
|
||||
# Handle text content - search in subject
|
||||
return {"$filter": f"contains(subject,'{filter_text}')"}
|
||||
|
||||
# Test cases
|
||||
test_cases = [
|
||||
("peter.muster@domain.com", {"$filter": "from/fromAddress/address eq 'peter.muster@domain.com'"}),
|
||||
("from:user@example.com", {"$search": '"from:user@example.com"'}),
|
||||
("subject:meeting", {"$search": '"subject:meeting"'}),
|
||||
("project update", {"$filter": "contains(subject,'project update')"}),
|
||||
("", {}),
|
||||
(" hello world ", {"$filter": "contains(subject,'hello world')"}),
|
||||
# Additional edge cases
|
||||
("to:manager@company.com", {"$search": '"to:manager@company.com"'}),
|
||||
("received:today", {"$search": '"received:today"'}),
|
||||
("hasattachment:true", {"$search": '"hasattachment:true"'}),
|
||||
("user@domain.com", {"$filter": "from/fromAddress/address eq 'user@domain.com'"}),
|
||||
("from:user@domain.com subject:budget", {"$search": '"from:user@domain.com subject:budget"'}),
|
||||
]
|
||||
|
||||
print("Testing FIXED Outlook filter logic:")
|
||||
print("=" * 50)
|
||||
|
||||
passed = 0
|
||||
failed = 0
|
||||
|
||||
for test_input, expected_output in test_cases:
|
||||
result = _buildGraphFilter(test_input)
|
||||
status = "✓ PASS" if result == expected_output else "✗ FAIL"
|
||||
if result == expected_output:
|
||||
passed += 1
|
||||
else:
|
||||
failed += 1
|
||||
|
||||
print(f"{status} | Input: '{test_input}'")
|
||||
print(f" | Expected: {expected_output}")
|
||||
print(f" | Got: {result}")
|
||||
print()
|
||||
|
||||
print(f"Test completed! {passed} passed, {failed} failed")
|
||||
|
||||
if failed == 0:
|
||||
print("🎉 All tests passed!")
|
||||
else:
|
||||
print("❌ Some tests failed. Please check the logic.")
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_build_graph_filter()
|
||||
|
|
@ -1,100 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Test script for Pydantic compatibility module.
|
||||
This script tests the version-aware functionality for both Pydantic v1 and v2.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
|
||||
# Add the modules directory to the path
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'modules'))
|
||||
|
||||
def test_compatibility_module():
|
||||
"""Test the Pydantic compatibility module"""
|
||||
try:
|
||||
from shared.pydanticCompat import (
|
||||
PYDANTIC_VERSION,
|
||||
create_private_field,
|
||||
create_model_config,
|
||||
model_to_dict,
|
||||
model_from_dict,
|
||||
get_version_info
|
||||
)
|
||||
|
||||
print(f"✅ Successfully imported Pydantic compatibility module")
|
||||
print(f"📊 Pydantic version detected: {PYDANTIC_VERSION}")
|
||||
|
||||
# Test version info
|
||||
version_info = get_version_info()
|
||||
print(f"🔍 Version info: {version_info}")
|
||||
|
||||
# Test field creation
|
||||
private_field = create_private_field(default="test")
|
||||
print(f"✅ Private field created: {type(private_field)}")
|
||||
|
||||
# Test model config
|
||||
config = create_model_config(validate_assignment=True)
|
||||
print(f"✅ Model config created: {type(config)}")
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ Error testing compatibility module: {e}")
|
||||
return False
|
||||
|
||||
def test_chat_document_model():
|
||||
"""Test the ChatDocument model with compatibility"""
|
||||
try:
|
||||
from interfaces.interfaceChatModel import ChatDocument
|
||||
|
||||
print(f"✅ Successfully imported ChatDocument model")
|
||||
|
||||
# Test creating a document
|
||||
doc = ChatDocument(fileId="test-file-123")
|
||||
print(f"✅ ChatDocument created: {doc.id}")
|
||||
|
||||
# Test setting component interface
|
||||
doc.setComponentInterface("mock_interface")
|
||||
print(f"✅ Component interface set")
|
||||
|
||||
# Test serialization
|
||||
doc_dict = doc.to_dict()
|
||||
print(f"✅ Document serialized: {doc_dict}")
|
||||
|
||||
# Test validation
|
||||
is_valid = doc.validate_component_interface()
|
||||
print(f"✅ Component interface validation: {is_valid}")
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ Error testing ChatDocument model: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
return False
|
||||
|
||||
def main():
|
||||
"""Main test function"""
|
||||
print("🧪 Testing Pydantic Compatibility Module")
|
||||
print("=" * 50)
|
||||
|
||||
# Test compatibility module
|
||||
compat_ok = test_compatibility_module()
|
||||
print()
|
||||
|
||||
# Test ChatDocument model
|
||||
model_ok = test_chat_document_model()
|
||||
print()
|
||||
|
||||
# Summary
|
||||
print("=" * 50)
|
||||
if compat_ok and model_ok:
|
||||
print("🎉 All tests passed! Pydantic compatibility is working correctly.")
|
||||
return 0
|
||||
else:
|
||||
print("💥 Some tests failed. Check the errors above.")
|
||||
return 1
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
1
tests/__init__.py
Normal file
1
tests/__init__.py
Normal file
|
|
@ -0,0 +1 @@
|
|||
# noqa
|
||||
0
tests/connectors/__init__.py
Normal file
0
tests/connectors/__init__.py
Normal file
108
tests/connectors/test_connector_tavily.py
Normal file
108
tests/connectors/test_connector_tavily.py
Normal file
|
|
@ -0,0 +1,108 @@
|
|||
"""Tests for Tavliy web search."""
|
||||
|
||||
import pytest
|
||||
import logging
|
||||
|
||||
from modules.interfaces.interfaceChatModel import ActionResult
|
||||
from gateway.modules.interfaces.interfaceWebModel import (
|
||||
WebSearchRequest,
|
||||
WebCrawlRequest,
|
||||
WebScrapeRequest,
|
||||
)
|
||||
from gateway.modules.connectors.connectorWebTavily import ConnectorTavily
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.expensive
|
||||
async def test_tavily_connector_search_test_live_api():
|
||||
logger.info("Testing Tavliy connector search with live API calls")
|
||||
|
||||
# Test request
|
||||
request = WebSearchRequest(query="How old is the Earth?", max_results=5)
|
||||
|
||||
# Tavily instance
|
||||
connectorWebTavily = await ConnectorTavily.create()
|
||||
|
||||
# Search test
|
||||
action_result = await connectorWebTavily.search_urls(request=request)
|
||||
|
||||
# Check results
|
||||
assert isinstance(action_result, ActionResult)
|
||||
|
||||
logger.info("=" * 20)
|
||||
logger.info(f"Action result success status: {action_result.success}")
|
||||
logger.info(f"Action result error: {action_result.error}")
|
||||
logger.info(f"Action result label: {action_result.resultLabel}")
|
||||
|
||||
logger.info("Documents:")
|
||||
for doc in action_result.documents:
|
||||
logger.info("-" * 10)
|
||||
logger.info(f" - Document Name: {doc.documentName}")
|
||||
logger.info(f" - Document Mime Type: {doc.mimeType}")
|
||||
logger.info(f" - Document Data: {doc.documentData}")
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.expensive
|
||||
async def test_tavily_connector_crawl_test_live_api():
|
||||
logger.info("Testing Tavily connector crawl with live API calls")
|
||||
|
||||
# Test request
|
||||
urls = [
|
||||
"https://en.wikipedia.org/wiki/Earth",
|
||||
"https://valueon.ch",
|
||||
]
|
||||
request = WebCrawlRequest(urls=urls)
|
||||
|
||||
# Tavily instance
|
||||
connectorWebTavily = await ConnectorTavily.create()
|
||||
|
||||
# Crawl test
|
||||
action_result = await connectorWebTavily.crawl_urls(request=request)
|
||||
|
||||
# Check results
|
||||
assert isinstance(action_result, ActionResult)
|
||||
|
||||
logger.info("=" * 20)
|
||||
logger.info(f"Action result success status: {action_result.success}")
|
||||
logger.info(f"Action result error: {action_result.error}")
|
||||
logger.info(f"Action result label: {action_result.resultLabel}")
|
||||
|
||||
logger.info("Documents:")
|
||||
for doc in action_result.documents:
|
||||
logger.info("-" * 10)
|
||||
logger.info(f" - Document Name: {doc.documentName}")
|
||||
logger.info(f" - Document Mime Type: {doc.mimeType}")
|
||||
logger.info(f" - Document Data: {doc.documentData}")
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.expensive
|
||||
async def test_tavily_connector_scrape_test_live_api():
|
||||
logger.info("Testing Tavily connector scrape with live API calls")
|
||||
|
||||
# Test request with query
|
||||
request = WebScrapeRequest(query="How old is the Earth?", max_results=3)
|
||||
|
||||
# Tavily instance
|
||||
connectorWebTavily = await ConnectorTavily.create()
|
||||
|
||||
# Scrape test
|
||||
action_result = await connectorWebTavily.scrape(request=request)
|
||||
|
||||
# Check results
|
||||
assert isinstance(action_result, ActionResult)
|
||||
|
||||
logger.info("=" * 20)
|
||||
logger.info(f"Action result success status: {action_result.success}")
|
||||
logger.info(f"Action result error: {action_result.error}")
|
||||
logger.info(f"Action result label: {action_result.resultLabel}")
|
||||
|
||||
logger.info("Documents:")
|
||||
for doc in action_result.documents:
|
||||
logger.info("-" * 10)
|
||||
logger.info(f" - Document Name: {doc.documentName}")
|
||||
logger.info(f" - Document Mime Type: {doc.mimeType}")
|
||||
logger.info(f" - Document Data: {doc.documentData}")
|
||||
0
tests/fixtures/__init__.py
vendored
Normal file
0
tests/fixtures/__init__.py
vendored
Normal file
71
tests/fixtures/tavily_responses.py
vendored
Normal file
71
tests/fixtures/tavily_responses.py
vendored
Normal file
File diff suppressed because one or more lines are too long
0
tests/methods/__init__.py
Normal file
0
tests/methods/__init__.py
Normal file
248
tests/methods/test_method_web.py
Normal file
248
tests/methods/test_method_web.py
Normal file
|
|
@ -0,0 +1,248 @@
|
|||
"""Tests for method web.py"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
|
||||
import pytest
|
||||
from unittest.mock import patch
|
||||
from modules.methods.methodWeb import MethodWeb
|
||||
from tests.fixtures.tavily_responses import (
|
||||
RESPONSE_SEARCH_HOW_OLD_IS_EARTH_NO_ANSWER,
|
||||
RESPONSE_EXTRACT_HOW_OLD_IS_EARTH_NO_ANSWER,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.expensive
|
||||
async def test_method_web_search_live():
|
||||
"""Tests method web search with live API calls."""
|
||||
|
||||
logger.info("=" * 50)
|
||||
logger.info("==> Test: Method Web Search Live")
|
||||
|
||||
method_web = MethodWeb(serviceCenter=None)
|
||||
|
||||
# Actual request
|
||||
action_result = await method_web.search(
|
||||
{"query": "How old is the earth", "maxResults": 5}
|
||||
)
|
||||
|
||||
# Evaluate results
|
||||
assert action_result.success
|
||||
assert len(action_result.documents) > 0
|
||||
|
||||
logger.info(f"Action result success status: {action_result.success}")
|
||||
logger.info(f"Action result error: {action_result.error}")
|
||||
logger.info(f"Action result label: {action_result.resultLabel}")
|
||||
|
||||
logger.info("Documents:")
|
||||
for doc in action_result.documents:
|
||||
logger.info(f" - Document Name: {doc.documentName}")
|
||||
logger.info(f" --> Document Mime Type: {doc.mimeType}")
|
||||
logger.info(f" --> Document Data: {doc.documentData}")
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_method_web_search_dummy():
|
||||
"""Tests method web search with dummy response data - no external API calls."""
|
||||
|
||||
logger.info("=" * 50)
|
||||
logger.info("==> Test: Method Web Search Dummy")
|
||||
|
||||
method_web = MethodWeb(serviceCenter=None)
|
||||
|
||||
# Mock the Tavily API response
|
||||
with patch(
|
||||
"tavily.AsyncTavilyClient.search",
|
||||
return_value=RESPONSE_SEARCH_HOW_OLD_IS_EARTH_NO_ANSWER,
|
||||
) as mock_client:
|
||||
action_result = await method_web.search(
|
||||
{"query": "How old is the earth", "maxResults": 5}
|
||||
)
|
||||
mock_client.assert_called_once()
|
||||
|
||||
# Evaluate results
|
||||
assert action_result.success
|
||||
assert len(action_result.documents) > 0
|
||||
|
||||
logger.info(f"Action result success status: {action_result.success}")
|
||||
logger.info(f"Action result error: {action_result.error}")
|
||||
logger.info(f"Action result label: {action_result.resultLabel}")
|
||||
|
||||
logger.info("Documents:")
|
||||
for doc in action_result.documents:
|
||||
logger.info(f" - Document Name: {doc.documentName}")
|
||||
logger.info(f" --> Document Mime Type: {doc.mimeType}")
|
||||
logger.info(f" --> Document Data: {doc.documentData}")
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.expensive
|
||||
async def test_method_web_crawl_live():
|
||||
"""Tests method web crawl with live API calls."""
|
||||
|
||||
logger.info("=" * 50)
|
||||
logger.info("==> Test: Method Web Crawl Live")
|
||||
|
||||
method_web = MethodWeb(serviceCenter=None)
|
||||
|
||||
# Create mock document data with URLs from search results
|
||||
search_results_json = {
|
||||
"documentData": {
|
||||
"results": [
|
||||
{"url": "https://en.wikipedia.org/wiki/Age_of_Earth"},
|
||||
{"url": "https://www.planetary.org/articles/how-old-is-the-earth"},
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
# Mock the service center methods
|
||||
with patch.object(method_web, "service") as mock_service:
|
||||
mock_service.getChatDocumentsFromDocumentList.return_value = [
|
||||
type("MockDoc", (), {"fileId": "test-file-id", "fileName": "test-search-results.json"})()
|
||||
]
|
||||
mock_service.getFileData.return_value = json.dumps(search_results_json).encode(
|
||||
"utf-8"
|
||||
)
|
||||
|
||||
# Actual request
|
||||
action_result = await method_web.crawl({"documentList": "test-document-list-ref"})
|
||||
|
||||
# Evaluate results
|
||||
assert action_result.success
|
||||
assert len(action_result.documents) > 0
|
||||
|
||||
logger.info(f"Action result success status: {action_result.success}")
|
||||
logger.info(f"Action result error: {action_result.error}")
|
||||
logger.info(f"Action result label: {action_result.resultLabel}")
|
||||
|
||||
logger.info("Documents:")
|
||||
for doc in action_result.documents:
|
||||
logger.info(f" - Document Name: {doc.documentName}")
|
||||
logger.info(f" --> Document Mime Type: {doc.mimeType}")
|
||||
logger.info(f" --> Document Data: {doc.documentData}")
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_method_web_crawl_dummy():
|
||||
"""Tests method web crawl with dummy response data - no external API calls."""
|
||||
|
||||
logger.info("=" * 50)
|
||||
logger.info("==> Test: Method Web Crawl Dummy")
|
||||
|
||||
method_web = MethodWeb(serviceCenter=None)
|
||||
|
||||
# Create mock document data with URLs from search results
|
||||
search_results_json = {
|
||||
"documentData": {
|
||||
"results": [
|
||||
{"url": "https://en.wikipedia.org/wiki/Age_of_Earth"},
|
||||
{"url": "https://www.planetary.org/articles/how-old-is-the-earth"},
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
# Mock both the service center and Tavily API
|
||||
with (
|
||||
patch.object(method_web, "service") as mock_service,
|
||||
patch(
|
||||
"tavily.AsyncTavilyClient.extract",
|
||||
return_value=RESPONSE_EXTRACT_HOW_OLD_IS_EARTH_NO_ANSWER,
|
||||
) as mock_client,
|
||||
):
|
||||
mock_service.getChatDocumentsFromDocumentList.return_value = [
|
||||
type("MockDoc", (), {"fileId": "test-file-id", "fileName": "test-search-results.json"})()
|
||||
]
|
||||
mock_service.getFileData.return_value = json.dumps(search_results_json).encode(
|
||||
"utf-8"
|
||||
)
|
||||
|
||||
action_result = await method_web.crawl({"documentList": "test-document-list-ref"})
|
||||
mock_client.assert_called_once()
|
||||
|
||||
# Evaluate results
|
||||
assert action_result.success
|
||||
assert len(action_result.documents) > 0
|
||||
|
||||
logger.info(f"Action result success status: {action_result.success}")
|
||||
logger.info(f"Action result error: {action_result.error}")
|
||||
logger.info(f"Action result label: {action_result.resultLabel}")
|
||||
|
||||
logger.info("Documents:")
|
||||
for doc in action_result.documents:
|
||||
logger.info(f" - Document Name: {doc.documentName}")
|
||||
logger.info(f" --> Document Mime Type: {doc.mimeType}")
|
||||
logger.info(f" --> Document Data: {doc.documentData}")
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.expensive
|
||||
async def test_method_web_scrape_live():
|
||||
"""Tests method web scrape with live API calls."""
|
||||
|
||||
logger.info("=" * 50)
|
||||
logger.info("==> Test: Method Web Scrape Live")
|
||||
|
||||
method_web = MethodWeb(serviceCenter=None)
|
||||
|
||||
# Actual request
|
||||
action_result = await method_web.scrape(
|
||||
{"query": "How old is the earth", "maxResults": 3}
|
||||
)
|
||||
|
||||
# Evaluate results
|
||||
assert action_result.success
|
||||
assert len(action_result.documents) > 0
|
||||
|
||||
logger.info(f"Action result success status: {action_result.success}")
|
||||
logger.info(f"Action result error: {action_result.error}")
|
||||
logger.info(f"Action result label: {action_result.resultLabel}")
|
||||
|
||||
logger.info("Documents:")
|
||||
for doc in action_result.documents:
|
||||
logger.info(f" - Document Name: {doc.documentName}")
|
||||
logger.info(f" --> Document Mime Type: {doc.mimeType}")
|
||||
logger.info(f" --> Document Data: {doc.documentData}")
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_method_web_scrape_dummy():
|
||||
"""Tests method web scrape with dummy response data - no external API calls."""
|
||||
|
||||
logger.info("=" * 50)
|
||||
logger.info("==> Test: Method Web Scrape Dummy")
|
||||
|
||||
method_web = MethodWeb(serviceCenter=None)
|
||||
|
||||
# Mock both Tavily API responses (search + extract)
|
||||
with (
|
||||
patch(
|
||||
"tavily.AsyncTavilyClient.search",
|
||||
return_value=RESPONSE_SEARCH_HOW_OLD_IS_EARTH_NO_ANSWER,
|
||||
) as mock_search,
|
||||
patch(
|
||||
"tavily.AsyncTavilyClient.extract",
|
||||
return_value=RESPONSE_EXTRACT_HOW_OLD_IS_EARTH_NO_ANSWER,
|
||||
) as mock_extract,
|
||||
):
|
||||
action_result = await method_web.scrape(
|
||||
{"query": "How old is the earth", "maxResults": 3}
|
||||
)
|
||||
mock_search.assert_called_once()
|
||||
mock_extract.assert_called_once()
|
||||
|
||||
# Evaluate results
|
||||
assert action_result.success
|
||||
assert len(action_result.documents) > 0
|
||||
|
||||
logger.info(f"Action result success status: {action_result.success}")
|
||||
logger.info(f"Action result error: {action_result.error}")
|
||||
logger.info(f"Action result label: {action_result.resultLabel}")
|
||||
|
||||
logger.info("Documents:")
|
||||
for doc in action_result.documents:
|
||||
logger.info(f" - Document Name: {doc.documentName}")
|
||||
logger.info(f" --> Document Mime Type: {doc.mimeType}")
|
||||
logger.info(f" --> Document Data: {doc.documentData}")
|
||||
Loading…
Reference in a new issue