Merge pull request #25 from valueonag/int

Int
This commit is contained in:
ValueOn AG 2025-09-04 01:53:00 +02:00 committed by GitHub
commit 460bb70ef7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
38 changed files with 3055 additions and 4529 deletions

View file

@ -36,20 +36,6 @@ Security_LOCK_DURATION_MINUTES = 30
# Content Neutralization configuration # Content Neutralization configuration
Content_Neutralization_ENABLED = False Content_Neutralization_ENABLED = False
# Agent Webcrawler configuration
Agent_Webcrawler_SERPAPI_ENGINE = google
Agent_Webcrawler_SERPAPI_APIKEY = 7304bd34bca767aa52dd3233297e30a9edc0abc57871f702b3f8238b9d3ee7bc
Agent_Webcrawler_SERPAPI_MAX_URLS = 3
Agent_Webcrawler_SERPAPI_MAX_SEARCH_KEYWORDS = 3
Agent_Webcrawler_SERPAPI_MAX_SEARCH_RESULTS = 5
Agent_Webcrawler_SERPAPI_TIMEOUT = 10
Agent_Webcrawler_SERPAPI_USER_AGENT = Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36
# Agent Coder configuration
Agent_Coder_INSTALL_TIMEOUT = 180
Agent_Coder_EXECUTION_TIMEOUT = 60
Agent_Coder_EXECUTION_RETRY = 5
# Agent Mail configuration # Agent Mail configuration
Service_MSFT_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c Service_MSFT_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
Service_MSFT_CLIENT_SECRET = Kxf8Q~2lJIteZ~JaI32kMf1lfaWKATqxXiNiFbzV Service_MSFT_CLIENT_SECRET = Kxf8Q~2lJIteZ~JaI32kMf1lfaWKATqxXiNiFbzV
@ -58,3 +44,16 @@ Service_MSFT_TENANT_ID = common
# Google Service configuration # Google Service configuration
Service_GOOGLE_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com Service_GOOGLE_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
Service_GOOGLE_CLIENT_SECRET = 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 Service_GOOGLE_CLIENT_SECRET = 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
# Tavily Web Search configuration
Connector_WebTavily_API_KEY = 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
# Web Search configuration
Web_Search_MAX_QUERY_LENGTH = 400
Web_Search_MAX_RESULTS = 20
Web_Search_MIN_RESULTS = 1
# Web Crawl configuration
Web_Crawl_TIMEOUT = 30
Web_Crawl_MAX_RETRIES = 3
Web_Crawl_RETRY_DELAY = 2

View file

@ -341,7 +341,7 @@ class DocumentExtraction:
# Use documentUtility for mime type # Use documentUtility for mime type
mime_type = getMimeTypeFromExtension(getFileExtension(fileName), self._serviceCenter) mime_type = getMimeTypeFromExtension(getFileExtension(fileName))
return [ContentItem( return [ContentItem(
label="main", label="main",
data=content, data=content,
@ -360,7 +360,7 @@ class DocumentExtraction:
"""Process CSV document with robust encoding detection""" """Process CSV document with robust encoding detection"""
try: try:
content = self._robustTextDecode(fileData, fileName) content = self._robustTextDecode(fileData, fileName)
mime_type = getMimeTypeFromExtension(getFileExtension(fileName), self._serviceCenter) mime_type = getMimeTypeFromExtension(getFileExtension(fileName))
return [ContentItem( return [ContentItem(
label="main", label="main",
data=content, data=content,
@ -380,7 +380,7 @@ class DocumentExtraction:
try: try:
content = self._robustTextDecode(fileData, fileName) content = self._robustTextDecode(fileData, fileName)
jsonData = json.loads(content) jsonData = json.loads(content)
mime_type = getMimeTypeFromExtension(getFileExtension(fileName), self._serviceCenter) mime_type = getMimeTypeFromExtension(getFileExtension(fileName))
return [ContentItem( return [ContentItem(
label="main", label="main",
data=content, data=content,
@ -399,7 +399,7 @@ class DocumentExtraction:
"""Process XML document with robust encoding detection""" """Process XML document with robust encoding detection"""
try: try:
content = self._robustTextDecode(fileData, fileName) content = self._robustTextDecode(fileData, fileName)
mime_type = getMimeTypeFromExtension(getFileExtension(fileName), self._serviceCenter) mime_type = getMimeTypeFromExtension(getFileExtension(fileName))
return [ContentItem( return [ContentItem(
label="main", label="main",
data=content, data=content,
@ -418,7 +418,7 @@ class DocumentExtraction:
"""Process HTML document with robust encoding detection""" """Process HTML document with robust encoding detection"""
try: try:
content = self._robustTextDecode(fileData, fileName) content = self._robustTextDecode(fileData, fileName)
mime_type = getMimeTypeFromExtension(getFileExtension(fileName), self._serviceCenter) mime_type = getMimeTypeFromExtension(getFileExtension(fileName))
return [ContentItem( return [ContentItem(
label="main", label="main",
data=content, data=content,
@ -512,7 +512,7 @@ class DocumentExtraction:
# Combine all meaningful content # Combine all meaningful content
final_content = "\n".join(meaningful_content) final_content = "\n".join(meaningful_content)
mime_type = getMimeTypeFromExtension(getFileExtension(fileName), self._serviceCenter) mime_type = getMimeTypeFromExtension(getFileExtension(fileName))
return [ContentItem( return [ContentItem(
label="svg_content", label="svg_content",
data=final_content, data=final_content,

View file

@ -98,26 +98,12 @@ class DocumentGenerator:
logger.info(f"Document {document_name} has content: {len(content)} characters") logger.info(f"Document {document_name} has content: {len(content)} characters")
# Create file in system # Create document with file in one step
file_id = self.service.createFile(
fileName=document_name,
mimeType=mime_type,
content=content,
base64encoded=False
)
if not file_id:
logger.error(f"Failed to create file for document {document_name}")
continue
logger.info(f"Created file with ID: {file_id}")
# Create document object using existing file ID
document = self.service.createDocument( document = self.service.createDocument(
fileName=document_name, fileName=document_name,
mimeType=mime_type, mimeType=mime_type,
content=content, content=content,
base64encoded=False, base64encoded=False
existing_file_id=file_id
) )
if document: if document:
# Set workflow context on the document if possible # Set workflow context on the document if possible

View file

@ -1,51 +1,160 @@
import json import json
import logging import logging
import os
from typing import Any, Dict from typing import Any, Dict
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
def getFileExtension(fileName: str) -> str: def getFileExtension(fileName: str) -> str:
"""Extract file extension from fileName""" """Extract file extension from fileName (without dot, lowercased)."""
if '.' in fileName: if '.' in fileName:
return fileName.rsplit('.', 1)[-1].lower() return fileName.rsplit('.', 1)[-1].lower()
return '' return ''
def getMimeTypeFromExtension(extension: str, service=None) -> str: def getMimeTypeFromExtension(extension: str) -> str:
"""Get MIME type based on file extension. Optionally use a service for mapping.""" """
if service: Get MIME type based on file extension.
return service.getMimeTypeFromExtension(extension) This method consolidates MIME type detection from extension.
# Fallback mapping
mapping = { Args:
extension: File extension (with or without dot)
Returns:
str: MIME type for the extension
"""
# Normalize extension (remove dot if present)
if extension.startswith('.'):
extension = extension[1:]
# Map extensions to MIME types
mime_types = {
'txt': 'text/plain', 'txt': 'text/plain',
'md': 'text/markdown',
'html': 'text/html',
'css': 'text/css',
'js': 'application/javascript',
'json': 'application/json', 'json': 'application/json',
'csv': 'text/csv',
'xml': 'application/xml', 'xml': 'application/xml',
'csv': 'text/csv',
'html': 'text/html',
'htm': 'text/html',
'md': 'text/markdown',
'py': 'text/x-python', 'py': 'text/x-python',
'js': 'application/javascript',
'css': 'text/css',
'pdf': 'application/pdf', 'pdf': 'application/pdf',
'doc': 'application/msword',
'docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', 'docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
'xls': 'application/vnd.ms-excel',
'xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', 'xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
'png': 'image/png', 'ppt': 'application/vnd.ms-powerpoint',
'pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
'svg': 'image/svg+xml',
'jpg': 'image/jpeg', 'jpg': 'image/jpeg',
'jpeg': 'image/jpeg', 'jpeg': 'image/jpeg',
'png': 'image/png',
'gif': 'image/gif', 'gif': 'image/gif',
'svg': 'image/svg+xml', 'bmp': 'image/bmp',
'webp': 'image/webp',
'zip': 'application/zip',
'rar': 'application/x-rar-compressed',
'7z': 'application/x-7z-compressed',
'tar': 'application/x-tar',
'gz': 'application/gzip'
} }
return mapping.get(extension.lower(), 'application/octet-stream') return mime_types.get(extension.lower(), 'application/octet-stream')
def detectContentTypeFromData(fileData: bytes, fileName: str) -> str:
"""
Detect content type from file data and fileName.
This method makes the MIME type detection function accessible through the service center.
Args:
fileData: Raw file data as bytes
fileName: Name of the file
Returns:
str: Detected MIME type
"""
try:
# Check file extension first
ext = os.path.splitext(fileName)[1].lower()
if ext:
# Map common extensions to MIME types
extToMime = {
'.txt': 'text/plain',
'.md': 'text/markdown',
'.csv': 'text/csv',
'.json': 'application/json',
'.xml': 'application/xml',
'.js': 'application/javascript',
'.py': 'application/x-python',
'.svg': 'image/svg+xml',
'.jpg': 'image/jpeg',
'.jpeg': 'image/jpeg',
'.png': 'image/png',
'.gif': 'image/gif',
'.bmp': 'image/bmp',
'.webp': 'image/webp',
'.pdf': 'application/pdf',
'.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
'.doc': 'application/msword',
'.xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
'.xls': 'application/vnd.ms-excel',
'.pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
'.ppt': 'application/vnd.ms-powerpoint',
'.html': 'text/html',
'.htm': 'text/html',
'.css': 'text/css',
'.zip': 'application/zip',
'.rar': 'application/x-rar-compressed',
'.7z': 'application/x-7z-compressed',
'.tar': 'application/x-tar',
'.gz': 'application/gzip'
}
if ext in extToMime:
return extToMime[ext]
# Try to detect from content
if fileData.startswith(b'%PDF'):
return 'application/pdf'
elif fileData.startswith(b'PK\x03\x04'):
# ZIP-based formats (docx, xlsx, pptx)
return 'application/zip'
elif fileData.startswith(b'<'):
# XML-based formats
try:
text = fileData.decode('utf-8', errors='ignore')
if '<svg' in text.lower():
return 'image/svg+xml'
elif '<html' in text.lower():
return 'text/html'
else:
return 'application/xml'
except:
pass
elif fileData.startswith(b'\x89PNG\r\n\x1a\n'):
return 'image/png'
elif fileData.startswith(b'\xff\xd8\xff'):
return 'image/jpeg'
elif fileData.startswith(b'GIF87a') or fileData.startswith(b'GIF89a'):
return 'image/gif'
elif fileData.startswith(b'BM'):
return 'image/bmp'
elif fileData.startswith(b'RIFF') and fileData[8:12] == b'WEBP':
return 'image/webp'
return 'application/octet-stream'
except Exception as e:
logger.error(f"Error detecting content type from data: {str(e)}")
return 'application/octet-stream'
def detectMimeTypeFromData(file_bytes: bytes, fileName: str, service=None) -> str: def detectMimeTypeFromData(file_bytes: bytes, fileName: str, service=None) -> str:
"""Detect MIME type from file bytes and fileName using a service if provided.""" """Detect MIME type from file bytes and fileName using a service if provided."""
try: try:
if service: if service and hasattr(service, 'detectContentTypeFromData'):
detected = service.detectContentTypeFromData(file_bytes, fileName) detected = service.detectContentTypeFromData(file_bytes, fileName)
if detected and detected != 'application/octet-stream': if detected and detected != 'application/octet-stream':
return detected return detected
# Fallback: guess from extension # Fallback: use our consolidated function
ext = getFileExtension(fileName) return detectContentTypeFromData(file_bytes, fileName)
return getMimeTypeFromExtension(ext, service)
except Exception as e: except Exception as e:
logger.warning(f"Error in MIME type detection for {fileName}: {str(e)}") logger.warning(f"Error in MIME type detection for {fileName}: {str(e)}")
return 'application/octet-stream' return 'application/octet-stream'

View file

@ -108,7 +108,7 @@ class HandlingTasks:
# Log the full task planning prompt being sent to AI for debugging # Log the full task planning prompt being sent to AI for debugging
logger.info("=== TASK PLANNING PROMPT SENT TO AI ===") logger.info("=== TASK PLANNING PROMPT SENT TO AI ===")
logger.info(f"User Input: {userInput}") logger.info(f"User Input: {userInput}")
logger.info(f"Available Documents: {len(available_docs) if available_docs else 0}") logger.info(f"Available Documents: {available_docs}")
logger.info("=== FULL TASK PLANNING PROMPT ===") logger.info("=== FULL TASK PLANNING PROMPT ===")
logger.info(task_planning_prompt) logger.info(task_planning_prompt)
logger.info("=== END TASK PLANNING PROMPT ===") logger.info("=== END TASK PLANNING PROMPT ===")
@ -192,7 +192,8 @@ class HandlingTasks:
task_plan = TaskPlan( task_plan = TaskPlan(
overview=task_plan_dict.get('overview', ''), overview=task_plan_dict.get('overview', ''),
tasks=tasks tasks=tasks,
userMessage=task_plan_dict.get('userMessage', '')
) )
# Set workflow totals for progress tracking # Set workflow totals for progress tracking
@ -217,24 +218,19 @@ class HandlingTasks:
"""Create a chat message containing the task plan with user-friendly messages""" """Create a chat message containing the task plan with user-friendly messages"""
try: try:
# Build task plan summary # Build task plan summary
task_summary = f"📋 **Task Plan Generated**\n\n" task_summary = f"📋 **Task Plan**\n\n"
task_summary += f"**Overview:** {task_plan.overview}\n\n"
task_summary += f"**Total Tasks:** {len(task_plan.tasks)}\n\n"
# Add each task with its user message
for i, task in enumerate(task_plan.tasks):
task_summary += f"**Task {i+1}:** {task.objective}\n"
if task.userMessage:
task_summary += f" 💬 {task.userMessage}\n"
if task.success_criteria:
criteria_str = ', '.join(task.success_criteria)
task_summary += f" ✅ Success Criteria: {criteria_str}\n"
task_summary += "\n"
# Get overall user message from task plan if available # Get overall user message from task plan if available
overall_message = task_plan.userMessage overall_message = task_plan.userMessage
if overall_message: if overall_message:
task_summary += f"**Plan Summary:** {overall_message}\n\n" task_summary += f"{overall_message}\n\n"
# Add each task with its user message
for i, task in enumerate(task_plan.tasks):
if task.userMessage:
task_summary += f"💬 {task.userMessage}\n"
task_summary += "\n"
# Create workflow message # Create workflow message
message_data = { message_data = {
@ -269,76 +265,6 @@ class HandlingTasks:
except Exception as e: except Exception as e:
logger.error(f"Error creating task plan message: {str(e)}") logger.error(f"Error creating task plan message: {str(e)}")
async def createDocumentContextMessage(self, documents: List, workflow):
"""Create a chat message with document context and workflow labeling"""
try:
# Get current workflow context and stats
workflow_context = self.service.getWorkflowContext()
workflow_stats = self.service.getWorkflowStats()
# Create a simple document context message without AI dependency
message_text = f"📄 **Document Context**\n\n"
message_text += f"**Total Documents:** {len(documents)}\n\n"
# Add workflow context information
current_round = workflow_context.get('currentRound', 0)
current_task = workflow_context.get('currentTask', 0)
total_tasks = workflow_stats.get('totalTasks', 0)
current_action = workflow_context.get('currentAction', 0)
total_actions = workflow_stats.get('totalActions', 0)
message_text += f"**Workflow Context:**\n"
message_text += f"- Round: {current_round}\n"
if total_tasks > 0:
message_text += f"- Task: {current_task}/{total_tasks}\n"
else:
message_text += f"- Task: {current_task}\n"
if total_actions > 0:
message_text += f"- Action: {current_action}/{total_actions}\n"
else:
message_text += f"- Action: {current_action}\n"
message_text += f"- Status: {workflow_stats.get('workflowStatus', 'unknown')}\n\n"
# Add document list
if documents:
message_text += "**Available Documents:**\n"
for i, doc in enumerate(documents[:5]): # Show first 5 documents
message_text += f"- {doc.fileName if hasattr(doc, 'fileName') else f'Document {i+1}'}\n"
if len(documents) > 5:
message_text += f"- ... and {len(documents) - 5} more documents\n"
message_text += "\n"
message_text += "Document context information is available for processing."
# Create workflow message
message_data = {
"workflowId": workflow.id,
"role": "assistant",
"message": message_text,
"status": "step",
"sequenceNr": len(workflow.messages) + 1,
"publishedAt": get_utc_timestamp(),
"documentsLabel": "document_context",
"documents": [], # Empty documents for context message
# Add workflow context fields
"roundNumber": workflow_context.get('currentRound', 0),
"taskNumber": workflow_context.get('currentTask', 0),
"actionNumber": workflow_context.get('currentAction', 0),
# Add progress status
"taskProgress": "pending",
"actionProgress": "pending"
}
message = self.chatInterface.createWorkflowMessage(message_data)
if message:
workflow.messages.append(message)
logger.info(f"Document context message created with {len(documents)} documents")
else:
logger.error("Failed to create document context message")
except Exception as e:
logger.error(f"Error creating document context message: {str(e)}")
async def generateTaskActions(self, task_step, workflow, previous_results=None, enhanced_context=None) -> List[TaskAction]: async def generateTaskActions(self, task_step, workflow, previous_results=None, enhanced_context=None) -> List[TaskAction]:
"""Generate actions for a given task step.""" """Generate actions for a given task step."""
try: try:
@ -386,12 +312,8 @@ class HandlingTasks:
# Log available resources for debugging # Log available resources for debugging
logger.info("=== AVAILABLE RESOURCES FOR ACTION GENERATION ===") logger.info("=== AVAILABLE RESOURCES FOR ACTION GENERATION ===")
logger.info(f"Available Documents: {len(available_docs) if available_docs else 0}") logger.info(f"Available Documents: {available_docs}")
if available_docs: # Note: available_docs is now a string description, not a list
for i, doc in enumerate(available_docs[:5]): # Show first 5
logger.info(f" Doc {i+1}: {doc}")
if len(available_docs) > 5:
logger.info(f" ... and {len(available_docs) - 5} more documents")
logger.info(f"Available Connections: {len(available_connections) if available_connections else 0}") logger.info(f"Available Connections: {len(available_connections) if available_connections else 0}")
if available_connections: if available_connections:
for i, conn in enumerate(available_connections[:5]): # Show first 5 for i, conn in enumerate(available_connections[:5]): # Show first 5
@ -450,7 +372,7 @@ class HandlingTasks:
logger.info(f"Task Step ID: {action_context.task_step.id if action_context.task_step else 'None'}") logger.info(f"Task Step ID: {action_context.task_step.id if action_context.task_step else 'None'}")
logger.info(f"Task Step Objective: {action_context.task_step.objective if action_context.task_step else 'None'}") logger.info(f"Task Step Objective: {action_context.task_step.objective if action_context.task_step else 'None'}")
logger.info(f"Workflow ID: {action_context.workflow_id}") logger.info(f"Workflow ID: {action_context.workflow_id}")
logger.info(f"Available Documents Count: {len(action_context.available_documents) if action_context.available_documents else 0}") logger.info(f"Available Documents: {action_context.available_documents or 'No documents available'}")
logger.info(f"Available Connections Count: {len(action_context.available_connections) if action_context.available_connections else 0}") logger.info(f"Available Connections Count: {len(action_context.available_connections) if action_context.available_connections else 0}")
logger.info(f"Previous Results Count: {len(action_context.previous_results) if action_context.previous_results else 0}") logger.info(f"Previous Results Count: {len(action_context.previous_results) if action_context.previous_results else 0}")
logger.info(f"Retry Count: {action_context.retry_count}") logger.info(f"Retry Count: {action_context.retry_count}")
@ -546,25 +468,13 @@ class HandlingTasks:
# Create database log entry for task start in format expected by frontend # Create database log entry for task start in format expected by frontend
if task_index is not None: if task_index is not None:
if total_tasks is not None:
self.chatInterface.createWorkflowLog({
"workflowId": workflow.id,
"message": f"Executing task {task_index}/{total_tasks}",
"type": "info"
})
else:
self.chatInterface.createWorkflowLog({
"workflowId": workflow.id,
"message": f"Executing task {task_index}/?",
"type": "info"
})
# Create a task start message for the user # Create a task start message for the user
task_progress = f"{task_index}/{total_tasks}" if total_tasks is not None else str(task_index) task_progress = f"{task_index}/{total_tasks}" if total_tasks is not None else str(task_index)
task_start_message = { task_start_message = {
"workflowId": workflow.id, "workflowId": workflow.id,
"role": "assistant", "role": "assistant",
"message": f"🚀 Starting Task {task_progress}\n\nObjective: {task_step.objective}", "message": f"🚀 **Task {task_progress}**",
"status": "step", "status": "step",
"sequenceNr": len(workflow.messages) + 1, "sequenceNr": len(workflow.messages) + 1,
"publishedAt": get_utc_timestamp(), "publishedAt": get_utc_timestamp(),
@ -617,11 +527,6 @@ class HandlingTasks:
logger.error("No actions defined for task step, aborting task execution") logger.error("No actions defined for task step, aborting task execution")
break break
# Create document context message if documents are available
available_docs = self.service.getAvailableDocuments(workflow)
if available_docs:
await self.createDocumentContextMessage(available_docs, workflow)
action_results = [] action_results = []
for action_idx, action in enumerate(actions): for action_idx, action in enumerate(actions):
# Check workflow status before each action execution # Check workflow status before each action execution
@ -639,18 +544,11 @@ class HandlingTasks:
# Log action start in format expected by frontend # Log action start in format expected by frontend
logger.info(f"Task {task_index} - Starting action {action_number}/{total_actions}") logger.info(f"Task {task_index} - Starting action {action_number}/{total_actions}")
# Create database log entry for action start
self.chatInterface.createWorkflowLog({
"workflowId": workflow.id,
"message": f"Task {task_index} - Starting action {action_number}/{total_actions}",
"type": "info"
})
# Create an action start message for the user # Create an action start message for the user
action_start_message = { action_start_message = {
"workflowId": workflow.id, "workflowId": workflow.id,
"role": "assistant", "role": "assistant",
"message": f"Task {task_index} - Action {action_number}/{total_actions}\n\nMethod: {action.execMethod}.{action.execAction}", "message": f"⚡ **Action {action_number}/{total_actions}** (Method {action.execMethod}.{action.execAction})",
"status": "step", "status": "step",
"sequenceNr": len(workflow.messages) + 1, "sequenceNr": len(workflow.messages) + 1,
"publishedAt": get_utc_timestamp(), "publishedAt": get_utc_timestamp(),
@ -694,34 +592,19 @@ class HandlingTasks:
if success: if success:
logger.info(f"=== TASK {task_index or '?'} COMPLETED SUCCESSFULLY: {task_step.objective} ===") logger.info(f"=== TASK {task_index or '?'} COMPLETED SUCCESSFULLY: {task_step.objective} ===")
# Create database log entry for task completion
if total_tasks is not None:
self.chatInterface.createWorkflowLog({
"workflowId": workflow.id,
"message": f"🎯 Task {task_index}/{total_tasks} completed",
"type": "success"
})
else:
self.chatInterface.createWorkflowLog({
"workflowId": workflow.id,
"message": f"🎯 Task {task_index}/? completed",
"type": "success"
})
# Create a task completion message for the user # Create a task completion message for the user
task_progress = f"{task_index}/{total_tasks}" if total_tasks is not None else str(task_index) task_progress = f"{task_index}/{total_tasks}" if total_tasks is not None else str(task_index)
# Enhanced completion message with criteria details # Enhanced completion message with criteria details
completion_message = f"🎯 Task {task_progress} Completed Successfully!\n\nObjective: {task_step.objective}\n\nFeedback: {feedback or 'Task completed successfully'}" completion_message = f"🎯 **Task {task_progress}**\n\n{feedback or 'Task completed successfully'}"
# Add criteria status if available # Add criteria status if available
if hasattr(review_result, 'met_criteria') and review_result.met_criteria: if hasattr(review_result, 'met_criteria') and review_result.met_criteria:
completion_message += f"\n\n✅ **Success Criteria Met:**\n"
for criterion in review_result.met_criteria: for criterion in review_result.met_criteria:
completion_message += f"{criterion}\n" completion_message += f"\n{criterion}"
if hasattr(review_result, 'quality_score'): if hasattr(review_result, 'quality_score'):
completion_message += f"\n📊 **Quality Score:** {review_result.quality_score}/10" completion_message += f"\n📊 Score {review_result.quality_score}/10"
task_completion_message = { task_completion_message = {
"workflowId": workflow.id, "workflowId": workflow.id,
@ -740,10 +623,6 @@ class HandlingTasks:
"taskProgress": "success" "taskProgress": "success"
} }
# Add user-friendly message if available
if task_step.userMessage:
task_completion_message["message"] += f"\n\n💬 {task_step.userMessage}"
message = self.chatInterface.createWorkflowMessage(task_completion_message) message = self.chatInterface.createWorkflowMessage(task_completion_message)
if message: if message:
workflow.messages.append(message) workflow.messages.append(message)
@ -824,7 +703,7 @@ class HandlingTasks:
retry_message = { retry_message = {
"workflowId": workflow.id, "workflowId": workflow.id,
"role": "assistant", "role": "assistant",
"message": f"🔄 Task {task_index} requires retry: {review_result.improvements}", "message": f"🔄 **Task {task_index}** needs retry: {review_result.improvements}",
"status": "step", "status": "step",
"sequenceNr": len(workflow.messages) + 1, "sequenceNr": len(workflow.messages) + 1,
"publishedAt": get_utc_timestamp(), "publishedAt": get_utc_timestamp(),
@ -843,19 +722,19 @@ class HandlingTasks:
continue continue
else: else:
logger.error(f"=== TASK {task_index or '?'} FAILED: {task_step.objective} after {attempt+1} attempts ===") logger.error(f"=== TASK {task_index or '?'} FAILED: {task_step.objective} after {attempt+1} attempts ===")
task_progress = f"{task_index}/{total_tasks}" if total_tasks is not None else str(task_index)
# Create user-facing error message for task failure # Create user-facing error message for task failure
error_message = f"❌ Task {task_index or '?'} - '{task_step.objective}' failed after {attempt+1} attempts\n\n" error_message = f"**Task {task_progress}**\n\n'{task_step.objective}' {attempt+1}x failed\n\n"
error_message += f"Objective: {task_step.objective}\n\n"
# Add specific error details if available # Add specific error details if available
if review_result and hasattr(review_result, 'reason') and review_result.reason: if review_result and hasattr(review_result, 'reason') and review_result.reason:
error_message += f"Reason: {review_result.reason}\n\n" error_message += f"{review_result.reason}\n\n"
# Add criteria progress information if available # Add criteria progress information if available
if retry_context and hasattr(retry_context, 'criteria_progress'): if retry_context and hasattr(retry_context, 'criteria_progress'):
progress = retry_context.criteria_progress progress = retry_context.criteria_progress
error_message += f"📊 **Progress Summary:**\n" error_message += f"📊 **Details**\n"
if progress.get('met_criteria'): if progress.get('met_criteria'):
error_message += f"✅ Met criteria: {', '.join(progress['met_criteria'])}\n" error_message += f"✅ Met criteria: {', '.join(progress['met_criteria'])}\n"
if progress.get('unmet_criteria'): if progress.get('unmet_criteria'):
@ -908,19 +787,18 @@ class HandlingTasks:
logger.error(f"=== TASK {task_index or '?'} FAILED AFTER ALL RETRIES: {task_step.objective} ===") logger.error(f"=== TASK {task_index or '?'} FAILED AFTER ALL RETRIES: {task_step.objective} ===")
# Create user-facing error message for task failure # Create user-facing error message for task failure
error_message = f"Task {task_index or '?'} - '{task_step.objective}' failed after all retries\n\n" error_message = f"**Task {task_index or '?'}**\n\n '{task_step.objective}' failed after all retries\n\n"
error_message += f"Objective: {task_step.objective}\n\n" error_message += f"{task_step.objective}\n\n"
# Add specific error details if available # Add specific error details if available
if retry_context and hasattr(retry_context, 'previous_review_result') and retry_context.previous_review_result: if retry_context and hasattr(retry_context, 'previous_review_result') and retry_context.previous_review_result:
reason = retry_context.previous_review_result.get('reason', '') reason = retry_context.previous_review_result.get('reason', '')
if reason and reason != "Task failed after all retries.": if reason and reason != "Task failed after all retries.":
error_message += f"Reason: {reason}\n\n" error_message += f"{reason}\n\n"
# Add retry information # Add retry information
error_message += f"Retries attempted: {retry_context.retry_count if retry_context else 'Unknown'}\n" error_message += f"Retries attempted: {retry_context.retry_count if retry_context else 'Unknown'}\n"
error_message += f"Status: Task failed permanently\n\n" error_message += f"Status: Task failed permanently"
error_message += "Please check the connection and try again, or contact support if the issue persists."
# Create workflow message for user # Create workflow message for user
message_data = { message_data = {
@ -1170,7 +1048,8 @@ class HandlingTasks:
processingTime=createdAction.get("processingTime"), processingTime=createdAction.get("processingTime"),
timestamp=float(createdAction.get("timestamp", get_utc_timestamp())), timestamp=float(createdAction.get("timestamp", get_utc_timestamp())),
result=createdAction.get("result"), result=createdAction.get("result"),
resultDocuments=createdAction.get("resultDocuments", []) resultDocuments=createdAction.get("resultDocuments", []),
userMessage=createdAction.get("userMessage")
) )
except Exception as e: except Exception as e:
@ -1241,20 +1120,6 @@ class HandlingTasks:
# Log action results # Log action results
logger.info(f"Action completed successfully") logger.info(f"Action completed successfully")
# Create database log entry for action completion
if total_actions is not None:
self.chatInterface.createWorkflowLog({
"workflowId": workflow.id,
"message": f"✅ Task {task_num} - Action {action_num}/{total_actions} completed",
"type": "success"
})
else:
self.chatInterface.createWorkflowLog({
"workflowId": workflow.id,
"message": f"✅ Task {task_num} - Action {action_num}/? completed",
"type": "success"
})
if created_documents: if created_documents:
logger.info(f"Output documents ({len(created_documents)}):") logger.info(f"Output documents ({len(created_documents)}):")
for i, doc in enumerate(created_documents): for i, doc in enumerate(created_documents):
@ -1276,19 +1141,12 @@ class HandlingTasks:
await self.createActionMessage(action, result, workflow, result_label, [], task_step, task_index) await self.createActionMessage(action, result, workflow, result_label, [], task_step, task_index)
# Create database log entry for action failure # Create database log entry for action failure
if total_actions is not None: self.chatInterface.createWorkflowLog({
self.chatInterface.createWorkflowLog({ "workflowId": workflow.id,
"workflowId": workflow.id, "message": f"❌ **Task {task_num}**\n\n❌ **Action {action_num}/{total_actions}** failed: {result.error}",
"message": f"❌ Task {task_num} - Action {action_num}/{total_actions} failed: {result.error}", "type": "error"
"type": "error" })
})
else:
self.chatInterface.createWorkflowLog({
"workflowId": workflow.id,
"message": f"❌ Task {task_num} - Action {action_num}/? failed: {result.error}",
"type": "error"
})
# Log action summary # Log action summary
logger.info(f"=== TASK {task_num} ACTION {action_num} COMPLETED ===") logger.info(f"=== TASK {task_num} ACTION {action_num} COMPLETED ===")
@ -1336,89 +1194,25 @@ class HandlingTasks:
# Create a more meaningful message that includes task context # Create a more meaningful message that includes task context
task_objective = task_step.objective if task_step else 'Unknown task' task_objective = task_step.objective if task_step else 'Unknown task'
# Add comprehensive workflow context
current_round = workflow_context.get('currentRound', 0)
current_task = workflow_context.get('currentTask', 0)
total_tasks = workflow_stats.get('totalTasks', 0)
current_action = workflow_context.get('currentAction', 0)
total_actions = workflow_stats.get('totalActions', 0)
# Build a user-friendly message based on success/failure # Build a user-friendly message based on success/failure
if result.success: if result.success:
if created_documents and len(created_documents) > 0: message_text = f"**Action {current_action}/{total_actions} ({action.execMethod}.{action.execAction})**\n\n"
doc_names = [doc.fileName for doc in created_documents[:3]] message_text += f"{task_objective}\n\n"
if len(created_documents) > 3:
doc_names.append(f"... and {len(created_documents) - 3} more")
# Enhanced message with workflow context
message_text = f"✅ **Task {task_index or '?'} - Action {action.execMethod}.{action.execAction} Completed**\n\n"
message_text += f"**Objective:** {task_objective}\n\n"
message_text += f"**Generated {len(created_documents)} document(s):** {', '.join(doc_names)}\n\n"
message_text += f"**Result Label:** {result_label}\n"
# Add comprehensive workflow context
current_round = workflow_context.get('currentRound', 0)
current_task = workflow_context.get('currentTask', 0)
total_tasks = workflow_stats.get('totalTasks', 0)
current_action = workflow_context.get('currentAction', 0)
total_actions = workflow_stats.get('totalActions', 0)
message_text += f"**Workflow Context:**\n"
message_text += f"- Round: {current_round}\n"
if total_tasks > 0:
message_text += f"- Task: {current_task}/{total_tasks}\n"
else:
message_text += f"- Task: {current_task}\n"
if total_actions > 0:
message_text += f"- Action: {current_action}/{total_actions}\n"
else:
message_text += f"- Action: {current_action}\n"
message_text += f"- Status: {workflow_stats.get('workflowStatus', 'unknown')}"
else:
message_text = f"✅ **Task {task_index or '?'} - Action {action.execMethod}.{action.execAction} Completed**\n\n"
message_text += f"**Objective:** {task_objective}\n\n"
message_text += "**Action executed successfully**\n\n"
message_text += f"**Result Label:** {result_label}\n"
# Add comprehensive workflow context
current_round = workflow_context.get('currentRound', 0)
current_task = workflow_context.get('currentTask', 0)
total_tasks = workflow_stats.get('totalTasks', 0)
current_action = workflow_context.get('currentAction', 0)
total_actions = workflow_stats.get('totalActions', 0)
message_text += f"**Workflow Context:**\n"
message_text += f"- Round: {current_round}\n"
if total_tasks > 0:
message_text += f"- Task: {current_task}/{total_tasks}\n"
else:
message_text += f"- Task: {current_task}\n"
if total_actions > 0:
message_text += f"- Action: {current_action}/{total_actions}\n"
else:
message_text += f"- Action: {current_action}\n"
message_text += f"- Status: {workflow_stats.get('workflowStats', 'unknown')}"
else: else:
# ⚠️ FAILURE MESSAGE - Show error details to user # ⚠️ FAILURE MESSAGE - Show error details to user
error_details = result.error if result.error else "Unknown error occurred" error_details = result.error if result.error else "Unknown error occurred"
message_text = f"❌ **Task {task_index or '?'} - Action {action.execMethod}.{action.execAction} Failed**\n\n" message_text = f"**Action {current_action}/{total_actions} ({action.execMethod}.{action.execAction})**\n\n"
message_text += f"**Objective:** {task_objective}\n\n" message_text += f"{task_objective}\n\n"
message_text += f"**Error:** {error_details}\n\n" message_text += f"{error_details}\n\n"
message_text += f"**Result Label:** {result_label}\n"
# Add comprehensive workflow context
current_round = workflow_context.get('currentRound', 0)
current_task = workflow_context.get('currentTask', 0)
total_tasks = workflow_stats.get('totalTasks', 0)
current_action = workflow_context.get('currentAction', 0)
total_actions = workflow_stats.get('totalActions', 0)
message_text += f"**Workflow Context:**\n"
message_text += f"- Round: {current_round}\n"
if total_tasks > 0:
message_text += f"- Task: {current_task}/{total_tasks}\n"
else:
message_text += f"- Task: {current_task}\n"
if total_actions > 0:
message_text += f"- Action: {current_action}/{total_actions}\n"
message_text += f"- Action: {current_action}\n"
message_text += f"- Status: {workflow_stats.get('workflowStatus', 'unknown')}\n\n"
message_text += "Please check the connection and try again."
message_data = { message_data = {
"workflowId": workflow.id, "workflowId": workflow.id,
"role": "assistant", "role": "assistant",
@ -1432,19 +1226,12 @@ class HandlingTasks:
"documentsLabel": result_label, "documentsLabel": result_label,
"documents": created_documents, "documents": created_documents,
# Add workflow context fields - extract from result_label to match document reference # Add workflow context fields - extract from result_label to match document reference
"roundNumber": workflow_context.get('currentRound', 0), "roundNumber": current_round,
"taskNumber": task_index, "taskNumber": current_task,
"actionNumber": self._extractActionNumberFromLabel(result_label) if result_label else workflow_context.get('currentAction', 0), "actionNumber": current_action,
"actionProgress": "success" if result.success else "fail" "actionProgress": "success" if result.success else "fail"
} }
# Add user-friendly message if available
if action.userMessage:
if result.success:
message_data["message"] += f"\n\n💬 {action.userMessage}"
else:
message_data["message"] += f"\n\n💬 Action was intended to: {action.userMessage}"
# Add debugging for error messages # Add debugging for error messages
if not result.success: if not result.success:
logger.info(f"Creating ERROR message: {message_text}") logger.info(f"Creating ERROR message: {message_text}")

View file

@ -1,3 +0,0 @@

View file

@ -20,13 +20,13 @@ def createTaskPlanningPrompt(context: TaskContext, service) -> str:
user_request = context.task_step.objective if context.task_step else 'No request specified' user_request = context.task_step.objective if context.task_step else 'No request specified'
# Extract available documents from context - use Pydantic model directly # Extract available documents from context - use Pydantic model directly
available_documents = context.available_documents or [] available_documents = context.available_documents or "No documents available"
return f"""You are a task planning AI that analyzes user requests and creates structured task plans with user-friendly feedback messages. return f"""You are a task planning AI that analyzes user requests and creates structured task plans with user-friendly feedback messages.
USER REQUEST: {user_request} USER REQUEST: {user_request}
AVAILABLE DOCUMENTS: {', '.join(available_documents)} AVAILABLE DOCUMENTS: {available_documents}
INSTRUCTIONS: INSTRUCTIONS:
1. Analyze the user request and available documents 1. Analyze the user request and available documents
@ -34,8 +34,8 @@ INSTRUCTIONS:
3. Focus on business outcomes, not technical operations 3. Focus on business outcomes, not technical operations
4. Each task should produce meaningful, usable outputs 4. Each task should produce meaningful, usable outputs
5. Ensure proper handover between tasks using result labels 5. Ensure proper handover between tasks using result labels
6. Generate user-friendly messages for each task in the user's language ({user_language}) 6. Detect the language of the user request and include it in languageUserDetected
7. Detect the language of the user request and include it in languageUserDetected 7. Generate user-friendly messages for each task in the user's request language
8. Return a JSON object with the exact structure shown below 8. Return a JSON object with the exact structure shown below
TASK GROUPING PRINCIPLES: TASK GROUPING PRINCIPLES:
@ -63,15 +63,15 @@ TASK PLANNING PRINCIPLES:
- Keep tasks at a meaningful level of abstraction - Keep tasks at a meaningful level of abstraction
- Each task should produce results that can be used by subsequent tasks - Each task should produce results that can be used by subsequent tasks
- Ensure clear dependencies and handovers between tasks - Ensure clear dependencies and handovers between tasks
- Provide clear, actionable user messages in the user's language ({user_language}) - Provide clear, actionable user messages in the user's request language
- Group related activities to minimize task fragmentation - Group related activities to minimize task fragmentation
- Only create multiple tasks when dealing with truly different, independent objectives - Only create multiple tasks when dealing with truly different, independent objectives
REQUIRED JSON STRUCTURE: REQUIRED JSON STRUCTURE:
{{ {{
"overview": "Brief description of the overall plan", "overview": "Brief description of the overall plan",
"userMessage": "User-friendly message explaining the task plan in {user_language}",
"languageUserDetected": "en", // Language code detected from user request (en, de, fr, it, es, etc.) "languageUserDetected": "en", // Language code detected from user request (en, de, fr, it, es, etc.)
"userMessage": "User-friendly message explaining the task plan in user's request language",
"tasks": [ "tasks": [
{{ {{
"id": "task_1", "id": "task_1",
@ -79,7 +79,7 @@ REQUIRED JSON STRUCTURE:
"dependencies": ["task_0"], // IDs of tasks that must complete first "dependencies": ["task_0"], // IDs of tasks that must complete first
"success_criteria": ["criteria1", "criteria2"], "success_criteria": ["criteria1", "criteria2"],
"estimated_complexity": "low|medium|high", "estimated_complexity": "low|medium|high",
"userMessage": "User-friendly message explaining what this task will accomplish in {user_language}" "userMessage": "User-friendly message explaining what this task will accomplish in user's request language"
}} }}
] ]
}} }}

View file

@ -14,6 +14,7 @@ from modules.interfaces.interfaceChatModel import ActionResult
from modules.interfaces.interfaceComponentObjects import getInterface as getComponentObjects from modules.interfaces.interfaceComponentObjects import getInterface as getComponentObjects
from modules.interfaces.interfaceAppObjects import getInterface as getAppObjects from modules.interfaces.interfaceAppObjects import getInterface as getAppObjects
from modules.chat.documents.documentExtraction import DocumentExtraction from modules.chat.documents.documentExtraction import DocumentExtraction
from modules.chat.documents.documentUtility import getFileExtension, getMimeTypeFromExtension, detectContentTypeFromData
from modules.chat.methodBase import MethodBase from modules.chat.methodBase import MethodBase
from modules.shared.timezoneUtils import get_utc_timestamp from modules.shared.timezoneUtils import get_utc_timestamp
import uuid import uuid
@ -111,165 +112,9 @@ class ServiceCenter:
except Exception as e: except Exception as e:
logger.error(f"Error discovering methods: {str(e)}") logger.error(f"Error discovering methods: {str(e)}")
def detectContentTypeFromData(self, fileData: bytes, fileName: str) -> str:
"""
Detect content type from file data and fileName.
This method makes the MIME type detection function accessible through the service center.
Args:
fileData: Raw file data as bytes
fileName: Name of the file
Returns:
str: Detected MIME type
"""
try:
# Check file extension first
ext = os.path.splitext(fileName)[1].lower()
if ext:
# Map common extensions to MIME types
extToMime = {
'.txt': 'text/plain',
'.md': 'text/markdown',
'.csv': 'text/csv',
'.json': 'application/json',
'.xml': 'application/xml',
'.js': 'application/javascript',
'.py': 'application/x-python',
'.svg': 'image/svg+xml',
'.jpg': 'image/jpeg',
'.jpeg': 'image/jpeg',
'.png': 'image/png',
'.gif': 'image/gif',
'.bmp': 'image/bmp',
'.webp': 'image/webp',
'.pdf': 'application/pdf',
'.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
'.doc': 'application/msword',
'.xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
'.xls': 'application/vnd.ms-excel',
'.pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
'.ppt': 'application/vnd.ms-powerpoint',
'.html': 'text/html',
'.htm': 'text/html',
'.css': 'text/css',
'.zip': 'application/zip',
'.rar': 'application/x-rar-compressed',
'.7z': 'application/x-7z-compressed',
'.tar': 'application/x-tar',
'.gz': 'application/gzip'
}
if ext in extToMime:
return extToMime[ext]
# Try to detect from content
if fileData.startswith(b'%PDF'):
return 'application/pdf'
elif fileData.startswith(b'PK\x03\x04'):
# ZIP-based formats (docx, xlsx, pptx)
return 'application/zip'
elif fileData.startswith(b'<'):
# XML-based formats
try:
text = fileData.decode('utf-8', errors='ignore')
if '<svg' in text.lower():
return 'image/svg+xml'
elif '<html' in text.lower():
return 'text/html'
else:
return 'application/xml'
except:
pass
elif fileData.startswith(b'\x89PNG\r\n\x1a\n'):
return 'image/png'
elif fileData.startswith(b'\xff\xd8\xff'):
return 'image/jpeg'
elif fileData.startswith(b'GIF87a') or fileData.startswith(b'GIF89a'):
return 'image/gif'
elif fileData.startswith(b'BM'):
return 'image/bmp'
elif fileData.startswith(b'RIFF') and fileData[8:12] == b'WEBP':
return 'image/webp'
return 'application/octet-stream'
except Exception as e:
logger.error(f"Error detecting content type from data: {str(e)}")
return 'application/octet-stream'
def getMimeTypeFromExtension(self, extension: str) -> str:
"""
Get MIME type based on file extension.
This method consolidates MIME type detection from extension.
Args:
extension: File extension (with or without dot)
Returns:
str: MIME type for the extension
"""
# Normalize extension (remove dot if present)
if extension.startswith('.'):
extension = extension[1:]
# Map extensions to MIME types
mime_types = {
'txt': 'text/plain',
'json': 'application/json',
'xml': 'application/xml',
'csv': 'text/csv',
'html': 'text/html',
'htm': 'text/html',
'md': 'text/markdown',
'py': 'text/x-python',
'js': 'application/javascript',
'css': 'text/css',
'pdf': 'application/pdf',
'doc': 'application/msword',
'docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
'xls': 'application/vnd.ms-excel',
'xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
'ppt': 'application/vnd.ms-powerpoint',
'pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
'svg': 'image/svg+xml',
'jpg': 'image/jpeg',
'jpeg': 'image/jpeg',
'png': 'image/png',
'gif': 'image/gif',
'bmp': 'image/bmp',
'webp': 'image/webp',
'zip': 'application/zip',
'rar': 'application/x-rar-compressed',
'7z': 'application/x-7z-compressed',
'tar': 'application/x-tar',
'gz': 'application/gzip'
}
return mime_types.get(extension.lower(), 'application/octet-stream')
def getFileExtension(self, fileName: str) -> str: # ===== Functions for Prompts: Context =====
"""
Extract file extension from fileName.
Args:
fileName: Name of the file
Returns:
str: File extension (without dot)
"""
if '.' in fileName:
return fileName.split('.')[-1].lower()
return "txt" # Default to text
def getFileExtension(self, fileName):
"""
Extract file extension from fileName (without dot, lowercased).
Returns empty string if no extension is found.
"""
if '.' in fileName:
return fileName.rsplit('.', 1)[-1].lower()
return ''
# ===== Functions =====
def getMethodsList(self) -> List[str]: def getMethodsList(self) -> List[str]:
"""Get list of available methods with their signatures in the required format""" """Get list of available methods with their signatures in the required format"""
@ -283,48 +128,122 @@ class ServiceCenter:
methodList.append(signature) methodList.append(signature)
return methodList return methodList
async def summarizeChat(self, messages: List[ChatMessage]) -> str:
"""
Summarize chat messages from last to first message with status="first"
def generateDocumentLabel(self, document: ChatDocument, message: ChatMessage) -> str: Args:
"""Generate new document label: round+task+action+filename.extension""" messages: List of chat messages to summarize
Returns:
str: Summary of the chat in user's language
"""
try: try:
# Get workflow context from message # Get messages from last to first, stopping at first message with status="first"
round_num = message.roundNumber if hasattr(message, 'roundNumber') else 1 relevantMessages = []
task_num = message.taskNumber if hasattr(message, 'taskNumber') else 0 for msg in reversed(messages):
action_num = message.actionNumber if hasattr(message, 'actionNumber') else 0 relevantMessages.append(msg)
if msg.status == "first":
break
# Get file extension from document's fileName property # Create prompt for AI
try: prompt = f"""You are an AI assistant providing a summary of a chat conversation.
file_extension = self.getFileExtension(document.fileName) Please respond in '{self.user.language}' language.
filename = document.fileName
except Exception as e: Chat History:
# Try to diagnose and recover the issue {chr(10).join(f"- {msg.message}" for msg in reversed(relevantMessages))}
diagnosis = self.diagnoseDocumentAccess(document)
logger.error(f"Critical error: Cannot access document fileName for document {document.id}. Diagnosis: {diagnosis}") Instructions:
1. Summarize the conversation's key points and outcomes
# Attempt recovery 2. Be concise but informative
if self.recoverDocumentAccess(document): 3. Use a professional but friendly tone
try: 4. Focus on important decisions and next steps if any
file_extension = self.getFileExtension(document.fileName)
filename = document.fileName Please provide a comprehensive summary of this conversation."""
logger.info(f"Document access recovered for {document.id}")
except Exception as recovery_error:
logger.error(f"Recovery failed for document {document.id}: {str(recovery_error)}")
raise RuntimeError(f"Document {document.id} is permanently inaccessible after recovery attempt: {str(recovery_error)}")
else:
# Recovery failed - don't continue with invalid data
raise RuntimeError(f"Document {document.id} is inaccessible and recovery failed. Diagnosis: {diagnosis}")
# Construct label: round1_task2_action3_filename.ext # Get summary using AI
if file_extension: return await self.callAiTextBasic(prompt)
label = f"round{round_num}_task{task_num}_action{action_num}_{filename}"
else:
label = f"round{round_num}_task{task_num}_action{action_num}_{filename}"
return label
except Exception as e: except Exception as e:
logger.error(f"Critical error generating document label for document {document.id}: {str(e)}") logger.error(f"Error summarizing chat: {str(e)}")
# Re-raise the error to prevent workflow from continuing with invalid data return f"Error summarizing chat: {str(e)}"
raise
# ===== Functions for Prompts + Actions: Document References generation and resolution =====
def getEnhancedDocumentContext(self) -> str:
"""Get enhanced document context formatted for action planning prompts with proper docList and docItem references"""
try:
document_list = self.getDocumentReferenceList()
# Build technical context string for AI action planning
context = "AVAILABLE DOCUMENTS:\n\n"
# Process chat exchanges (current round)
if document_list["chat"]:
context += "CURRENT ROUND DOCUMENTS:\n"
for exchange in document_list["chat"]:
# Generate docList reference for the exchange (using message ID and label)
# Find the message that corresponds to this exchange
message_id = None
for message in self.workflow.messages:
if hasattr(message, 'documentsLabel') and message.documentsLabel == exchange.documentsLabel:
message_id = message.id
break
if message_id:
doc_list_ref = f"docList:{message_id}:{exchange.documentsLabel}"
else:
# Fallback to label-only format if message ID not found
doc_list_ref = f"docList:{exchange.documentsLabel}"
logger.debug(f"Using document label for action planning: {exchange.documentsLabel} (message_id: {message_id})")
context += f"- {doc_list_ref} contains:\n"
# Generate docItem references for each document in the list
for doc_ref in exchange.documents:
if doc_ref.startswith("docItem:"):
context += f" - {doc_ref}\n"
else:
# Convert to proper docItem format if needed
context += f" - docItem:{doc_ref}\n"
context += "\n"
# Process history exchanges (previous rounds)
if document_list["history"]:
context += "WORKFLOW HISTORY DOCUMENTS:\n"
for exchange in document_list["history"]:
# Generate docList reference for the exchange (using message ID and label)
# Find the message that corresponds to this exchange
message_id = None
for message in self.workflow.messages:
if hasattr(message, 'documentsLabel') and message.documentsLabel == exchange.documentsLabel:
message_id = message.id
break
if message_id:
doc_list_ref = f"docList:{message_id}:{exchange.documentsLabel}"
else:
# Fallback to label-only format if message ID not found
doc_list_ref = f"docList:{exchange.documentsLabel}"
logger.debug(f"Using history document label for action planning: {exchange.documentsLabel} (message_id: {message_id})")
context += f"- {doc_list_ref} contains:\n"
# Generate docItem references for each document in the list
for doc_ref in exchange.documents:
if doc_ref.startswith("docItem:"):
context += f" - {doc_ref}\n"
else:
# Convert to proper docItem format if needed
context += f" - docItem:{doc_ref}\n"
context += "\n"
if not document_list["chat"] and not document_list["history"]:
context += "NO DOCUMENTS AVAILABLE - This workflow has no documents to process.\n"
return context
except Exception as e:
logger.error(f"Error generating enhanced document context: {str(e)}")
return "NO DOCUMENTS AVAILABLE - Error generating document context."
def getDocumentReferenceList(self) -> Dict[str, List[DocumentExchange]]: def getDocumentReferenceList(self) -> Dict[str, List[DocumentExchange]]:
"""Get list of document exchanges with new labeling format, sorted by recency""" """Get list of document exchanges with new labeling format, sorted by recency"""
@ -336,7 +255,7 @@ class ServiceCenter:
# Refresh file attributes for all documents # Refresh file attributes for all documents
if all_documents: if all_documents:
self.refreshDocumentFileAttributes(all_documents) self._refreshDocumentFileAttributes(all_documents)
chat_exchanges = [] chat_exchanges = []
history_exchanges = [] history_exchanges = []
@ -350,29 +269,30 @@ class ServiceCenter:
doc_exchange = None doc_exchange = None
if message.documents: if message.documents:
if message.actionId and message.documentsLabel: if message.actionId and message.documentsLabel:
# Use new document label format # Validate that we use the same label as in the message
validated_label = self._validateDocumentLabelConsistency(message)
# Use the message's actual documentsLabel
doc_refs = [] doc_refs = []
for doc in message.documents: for doc in message.documents:
doc_ref = self.getDocumentReferenceFromChatDocument(doc, message) doc_ref = self._getDocumentReferenceFromChatDocument(doc, message)
doc_refs.append(doc_ref) doc_refs.append(doc_ref)
doc_exchange = DocumentExchange( doc_exchange = DocumentExchange(
documentsLabel=message.documentsLabel, documentsLabel=validated_label,
documents=doc_refs documents=doc_refs
) )
else: else:
# Generate new labels for documents without explicit labels # Generate new labels for documents without explicit labels
doc_refs = [] doc_refs = []
for doc in message.documents: for doc in message.documents:
doc_ref = self.getDocumentReferenceFromChatDocument(doc, message) doc_ref = self._getDocumentReferenceFromChatDocument(doc, message)
doc_refs.append(doc_ref) doc_refs.append(doc_ref)
if doc_refs: if doc_refs:
# Create a label based on message context # Create a label based on message context
round_num = message.roundNumber if hasattr(message, 'roundNumber') else 1 context_prefix = self._generateWorkflowContextPrefix(message)
task_num = message.taskNumber if hasattr(message, 'taskNumber') else 0 context_label = f"{context_prefix}_context"
action_num = message.actionNumber if hasattr(message, 'actionNumber') else 0
context_label = f"round{round_num}_task{task_num}_action{action_num}_context"
doc_exchange = DocumentExchange( doc_exchange = DocumentExchange(
documentsLabel=context_label, documentsLabel=context_label,
@ -400,7 +320,38 @@ class ServiceCenter:
"chat": chat_exchanges, "chat": chat_exchanges,
"history": history_exchanges "history": history_exchanges
} }
def _refreshDocumentFileAttributes(self, documents: List[ChatDocument]) -> None:
"""Update file attributes (fileName, fileSize, mimeType) for documents"""
for doc in documents:
try:
file_item = self.interfaceComponent.getFile(doc.fileId)
if file_item:
doc.fileName = file_item.fileName
doc.fileSize = file_item.fileSize
doc.mimeType = file_item.mimeType
else:
logger.warning(f"File not found for document {doc.id}, fileId: {doc.fileId}")
except Exception as e:
logger.error(f"Error refreshing file attributes for document {doc.id}: {e}")
def _generateWorkflowContextPrefix(self, message: ChatMessage) -> str:
"""Generate workflow context prefix: round{num}_task{num}_action{num}"""
round_num = message.roundNumber if hasattr(message, 'roundNumber') else 1
task_num = message.taskNumber if hasattr(message, 'taskNumber') else 0
action_num = message.actionNumber if hasattr(message, 'actionNumber') else 0
return f"round{round_num}_task{task_num}_action{action_num}"
def _getDocumentReferenceFromChatDocument(self, document: ChatDocument, message: ChatMessage) -> str:
"""Get document reference using document ID and filename."""
try:
# Use document ID and filename for simple reference
return f"docItem:{document.id}:{document.fileName}"
except Exception as e:
logger.error(f"Critical error creating document reference for document {document.id}: {str(e)}")
# Re-raise the error to prevent workflow from continuing with invalid data
raise
def _getMessageSequenceForExchange(self, exchange: DocumentExchange) -> int: def _getMessageSequenceForExchange(self, exchange: DocumentExchange) -> int:
"""Get message sequence number for sorting exchanges by recency""" """Get message sequence number for sorting exchanges by recency"""
try: try:
@ -432,54 +383,15 @@ class ServiceCenter:
logger.error(f"Error getting message sequence for exchange: {str(e)}") logger.error(f"Error getting message sequence for exchange: {str(e)}")
return 0 return 0
def getEnhancedDocumentContext(self) -> str: def _validateDocumentLabelConsistency(self, message) -> str:
"""Get enhanced document context formatted for action planning prompts with proper docList and docItem references""" """Validate that the document label used for references matches the message's actual label"""
try: if not hasattr(message, 'documentsLabel') or not message.documentsLabel:
document_list = self.getDocumentReferenceList() logger.debug(f"Message {message.id} has no documentsLabel, returning None")
return None
# Build technical context string for AI action planning # Simply return the message's actual documentsLabel - no correction, just validation
context = "AVAILABLE DOCUMENTS:\n\n" logger.debug(f"Using message's documentsLabel for references: '{message.documentsLabel}'")
return message.documentsLabel
# Process chat exchanges (current round)
if document_list["chat"]:
context += "CURRENT ROUND DOCUMENTS:\n"
for exchange in document_list["chat"]:
# Generate docList reference for the exchange (using message ID)
doc_list_ref = f"docList:{exchange.documentsLabel}"
context += f"- {doc_list_ref} contains:\n"
# Generate docItem references for each document in the list
for doc_ref in exchange.documents:
if doc_ref.startswith("docItem:"):
context += f" - {doc_ref}\n"
else:
# Convert to proper docItem format if needed
context += f" - docItem:{doc_ref}\n"
context += "\n"
# Process history exchanges (previous rounds)
if document_list["history"]:
context += "WORKFLOW HISTORY DOCUMENTS:\n"
for exchange in document_list["history"]:
# Generate docList reference for the exchange (using message ID)
doc_list_ref = f"docList:{exchange.documentsLabel}"
context += f"- {doc_list_ref} contains:\n"
# Generate docItem references for each document in the list
for doc_ref in exchange.documents:
if doc_ref.startswith("docItem:"):
context += f" - {doc_ref}\n"
else:
# Convert to proper docItem format if needed
context += f" - docItem:{doc_ref}\n"
context += "\n"
if not document_list["chat"] and not document_list["history"]:
context += "NO DOCUMENTS AVAILABLE - This workflow has no documents to process.\n"
return context
except Exception as e:
logger.error(f"Error generating enhanced document context: {str(e)}")
return "NO DOCUMENTS AVAILABLE - Error generating document context."
def _extractDocumentInfoFromReference(self, doc_ref: str) -> Dict[str, str]: def _extractDocumentInfoFromReference(self, doc_ref: str) -> Dict[str, str]:
"""Extract document information from reference string""" """Extract document information from reference string"""
@ -533,27 +445,6 @@ class ServiceCenter:
logger.error(f"Error extracting document info from reference: {str(e)}") logger.error(f"Error extracting document info from reference: {str(e)}")
return None return None
def getDocumentReferenceFromChatDocument(self, document: ChatDocument, message: ChatMessage) -> str:
"""Get document reference using document ID and filename."""
try:
# Use document ID and filename for simple reference
return f"docItem:{document.id}:{document.fileName}"
except Exception as e:
logger.error(f"Critical error creating document reference for document {document.id}: {str(e)}")
# Re-raise the error to prevent workflow from continuing with invalid data
raise
def getDocumentListReferenceFromChatMessage(self, message: ChatMessage) -> str:
"""Get document list reference using message ID and label."""
try:
# Use message ID and documentsLabel for document list reference
label = getattr(message, 'documentsLabel', f"message_{message.id}")
return f"docList:{message.id}:{label}"
except Exception as e:
logger.error(f"Critical error creating document list reference for message {message.id}: {str(e)}")
# Re-raise the error to prevent workflow from continuing with invalid data
raise
def getChatDocumentsFromDocumentList(self, documentList: List[str]) -> List[ChatDocument]: def getChatDocumentsFromDocumentList(self, documentList: List[str]) -> List[ChatDocument]:
"""Get ChatDocuments from a list of document references using all three formats.""" """Get ChatDocuments from a list of document references using all three formats."""
try: try:
@ -569,19 +460,56 @@ class ServiceCenter:
if message.documents: if message.documents:
for doc in message.documents: for doc in message.documents:
if doc.id == doc_id: if doc.id == doc_id:
doc_name = getattr(doc, 'fileName', 'unknown')
logger.debug(f"Found docItem reference {doc_ref}: {doc_name}")
all_documents.append(doc) all_documents.append(doc)
break break
elif doc_ref.startswith("docList:"): elif doc_ref.startswith("docList:"):
# docList:<messageId>:<label> - extract message ID and find document list # docList:<messageId>:<label> or docList:<label> - extract message ID and find document list
parts = doc_ref.split(':') parts = doc_ref.split(':')
if len(parts) >= 2: if len(parts) >= 3:
# Format: docList:<messageId>:<label>
message_id = parts[1] message_id = parts[1]
label = parts[2]
# Find the message by ID and get all its documents # Find the message by ID and get all its documents
for message in self.workflow.messages: for message in self.workflow.messages:
if str(message.id) == message_id: if str(message.id) == message_id:
if message.documents: if message.documents:
doc_names = [doc.fileName for doc in message.documents if hasattr(doc, 'fileName')]
logger.debug(f"Found docList reference {doc_ref}: {len(message.documents)} documents - {doc_names}")
all_documents.extend(message.documents) all_documents.extend(message.documents)
else:
logger.debug(f"Found docList reference {doc_ref} but message has no documents")
break break
elif len(parts) >= 2:
# Format: docList:<label> - find message by documentsLabel
label = parts[1]
logger.debug(f"Looking for message with documentsLabel: {label}")
# Find messages with matching documentsLabel
matching_messages = []
for message in self.workflow.messages:
# Check both attribute and raw data for documentsLabel
msg_label = getattr(message, 'documentsLabel', None)
if msg_label == label:
matching_messages.append(message)
logger.debug(f"Found message {message.id} with matching documentsLabel: {msg_label}")
else:
# Debug: show what labels we're comparing
logger.debug(f"Message {message.id} has documentsLabel: '{msg_label}' (looking for: '{label}')")
if matching_messages:
# Use the newest message (highest publishedAt)
matching_messages.sort(key=lambda msg: getattr(msg, 'publishedAt', 0), reverse=True)
newest_message = matching_messages[0]
if newest_message.documents:
doc_names = [doc.fileName for doc in newest_message.documents if hasattr(doc, 'fileName')]
logger.debug(f"Found docList reference {doc_ref}: {len(newest_message.documents)} documents - {doc_names}")
all_documents.extend(newest_message.documents)
else:
logger.debug(f"Found docList reference {doc_ref} but message has no documents")
else:
logger.debug(f"No messages found with documentsLabel: {label}")
else: else:
# Direct label reference (round1_task2_action3_contextinfo) # Direct label reference (round1_task2_action3_contextinfo)
# Search for messages with matching documentsLabel to find the actual documents # Search for messages with matching documentsLabel to find the actual documents
@ -619,8 +547,9 @@ class ServiceCenter:
logger.debug(f"Newest message has {len(newest_message.documents) if newest_message.documents else 0} documents") logger.debug(f"Newest message has {len(newest_message.documents) if newest_message.documents else 0} documents")
if newest_message.documents: if newest_message.documents:
doc_names = [doc.fileName for doc in newest_message.documents if hasattr(doc, 'fileName')]
logger.debug(f"Added {len(newest_message.documents)} documents from newest message {newest_message.id}: {doc_names}")
all_documents.extend(newest_message.documents) all_documents.extend(newest_message.documents)
logger.debug(f"Added {len(newest_message.documents)} documents from newest message {newest_message.id}")
else: else:
logger.debug(f"No documents found in newest message {newest_message.id}") logger.debug(f"No documents found in newest message {newest_message.id}")
else: else:
@ -641,8 +570,9 @@ class ServiceCenter:
logger.debug(f"Using fallback message {newest_fallback.id} with documentsLabel: {getattr(newest_fallback, 'documentsLabel', 'unknown')}") logger.debug(f"Using fallback message {newest_fallback.id} with documentsLabel: {getattr(newest_fallback, 'documentsLabel', 'unknown')}")
if newest_fallback.documents: if newest_fallback.documents:
doc_names = [doc.fileName for doc in newest_fallback.documents if hasattr(doc, 'fileName')]
logger.debug(f"Added {len(newest_fallback.documents)} documents from fallback message {newest_fallback.id}: {doc_names}")
all_documents.extend(newest_fallback.documents) all_documents.extend(newest_fallback.documents)
logger.debug(f"Added {len(newest_fallback.documents)} documents from fallback message {newest_fallback.id}")
else: else:
logger.debug(f"No documents found in fallback message {newest_fallback.id}") logger.debug(f"No documents found in fallback message {newest_fallback.id}")
else: else:
@ -654,6 +584,8 @@ class ServiceCenter:
logger.error(f"Error getting documents from document list: {str(e)}") logger.error(f"Error getting documents from document list: {str(e)}")
return [] return []
# ===== Functions for Prompts + Actions: Connection References generation and resolution =====
def getConnectionReferenceList(self) -> List[str]: def getConnectionReferenceList(self) -> List[str]:
"""Get list of all UserConnection objects as references with enhanced state information""" """Get list of all UserConnection objects as references with enhanced state information"""
connections = [] connections = []
@ -750,46 +682,8 @@ class ServiceCenter:
logger.error(f"Error parsing connection reference: {str(e)}") logger.error(f"Error parsing connection reference: {str(e)}")
return None return None
async def summarizeChat(self, messages: List[ChatMessage]) -> str: # ===== Functions for Actions: AI calls =====
"""
Summarize chat messages from last to first message with status="first"
Args:
messages: List of chat messages to summarize
Returns:
str: Summary of the chat in user's language
"""
try:
# Get messages from last to first, stopping at first message with status="first"
relevantMessages = []
for msg in reversed(messages):
relevantMessages.append(msg)
if msg.status == "first":
break
# Create prompt for AI
prompt = f"""You are an AI assistant providing a summary of a chat conversation.
Please respond in '{self.user.language}' language.
Chat History:
{chr(10).join(f"- {msg.message}" for msg in reversed(relevantMessages))}
Instructions:
1. Summarize the conversation's key points and outcomes
2. Be concise but informative
3. Use a professional but friendly tone
4. Focus on important decisions and next steps if any
Please provide a comprehensive summary of this conversation."""
# Get summary using AI
return await self.callAiTextBasic(prompt)
except Exception as e:
logger.error(f"Error summarizing chat: {str(e)}")
return f"Error summarizing chat: {str(e)}"
async def callAiTextAdvanced(self, prompt: str, context: str = None) -> str: async def callAiTextAdvanced(self, prompt: str, context: str = None) -> str:
"""Advanced text processing using Anthropic, with fallback to OpenAI basic if advanced fails.""" """Advanced text processing using Anthropic, with fallback to OpenAI basic if advanced fails."""
max_retries = 3 max_retries = 3
@ -882,6 +776,8 @@ Please provide a comprehensive summary of this conversation."""
return response return response
# ===== Functions for Actions: Data management =====
def getFileInfo(self, fileId: str) -> Dict[str, Any]: def getFileInfo(self, fileId: str) -> Dict[str, Any]:
"""Get file information""" """Get file information"""
file_item = self.interfaceComponent.getFile(fileId) file_item = self.interfaceComponent.getFile(fileId)
@ -920,11 +816,11 @@ Please provide a comprehensive summary of this conversation."""
mimeType = document.mimeType mimeType = document.mimeType
except Exception as e: except Exception as e:
# Try to diagnose and recover the issue # Try to diagnose and recover the issue
diagnosis = self.diagnoseDocumentAccess(document) diagnosis = self._diagnoseDocumentAccess(document)
logger.error(f"Critical error: Cannot access document properties for document {document.id}. Diagnosis: {diagnosis}") logger.error(f"Critical error: Cannot access document properties for document {document.id}. Diagnosis: {diagnosis}")
# Attempt recovery # Attempt recovery
if self.recoverDocumentAccess(document): if self._recoverDocumentAccess(document):
try: try:
fileName = document.fileName fileName = document.fileName
mimeType = document.mimeType mimeType = document.mimeType
@ -954,9 +850,78 @@ Please provide a comprehensive summary of this conversation."""
except Exception as e: except Exception as e:
logger.error(f"Error extracting from document: {str(e)}") logger.error(f"Error extracting from document: {str(e)}")
raise raise
def createFile(self, fileName: str, mimeType: str, content: str, base64encoded: bool = False) -> str: def _diagnoseDocumentAccess(self, document: ChatDocument) -> Dict[str, Any]:
"""Create new file and return its ID""" """
Diagnose document access issues and provide recovery information.
This method helps identify why document properties are inaccessible.
"""
try:
diagnosis = {
'document_id': document.id,
'file_id': document.fileId,
'has_component_interface': document._componentInterface is not None,
'component_interface_type': type(document._componentInterface).__name__ if document._componentInterface else None,
'file_exists': False,
'file_info': None,
'error_details': None
}
# Check if component interface is set
if not document._componentInterface:
diagnosis['error_details'] = "Component interface not set - document cannot access file system"
return diagnosis
# Try to access the file directly
try:
file_info = self.interfaceComponent.getFile(document.fileId)
if file_info:
diagnosis['file_exists'] = True
diagnosis['file_info'] = {
'fileName': file_info.fileName if hasattr(file_info, 'fileName') else 'N/A',
'fileSize': file_info.fileSize if hasattr(file_info, 'fileSize') else 'N/A',
'mimeType': file_info.mimeType if hasattr(file_info, 'mimeType') else 'N/A'
}
else:
diagnosis['error_details'] = f"File with ID {document.fileId} not found in component interface"
except Exception as e:
diagnosis['error_details'] = f"Error accessing file {document.fileId}: {str(e)}"
return diagnosis
except Exception as e:
return {
'document_id': document.id if hasattr(document, 'id') else 'unknown',
'file_id': document.fileId if hasattr(document, 'fileId') else 'unknown',
'error_details': f"Error during diagnosis: {str(e)}"
}
def _recoverDocumentAccess(self, document: ChatDocument) -> bool:
"""
Attempt to recover document access by re-setting the component interface.
Returns True if recovery was successful.
"""
try:
logger.info(f"Attempting to recover document access for document {document.id}")
# Re-set the component interface
document.setComponentInterface(self.interfaceComponent)
# Test if we can now access the fileName
try:
test_fileName = document.fileName
logger.info(f"Document access recovered for {document.id} -> {test_fileName}")
return True
except Exception as e:
logger.error(f"Document access recovery failed for {document.id}: {str(e)}")
return False
except Exception as e:
logger.error(f"Error during document access recovery for {document.id}: {str(e)}")
return False
def createDocument(self, fileName: str, mimeType: str, content: str, base64encoded: bool = True) -> ChatDocument:
"""Create document with file in one step - handles file creation internally"""
# Convert content to bytes based on base64 flag # Convert content to bytes based on base64 flag
if base64encoded: if base64encoded:
import base64 import base64
@ -974,27 +939,16 @@ Please provide a comprehensive summary of this conversation."""
# Then store the file data # Then store the file data
self.interfaceComponent.createFileData(file_item.id, content_bytes) self.interfaceComponent.createFileData(file_item.id, content_bytes)
return file_item.id
def createDocument(self, fileName: str, mimeType: str, content: str, base64encoded: bool = True, existing_file_id: str = None) -> ChatDocument:
"""Create document AND file from file data object created by AI call"""
# Use existing file ID if provided, otherwise create new file
if existing_file_id:
file_id = existing_file_id
else:
# First create the file and get its ID
file_id = self.createFile(fileName, mimeType, content, base64encoded)
# Get file info to copy attributes # Get file info to copy attributes
file_info = self.getFileInfo(file_id) file_info = self.getFileInfo(file_item.id)
if not file_info: if not file_info:
logger.error(f"Could not get file info for fileId: {file_id}") logger.error(f"Could not get file info for fileId: {file_item.id}")
raise ValueError(f"File info not found for fileId: {file_id}") raise ValueError(f"File info not found for fileId: {file_item.id}")
# Create document with all file attributes copied # Create document with all file attributes copied
document = ChatDocument( document = ChatDocument(
id=str(uuid.uuid4()), id=str(uuid.uuid4()),
fileId=file_id, fileId=file_item.id,
fileName=file_info.get("fileName", fileName), fileName=file_info.get("fileName", fileName),
fileSize=file_info.get("size", 0), fileSize=file_info.get("size", 0),
mimeType=file_info.get("mimeType", mimeType) mimeType=file_info.get("mimeType", mimeType)
@ -1002,6 +956,8 @@ Please provide a comprehensive summary of this conversation."""
return document return document
# ===== Internal public helper functions =====
def updateWorkflowStats(self, eventLabel: str = None, bytesSent: int = 0, bytesReceived: int = 0, tokenCount: int = 0) -> None: def updateWorkflowStats(self, eventLabel: str = None, bytesSent: int = 0, bytesReceived: int = 0, tokenCount: int = 0) -> None:
""" """
Centralized function to update workflow statistics in database and running workflow. Centralized function to update workflow statistics in database and running workflow.
@ -1051,24 +1007,40 @@ Please provide a comprehensive summary of this conversation."""
logger.error(f"Error calculating object size: {str(e)}") logger.error(f"Error calculating object size: {str(e)}")
return 0 return 0
def getAvailableDocuments(self, workflow) -> List[str]: def getAvailableDocuments(self, workflow) -> str:
""" """
Get list of available document fileNames from workflow with new labeling format. Get simple description of available documents for task planning.
Args: Args:
workflow: ChatWorkflow object workflow: ChatWorkflow object
Returns: Returns:
List[str]: List of document labels in new format str: Simple description of document availability
""" """
documents = [] total_documents = 0
document_types = set()
for message in workflow.messages: for message in workflow.messages:
for doc in message.documents: if message.documents:
# Generate new label format total_documents += len(message.documents)
label = self.generateDocumentLabel(doc, message) for doc in message.documents:
documents.append(label) try:
return documents file_extension = getFileExtension(doc.fileName)
if file_extension:
document_types.add(file_extension.upper())
except:
pass
if total_documents == 0:
return "No documents available"
elif len(document_types) == 0:
return f"{total_documents} document(s) available"
else:
types_str = ", ".join(sorted(document_types))
return f"{total_documents} document(s) available ({types_str} files)"
# ===== Functions for Manager: Execution Tools =====
async def executeAction(self, methodName: str, actionName: str, parameters: Dict[str, Any]) -> ActionResult: async def executeAction(self, methodName: str, actionName: str, parameters: Dict[str, Any]) -> ActionResult:
"""Execute a method action""" """Execute a method action"""
try: try:
@ -1116,6 +1088,8 @@ Please provide a comprehensive summary of this conversation."""
"""Set user language for the service center""" """Set user language for the service center"""
self.user.language = language self.user.language = language
# ===== Functions for Manager: Workflow Tools =====
def setWorkflowContext(self, round_number: int = None, task_number: int = None, action_number: int = None): def setWorkflowContext(self, round_number: int = None, task_number: int = None, action_number: int = None):
"""Set current workflow context for document generation and routing""" """Set current workflow context for document generation and routing"""
try: try:
@ -1210,92 +1184,5 @@ Please provide a comprehensive summary of this conversation."""
'workflowId': 'unknown' 'workflowId': 'unknown'
} }
def refreshDocumentFileAttributes(self, documents: List[ChatDocument]) -> None:
"""Update file attributes (fileName, fileSize, mimeType) for documents"""
for doc in documents:
try:
file_item = self.interfaceComponent.getFile(doc.fileId)
if file_item:
doc.fileName = file_item.fileName
doc.fileSize = file_item.fileSize
doc.mimeType = file_item.mimeType
else:
logger.warning(f"File not found for document {doc.id}, fileId: {doc.fileId}")
except Exception as e:
logger.error(f"Error refreshing file attributes for document {doc.id}: {e}")
# Note: Workflow progress update methods have been moved to handlingTasks.py
# where they belong since that's where the actual workflow execution happens
# This avoids circular import issues between ServiceCenter and ChatInterface
def diagnoseDocumentAccess(self, document: ChatDocument) -> Dict[str, Any]:
"""
Diagnose document access issues and provide recovery information.
This method helps identify why document properties are inaccessible.
"""
try:
diagnosis = {
'document_id': document.id,
'file_id': document.fileId,
'has_component_interface': document._componentInterface is not None,
'component_interface_type': type(document._componentInterface).__name__ if document._componentInterface else None,
'file_exists': False,
'file_info': None,
'error_details': None
}
# Check if component interface is set
if not document._componentInterface:
diagnosis['error_details'] = "Component interface not set - document cannot access file system"
return diagnosis
# Try to access the file directly
try:
file_info = self.interfaceComponent.getFile(document.fileId)
if file_info:
diagnosis['file_exists'] = True
diagnosis['file_info'] = {
'fileName': file_info.fileName if hasattr(file_info, 'fileName') else 'N/A',
'fileSize': file_info.fileSize if hasattr(file_info, 'fileSize') else 'N/A',
'mimeType': file_info.mimeType if hasattr(file_info, 'mimeType') else 'N/A'
}
else:
diagnosis['error_details'] = f"File with ID {document.fileId} not found in component interface"
except Exception as e:
diagnosis['error_details'] = f"Error accessing file {document.fileId}: {str(e)}"
return diagnosis
except Exception as e:
return {
'document_id': document.id if hasattr(document, 'id') else 'unknown',
'file_id': document.fileId if hasattr(document, 'fileId') else 'unknown',
'error_details': f"Error during diagnosis: {str(e)}"
}
def recoverDocumentAccess(self, document: ChatDocument) -> bool:
"""
Attempt to recover document access by re-setting the component interface.
Returns True if recovery was successful.
"""
try:
logger.info(f"Attempting to recover document access for document {document.id}")
# Re-set the component interface
document.setComponentInterface(self.interfaceComponent)
# Test if we can now access the fileName
try:
test_fileName = document.fileName
logger.info(f"Document access recovered for {document.id} -> {test_fileName}")
return True
except Exception as e:
logger.error(f"Document access recovery failed for {document.id}: {str(e)}")
return False
except Exception as e:
logger.error(f"Error during document access recovery for {document.id}: {str(e)}")
return False
# Create singleton instance # Create singleton instance
serviceObject = None serviceObject = None

View file

@ -8,6 +8,10 @@ from modules.shared.configuration import APP_CONFIG
# Configure logger # Configure logger
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
class ContextLengthExceededException(Exception):
"""Exception raised when the context length exceeds the model's limit"""
pass
def loadConfigData(): def loadConfigData():
"""Load configuration data for OpenAI connector""" """Load configuration data for OpenAI connector"""
return { return {
@ -75,12 +79,29 @@ class AiOpenai:
if response.status_code != 200: if response.status_code != 200:
logger.error(f"OpenAI API error: {response.status_code} - {response.text}") logger.error(f"OpenAI API error: {response.status_code} - {response.text}")
# Check for context length exceeded error
if response.status_code == 400:
try:
error_data = response.json()
if (error_data.get("error", {}).get("code") == "context_length_exceeded" or
"context length" in error_data.get("error", {}).get("message", "").lower()):
# Raise a specific exception for context length issues
raise ContextLengthExceededException(
f"Context length exceeded: {error_data.get('error', {}).get('message', 'Unknown error')}"
)
except (ValueError, KeyError):
pass # If we can't parse the error, fall through to generic error
raise HTTPException(status_code=500, detail="Error communicating with OpenAI API") raise HTTPException(status_code=500, detail="Error communicating with OpenAI API")
responseJson = response.json() responseJson = response.json()
content = responseJson["choices"][0]["message"]["content"] content = responseJson["choices"][0]["message"]["content"]
return content return content
except ContextLengthExceededException:
# Re-raise context length exceptions without wrapping
raise
except Exception as e: except Exception as e:
logger.error(f"Error calling OpenAI API: {str(e)}") logger.error(f"Error calling OpenAI API: {str(e)}")
raise HTTPException(status_code=500, detail=f"Error calling OpenAI API: {str(e)}") raise HTTPException(status_code=500, detail=f"Error calling OpenAI API: {str(e)}")

View file

@ -0,0 +1,268 @@
"""Tavily web search class."""
import logging
import os
from dataclasses import dataclass
from modules.interfaces.interfaceWebModel import (
WebCrawlBase,
WebCrawlDocumentData,
WebCrawlRequest,
WebCrawlResultItem,
WebScrapeActionDocument,
WebScrapeActionResult,
WebScrapeBase,
WebScrapeDocumentData,
WebScrapeRequest,
WebScrapeResultItem,
WebSearchBase,
WebSearchRequest,
WebSearchActionResult,
WebSearchActionDocument,
WebSearchDocumentData,
WebSearchResultItem,
WebCrawlActionDocument,
WebCrawlActionResult,
get_web_search_min_results,
get_web_search_max_results,
)
# from modules.interfaces.interfaceChatModel import ActionResult, ActionDocument
from tavily import AsyncTavilyClient
from modules.shared.timezoneUtils import get_utc_timestamp
from modules.shared.configuration import APP_CONFIG
logger = logging.getLogger(__name__)
# Configuration loading functions
def get_web_crawl_timeout() -> int:
"""Get web crawl timeout from configuration"""
return int(APP_CONFIG.get("Web_Crawl_TIMEOUT", "30"))
def get_web_crawl_max_retries() -> int:
"""Get web crawl max retries from configuration"""
return int(APP_CONFIG.get("Web_Crawl_MAX_RETRIES", "3"))
def get_web_crawl_retry_delay() -> int:
"""Get web crawl retry delay from configuration"""
return int(APP_CONFIG.get("Web_Crawl_RETRY_DELAY", "2"))
@dataclass
class TavilySearchResult:
title: str
url: str
@dataclass
class TavilyCrawlResult:
url: str
content: str
@dataclass
class ConnectorTavily(WebSearchBase, WebCrawlBase, WebScrapeBase):
client: AsyncTavilyClient = None
@classmethod
async def create(cls):
api_key = APP_CONFIG.get("Connector_WebTavily_API_KEY")
if not api_key:
raise ValueError("Tavily API key not configured. Please set Connector_WebTavily_API_KEY in config.ini")
return cls(client=AsyncTavilyClient(api_key=api_key))
async def search_urls(self, request: WebSearchRequest) -> WebSearchActionResult:
"""Handles the web search request.
Takes a query and returns a list of URLs.
"""
# Step 1: Search
try:
search_results = await self._search(request.query, request.max_results)
except Exception as e:
return WebSearchActionResult(success=False, error=str(e))
# Step 2: Build ActionResult
try:
result = self._build_search_action_result(search_results, request.query)
except Exception as e:
return WebSearchActionResult(success=False, error=str(e))
return result
async def crawl_urls(self, request: WebCrawlRequest) -> WebCrawlActionResult:
"""Crawls the given URLs and returns the extracted text content."""
# Step 1: Crawl
try:
crawl_results = await self._crawl(request.urls)
except Exception as e:
return WebCrawlActionResult(success=False, error=str(e))
# Step 2: Build ActionResult
try:
result = self._build_crawl_action_result(crawl_results, request.urls)
except Exception as e:
return WebCrawlActionResult(success=False, error=str(e))
return result
async def scrape(self, request: WebScrapeRequest) -> WebScrapeActionResult:
"""Turns a query in a list of urls with extracted content."""
# Step 1: Search
try:
search_results = await self._search(request.query, request.max_results)
except Exception as e:
return WebScrapeActionResult(success=False, error=str(e))
# Step 2: Crawl
try:
urls = [result.url for result in search_results]
crawl_results = await self._crawl(urls)
except Exception as e:
return WebScrapeActionResult(success=False, error=str(e))
# Step 3: Build ActionResult
try:
result = self._build_scrape_action_result(crawl_results, request.query)
except Exception as e:
return WebScrapeActionResult(success=False, error=str(e))
return result
async def _search(self, query: str, max_results: int) -> list[TavilySearchResult]:
"""Calls the Tavily API to perform a web search."""
# Make sure max_results is within the allowed range
min_results = get_web_search_min_results()
max_allowed_results = get_web_search_max_results()
if max_results < min_results or max_results > max_allowed_results:
raise ValueError(f"max_results must be between {min_results} and {max_allowed_results}")
# Perform actual API call
response = await self.client.search(query=query, max_results=max_results)
return [
TavilySearchResult(title=result["title"], url=result["url"])
for result in response["results"]
]
def _build_search_action_result(
self, search_results: list[TavilySearchResult], query: str = ""
) -> WebSearchActionResult:
"""Builds the ActionResult from the search results."""
# Convert to result items
result_items = [
WebSearchResultItem(title=result.title, url=result.url)
for result in search_results
]
# Create document data with all results
document_data = WebSearchDocumentData(
query=query, results=result_items, total_count=len(result_items)
)
# Create single document
document = WebSearchActionDocument(
documentName=f"web_search_results_{get_utc_timestamp()}.json",
documentData=document_data,
mimeType="application/json",
)
return WebSearchActionResult(
success=True, documents=[document], resultLabel="web_search_results"
)
async def _crawl(self, urls: list) -> list[TavilyCrawlResult]:
"""Calls the Tavily API to extract text content from URLs with retry logic."""
import asyncio
max_retries = get_web_crawl_max_retries()
retry_delay = get_web_crawl_retry_delay()
timeout = get_web_crawl_timeout()
for attempt in range(max_retries + 1):
try:
# Use asyncio.wait_for for timeout
response = await asyncio.wait_for(
self.client.extract(urls=urls, extract_depth="advanced", format="text"),
timeout=timeout
)
return [
TavilyCrawlResult(url=result["url"], content=result["raw_content"])
for result in response["results"]
]
except asyncio.TimeoutError:
logger.warning(f"Crawl attempt {attempt + 1} timed out after {timeout} seconds")
if attempt < max_retries:
logger.info(f"Retrying in {retry_delay} seconds...")
await asyncio.sleep(retry_delay)
else:
raise Exception(f"Crawl failed after {max_retries + 1} attempts due to timeout")
except Exception as e:
logger.warning(f"Crawl attempt {attempt + 1} failed: {str(e)}")
if attempt < max_retries:
logger.info(f"Retrying in {retry_delay} seconds...")
await asyncio.sleep(retry_delay)
else:
raise Exception(f"Crawl failed after {max_retries + 1} attempts: {str(e)}")
def _build_crawl_action_result(
self, crawl_results: list[TavilyCrawlResult], urls: list[str] = None
) -> WebCrawlActionResult:
"""Builds the ActionResult from the crawl results."""
# Convert to result items
result_items = [
WebCrawlResultItem(url=result.url, content=result.content)
for result in crawl_results
]
# Create document data with all results
document_data = WebCrawlDocumentData(
urls=urls or [result.url for result in crawl_results],
results=result_items,
total_count=len(result_items),
)
# Create single document
document = WebCrawlActionDocument(
documentName=f"web_crawl_results_{get_utc_timestamp()}.json",
documentData=document_data,
mimeType="application/json",
)
return WebCrawlActionResult(
success=True, documents=[document], resultLabel="web_crawl_results"
)
def _build_scrape_action_result(
self, crawl_results: list[TavilyCrawlResult], query: str = ""
) -> WebScrapeActionResult:
"""Builds the ActionResult from the scrape results."""
# Convert to result items
result_items = [
WebScrapeResultItem(url=result.url, content=result.content)
for result in crawl_results
]
# Create document data with all results
document_data = WebScrapeDocumentData(
query=query,
results=result_items,
total_count=len(result_items),
)
# Create single document
document = WebScrapeActionDocument(
documentName=f"web_scrape_results_{get_utc_timestamp()}.json",
documentData=document_data,
mimeType="application/json",
)
return WebScrapeActionResult(
success=True, documents=[document], resultLabel="web_scrape_results"
)

View file

@ -1,20 +1,378 @@
import logging import logging
from typing import Dict, Any, List, Union, Optional from typing import Dict, Any, List, Union, Optional
from modules.connectors.connectorAiOpenai import AiOpenai from modules.connectors.connectorAiOpenai import AiOpenai, ContextLengthExceededException
from modules.connectors.connectorAiAnthropic import AiAnthropic from modules.connectors.connectorAiAnthropic import AiAnthropic
from modules.chat.documents.documentExtraction import DocumentExtraction
from modules.interfaces.interfaceChatModel import ChatDocument
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
# AI Model Registry with Performance Data
AI_MODELS = {
"openai_gpt4o": {
"connector": "openai",
"max_tokens": 128000,
"cost_per_1k_tokens": 0.03, # Input
"cost_per_1k_tokens_output": 0.06, # Output
"speed_rating": 8, # 1-10
"quality_rating": 9, # 1-10
"supports_images": True,
"supports_documents": True,
"context_length": 128000,
"model_name": "gpt-4o"
},
"openai_gpt35": {
"connector": "openai",
"max_tokens": 16000,
"cost_per_1k_tokens": 0.0015,
"cost_per_1k_tokens_output": 0.002,
"speed_rating": 9,
"quality_rating": 7,
"supports_images": False,
"supports_documents": True,
"context_length": 16000,
"model_name": "gpt-3.5-turbo"
},
"anthropic_claude": {
"connector": "anthropic",
"max_tokens": 200000,
"cost_per_1k_tokens": 0.015,
"cost_per_1k_tokens_output": 0.075,
"speed_rating": 7,
"quality_rating": 10,
"supports_images": True,
"supports_documents": True,
"context_length": 200000,
"model_name": "claude-3-sonnet-20240229"
}
}
class AiCalls: class AiCalls:
"""Interface for AI service interactions""" """Interface for AI service interactions with centralized call method"""
def __init__(self): def __init__(self):
self.openaiService = AiOpenai() self.openaiService = AiOpenai()
self.anthropicService = AiAnthropic() self.anthropicService = AiAnthropic()
self.document_extractor = DocumentExtraction()
async def callAi(
self,
prompt: str,
documents: List[ChatDocument] = None,
operation_type: str = "general",
priority: str = "balanced", # "speed", "quality", "cost", "balanced"
compress_prompt: bool = True,
compress_documents: bool = True,
process_documents_individually: bool = False,
max_cost: float = None,
max_processing_time: int = None
) -> str:
"""
Zentrale AI Call Methode mit intelligenter Modell-Auswahl und Content-Verarbeitung.
Args:
prompt: Der Hauptprompt für die AI
documents: Liste von Dokumenten zur Verarbeitung
operation_type: Art der Operation ("general", "document_analysis", "image_analysis", etc.)
priority: Priorität für Modell-Auswahl ("speed", "quality", "cost", "balanced")
compress_prompt: Ob der Prompt komprimiert werden soll
compress_documents: Ob Dokumente komprimiert werden sollen
process_documents_individually: Ob Dokumente einzeln verarbeitet werden sollen
max_cost: Maximale Kosten für den Call
max_processing_time: Maximale Verarbeitungszeit in Sekunden
Returns:
AI Response als String
"""
try:
# 1. Dokumente verarbeiten falls vorhanden
document_content = ""
if documents:
document_content = await self._process_documents_for_ai(
documents,
operation_type,
compress_documents,
process_documents_individually
)
# 2. Bestes Modell basierend auf Priorität und Content auswählen
selected_model = self._select_optimal_model(
prompt,
document_content,
priority,
operation_type,
max_cost,
max_processing_time
)
# 3. Content für das gewählte Modell optimieren
optimized_prompt, optimized_content = await self._optimize_content_for_model(
prompt,
document_content,
selected_model,
compress_prompt,
compress_documents
)
# 4. AI Call mit Failover ausführen
return await self._execute_ai_call_with_failover(
selected_model,
optimized_prompt,
optimized_content
)
except Exception as e:
logger.error(f"Error in centralized AI call: {str(e)}")
return f"Error: {str(e)}"
def _select_optimal_model(
self,
prompt: str,
document_content: str,
priority: str,
operation_type: str,
max_cost: float = None,
max_processing_time: int = None
) -> str:
"""Wählt das optimale Modell basierend auf Priorität und Content aus"""
# Content-Größe berechnen
total_content_size = len(prompt.encode('utf-8')) + len(document_content.encode('utf-8'))
# Verfügbare Modelle filtern
available_models = {}
for model_name, model_info in AI_MODELS.items():
# Prüfe ob Modell für Content-Größe geeignet ist
if total_content_size > model_info["context_length"] * 0.8: # 80% für Content
continue
# Prüfe Kosten-Limit
if max_cost:
estimated_cost = self._estimate_cost(model_info, total_content_size)
if estimated_cost > max_cost:
continue
# Prüfe Operation-Type Kompatibilität
if operation_type == "image_analysis" and not model_info["supports_images"]:
continue
available_models[model_name] = model_info
if not available_models:
# Fallback zum kleinsten Modell
return "openai_gpt35"
# Modell basierend auf Priorität auswählen
if priority == "speed":
return max(available_models.keys(), key=lambda x: available_models[x]["speed_rating"])
elif priority == "quality":
return max(available_models.keys(), key=lambda x: available_models[x]["quality_rating"])
elif priority == "cost":
return min(available_models.keys(), key=lambda x: available_models[x]["cost_per_1k_tokens"])
else: # balanced
# Gewichtete Bewertung: 40% Qualität, 30% Geschwindigkeit, 30% Kosten
def balanced_score(model_name):
model_info = available_models[model_name]
quality_score = model_info["quality_rating"] * 0.4
speed_score = model_info["speed_rating"] * 0.3
cost_score = (10 - (model_info["cost_per_1k_tokens"] * 1000)) * 0.3 # Niedrigere Kosten = höherer Score
return quality_score + speed_score + cost_score
return max(available_models.keys(), key=balanced_score)
def _estimate_cost(self, model_info: Dict, content_size: int) -> float:
"""Schätzt die Kosten für einen AI Call"""
# Grobe Schätzung: 1 Token ≈ 4 Zeichen
estimated_tokens = content_size / 4
input_cost = (estimated_tokens / 1000) * model_info["cost_per_1k_tokens"]
output_cost = (estimated_tokens / 1000) * model_info["cost_per_1k_tokens_output"] * 0.1 # 10% für Output
return input_cost + output_cost
async def _process_documents_for_ai(
self,
documents: List[ChatDocument],
operation_type: str,
compress_documents: bool,
process_individually: bool
) -> str:
"""Verarbeitet Dokumente für AI Call mit documentExtraction.py"""
if not documents:
return ""
processed_contents = []
for doc in documents:
try:
# Extrahiere Content mit documentExtraction.py
extracted = await self.document_extractor.processFileData(
doc.fileData,
doc.fileName,
doc.mimeType,
prompt=f"Extract relevant content for {operation_type}",
documentId=doc.id,
enableAI=True
)
# Kombiniere alle Content-Items
doc_content = []
for content_item in extracted.contents:
if content_item.data and content_item.data.strip():
doc_content.append(content_item.data)
if doc_content:
combined_doc_content = "\n\n".join(doc_content)
# Komprimiere falls gewünscht
if compress_documents and len(combined_doc_content.encode('utf-8')) > 10000: # 10KB Limit
combined_doc_content = await self._compress_content(
combined_doc_content,
10000,
"document"
)
processed_contents.append(f"Document: {doc.fileName}\n{combined_doc_content}")
except Exception as e:
logger.warning(f"Error processing document {doc.fileName}: {str(e)}")
processed_contents.append(f"Document: {doc.fileName}\n[Error processing document: {str(e)}]")
return "\n\n---\n\n".join(processed_contents)
async def _optimize_content_for_model(
self,
prompt: str,
document_content: str,
model_name: str,
compress_prompt: bool,
compress_documents: bool
) -> tuple[str, str]:
"""Optimiert Content für das gewählte Modell"""
model_info = AI_MODELS[model_name]
max_content_size = model_info["context_length"] * 0.7 # 70% für Content
optimized_prompt = prompt
optimized_content = document_content
# Prompt komprimieren falls gewünscht
if compress_prompt and len(prompt.encode('utf-8')) > 2000: # 2KB Limit für Prompt
optimized_prompt = await self._compress_content(prompt, 2000, "prompt")
# Dokument-Content komprimieren falls gewünscht
if compress_documents and document_content:
content_size = len(document_content.encode('utf-8'))
if content_size > max_content_size:
optimized_content = await self._compress_content(
document_content,
int(max_content_size),
"document"
)
return optimized_prompt, optimized_content
async def _compress_content(self, content: str, target_size: int, content_type: str) -> str:
"""Komprimiert Content intelligent basierend auf Typ"""
if len(content.encode('utf-8')) <= target_size:
return content
try:
# Verwende AI für intelligente Kompression
compression_prompt = f"""
Komprimiere den folgenden {content_type} auf maximal {target_size} Zeichen,
behalte aber alle wichtigen Informationen bei:
{content}
Gib nur den komprimierten Inhalt zurück, ohne zusätzliche Erklärungen.
"""
# Verwende das schnellste verfügbare Modell für Kompression
compression_model = "openai_gpt35"
model_info = AI_MODELS[compression_model]
connector = getattr(self, f"{model_info['connector']}Service")
messages = [{"role": "user", "content": compression_prompt}]
if model_info["connector"] == "openai":
compressed = await connector.callAiBasic(messages)
else:
response = await connector.callAiBasic(messages)
compressed = response["choices"][0]["message"]["content"]
return compressed
except Exception as e:
logger.warning(f"AI compression failed, using truncation: {str(e)}")
# Fallback: Einfache Truncation
return content[:target_size] + "... [truncated]"
async def _execute_ai_call_with_failover(
self,
model_name: str,
prompt: str,
document_content: str
) -> str:
"""Führt AI Call mit automatischem Failover aus"""
try:
model_info = AI_MODELS[model_name]
connector = getattr(self, f"{model_info['connector']}Service")
# Messages vorbereiten
messages = []
if document_content:
messages.append({
"role": "system",
"content": f"Context from documents:\n{document_content}"
})
messages.append({
"role": "user",
"content": prompt
})
# AI Call ausführen
if model_info["connector"] == "openai":
return await connector.callAiBasic(messages)
else: # anthropic
response = await connector.callAiBasic(messages)
return response["choices"][0]["message"]["content"]
except ContextLengthExceededException:
logger.warning(f"Context length exceeded for {model_name}, trying fallback")
# Fallback zu Modell mit größerem Context
fallback_model = self._find_fallback_model(model_name)
if fallback_model:
return await self._execute_ai_call_with_failover(fallback_model, prompt, document_content)
else:
# Letzter Ausweg: Content weiter komprimieren
compressed_prompt = await self._compress_content(prompt, 1000, "prompt")
compressed_content = await self._compress_content(document_content, 5000, "document")
return await self._execute_ai_call_with_failover("openai_gpt35", compressed_prompt, compressed_content)
except Exception as e:
logger.warning(f"AI call failed with {model_name}: {e}")
# Allgemeiner Fallback
return await self._execute_ai_call_with_failover("openai_gpt35", prompt, document_content)
def _find_fallback_model(self, current_model: str) -> Optional[str]:
"""Findet ein Fallback-Modell mit größerem Context"""
current_context = AI_MODELS[current_model]["context_length"]
# Suche Modell mit größerem Context
for model_name, model_info in AI_MODELS.items():
if model_info["context_length"] > current_context:
return model_name
return None
# Legacy methods
async def callAiTextBasic(self, prompt: str, context: Optional[str] = None) -> str: async def callAiTextBasic(self, prompt: str, context: Optional[str] = None) -> str:
""" """
Basic text processing using OpenAI. Basic text processing - now uses centralized AI call method.
Args: Args:
prompt: The user prompt to process prompt: The user prompt to process
@ -23,86 +381,47 @@ class AiCalls:
Returns: Returns:
The AI response as text The AI response as text
""" """
# Prepare messages in OpenAI format # Combine context with prompt if provided
messages = [] full_prompt = prompt
# Add system message if context provided
if context: if context:
messages.append({ full_prompt = f"Context: {context}\n\nUser Request: {prompt}"
"role": "system",
"content": context
})
# Add user message # Use centralized AI call with speed priority for basic calls
messages.append({ return await self.callAi(
"role": "user", prompt=full_prompt,
"content": prompt priority="speed",
}) compress_prompt=True,
compress_documents=False
# Add language instruction for user-facing responses )
if hasattr(self, 'userLanguage') and self.userLanguage:
ltext = f"Please respond in '{self.userLanguage}' language."
if messages and messages[0]["role"] == "system":
if "language" not in messages[0]["content"].lower():
messages[0]["content"] = f"{ltext} {messages[0]['content']}"
else:
messages.insert(0, {
"role": "system",
"content": ltext
})
try:
return await self.openaiService.callAiBasic(messages)
except Exception as e:
logger.error(f"Error in OpenAI call: {str(e)}")
return f"Error: {str(e)}"
async def callAiTextAdvanced(self, prompt: str, context: Optional[str] = None) -> str: async def callAiTextAdvanced(self, prompt: str, context: Optional[str] = None, _is_fallback: bool = False) -> str:
""" """
Advanced text processing using Anthropic. Advanced text processing - now uses centralized AI call method.
Fallback to OpenAI if Anthropic is overloaded or rate-limited.
Args:
prompt: The user prompt to process
context: Optional system context/prompt
_is_fallback: Internal flag (kept for compatibility)
Returns:
The AI response as text
""" """
# For Anthropic, we need to handle system content differently # Combine context with prompt if provided
# Anthropic expects system content in a top-level parameter, not as a message role full_prompt = prompt
try: if context:
# Create messages without system role for Anthropic full_prompt = f"Context: {context}\n\nUser Request: {prompt}"
anthropic_messages = []
if hasattr(self, 'userLanguage') and self.userLanguage: # Use centralized AI call with quality priority for advanced calls
ltext = f"Please respond in '{self.userLanguage}' language." return await self.callAi(
if context: prompt=full_prompt,
# Combine context and language instruction priority="quality",
full_context = f"{ltext}\n\n{context}" compress_prompt=False,
else: compress_documents=False
full_context = ltext )
else:
full_context = context
# Add user message
anthropic_messages.append({
"role": "user",
"content": prompt
})
# Call Anthropic - let the connector handle system content conversion
if full_context:
# Send context as part of the user message for Anthropic
enhanced_prompt = f"Context:\n{full_context}\n\nUser Request:\n{prompt}"
response = await self.anthropicService.callAiBasic([
{"role": "user", "content": enhanced_prompt}
])
else:
response = await self.anthropicService.callAiBasic(anthropic_messages)
return response["choices"][0]["message"]["content"]
except Exception as e:
err_str = str(e)
logger.warning(f"[UI NOTICE] Advanced AI failed, falling back to Basic AI (OpenAI). Reason: {err_str}")
# Fallback to OpenAI basic
return await self.callAiTextBasic(prompt, context)
async def callAiImageBasic(self, prompt: str, imageData: Union[str, bytes], mimeType: str = None) -> str: async def callAiImageBasic(self, prompt: str, imageData: Union[str, bytes], mimeType: str = None) -> str:
""" """
Basic image processing using OpenAI. Basic image processing - now uses centralized AI call method.
Args: Args:
prompt: The prompt for image analysis prompt: The prompt for image analysis
@ -113,6 +432,8 @@ class AiCalls:
The AI response as text The AI response as text
""" """
try: try:
# For image processing, use the original connector directly
# as the centralized method doesn't handle images yet
return await self.openaiService.callAiImage(prompt, imageData, mimeType) return await self.openaiService.callAiImage(prompt, imageData, mimeType)
except Exception as e: except Exception as e:
logger.error(f"Error in OpenAI image call: {str(e)}") logger.error(f"Error in OpenAI image call: {str(e)}")
@ -120,7 +441,7 @@ class AiCalls:
async def callAiImageAdvanced(self, prompt: str, imageData: Union[str, bytes], mimeType: str = None) -> str: async def callAiImageAdvanced(self, prompt: str, imageData: Union[str, bytes], mimeType: str = None) -> str:
""" """
Advanced image processing using Anthropic. Advanced image processing - now uses centralized AI call method.
Args: Args:
prompt: The prompt for image analysis prompt: The prompt for image analysis
@ -131,8 +452,76 @@ class AiCalls:
The AI response as text The AI response as text
""" """
try: try:
# For image processing, use the original connector directly
# as the centralized method doesn't handle images yet
return await self.anthropicService.callAiImage(prompt, imageData, mimeType) return await self.anthropicService.callAiImage(prompt, imageData, mimeType)
except Exception as e: except Exception as e:
logger.error(f"Error in Anthropic image call: {str(e)}") logger.error(f"Error in Anthropic image call: {str(e)}")
return f"Error: {str(e)}" return f"Error: {str(e)}"
# Convenience methods for common use cases
async def callAiForDocumentAnalysis(
self,
prompt: str,
documents: List[ChatDocument],
priority: str = "balanced"
) -> str:
"""Convenience method for document analysis"""
return await self.callAi(
prompt=prompt,
documents=documents,
operation_type="document_analysis",
priority=priority,
compress_documents=True,
process_documents_individually=False
)
async def callAiForReportGeneration(
self,
prompt: str,
documents: List[ChatDocument],
priority: str = "quality"
) -> str:
"""Convenience method for report generation"""
return await self.callAi(
prompt=prompt,
documents=documents,
operation_type="report_generation",
priority=priority,
compress_documents=True,
process_documents_individually=True
)
async def callAiForEmailComposition(
self,
prompt: str,
documents: List[ChatDocument] = None,
priority: str = "speed"
) -> str:
"""Convenience method for email composition"""
return await self.callAi(
prompt=prompt,
documents=documents,
operation_type="email_composition",
priority=priority,
compress_prompt=True,
compress_documents=True
)
async def callAiForTaskPlanning(
self,
prompt: str,
documents: List[ChatDocument] = None,
priority: str = "balanced"
) -> str:
"""Convenience method for task planning"""
return await self.callAi(
prompt=prompt,
documents=documents,
operation_type="task_planning",
priority=priority,
compress_prompt=False,
compress_documents=True
)

View file

@ -732,7 +732,7 @@ class TaskContext(BaseModel, ModelMixin):
workflow_id: Optional[str] = None workflow_id: Optional[str] = None
# Available resources # Available resources
available_documents: Optional[list[str]] = [] available_documents: Optional[str] = "No documents available"
available_connections: Optional[list[str]] = [] available_connections: Optional[list[str]] = []
# Previous execution state # Previous execution state
@ -755,8 +755,8 @@ class TaskContext(BaseModel, ModelMixin):
criteria_progress: Optional[dict] = None criteria_progress: Optional[dict] = None
def getDocumentReferences(self) -> List[str]: def getDocumentReferences(self) -> List[str]:
"""Get all available document references""" """Get all available document references from previous handover"""
docs = self.available_documents or [] docs = []
if self.previous_handover: if self.previous_handover:
for doc_exchange in self.previous_handover.inputDocuments: for doc_exchange in self.previous_handover.inputDocuments:
docs.extend(doc_exchange.documents) docs.extend(doc_exchange.documents)

View file

@ -0,0 +1,140 @@
"""Base class for web classes."""
from abc import ABC, abstractmethod
from modules.interfaces.interfaceChatModel import ActionDocument, ActionResult
from pydantic import BaseModel, Field, HttpUrl
from typing import List
from modules.shared.configuration import APP_CONFIG
# Configuration loading functions
def get_web_search_max_query_length() -> int:
"""Get maximum query length from configuration"""
return int(APP_CONFIG.get("Web_Search_MAX_QUERY_LENGTH", "400"))
def get_web_search_max_results() -> int:
"""Get maximum search results from configuration"""
return int(APP_CONFIG.get("Web_Search_MAX_RESULTS", "20"))
def get_web_search_min_results() -> int:
"""Get minimum search results from configuration"""
return int(APP_CONFIG.get("Web_Search_MIN_RESULTS", "1"))
# --- Web search ---
# query -> list of URLs
class WebSearchRequest(BaseModel):
query: str = Field(min_length=1, max_length=get_web_search_max_query_length())
max_results: int = Field(ge=get_web_search_min_results(), le=get_web_search_max_results())
class WebSearchResultItem(BaseModel):
"""Individual search result"""
title: str
url: HttpUrl
class WebSearchDocumentData(BaseModel):
"""Complete search results document"""
query: str = Field(min_length=1, max_length=get_web_search_max_query_length())
results: List[WebSearchResultItem]
total_count: int
class WebSearchActionDocument(ActionDocument):
documentData: WebSearchDocumentData
class WebSearchActionResult(ActionResult):
documents: List[WebSearchActionDocument] = Field(default_factory=list)
class WebSearchBase(ABC):
@abstractmethod
async def search_urls(self, request: WebSearchRequest) -> WebSearchActionResult: ...
# --- Web crawl ---
# list of URLs -> list of extracted HTML content
class WebCrawlRequest(BaseModel):
urls: List[HttpUrl]
class WebCrawlResultItem(BaseModel):
"""Individual crawl result"""
url: HttpUrl
content: str
class WebCrawlDocumentData(BaseModel):
"""Complete crawl results document"""
urls: List[HttpUrl]
results: List[WebCrawlResultItem]
total_count: int
class WebCrawlActionDocument(ActionDocument):
documentData: WebCrawlDocumentData = Field(
description="The data extracted from crawled URLs"
)
class WebCrawlActionResult(ActionResult):
documents: List[WebCrawlActionDocument] = Field(default_factory=list)
class WebCrawlBase(ABC):
@abstractmethod
async def crawl_urls(self, request: WebCrawlRequest) -> WebCrawlActionResult: ...
# --- Web scrape ---
# scrape -> list of extracted text; combines web search and crawl in one step
class WebScrapeRequest(BaseModel):
query: str = Field(min_length=1, max_length=get_web_search_max_query_length())
max_results: int = Field(ge=get_web_search_min_results(), le=get_web_search_max_results())
class WebScrapeResultItem(BaseModel):
"""Individual scrape result"""
url: HttpUrl
content: str
class WebScrapeDocumentData(BaseModel):
"""Complete scrape results document"""
query: str = Field(min_length=1, max_length=get_web_search_max_query_length())
results: List[WebScrapeResultItem]
total_count: int
class WebScrapeActionDocument(ActionDocument):
documentData: WebScrapeDocumentData = Field(
description="The data extracted from scraped URLs"
)
class WebScrapeActionResult(ActionResult):
documents: List[WebScrapeActionDocument] = Field(default_factory=list)
class WebScrapeBase(ABC):
@abstractmethod
async def scrape(self, request: WebScrapeRequest) -> WebScrapeActionResult: ...

View file

@ -0,0 +1,118 @@
from typing import Optional
import json
import csv
import io
from modules.interfaces.interfaceWebModel import (
WebCrawlActionResult,
WebSearchActionResult,
WebSearchRequest,
WebCrawlRequest,
WebScrapeActionResult,
WebScrapeRequest,
WebCrawlDocumentData,
WebScrapeDocumentData,
WebSearchDocumentData,
)
from dataclasses import dataclass
from modules.connectors.connectorWebTavily import ConnectorTavily
from modules.interfaces.interfaceChatModel import ActionDocument
@dataclass(slots=True)
class WebInterface:
connectorWebTavily: ConnectorTavily
def __post_init__(self) -> None:
if self.connectorWebTavily is None:
raise TypeError(
"connectorWebTavily must be provided. "
"Use `await WebInterface.create()` or pass a ConnectorTavily."
)
@classmethod
async def create(cls) -> "WebInterface":
connectorWebTavily = await ConnectorTavily.create()
return WebInterface(connectorWebTavily=connectorWebTavily)
async def search(
self, web_search_request: WebSearchRequest
) -> WebSearchActionResult:
# NOTE: Add connectors here
return await self.connectorWebTavily.search_urls(web_search_request)
async def crawl(self, web_crawl_request: WebCrawlRequest) -> WebCrawlActionResult:
# NOTE: Add connectors here
return await self.connectorWebTavily.crawl_urls(web_crawl_request)
async def scrape(
self, web_scrape_request: WebScrapeRequest
) -> WebScrapeActionResult:
# NOTE: Add connectors here
return await self.connectorWebTavily.scrape(web_scrape_request)
def convert_web_result_to_json(self, web_result) -> str:
"""Convert WebCrawlActionResult or WebScrapeActionResult to proper JSON format"""
if not web_result.success or not web_result.documents:
return json.dumps({"success": web_result.success, "error": web_result.error})
# Extract the document data and convert to dict
document_data = web_result.documents[0].documentData
# Convert Pydantic model to dict
result_dict = {
"success": web_result.success,
"results": [
{
"url": str(result.url),
"content": result.content
}
for result in document_data.results
],
"total_count": document_data.total_count
}
# Add type-specific fields
if hasattr(document_data, 'urls'):
# WebCrawlDocumentData has urls field
result_dict["urls"] = [str(url) for url in document_data.urls]
elif hasattr(document_data, 'query'):
# WebScrapeDocumentData has query field
result_dict["query"] = document_data.query
return json.dumps(result_dict, indent=2, ensure_ascii=False)
def convert_web_search_result_to_csv(self, web_search_result: WebSearchActionResult) -> str:
"""Convert WebSearchActionResult to CSV format with url and title columns"""
if not web_search_result.success or not web_search_result.documents:
return ""
output = io.StringIO()
writer = csv.writer(output, delimiter=';')
# Write header
writer.writerow(['url', 'title'])
# Write data rows
document_data = web_search_result.documents[0].documentData
for result in document_data.results:
writer.writerow([str(result.url), result.title])
return output.getvalue()
def create_json_action_document(self, json_content: str, document_name: str) -> ActionDocument:
"""Create an ActionDocument with JSON content"""
return ActionDocument(
documentName=document_name,
documentData=json_content,
mimeType="application/json"
)
def create_csv_action_document(self, csv_content: str, document_name: str) -> ActionDocument:
"""Create an ActionDocument with CSV content"""
return ActionDocument(
documentName=document_name,
documentData=csv_content,
mimeType="text/csv"
)

View file

@ -441,6 +441,10 @@ class MethodDocument(MethodBase):
if len(lines) > 2: if len(lines) > 2:
formatted_content = '\n'.join(lines[1:-1]) formatted_content = '\n'.join(lines[1:-1])
# For HTML format, check if AI returned complete HTML document
if extension == ".html" and (formatted_content.startswith('<!DOCTYPE') or formatted_content.startswith('<html')):
return formatted_content
return formatted_content return formatted_content
except Exception as e: except Exception as e:
@ -643,7 +647,22 @@ class MethodDocument(MethodBase):
raise Exception("AI report generation failed - AI is required for report generation") raise Exception("AI report generation failed - AI is required for report generation")
# Clean up the AI response and ensure it's valid HTML # Clean up the AI response and ensure it's valid HTML
if not aiReport.strip().startswith('<html'): aiReport = aiReport.strip()
# Strip fenced code blocks like ```html ... ``` if present
if aiReport.startswith("```") and aiReport.endswith("```"):
lines = aiReport.split('\n')
if len(lines) >= 2:
# remove first and last fence lines (language tag allowed on first)
aiReport = '\n'.join(lines[1:-1]).strip()
# Check if AI response starts with DOCTYPE or html tag (complete HTML document)
if aiReport.startswith('<!DOCTYPE') or aiReport.startswith('<html'):
# AI returned complete HTML document, use it directly
return aiReport
else:
# AI returned HTML content without document structure, wrap it
# Check if AI response already contains a title/header # Check if AI response already contains a title/header
has_title = any(title.lower() in aiReport.lower() for title in [title, "outlook", "report", "status"]) has_title = any(title.lower() in aiReport.lower() for title in [title, "outlook", "report", "status"])
@ -660,9 +679,6 @@ class MethodDocument(MethodBase):
html.append(aiReport) html.append(aiReport)
html.append("</body></html>") html.append("</body></html>")
return '\n'.join(html) return '\n'.join(html)
else:
# AI returned complete HTML, use it directly
return aiReport
except Exception as e: except Exception as e:
logger.error(f"Error generating AI report: {str(e)}") logger.error(f"Error generating AI report: {str(e)}")

View file

@ -731,8 +731,6 @@ class MethodOutlook(MethodBase):
attachment_docs = self.service.getChatDocumentsFromDocumentList([attachment_ref]) attachment_docs = self.service.getChatDocumentsFromDocumentList([attachment_ref])
if attachment_docs: if attachment_docs:
for doc in attachment_docs: for doc in attachment_docs:
# Get the actual file content using fileId # Get the actual file content using fileId
file_id = getattr(doc, 'fileId', None) file_id = getattr(doc, 'fileId', None)
if file_id: if file_id:
@ -757,15 +755,15 @@ class MethodOutlook(MethodBase):
"contentBytes": base64_content "contentBytes": base64_content
} }
message["attachments"].append(attachment) message["attachments"].append(attachment)
else: else:
logger.warning(f"No content found for attachment: {doc.fileName}") logger.warning(f"No content found for attachment: {doc.fileName}")
except Exception as e: except Exception as e:
logger.error(f"Error reading attachment file {doc.fileName}: {str(e)}") logger.error(f"Error reading attachment file {doc.fileName}: {str(e)}")
else: else:
logger.warning(f"Attachment document has no fileId: {doc.fileName}") logger.warning(f"Attachment document has no fileId: {doc.fileName}")
else: else:
logger.warning(f"No attachment documents found for reference: {attachment_ref}") logger.warning(f"No attachment documents found for reference: {attachment_ref}")
# Create the draft message # Create the draft message
# First, get the Drafts folder ID to ensure the draft is created there # First, get the Drafts folder ID to ensure the draft is created there

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -270,15 +270,6 @@ async def get_workflow_messages(
# Get all messages # Get all messages
allMessages = interfaceChat.getWorkflowMessages(workflowId) allMessages = interfaceChat.getWorkflowMessages(workflowId)
# Debug logging: Log attributes for each message
logger.debug(f"Retrieved {len(allMessages)} messages for workflow {workflowId}")
for i, message in enumerate(allMessages):
logger.debug(f"Message {i+1} (ID: {message.id}): {message}")
logger.debug(f" - Type: {getattr(message, 'type', 'N/A')}")
logger.debug(f" - Content: {getattr(message, 'content', 'N/A')[:100]}...")
logger.debug(f" - PublishedAt: {getattr(message, 'publishedAt', 'N/A')}")
logger.debug(f" - All attributes: {message.__dict__}")
# Apply selective data transfer if messageId is provided # Apply selective data transfer if messageId is provided
if messageId: if messageId:
# Find the index of the message with the given ID # Find the index of the message with the given ID

View file

@ -141,6 +141,12 @@ class WorkflowManager:
self.chatManager.handlingTasks._checkWorkflowStopped() self.chatManager.handlingTasks._checkWorkflowStopped()
# Create initial message using interface # Create initial message using interface
# Generate the correct documentsLabel that matches what getDocumentReferenceString will create
round_num = workflow.currentRound
task_num = 0
action_num = 0
context_label = f"round{round_num}_task{task_num}_action{action_num}_context"
messageData = { messageData = {
"workflowId": workflow.id, "workflowId": workflow.id,
"role": "user", "role": "user",
@ -148,7 +154,7 @@ class WorkflowManager:
"status": "first", "status": "first",
"sequenceNr": 1, "sequenceNr": 1,
"publishedAt": get_utc_timestamp(), "publishedAt": get_utc_timestamp(),
"documentsLabel": "workflow_start", "documentsLabel": context_label,
"documents": [], "documents": [],
# Add workflow context fields # Add workflow context fields
"roundNumber": workflow.currentRound, "roundNumber": workflow.currentRound,
@ -390,7 +396,7 @@ class WorkflowManager:
summary_message = { summary_message = {
"workflowId": workflow.id, "workflowId": workflow.id,
"role": "assistant", "role": "assistant",
"message": f"Workflow completed successfully. Completed {workflow_result.completed_tasks}/{workflow_result.total_tasks} tasks in {workflow_result.execution_time:.2f} seconds.", "message": f"Workflow completed successfully.",
"status": "last", "status": "last",
"sequenceNr": len(workflow.messages) + 1, "sequenceNr": len(workflow.messages) + 1,
"publishedAt": get_utc_timestamp(), "publishedAt": get_utc_timestamp(),

View file

@ -2,8 +2,9 @@
TODO TODO
# System # System
- Backend/UI fix Table Connections mit korrekten Token Infos, View jedesmal neu laden im formGeneric - sharepoint to fix
- model reference diagram for all models. who uses who? --> to see the basic building blocks - document handling centralized
- ai handling centralized
- neutralizer to activate AND put back placeholders to the returned data - neutralizer to activate AND put back placeholders to the returned data
# Tests # Tests
@ -20,7 +21,13 @@ TODO
- check zusammenfassung von 10 dokumenten >10 MB - check zusammenfassung von 10 dokumenten >10 MB
- test case bewerbung - test case bewerbung
# DOCUMENTATION
Design principles
- UI: Module classes for data management (CRUD tables & forms --> formGeneric)
- Basic: All timestamps to be timezone aware fehlerabfangroutinen
- Backend: All external components to attach over connectorXxx --> interfaceXxx --> our codebase
- all model definitions in interfaceXxxModel
- action functions for ai: why to use documentList and not just document as input parameter? --> to have full flexibility to pass either list of documents, or documentList
******************** ********************

View file

@ -1,128 +0,0 @@
<!DOCTYPE html>
<html lang="de">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Management Summary: Methoden-basierte Chat-Architektur</title>
<style>
body {
font-family: Arial, sans-serif;
line-height: 1.6;
max-width: 800px;
margin: 0 auto;
padding: 20px;
color: #333;
}
h1 {
color: #2c3e50;
border-bottom: 2px solid #3498db;
padding-bottom: 10px;
}
h2 {
color: #2c3e50;
margin-top: 30px;
}
.example {
background-color: #f8f9fa;
border-left: 4px solid #3498db;
padding: 15px;
margin: 20px 0;
}
.old-arch, .new-arch {
margin: 15px 0;
padding: 15px;
border-radius: 5px;
}
.old-arch {
background-color: #fff3cd;
border: 1px solid #ffeeba;
}
.new-arch {
background-color: #d4edda;
border: 1px solid #c3e6cb;
}
.benefits {
background-color: #e8f4f8;
padding: 15px;
border-radius: 5px;
margin: 20px 0;
}
.benefits ul {
margin: 10px 0;
padding-left: 20px;
}
.benefits li {
margin: 5px 0;
}
</style>
</head>
<body>
<h1>Management Summary: Methoden-basierte Chat-Architektur</h1>
<p>Die Umstellung von einer Agenten-basierten auf eine Methoden-basierte Chat-Architektur stellt einen fundamentalen Paradigmenwechsel dar. Während die Mehrheit der KI-Chat-Systeme weiterhin auf Agenten-Architekturen setzt, ermöglicht unser methoden-basierter Ansatz eine präzisere Kontrolle und effizientere Integration.</p>
<p>Der methoden-basierte Ansatz definiert klare, selbstbeschreibende Operationen mit festgelegten Parametern und Ergebnissen. Im Gegensatz zu Agenten, die als Blackbox-Operationen fungieren, bieten Methoden eine transparente, validierbare und vorhersehbare Ausführung. Diese Struktur ermöglicht eine präzise Fehlerbehandlung und Retry-Logik auf Aktions-Ebene, anstatt auf Agenten-Ebene.</p>
<p>Die Integration mit Benutzerdaten erfolgt direkt über definierte Authentifizierungspfade, was die Sicherheit erhöht und die Komplexität reduziert. Jede Methode ist selbstbeschreibend und enthält ihre eigenen Validierungsregeln, was die Wartbarkeit verbessert und die Entwicklung neuer Funktionen beschleunigt.</p>
<p>Der methoden-basierte Ansatz reduziert die KI-Abhängigkeit bei der Ausführung von Operationen, während die KI weiterhin für die Planung und Koordination der Methoden eingesetzt wird. Diese Trennung von Planung und Ausführung führt zu zuverlässigeren Ergebnissen und besserer Nachvollziehbarkeit.</p>
<p>Die Architektur ermöglicht eine präzise Dokumentation und Validierung jeder Operation, was in einer regulierten Umgebung von besonderem Wert ist. Die klare Struktur erleichtert die Integration neuer Dienste und die Erweiterung bestehender Funktionalitäten.</p>
<h2>Praktisches Beispiel: Dokumentenverarbeitung und E-Mail-Versand</h2>
<div class="example">
<div class="old-arch">
<strong>Alte Agenten-basierte Architektur:</strong><br>
<pre>
Benutzer: "Suche nach Verträgen im SharePoint und sende mir eine Zusammenfassung per E-Mail"
Agent SharePoint:
- Sucht nach Verträgen
- Extrahiert Inhalte
- Speichert Ergebnisse
Agent Outlook:
- Liest Ergebnisse
- Erstellt E-Mail
- Sendet E-Mail</pre>
</div>
<div class="new-arch">
<strong>Neue Methoden-basierte Architektur:</strong><br>
<pre>
Benutzer: "Suche nach Verträgen im SharePoint und sende mir eine Zusammenfassung per E-Mail"
Methoden-Katalog:
1. SharePoint.searchDocuments
- Parameter: {query: "Verträge", site: "valueon"}
- Retry: 3x bei Netzwerkfehler
- Auth: MSFT
2. Document.extractContent
- Parameter: {documents: [...], sections: ["Zusammenfassung"]}
- Retry: 2x bei Extraktionsfehler
- Auth: LOCAL
3. Outlook.sendMail
- Parameter: {to: ["user@example.com"], subject: "Vertragszusammenfassung"}
- Retry: 1x bei SMTP-Fehler
- Auth: MSFT</pre>
</div>
<div class="benefits">
<strong>Vorteile im Beispiel:</strong>
<ul>
<li>Jede Operation ist klar definiert und validierbar</li>
<li>Retry-Logik ist spezifisch für jede Operation</li>
<li>Authentifizierung ist explizit definiert</li>
<li>Fehler können präzise zugeordnet werden</li>
<li>Operationen können unabhängig voneinander getestet werden</li>
<li>Neue Operationen können einfach hinzugefügt werden</li>
</ul>
</div>
</div>
<p>Die KI plant die Ausführung dieser Methoden, aber die eigentliche Ausführung erfolgt durch die definierten Methoden mit klaren Parametern und Ergebnissen. Dies führt zu einer zuverlässigeren und besser nachvollziehbaren Ausführung.</p>
</body>
</html>

View file

@ -1,129 +0,0 @@
<!DOCTYPE html>
<html lang="de">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Management Summary: Methoden-basierte Chat-Architektur</title>
<style>
body {
font-family: Arial, sans-serif;
line-height: 1.6;
max-width: 800px;
margin: 0 auto;
padding: 20px;
color: #333;
}
h1 {
color: #2c3e50;
border-bottom: 2px solid #3498db;
padding-bottom: 10px;
}
h2 {
color: #2c3e50;
margin-top: 30px;
}
.example {
background-color: #f8f9fa;
border-left: 4px solid #3498db;
padding: 15px;
margin: 20px 0;
}
.old-arch, .new-arch {
margin: 15px 0;
padding: 15px;
border-radius: 5px;
}
.old-arch {
background-color: #fff3cd;
border: 1px solid #ffeeba;
}
.new-arch {
background-color: #d4edda;
border: 1px solid #c3e6cb;
}
.benefits {
background-color: #e8f4f8;
padding: 15px;
border-radius: 5px;
margin: 20px 0;
}
.benefits ul {
margin: 10px 0;
padding-left: 20px;
}
.benefits li {
margin: 5px 0;
}
</style>
</head>
<body>
<h1>Management Summary: Methoden-basierte Chat-Architektur</h1>
<p>Die Umstellung von einer Agenten-basierten auf eine Methoden-basierte Chat-Architektur stellt einen fundamentalen Paradigmenwechsel dar. Während die Mehrheit der KI-Chat-Systeme weiterhin auf Agenten-Architekturen setzt, ermöglicht unser methoden-basierter Ansatz eine präzisere Kontrolle und effizientere Integration.</p>
<p>Der methoden-basierte Ansatz definiert klare, selbstbeschreibende Operationen mit festgelegten Parametern und Ergebnissen. Im Gegensatz zu Agenten, die als Blackbox-Operationen fungieren, bieten Methoden eine transparente, validierbare und vorhersehbare Ausführung. Diese Struktur ermöglicht eine präzise Fehlerbehandlung und Retry-Logik auf Aktions-Ebene, anstatt auf Agenten-Ebene.</p>
<p>Die Integration mit Benutzerdaten erfolgt direkt über definierte Authentifizierungspfade, was die Sicherheit erhöht und die Komplexität reduziert. Jede Methode ist selbstbeschreibend und enthält ihre eigenen Validierungsregeln, was die Wartbarkeit verbessert und die Entwicklung neuer Funktionen beschleunigt.</p>
<p>Der methoden-basierte Ansatz reduziert die KI-Abhängigkeit bei der Ausführung von Operationen, während die KI weiterhin für die Planung und Koordination der Methoden eingesetzt wird. Diese Trennung von Planung und Ausführung führt zu zuverlässigeren Ergebnissen und besserer Nachvollziehbarkeit.</p>
<p>Die Architektur ermöglicht eine präzise Dokumentation und Validierung jeder Operation, was in einer regulierten Umgebung von besonderem Wert ist. Die klare Struktur erleichtert die Integration neuer Dienste und die Erweiterung bestehender Funktionalitäten.</p>
<h2>Praktisches Beispiel: Dokumentenverarbeitung und E-Mail-Versand</h2>
<div class="example">
<div class="old-arch">
<strong>Alte Agenten-basierte Architektur:</strong><br>
<pre>
Benutzer: "Suche nach Verträgen im SharePoint und sende mir eine Zusammenfassung per E-Mail"
Agent SharePoint:
- Sucht nach Verträgen
- Extrahiert Inhalte
- Speichert Ergebnisse
Agent Outlook:
- Liest Ergebnisse
- Erstellt E-Mail
- Sendet E-Mail</pre>
</div>
<div class="new-arch">
<strong>Neue Methoden-basierte Architektur:</strong><br>
<pre>
Benutzer: "Suche nach Verträgen im SharePoint und sende mir eine Zusammenfassung per E-Mail"
Methoden-Katalog:
1. SharePoint.searchDocuments
- Parameter: {query: "Verträge", site: "valueon"}
- Retry: 3x bei Netzwerkfehler
- Auth: MSFT
2. Document.extractContent
- Parameter: {documents: [...], sections: ["Zusammenfassung"]}
- Retry: 2x bei Extraktionsfehler
- Auth: LOCAL
3. Outlook.sendMail
- Parameter: {to: ["user@example.com"], subject: "Vertragszusammenfassung"}
- Retry: 1x bei SMTP-Fehler
- Auth: MSFT</pre>
</div>
<div class="benefits">
<strong>Vorteile im Beispiel:</strong>
<ul>
<li>Jede Operation ist klar definiert und validierbar</li>
<li>Retry-Logik ist spezifisch für jede Operation</li>
<li>Authentifizierung ist explizit definiert</li>
<li>Fehler können präzise zugeordnet werden</li>
<li>Operationen können unabhängig voneinander getestet werden</li>
<li>Neue Operationen können einfach hinzugefügt werden</li>
</ul>
</div>
</div>
<p>Die KI plant die Ausführung dieser Methoden, aber die eigentliche Ausführung erfolgt durch die definierten Methoden mit klaren Parametern und Ergebnissen. Dies führt zu einer zuverlässigeren und besser nachvollziehbaren Ausführung.</p>
</body>
</html>

View file

@ -1,999 +0,0 @@
# Chat System Process Flow Specification
## 1. System Overview
### 1.1 Core Components
- **WorkflowManager**: Orchestrates the overall workflow process
- **ChatManager**: Manages chat interactions and task execution
- **ServiceCenter**: Central state and context management
- **AgentTask**: Core data object for task execution
### 1.2 Service center Structure
```python
from enum import Enum
from typing import Dict, List, Optional, Any, Literal
from datetime import datetime, UTC
from pydantic import BaseModel, Field
class TaskStatus(str, Enum):
PENDING = "pending"
SUCCESS = "success"
FAILED = "failed"
RETRY = "retry"
TIMEOUT = "timeout"
ROLLBACK = "rollback"
class ActionStatus(str, Enum):
PENDING = "pending"
SUCCESS = "success"
FAILED = "failed"
RETRY = "retry"
TIMEOUT = "timeout"
SKIPPED = "skipped"
DEPENDENCY_FAILED = "dependency_failed"
class AuthSource(str, Enum):
LOCAL = "local"
MSFT = "msft"
GOOGLE = "google"
# Add more auth sources as needed
class MethodParameter(BaseModel):
"""Model for method parameters"""
name: str
type: str
required: bool
validation: Optional[callable] = None
description: str
class ActionResult(BaseModel):
"""Model for method results"""
success: bool
data: Dict[str, Any]
metadata: Dict[str, Any]
validation: List[str]
class MethodBase:
"""Base class for all methods"""
def __init__(self, service):
self.service = service
self.name: str
self.description: str
self.auth_source: AuthSource = AuthSource.LOCAL # Default to local auth
@property
def actions(self) -> Dict[str, Dict[str, Any]]:
"""Available actions and their parameters"""
raise NotImplementedError
async def execute(self, action: str, parameters: Dict[str, Any], auth_data: Optional[Dict[str, Any]] = None) -> ActionResult:
"""Execute method action with authentication data"""
raise NotImplementedError
async def validate_parameters(self, action: str, parameters: Dict[str, Any]) -> bool:
"""Validate action parameters"""
if action not in self.actions:
return False
action_def = self.actions[action]
required_params = {k for k, v in action_def['parameters'].items() if v['required']}
return all(param in parameters for param in required_params)
async def rollback(self, action: str, parameters: Dict[str, Any], auth_data: Optional[Dict[str, Any]] = None) -> None:
"""Rollback action if needed"""
pass
class Action(BaseModel):
"""Action model with validation"""
method: str
action: str
parameters: Dict[str, Any]
retryCount: int = 0
retryMax: int
status: ActionStatus = ActionStatus.PENDING
timeout: Optional[int] = None
dependencies: List[str] = []
rollback_on_failure: bool = False
auth_source: Optional[AuthSource] = None # Auth source for this action
class Config:
use_enum_values = True
class AgentTask(BaseModel):
"""Task model with validation"""
id: str
workflowId: str
status: TaskStatus = TaskStatus.PENDING
userInput: str
dataList: List[Dict[str, str]] # List of available connections
actionList: List[Action]
chatHistory: str
taskHistory: str
previousTaskFeedback: Optional[str]
thisTaskFeedback: Optional[str]
result: Optional[Dict[str, Any]]
documentsInput: List[Dict]
documentsOutput: List[Dict]
startedAt: str
finishedAt: Optional[str]
error: Optional[str]
dependencies: List[str] = []
requiredOutputs: List[str] = []
class Config:
use_enum_values = True
def get_auth_data(self, auth_source: AuthSource) -> Optional[Dict[str, Any]]:
"""Get authentication data for the specified source"""
return next(
(conn for conn in self.dataList if conn.get('source') == auth_source),
None
)
def get_action_by_id(self, action_id: str) -> Optional[Action]:
"""Get action by its ID (method:action)"""
return next((a for a in self.actionList if f"{a.method}:{a.action}" == action_id), None)
def can_execute_action(self, action: Action) -> bool:
"""Check if action can be executed based on dependencies and auth"""
# Check dependencies
if action.dependencies:
if not all(
self.get_action_by_id(dep).status == ActionStatus.SUCCESS
for dep in action.dependencies
):
return False
# Check authentication
if action.auth_source and action.auth_source != AuthSource.LOCAL:
if not self.get_auth_data(action.auth_source):
return False
return True
def is_complete(self) -> bool:
"""Check if all actions are complete"""
return all(a.status in [ActionStatus.SUCCESS, ActionStatus.SKIPPED]
for a in self.actionList)
def has_failed(self) -> bool:
"""Check if any action has failed"""
return any(a.status == ActionStatus.FAILED for a in self.actionList)
class ServiceCenter:
"""Service center with improved state management"""
def __init__(self):
self.state = {
'status': TaskStatus.PENDING,
'retryCount': 0,
'retryMax': 3,
'timeout': 300, # 5 minutes
'lastError': None,
'lastErrorTime': None
}
self.methods: Dict[str, MethodBase] = {}
self.tasks: Dict[str, AgentTask] = {}
self.promptManager = AIPromptManager()
self.taskStateManager = TaskStateManager()
self.documentProcessor = DocumentExtraction()
async def execute_task(self, task: AgentTask) -> None:
"""Execute task with improved error handling and timeout"""
try:
# Check for timeout
if (datetime.now(UTC) - datetime.fromisoformat(task.startedAt)).seconds > self.state['timeout']:
task.status = TaskStatus.TIMEOUT
return
# Execute actions
for action in task.actionList:
if not task.can_execute_action(action):
if not task.get_auth_data(action.auth_source):
action.status = ActionStatus.FAILED
task.error = f"Missing authentication for {action.auth_source}"
else:
action.status = ActionStatus.DEPENDENCY_FAILED
continue
try:
# Get method
method = self.methods.get(action.method)
if not method:
raise ValueError(f"Unknown method: {action.method}")
# Validate parameters
if not await method.validate_parameters(action.action, action.parameters):
raise ValueError(f"Invalid parameters for {action.method}:{action.action}")
# Get auth data if needed
auth_data = None
if action.auth_source and action.auth_source != AuthSource.LOCAL:
auth_data = task.get_auth_data(action.auth_source)
if not auth_data:
raise ValueError(f"Missing authentication data for {action.auth_source}")
# Execute with timeout
result = await asyncio.wait_for(
method.execute(action.action, action.parameters, auth_data),
timeout=action.timeout or 60
)
if result.success:
action.status = ActionStatus.SUCCESS
else:
if self._should_retry(result.data.get('error')):
action.retryCount += 1
if action.retryCount > action.retryMax:
action.status = ActionStatus.FAILED
if action.rollback_on_failure:
await method.rollback(action.action, action.parameters, auth_data)
else:
action.status = ActionStatus.RETRY
else:
action.status = ActionStatus.FAILED
if action.rollback_on_failure:
await method.rollback(action.action, action.parameters, auth_data)
except asyncio.TimeoutError:
action.status = ActionStatus.TIMEOUT
except Exception as e:
action.status = ActionStatus.FAILED
if action.rollback_on_failure:
await method.rollback(action.action, action.parameters, auth_data)
# Update task status
if task.has_failed():
task.status = TaskStatus.FAILED
elif task.is_complete():
task.status = TaskStatus.SUCCESS
task.finishedAt = datetime.now(UTC).isoformat()
except Exception as e:
task.status = TaskStatus.FAILED
task.error = str(e)
class AIPromptManager:
"""Manages AI prompts and response validation"""
def generatePrompt(self, context: Dict[str, Any], examples: List[Dict]) -> str:
"""Generate a context-aware prompt with few-shot examples"""
prompt = (
f"Task: {context['task']}\n"
f"Document: {context['document']['name']} ({context['document']['type']})\n"
"Examples:\n"
)
for ex in examples:
prompt += f"- {ex['input']} => {ex['output']}\n"
prompt += "Extract the most relevant information for the task above."
return prompt
def validateResponse(self, response: str, schema: Dict) -> bool:
"""Validate AI response against a schema"""
import jsonschema
try:
jsonschema.validate(instance=response, schema=schema)
return True
except jsonschema.ValidationError:
return False
class TaskStateManager:
"""Manages task state and retry tracking"""
def __init__(self):
self.taskStates = {}
def trackState(self, task: AgentTask):
"""Track task state"""
self.taskStates[task.id] = {
"status": task.status,
"retryState": getattr(task, "retryState", {}),
"history": getattr(task, "history", [])
}
def canRetry(self, task: AgentTask, method: str) -> bool:
"""Check if task can be retried"""
retryState = self.taskStates[task.id].get("retryState", {})
return retryState.get(method, 0) < getattr(task, "retryMax", 3)
class DocumentContext(BaseModel):
"""Model for document context"""
id: str
extractionHistory: List[Dict]
relevantSections: List[str]
processingStatus: Dict[str, str]
class DocumentExtraction:
"""Processes documents with context awareness"""
def process_with_context(self, doc: Dict, context: DocumentContext) -> Dict:
"""Process document with context"""
extracted = {}
for section in context.relevantSections:
extracted[section] = doc.get(section)
return extracted
def track_extraction(self, doc: Dict, extraction: Dict):
"""Track document extraction"""
if 'extractionHistory' not in doc:
doc['extractionHistory'] = []
doc['extractionHistory'].append(extraction)
class ErrorRecovery(BaseModel):
"""Model for error recovery strategies"""
strategy: str # e.g., "retry", "fallback", "skip"
fallbackActions: List[str]
contextPreservation: bool
### 1.3 Method-Based Module Structure
```python
# Example: methodSharepoint.py
class MethodSharepoint:
"""SharePoint method implementation"""
def __init__(self, service):
self.service = service
self.name = "sharepoint"
self.description = "Search and process SharePoint documents"
self.auth_source = AuthSource.MSFT # Requires Microsoft authentication
@property
def actions(self) -> Dict[str, Dict[str, Any]]:
"""Available actions and their parameters"""
return {
"search": {
"description": "Search SharePoint documents",
"retryMax": 3,
"timeout": 30,
"parameters": {
"query": {"type": "string", "required": True},
"site": {"type": "string", "required": False},
"folder": {"type": "string", "required": False},
"maxResults": {"type": "number", "required": False}
}
}
}
async def execute(self, action: str, parameters: Dict[str, Any], auth_data: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
"""Execute SharePoint method"""
if not auth_data:
return {"success": False, "error": "Missing Microsoft authentication"}
if action == "search":
return await self._searchDocuments(parameters, auth_data)
return {"success": False, "error": f"Unknown action: {action}"}
async def _searchDocuments(self, parameters: Dict[str, Any], auth_data: Dict[str, Any]) -> Dict[str, Any]:
"""Search SharePoint documents"""
# Implementation using existing SharePoint agent functionality
pass
# Example: methodOutlook.py
class MethodOutlook:
"""Outlook method implementation"""
def __init__(self, service):
self.service = service
self.name = "outlook"
self.description = "Handle Outlook email operations"
@property
def actions(self) -> Dict[str, Dict[str, Any]]:
"""Available actions and their parameters"""
return {
"readMails": {
"description": "Read emails from specified folder",
"retryMax": 2, # Action-specific retry limit
"parameters": {
"folder": {"type": "string", "required": False},
"unreadOnly": {"type": "boolean", "required": False},
"fromAddress": {"type": "string", "required": False},
"maxResults": {"type": "number", "required": False}
}
},
"sendMail": {
"description": "Send an email",
"retryMax": 1, # Action-specific retry limit
"parameters": {
"to": {"type": "array", "items": "string", "required": True},
"subject": {"type": "string", "required": True},
"body": {"type": "string", "required": True},
"attachments": {"type": "array", "items": "FileRef", "required": False}
}
}
}
async def execute(self, action: str, parameters: Dict[str, Any]) -> Dict[str, Any]:
"""Execute Outlook method"""
if action == "readMails":
return await self._readMails(parameters)
elif action == "sendMail":
return await self._sendMail(parameters)
return {"success": False, "error": f"Unknown action: {action}"}
```
### 1.4 Key Data Objects
```python
class ChatWorkflow:
id: str
mandateId: str
status: str
name: Optional[str]
currentRound: int
lastActivity: str
startedAt: str
logs: List[ChatLog]
messages: List[ChatMessage]
stats: Optional[ChatStat]
tasks: List[Task]
class AgentTask:
id: str
workflowId: str
status: str # pending, success, failed, retry
userInput: str # AI-processed summary
dataList: List[Dict[str, str]] # User connections
actionList: List[Dict[str, Any]] # Actions to execute, e.g.:
# [
# {
# "method": "sharepoint",
# "action": "search",
# "parameters": {
# "query": "offerings",
# "site": "valueon"
# },
# "retryCount": 0,
# "retryMax": 3,
# "status": "pending" # pending, success, failed, retry
# },
# {
# "method": "outlook",
# "action": "sendMail",
# "parameters": {
# "to": ["user@example.com"],
# "subject": "Offer Summary",
# "body": "..."
# },
# "retryCount": 0,
# "retryMax": 1,
# "status": "pending"
# }
# ]
chatHistory: str # Summary of previous messages
taskHistory: str # Summary of previous tasks
previousTaskFeedback: Optional[str]
thisTaskFeedback: Optional[str]
result: Optional[ChatMessage]
documentsInput: List[Dict]
documentsOutput: List[Dict]
startedAt: str
finishedAt: Optional[str]
error: Optional[str]
dependencies: List[str] = [] # Task dependencies
requiredOutputs: List[str] = [] # Required outputs from dependencies
```
## 2. Process Flow
### 2.1 Initialization Phase
```mermaid
graph TD
A[User Input] --> B[WorkflowManager.workflowProcess]
B --> C[ChatManager.initialize]
C --> D[Create ServiceCenter]
D --> E[Create Initial Task]
```
1. **WorkflowManager.workflowProcess**
- Receives user input and workflow
- Initializes chat manager
- Starts task processing loop
2. **ChatManager.initialize**
- Creates ServiceCenter with all required components
- Initializes service interfaces
- Sets up task and state management
### 2.2 Task Creation Phase
1. **Create Initial Task**
```python
def createInitialTask(self, userInput: UserInputRequest) -> AgentTask:
# 1. Get available methods and their actions
available_methods = self._getAvailableMethods()
method_catalog = {
method.name: {
"description": method.description,
"actions": method.actions
}
for method in available_methods
}
# 2. Process user input with AI including document analysis
processedInput = await self.service.model['callAiBasic'](
f"""Analyze user request and documents:
User Prompt: {userInput.prompt}
Documents: {userInput.listFileId}
Available Methods:
{json.dumps(method_catalog, indent=2)}
Please provide:
1. Main objective
2. Required actions (using available methods and their actions)
3. Required data sources
4. Document processing requirements
5. Expected output format
Format your response as JSON:
{{
"objective": "string",
"actions": [
{{
"method": "string",
"action": "string",
"parameters": {{
"param1": "value1",
"param2": "value2"
}}
}}
],
"dataSources": ["string"],
"documentRequirements": ["string"],
"outputFormat": "string"
}}
"""
)
# 3. Create task with processed input and initialize action states
actions = []
for action in processedInput['actions']:
method = next(m for m in available_methods if m.name == action['method'])
action_info = method.actions[action['action']]
actions.append({
**action,
"retryCount": 0,
"retryMax": action_info['retryMax'],
"status": "pending"
})
task = AgentTask(
workflowId=self.service.workflow.id,
userInput=processedInput,
dataList=self.service.context['dataConnections'],
actionList=actions,
chatHistory=await self.workflowSummarize(userInput),
startedAt=datetime.now(UTC).isoformat()
)
# 4. Store in service
self.service.tasks['current'] = task
return task
```
### 2.3 Task Execution Phase
1. **Execute Task**
```python
async def executeTask(self, task: AgentTask) -> None:
"""Execute task actions in sequence"""
for action in task.actionList:
if action['status'] == 'pending':
try:
# Get method instance
method = self.service.methods[action['method']]
# Execute action
result = await method.execute(
action['action'],
action['parameters']
)
if result['success']:
action['status'] = 'success'
else:
if self._shouldRetry(result['error']):
action['retryCount'] += 1
if action['retryCount'] > action['retryMax']:
action['status'] = 'failed'
task.status = 'failed'
task.error = "Maximum retries exceeded"
else:
action['status'] = 'retry'
task.status = 'retry'
else:
action['status'] = 'failed'
task.status = 'failed'
task.error = result['error']
except Exception as e:
action['status'] = 'failed'
task.status = 'failed'
task.error = str(e)
# Update task status based on action status
if action['status'] == 'failed':
break
# Mark task as complete if all actions succeeded
if all(a['status'] == 'success' for a in task.actionList):
task.status = 'success'
task.finishedAt = datetime.now(UTC).isoformat()
```
### 2.4 Task Analysis Phase
1. **Define Next Task**
```python
def defineNextTask(self, currentTask: AgentTask) -> Optional[AgentTask]:
try:
# 1. Analyze current task results using basic AI
analysis = await self.service.model['callAiBasic'](
f"""Analyze task results and determine next steps:
Previous Feedback: {currentTask.previousTaskFeedback}
Current Feedback: {currentTask.thisTaskFeedback}
User Input: {currentTask.userInput}
Current Documents: {currentTask.documentsOutput}
Please provide:
1. Task completion status
2. Next required actions
3. Required documents
4. Method recommendations
Format your response as JSON:
{{
"isComplete": boolean,
"nextActions": ["string"],
"requiredDocuments": ["string"],
"recommendedMethods": ["string"]
}}
"""
)
# 2. Parse and validate AI response
analysis_data = json.loads(analysis)
# 3. Determine if next task needed
if not analysis_data["isComplete"]:
# 4. Create next task
nextTask = self._createNextTask(currentTask, analysis_data)
self.service.tasks['previous'] = currentTask
self.service.tasks['current'] = nextTask
return nextTask
return None
except Exception as e:
logger.error(f"Error defining next task: {str(e)}")
return None
```
## 3. Method Integration
### 3.1 Method Registration
```python
def _registerMethods(self):
"""Register available methods in service center"""
self.service.methods = {
"sharepoint": MethodSharepoint(self.service),
"outlook": MethodOutlook(self.service),
"web": MethodWeb(self.service),
"document": MethodDocument(self.service)
}
```
### 3.2 Method Execution
```python
def _executeMethod(self, method: str, parameters: Dict[str, Any]) -> Dict[str, Any]:
"""Execute a method with parameters"""
try:
# Get method implementation
method_impl = self.service.methods.get(method)
if not method_impl:
return {"success": False, "error": f"Unknown method: {method}"}
# Execute method
return await method_impl.execute(parameters)
except Exception as e:
return {"success": False, "error": str(e)}
```
## 4. Error Handling
### 4.1 Error Types
1. **AI Errors**
- Model unavailable
- Invalid response
- Timeout
2. **Method Errors**
- Invalid method
- Execution failure
- Resource unavailable
3. **Task Errors**
- Invalid state
- Missing data
- Timeout
### 4.2 Retry Logic
```python
def _shouldRetry(self, error: str) -> bool:
"""Determine if error is retryable"""
retryable_errors = [
"AI down",
"Document not found",
"Content extraction failed"
]
return any(err in error for err in retryable_errors)
def _shouldCreateNextTask(self, analysis: Dict[str, Any]) -> bool:
"""Determine if next task is needed based on AI analysis"""
return not analysis.get("isComplete", True)
```
## 5. AI Integration Points
### 5.1 User Input Processing
```python
async def _processUserInput(self, input: str, documents: List[str]) -> str:
"""Process user input including document analysis"""
context = {
"task": "Process user input",
"document": {"name": "User Input", "type": "text"}
}
examples = [
{"input": "Search documents", "output": "Extract relevant information"}
]
prompt = self.service.promptManager.generatePrompt(context, examples)
return await self.service.model['callAiBasic'](
f"""Analyze user request and documents:
User Input: {input}
Documents: {documents}
{prompt}
Please provide:
1. Main objective
2. Required actions
3. Required data sources
4. Document processing requirements
5. Expected output format
Format your response as JSON:
{{
"objective": "string",
"actions": ["string"],
"dataSources": ["string"],
"documentRequirements": ["string"],
"outputFormat": "string"
}}
"""
)
```
### 5.2 Task Analysis
```python
async def _analyzeTaskResults(self, task: AgentTask) -> str:
"""Analyze task results and determine next steps"""
context = {
"task": "Analyze task results",
"document": {"name": "Task Results", "type": "json"}
}
examples = [
{"input": "Task completed", "output": "Generate next steps"}
]
prompt = self.service.promptManager.generatePrompt(context, examples)
return await self.service.model['callAiBasic'](
f"""Analyze task results and determine next steps:
Task Input: {task.userInput}
Previous Feedback: {task.previousTaskFeedback}
Current Feedback: {task.thisTaskFeedback}
Current Documents: {task.documentsOutput}
{prompt}
Please provide:
1. Task completion status
2. Next required actions
3. Required documents
4. Method recommendations
Format your response as JSON:
{{
"isComplete": boolean,
"nextActions": ["string"],
"requiredDocuments": ["string"],
"recommendedMethods": ["string"]
}}
"""
)
```
### 5.3 Result Processing
```python
async def _processTaskResults(self, task: AgentTask) -> str:
"""Process task results and generate feedback"""
context = {
"task": "Process task results",
"document": {"name": "Task Results", "type": "json"}
}
examples = [
{"input": "Task results", "output": "Generate summary"}
]
prompt = self.service.promptManager.generatePrompt(context, examples)
return await self.service.model['callAiBasic'](
f"""Process task results and generate feedback:
Task Input: {task.userInput}
Method Results: {task.result}
Generated Documents: {task.documentsOutput}
{prompt}
Please provide:
1. Summary of completed actions
2. Generated document descriptions
3. Next steps or completion status
Format your response as JSON:
{{
"summary": "string",
"documents": ["string"],
"nextSteps": ["string"]
}}
"""
)
```
## 6. File Structure and Implementation Plan
### 6.1 File Structure
```
gateway/
├── modules/
│ ├── workflow/
│ │ ├── managerWorkflow.py # Workflow management and state machine
│ │ ├── managerChat.py # Chat management and AI response validation
│ │ ├── managerPrompt.py # AI prompt generation and management
│ │ ├── methodBase.py # Base method class with result validation
│ │ └── documentExtraction.py # Document content extraction
│ │
│ ├── agents/ # To be refactored into methods
│ │ ├── agentSharepoint.py → methods/methodSharepoint.py
│ │ ├── agentOutlook.py → methods/methodOutlook.py
│ │ ├── agentWebcrawler.py → methods/methodWeb.py
│ │ ├── agentDocument.py → methods/methodDocument.py
│ │ └── agentCoder.py → methods/methodCoder.py
│ │
│ ├── methods/ # New directory for method implementations
│ │ ├── methodSharepoint.py # SharePoint operations
│ │ ├── methodOutlook.py # Outlook operations
│ │ ├── methodWeb.py # Web operations
│ │ ├── methodDocument.py # Document operations
│ │ ├── methodCoder.py # Code generation operations
│ │ └── methodPowerpoint.py # PowerPoint operations
│ │
│ └── interfaces/
│ ├── interfaceChatModel.py # Chat system models and enums
│ └── interfaceAppModel.py # Application models including UserConnection
```
### 6.2 Implementation Plan
#### Phase 1: Core Structure Setup
1. **File Renaming and Organization**
- Rename manager files to follow `manager*.py` pattern
- Move document processor to `documentExtraction.py`
- Create new `methods` directory
2. **Model Updates**
- Update `interfaceChatModel.py` with new enums and models
- Integrate `UserConnection` from `interfaceAppModel.py`
- Update validation logic in respective modules
#### Phase 2: Method Migration
1. **Base Method Implementation**
- Implement `methodBase.py` with core functionality
- Add method result validation
- Set up authentication handling
2. **Agent to Method Conversion**
- Convert each agent to its method implementation
- Migrate functionality while maintaining existing behavior
- Add method-specific validation
3. **New Method Implementation**
- Implement `methodPowerpoint.py`
- Add PowerPoint-specific operations
- Integrate with document processing
#### Phase 3: Manager Updates
1. **Chat Manager Enhancement**
- Integrate AI response validation
- Update service center structure
- Improve error handling
2. **Document Manager Integration**
- Update document operations for new method structure
- Enhance content extraction capabilities
- Improve file handling
3. **Workflow Manager Updates**
- Update state machine for method-based approach
- Improve task management
- Enhance error recovery
#### Phase 4: Testing and Validation
1. **Unit Testing**
- Test each method implementation
- Validate error handling
- Verify authentication flow
2. **Integration Testing**
- Test method interactions
- Validate document processing
- Verify workflow execution
3. **Performance Testing**
- Measure response times
- Validate resource usage
- Test concurrent operations
#### Phase 5: Documentation and Cleanup
1. **Documentation**
- Update API documentation
- Document method implementations
- Add usage examples
2. **Code Cleanup**
- Remove deprecated code
- Clean up old agent files
- Optimize imports
3. **Final Review**
- Code review
- Security audit
- Performance optimization
### 6.3 Migration Strategy
1. **Incremental Migration**
- Migrate one agent at a time
- Maintain backward compatibility
- Use feature flags for gradual rollout
2. **Testing Strategy**
- Unit tests for each method
- Integration tests for workflows
- End-to-end tests for complete scenarios
3. **Rollback Plan**
- Keep old agent implementations until stable
- Maintain version control
- Document rollback procedures
### 6.4 Success Criteria
1. **Functionality**
- All existing features working
- New method-based structure operational
- Improved error handling
2. **Performance**
- Equal or better response times
- Reduced resource usage
- Improved scalability
3. **Maintainability**
- Clear code structure
- Comprehensive documentation
- Easy to extend
4. **Security**
- Proper authentication handling
- Secure data processing
- Access control implementation

13
pytest.ini Normal file
View file

@ -0,0 +1,13 @@
[pytest]
testpaths = tests
python_paths = .
python_files = test_*.py
python_classes = Test*
python_functions = test_*
log_file = logs/test_logs.log
log_file_level = INFO
log_file_format = %(asctime)s %(levelname)s %(message)s
log_file_date_format = %Y-%m-%d %H:%M:%S
# Only run non-expensive tests by default, verbose log, short traceback
# Use 'pytest -m ""' to run ALL tests.
addopts = -v --tb=short -m 'not expensive'

View file

@ -42,6 +42,7 @@ requests==2.31.0
chardet>=5.0.0 # Für Zeichensatzerkennung bei Webinhalten chardet>=5.0.0 # Für Zeichensatzerkennung bei Webinhalten
aiohttp>=3.8.0 # Required for SharePoint operations (async HTTP) aiohttp>=3.8.0 # Required for SharePoint operations (async HTTP)
selenium>=4.15.0 # Required for web automation and JavaScript-heavy pages selenium>=4.15.0 # Required for web automation and JavaScript-heavy pages
tavily-python==0.7.11 # Tavily SDK
## Image Processing ## Image Processing
Pillow>=10.0.0 # Für Bildverarbeitung (als PIL importiert) Pillow>=10.0.0 # Für Bildverarbeitung (als PIL importiert)
@ -67,3 +68,26 @@ PyPDF2>=3.0.0
PyMuPDF>=1.20.0 PyMuPDF>=1.20.0
beautifulsoup4>=4.11.0 beautifulsoup4>=4.11.0
chardet>=4.0.0 # For encoding detection chardet>=4.0.0 # For encoding detection
## Testing Dependencies
pytest>=8.0.0
pytest-asyncio>=0.21.0
## Missing Dependencies for IPython and other tools
decorator>=5.0.0
jedi>=0.16
matplotlib-inline>=0.1.0
stack-data>=0.1.0
traitlets>=5.0.0
docutils>=0.13.1
markdown-it-py>=2.2.0
keyring>=15.1
pkginfo>=1.8.1
## Missing Dependencies for Panel 1.3.8
bleach>=4.0.0
bokeh>=3.2.0,<3.4.0
linkify-it-py>=1.0.0
mdit-py-plugins>=0.3.0
pyviz-comms>=2.0.0
xyzservices>=2021.09.1

View file

@ -1,855 +0,0 @@
#!/usr/bin/env python3
"""
Test script for DocumentExtraction class.
Processes all files in d:/temp folder and stores extracted content in d:/temp/extracted.
Features:
- Option to extract content WITH AI processing (default)
- Option to extract content WITHOUT AI processing (content-only mode)
- Supports all document types: text, images, PDFs, Office documents, etc.
- Detailed logging and progress tracking
- Separate output directories for AI vs content-only modes
Usage:
- Interactive mode: python test_documentExtraction.py
- Content-only mode: python test_documentExtraction.py --no-ai
- Content-only mode: python test_documentExtraction.py --content-only
- Specify custom input/output: python test_documentExtraction.py --input-dir /path/to/input --output-dir /path/to/output --no-ai
"""
import os
import asyncio
import logging
import sys
import argparse
from pathlib import Path
from typing import List, Optional
from datetime import datetime, UTC
# Configure logging
logging.basicConfig(
level=logging.DEBUG, # Changed from INFO to DEBUG
format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
# Filter out specific unwanted log messages
class LogFilter(logging.Filter):
"""Filter to hide specific unwanted log messages."""
def filter(self, record):
# Hide workflow stats update errors
if "Workflow" in record.getMessage() and "not found for stats update" in record.getMessage():
return False
# Hide HTTP request info messages
if "HTTP Request:" in record.getMessage() and "POST https://api.openai.com" in record.getMessage():
return False
# Hide HTTP response info messages
if "HTTP/1.1 200 OK" in record.getMessage():
return False
return True
# Apply the filter to the root logger
root_logger = logging.getLogger()
root_logger.addFilter(LogFilter())
def check_dependencies():
"""Check if required dependencies are available and provide installation instructions."""
missing_deps = []
# Check for required dependencies
try:
import bs4
logger.info("beautifulsoup4 is available")
except ImportError:
missing_deps.append("beautifulsoup4")
logger.error("beautifulsoup4 is missing")
try:
import PyPDF2
logger.info("PyPDF2 is available")
except ImportError:
missing_deps.append("PyPDF2")
logger.error("PyPDF2 is missing")
try:
import fitz
logger.info("PyMuPDF (fitz) is available")
except ImportError:
missing_deps.append("PyMuPDF")
logger.error("PyMuPDF (fitz) is missing")
try:
import docx
logger.info("python-docx is available")
except ImportError:
missing_deps.append("python-docx")
logger.error("python-docx is missing")
try:
import openpyxl
logger.info("openpyxl is available")
except ImportError:
missing_deps.append("openpyxl")
logger.error("openpyxl is missing")
try:
import pptx
logger.info("python-pptx is available")
except ImportError:
missing_deps.append("python-pptx")
logger.error("python-pptx is missing")
try:
from PIL import Image
logger.info("Pillow (PIL) is available")
except ImportError:
missing_deps.append("Pillow")
logger.error("Pillow (PIL) is missing")
if missing_deps:
logger.error("\n" + "="*60)
logger.error("MISSING DEPENDENCIES DETECTED!")
logger.error("="*60)
logger.error("The following packages are required but not installed:")
for dep in missing_deps:
logger.error(f" - {dep}")
logger.error("\nTo install all dependencies, run:")
logger.error("pip install -r requirements.txt")
logger.error("\nOr install individual packages:")
for dep in missing_deps:
if dep == "beautifulsoup4":
logger.error(f" pip install {dep}")
elif dep == "PyMuPDF":
logger.error(f" pip install {dep}")
elif dep == "Pillow":
logger.error(f" pip install {dep}")
else:
logger.error(f" pip install {dep}")
logger.error("="*60)
return False
logger.info("All required dependencies are available!")
return True
def check_module_imports():
"""Check if we can import the required modules."""
try:
# Add the gateway directory to the path so we can import our modules
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', '..'))
from modules.chat.documents.documentExtraction import DocumentExtraction
from modules.chat.serviceCenter import ServiceCenter
from modules.interfaces.interfaceAppModel import User, UserConnection
from modules.interfaces.interfaceChatModel import ChatWorkflow, TaskItem
logger.info("All required modules imported successfully")
return True
except ImportError as e:
logger.error(f"Failed to import required modules: {e}")
logger.error("Make sure you're running this script from the gateway directory")
return False
except Exception as e:
logger.error(f"Unexpected error importing modules: {e}")
return False
def create_mock_service_center():
"""Create a proper ServiceCenter for testing purposes with all required fields."""
try:
from modules.chat.serviceCenter import ServiceCenter
from modules.interfaces.interfaceAppModel import User, UserPrivilege, AuthAuthority
from modules.interfaces.interfaceChatModel import ChatWorkflow, TaskItem, TaskStatus
from modules.interfaces.interfaceChatModel import ChatLog, ChatMessage, ChatStat
# Create proper user with all required fields
mock_user = User(
id="test_user_001",
username="testuser",
email="test@example.com",
fullName="Test User",
language="en",
enabled=True,
privilege=UserPrivilege.USER,
authenticationAuthority=AuthAuthority.LOCAL,
mandateId="test_mandate_001"
)
# Create proper workflow with all required fields
current_time = datetime.now(UTC).isoformat()
mock_workflow = ChatWorkflow(
id="test_workflow_001",
mandateId="test_mandate_001",
status="active",
name="Test Document Extraction Workflow",
currentRound=1,
lastActivity=current_time,
startedAt=current_time,
logs=[],
messages=[],
stats=None,
tasks=[]
)
# Create service center
service_center = ServiceCenter(mock_user, mock_workflow)
logger.info("ServiceCenter created successfully with proper objects")
return service_center
except Exception as e:
logger.error(f"Failed to create ServiceCenter: {e}")
return None
class DocumentExtractionTester:
"""Test class for DocumentExtraction functionality."""
def __init__(self, input_dir: str = "d:/temp/test-extraction", output_dir: str = None, enable_ai: bool = True):
"""
Initialize the tester.
Args:
input_dir: Directory containing files to process
output_dir: Directory to store extracted content (auto-generated if None)
enable_ai: Whether to enable AI processing (default: True)
"""
self.input_dir = Path(input_dir)
# Auto-generate output directory if not specified
if output_dir is None:
if enable_ai:
self.output_dir = Path(input_dir) / "extracted"
else:
self.output_dir = Path(input_dir) / "extracted-raw"
else:
self.output_dir = Path(output_dir)
self.extractor = None
self.service_center = None
self.enable_ai = enable_ai
if enable_ai:
self.prompt = "Make a summary of each sentence for each page or chapter of the document"
else:
self.prompt = None # No prompt needed for content-only extraction
# Track processing results for summary
self.processing_results = []
# Ensure output directory exists
logger.info(f"Creating output directory: {self.output_dir}")
self.output_dir.mkdir(parents=True, exist_ok=True)
# Verify directory was created
if self.output_dir.exists():
logger.info(f"Output directory created/verified: {self.output_dir}")
logger.info(f"Output directory absolute path: {self.output_dir.absolute()}")
else:
logger.error(f"Failed to create output directory: {self.output_dir}")
# Log configuration
logger.info(f"Configuration: AI processing = {'ENABLED' if self.enable_ai else 'DISABLED'}")
logger.info(f"Input directory: {self.input_dir}")
logger.info(f"Output directory: {self.output_dir}")
# Test basic file writing capability
test_file = self.output_dir / "test_write_capability.txt"
try:
logger.info(f"Testing file write capability to: {test_file}")
logger.info(f"Absolute path: {test_file.absolute()}")
with open(test_file, 'w', encoding='utf-8') as f:
f.write("Test file to verify write capability")
if test_file.exists():
actual_size = test_file.stat().st_size
logger.info(f"Basic file writing test passed: {test_file} (size: {actual_size} bytes)")
# Test reading the file back
with open(test_file, 'r', encoding='utf-8') as f:
content = f.read()
logger.info(f"File read test passed: content length = {len(content)}")
# Clean up test file
test_file.unlink()
logger.info("Test file cleaned up")
else:
logger.error(f"Basic file writing test failed: {test_file}")
except Exception as e:
logger.error(f"Basic file writing test failed with error: {e}")
import traceback
traceback.print_exc()
# Supported file extensions for content extraction
self.supported_extensions = {
# Text and data files
'.txt', '.csv', '.json', '.xml', '.html', '.htm', '.svg',
'.md', '.markdown', '.rst', '.log', '.ini', '.cfg', '.conf',
# Programming languages
'.js', '.ts', '.jsx', '.tsx', '.py', '.java', '.c', '.cpp', '.cc', '.cxx',
'.h', '.hpp', '.cs', '.php', '.rb', '.go', '.rs', '.swift', '.kt', '.scala',
'.r', '.m', '.pl', '.sh', '.bash', '.zsh', '.fish', '.ps1', '.bat', '.cmd',
'.vbs', '.lua', '.sql', '.r', '.dart', '.elm', '.clj', '.hs', '.fs', '.ml',
# Web technologies
'.css', '.scss', '.sass', '.less', '.vue', '.svelte', '.astro',
# Configuration and build files
'.yaml', '.yml', '.toml', '.env', '.gitignore', '.dockerfile', '.dockerignore',
'.makefile', '.cmake', '.gradle', '.maven', '.pom', '.sln', '.vcxproj',
'.csproj', '.fsproj', '.vbproj', '.xcodeproj', '.pbxproj',
# Documentation and markup
'.tex', '.bib', '.adoc', '.asciidoc', '.wiki', '.creole',
# Images
'.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp', '.tiff', '.ico',
# Documents
'.pdf', '.docx', '.xlsx', '.pptx', '.odt', '.ods', '.odp',
# Legacy Office formats
'.doc', '.xls', '.ppt',
# Archives and binaries
'.zip', '.tar', '.gz', '.7z', '.rar', '.exe', '.dll', '.so', '.dylib'
}
def initialize_extractor(self):
"""Initialize the DocumentExtraction instance with a proper ServiceCenter."""
try:
# First create the service center
self.service_center = create_mock_service_center()
if not self.service_center:
logger.error("Failed to create ServiceCenter!")
return False
# Now create DocumentExtraction with the service center
from modules.chat.documents.documentExtraction import DocumentExtraction
self.extractor = DocumentExtraction(self.service_center)
logger.info("DocumentExtraction initialized successfully with ServiceCenter")
return True
except Exception as e:
logger.error(f"Failed to initialize DocumentExtraction: {e}")
return False
def get_files_to_process(self) -> List[Path]:
"""Get list of files to process from input directory."""
if not self.input_dir.exists():
logger.error(f"Input directory {self.input_dir} does not exist!")
logger.info("Creating input directory and adding a test file...")
self.input_dir.mkdir(parents=True, exist_ok=True)
# Create a test file if none exist
test_file = self.input_dir / "test.txt"
with open(test_file, 'w') as f:
f.write("This is a test file for document extraction.\nIt contains multiple lines.\nAnd some special characters: äöüß")
logger.info(f"Created test file: {test_file}")
files = []
all_files = list(self.input_dir.iterdir())
logger.info(f"All files in directory: {[f.name for f in all_files]}")
for file_path in all_files:
if file_path.is_file():
logger.debug(f"Checking file: {file_path.name} (extension: {file_path.suffix})")
if file_path.suffix.lower() in self.supported_extensions:
files.append(file_path)
logger.debug(f"Added file: {file_path.name}")
else:
logger.debug(f"Skipped file: {file_path.name} (unsupported extension)")
logger.info(f"Found {len(files)} supported files to process")
if files:
logger.info(f"Files to process: {[f.name for f in files]}")
return files
async def process_single_file(self, file_path: Path) -> bool:
"""
Process a single file and extract its content.
Args:
file_path: Path to the file to process
Returns:
True if successful, False otherwise
"""
if not self.extractor:
logger.error("DocumentExtraction not initialized!")
return False
try:
logger.info(f"Processing file: {file_path.name}")
# Read file data
with open(file_path, 'rb') as f:
file_data = f.read()
logger.debug(f"File size: {len(file_data)} bytes")
# Determine MIME type based on extension
mime_type = self._get_mime_type(file_path.suffix)
logger.debug(f"MIME type: {mime_type}")
# Process the file with or without AI based on configuration
extracted_content = await self.extractor.processFileData(
fileData=file_data,
fileName=file_path.name,
mimeType=mime_type,
base64Encoded=False,
prompt=self.prompt,
enableAI=self.enable_ai
)
logger.debug(f"Extracted {len(extracted_content.contents)} content items")
# Debug: Show content details
for i, content_item in enumerate(extracted_content.contents):
logger.debug(f"Content item {i+1}: label='{content_item.label}', has_data={content_item.data is not None}, data_length={len(content_item.data) if content_item.data else 0}")
# Special logging for JavaScript files
if mime_type == "application/javascript":
logger.debug(f"JavaScript file detected: {file_path.name}")
logger.debug(f"Original file size: {len(file_data)} bytes")
for i, content_item in enumerate(extracted_content.contents):
if content_item.data:
content_size = len(content_item.data.encode('utf-8'))
logger.debug(f"JavaScript content item {i+1}: {content_size} bytes")
# Check if content was truncated
if content_size < len(file_data) * 0.9: # If less than 90% of original
logger.warning(f"JavaScript content may be truncated: {content_size} bytes vs {len(file_data)} bytes original")
# Track processing result
result = {
'fileName': file_path.name,
'status': 'OK',
'content_items': 0,
'output_files': [],
'total_content_size': 0
}
# Save each content item as a separate file
if extracted_content.contents:
for i, content_item in enumerate(extracted_content.contents):
if content_item.data:
content_size = len(content_item.data.encode('utf-8'))
result['total_content_size'] += content_size
logger.debug(f"Content item {i+1}: {content_item.label}, size: {content_size} bytes")
# Generate fileName with new naming convention
if len(extracted_content.contents) == 1:
# Single content item
output_fileName = f"{file_path.stem} - {content_item.label} 1.txt"
else:
# Multiple content items - add sequence number
output_fileName = f"{file_path.stem} - {content_item.label} {i+1}.txt"
output_file = self.output_dir / output_fileName
# Write only the raw extracted content
logger.debug(f"Attempting to write to: {output_file}")
try:
with open(output_file, 'w', encoding='utf-8') as f:
f.write(content_item.data)
# Verify file was created
if output_file.exists():
actual_size = output_file.stat().st_size
logger.info(f"File created successfully: {output_fileName} (expected: {content_size} bytes, actual: {actual_size} bytes)")
else:
logger.error(f"File was not created: {output_file}")
result['output_files'].append(output_fileName)
result['content_items'] += 1
except Exception as write_error:
logger.error(f"Error writing file {output_fileName}: {write_error}")
import traceback
traceback.print_exc()
else:
logger.warning(f"Content item {i+1} has no data, skipping")
else:
logger.warning(f"No content extracted from {file_path.name}")
result['status'] = 'FAIL'
result['error'] = 'No content extracted'
# Add result to tracking list
self.processing_results.append(result)
logger.info(f"Successfully processed {file_path.name} - Total content: {result['total_content_size']} bytes")
return True
except Exception as e:
error_msg = str(e)
logger.error(f"Error processing {file_path.name}: {error_msg}")
# Track failed result
result = {
'fileName': file_path.name,
'status': 'FAIL',
'content_items': 0,
'output_files': [],
'error': error_msg,
'total_content_size': 0
}
self.processing_results.append(result)
return False
def _get_mime_type(self, extension: str) -> str:
"""Get MIME type based on file extension."""
mime_types = {
# Text and data files
'.txt': 'text/plain',
'.csv': 'text/csv',
'.json': 'application/json',
'.xml': 'application/xml',
'.html': 'text/html',
'.htm': 'text/html',
'.svg': 'image/svg+xml',
'.md': 'text/markdown',
'.markdown': 'text/markdown',
'.rst': 'text/x-rst',
'.log': 'text/plain',
'.ini': 'text/plain',
'.cfg': 'text/plain',
'.conf': 'text/plain',
# Programming languages
'.js': 'application/javascript',
'.ts': 'application/typescript',
'.jsx': 'text/jsx',
'.tsx': 'text/tsx',
'.py': 'text/x-python',
'.java': 'text/x-java-source',
'.c': 'text/x-c',
'.cpp': 'text/x-c++src',
'.cc': 'text/x-c++src',
'.cxx': 'text/x-c++src',
'.h': 'text/x-c',
'.hpp': 'text/x-c++hdr',
'.cs': 'text/x-csharp',
'.php': 'application/x-httpd-php',
'.rb': 'text/x-ruby',
'.go': 'text/x-go',
'.rs': 'text/x-rust',
'.swift': 'text/x-swift',
'.kt': 'text/x-kotlin',
'.scala': 'text/x-scala',
'.r': 'text/x-r',
'.m': 'text/x-matlab',
'.pl': 'text/x-perl',
'.sh': 'application/x-sh',
'.bash': 'application/x-sh',
'.zsh': 'application/x-sh',
'.fish': 'application/x-sh',
'.ps1': 'application/x-powershell',
'.bat': 'application/x-msdos-program',
'.cmd': 'application/x-msdos-program',
'.vbs': 'text/vbscript',
'.lua': 'text/x-lua',
'.sql': 'application/sql',
'.dart': 'application/dart',
'.elm': 'text/x-elm',
'.clj': 'text/x-clojure',
'.hs': 'text/x-haskell',
'.fs': 'text/x-fsharp',
'.ml': 'text/x-ocaml',
# Web technologies
'.css': 'text/css',
'.scss': 'text/x-scss',
'.sass': 'text/x-sass',
'.less': 'text/x-less',
'.vue': 'text/x-vue',
'.svelte': 'text/x-svelte',
'.astro': 'text/x-astro',
# Configuration and build files
'.yaml': 'application/x-yaml',
'.yml': 'application/x-yaml',
'.toml': 'application/toml',
'.env': 'text/plain',
'.gitignore': 'text/plain',
'.dockerfile': 'text/x-dockerfile',
'.dockerignore': 'text/plain',
'.makefile': 'text/x-makefile',
'.cmake': 'text/x-cmake',
'.gradle': 'text/x-gradle',
'.maven': 'text/x-maven',
'.pom': 'application/xml',
'.sln': 'text/plain',
'.vcxproj': 'application/xml',
'.csproj': 'application/xml',
'.fsproj': 'application/xml',
'.vbproj': 'application/xml',
'.xcodeproj': 'text/plain',
'.pbxproj': 'text/plain',
# Documentation and markup
'.tex': 'application/x-tex',
'.bib': 'text/x-bibtex',
'.adoc': 'text/asciidoc',
'.asciidoc': 'text/asciidoc',
'.wiki': 'text/x-wiki',
'.creole': 'text/x-wiki',
# Images
'.jpg': 'image/jpeg',
'.jpeg': 'image/jpeg',
'.png': 'image/png',
'.gif': 'image/gif',
'.webp': 'image/webp',
'.bmp': 'image/bmp',
'.tiff': 'image/tiff',
'.ico': 'image/x-icon',
# Documents
'.pdf': 'application/pdf',
'.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
'.xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
'.pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
'.odt': 'application/vnd.oasis.opendocument.text',
'.ods': 'application/vnd.oasis.opendocument.spreadsheet',
'.odp': 'application/vnd.oasis.opendocument.presentation',
# Legacy Office formats
'.doc': 'application/msword',
'.xls': 'application/vnd.ms-excel',
'.ppt': 'application/vnd.ms-powerpoint',
# Archives and binaries (will be processed as binary)
'.zip': 'application/zip',
'.tar': 'application/x-tar',
'.gz': 'application/gzip',
'.7z': 'application/x-7z-compressed',
'.rar': 'application/vnd.rar',
'.exe': 'application/x-msdownload',
'.dll': 'application/x-msdownload',
'.so': 'application/x-sharedlib',
'.dylib': 'application/x-mach-binary'
}
return mime_types.get(extension.lower(), 'application/octet-stream')
async def run_tests(self) -> None:
"""Run the document extraction tests on all files."""
mode = "WITH AI" if self.enable_ai else "CONTENT ONLY (No AI)"
logger.info(f"Starting document extraction tests - {mode}")
logger.info(f"Input directory: {self.input_dir}")
logger.info(f"Output directory: {self.output_dir}")
if self.enable_ai:
logger.info(f"Processing prompt: {self.prompt}")
else:
logger.info("AI processing: DISABLED - Raw content extraction only")
# Initialize the extractor
if not self.initialize_extractor():
logger.error("Cannot proceed without DocumentExtraction!")
return
# Get files to process
files = self.get_files_to_process()
if not files:
logger.warning("No files found to process!")
return
# Process each file
successful = 0
failed = 0
logger.info(f"Starting to process {len(files)} files...")
for i, file_path in enumerate(files):
logger.info(f"Processing file {i+1}/{len(files)}: {file_path.name}")
try:
if await self.process_single_file(file_path):
successful += 1
logger.info(f"File {i+1} processed successfully")
else:
failed += 1
logger.error(f"File {i+1} processing failed")
except Exception as e:
failed += 1
logger.error(f"Exception processing file {i+1}: {e}")
import traceback
traceback.print_exc()
# Print detailed summary
mode = "WITH AI" if self.enable_ai else "CONTENT ONLY (No AI)"
logger.info("\n" + "=" * 80)
logger.info(f"DETAILED TEST SUMMARY - {mode}")
logger.info("=" * 80)
logger.info(f"Total files processed: {len(files)}")
logger.info(f"Successful: {successful}")
logger.info(f"Failed: {failed}")
logger.info(f"Output directory: {self.output_dir}")
if self.enable_ai:
logger.info("AI processing: ENABLED")
else:
logger.info("AI processing: DISABLED")
logger.info("=" * 80)
# List all processed documents with results
logger.info("\nPROCESSING RESULTS:")
logger.info("-" * 80)
for result in self.processing_results:
status_icon = "" if result['status'] == 'OK' else ""
logger.info(f"{status_icon} {result['fileName']} - {result['status']}")
if result['status'] == 'OK':
if result['content_items'] == 1:
logger.info(f" └─ Generated: {result['output_files'][0]} ({result['total_content_size']} bytes)")
else:
logger.info(f" └─ Generated {result['content_items']} files ({result['total_content_size']} total bytes):")
for output_file in result['output_files']:
logger.info(f" └─ {output_file}")
else:
error_msg = result.get('error', 'Unknown error')
logger.info(f" └─ Error: {error_msg}")
logger.info("-" * 80)
logger.info("=" * 80)
def parse_arguments():
"""Parse command line arguments."""
parser = argparse.ArgumentParser(description='Document Extraction Test Script')
parser.add_argument('--no-ai', '--content-only', action='store_true',
help='Run in content-only mode without AI processing')
parser.add_argument('--input-dir', type=str, default='d:/temp/test-extraction',
help='Input directory containing files to process (default: d:/temp/test-extraction)')
parser.add_argument('--output-dir', type=str,
help='Output directory for extracted content (auto-generated if not specified)')
parser.add_argument('--verbose', '-v', action='store_true',
help='Enable verbose logging')
return parser.parse_args()
async def main():
"""Main function to run the tests."""
# Parse command line arguments
args = parse_arguments()
# Set logging level based on verbosity
if args.verbose:
logging.getLogger().setLevel(logging.DEBUG)
else:
logging.getLogger().setLevel(logging.INFO)
logger.info("DocumentExtraction Test Script")
logger.info("=" * 50)
logger.info(f"Source: {args.input_dir}")
# Determine output directory
if args.output_dir:
output_dir = args.output_dir
else:
if args.no_ai:
output_dir = f"{args.input_dir}/extracted-raw"
else:
output_dir = f"{args.input_dir}/extracted"
logger.info(f"Output: {output_dir}")
logger.info("=" * 50)
# Check dependencies first
if not check_dependencies():
logger.error("Please install missing dependencies before running tests.")
return
# Check module imports
if not check_module_imports():
logger.error("Cannot import required modules. Please check your setup.")
return
# Determine mode based on command line arguments
if args.no_ai:
enable_ai = False
logger.info("Running in CONTENT ONLY mode (no AI processing)")
else:
# Interactive mode: ask user for choice
print("\n" + "=" * 50)
print("SELECT EXTRACTION MODE:")
print("=" * 50)
print("1. With AI processing (default)")
print("2. Content only (no AI processing)")
print("=" * 50)
try:
choice = input("Enter your choice (1 or 2, default is 1): ").strip()
if choice == "2":
enable_ai = False
output_dir = f"{args.input_dir}/extracted-raw"
logger.info("Selected: Content only mode (no AI processing)")
else:
enable_ai = True
output_dir = f"{args.input_dir}/extracted"
logger.info("Selected: AI processing mode")
except (EOFError, KeyboardInterrupt):
# Default to AI mode if input fails
enable_ai = True
output_dir = f"{args.input_dir}/extracted"
logger.info("Defaulting to AI processing mode")
# Run tests with selected mode
tester = DocumentExtractionTester(
input_dir=args.input_dir,
output_dir=output_dir,
enable_ai=enable_ai
)
await tester.run_tests()
if __name__ == "__main__":
# Check if command line arguments are provided for automated testing
if len(sys.argv) > 1:
# Parse arguments and run directly
asyncio.run(main())
else:
# Interactive mode: ask user for choice
asyncio.run(main())
# Convenience function for easy content-only extraction
async def extract_documents_content_only(input_folder: str, output_folder: str = None):
"""
Convenience function to extract documents without AI processing.
Args:
input_folder: Path to folder containing documents to extract
output_folder: Path to folder where extracted content will be stored (optional)
Example:
# Extract from d:/temp to d:/temp/extracted-raw
asyncio.run(extract_documents_content_only("d:/temp"))
# Extract from custom folders
asyncio.run(extract_documents_content_only("c:/my_docs", "c:/my_docs/extracted"))
"""
if output_folder is None:
output_folder = f"{input_folder}/extracted-raw"
logger.info(f"Running content-only extraction from {input_folder} to {output_folder}")
# Check dependencies and imports
if not check_dependencies():
logger.error("Missing dependencies. Please install required packages.")
return False
if not check_module_imports():
logger.error("Cannot import required modules. Please check your setup.")
return False
# Create tester and run
tester = DocumentExtractionTester(
input_dir=input_folder,
output_dir=output_folder,
enable_ai=False
)
await tester.run_tests()
return True
# Example usage (uncomment to use):
# if __name__ == "__main__":
# # For content-only extraction from d:/temp to d:/temp/extracted-raw
# asyncio.run(extract_documents_content_only("d:/temp"))

View file

@ -1,189 +0,0 @@
#!/usr/bin/env python3
"""
Simple test script for enhanced Excel processing functionality.
This script tests the DocumentExtraction class with Excel files.
"""
import os
import sys
import asyncio
import logging
from pathlib import Path
# Configure logging
logging.basicConfig(
level=logging.DEBUG,
format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
# Add the gateway directory to the path
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', '..'))
async def test_excel_processing():
"""Test Excel processing functionality."""
try:
# Import required modules
from modules.chat.documents.documentExtraction import DocumentExtraction
from modules.chat.serviceCenter import ServiceCenter
from modules.interfaces.interfaceAppModel import User, UserPrivilege, AuthAuthority
from modules.interfaces.interfaceChatModel import ChatWorkflow
from datetime import datetime, UTC
logger.info("Testing Excel processing functionality...")
# Create mock service center
mock_user = User(
id="test_user_001",
username="testuser",
email="test@example.com",
fullName="Test User",
language="en",
enabled=True,
privilege=UserPrivilege.USER,
authenticationAuthority=AuthAuthority.LOCAL,
mandateId="test_mandate_001"
)
current_time = datetime.now(UTC).isoformat()
mock_workflow = ChatWorkflow(
id="test_workflow_001",
mandateId="test_mandate_001",
status="active",
name="Test Excel Processing Workflow",
currentRound=1,
lastActivity=current_time,
startedAt=current_time,
logs=[],
messages=[],
stats=None,
tasks=[]
)
service_center = ServiceCenter(mock_user, mock_workflow)
logger.info("ServiceCenter created successfully")
# Create DocumentExtraction instance
extractor = DocumentExtraction(service_center)
logger.info("DocumentExtraction created successfully")
# Test with a sample Excel file if available
test_file_path = "d:/temp/test-extraction/test.xlsx"
if os.path.exists(test_file_path):
logger.info(f"Found test file: {test_file_path}")
# Read the file
with open(test_file_path, 'rb') as f:
file_data = f.read()
logger.info(f"File size: {len(file_data)} bytes")
# Process the Excel file
logger.info("Processing Excel file...")
result = await extractor.processFileData(
fileData=file_data,
fileName="test.xlsx",
mimeType="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
base64Encoded=False,
prompt=None,
enableAI=False
)
logger.info(f"Excel processing completed successfully!")
logger.info(f"Generated {len(result.contents)} content items:")
for i, content_item in enumerate(result.contents):
logger.info(f" Item {i+1}: {content_item.label}")
logger.info(f" MIME type: {content_item.metadata.mimeType}")
logger.info(f" Size: {content_item.metadata.size} bytes")
if content_item.data:
logger.info(f" Data preview: {content_item.data[:100]}...")
else:
logger.info(f" Data: None")
else:
logger.info("No test Excel file found. Creating a simple test...")
# Test the openpyxl library directly
try:
import openpyxl
from openpyxl import Workbook
# Create a test workbook
wb = Workbook()
ws = wb.active
ws.title = "Test Sheet"
# Add some test data
ws['A1'] = "Name"
ws['B1'] = "Age"
ws['C1'] = "City"
ws['A2'] = "John Doe"
ws['B2'] = 30
ws['C2'] = "New York"
ws['A3'] = "Jane Smith"
ws['B3'] = 25
ws['C3'] = "Los Angeles"
# Test properties
wb.properties.title = "Test Workbook"
wb.properties.creator = "Test User"
wb.properties.subject = "Test Subject"
logger.info("Test workbook created successfully")
logger.info(f" Title: {wb.properties.title}")
logger.info(f" Creator: {wb.properties.creator}")
logger.info(f" Subject: {wb.properties.subject}")
logger.info(f" Sheets: {wb.sheetnames}")
# Test the DocumentExtraction with this workbook
from io import BytesIO
# Save to bytes
buffer = BytesIO()
wb.save(buffer)
buffer.seek(0)
file_data = buffer.getvalue()
logger.info(f"Test workbook size: {len(file_data)} bytes")
# Process with DocumentExtraction
result = await extractor.processFileData(
fileData=file_data,
fileName="test_workbook.xlsx",
mimeType="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
base64Encoded=False,
prompt=None,
enableAI=False
)
logger.info(f"Test workbook processing completed successfully!")
logger.info(f"Generated {len(result.contents)} content items:")
for i, content_item in enumerate(result.contents):
logger.info(f" Item {i+1}: {content_item.label}")
logger.info(f" MIME type: {content_item.metadata.mimeType}")
logger.info(f" Size: {content_item.metadata.size} bytes")
if content_item.data:
logger.info(f" Data preview: {content_item.data[:200]}...")
else:
logger.info(f" Data: None")
except ImportError as e:
logger.error(f"openpyxl not available: {e}")
except Exception as e:
logger.error(f"Error testing Excel functionality: {e}")
logger.info("Excel processing test completed!")
except ImportError as e:
logger.error(f"Failed to import required modules: {e}")
logger.error("Make sure you're running this script from the gateway directory")
except Exception as e:
logger.error(f"Unexpected error: {e}")
import traceback
traceback.print_exc()
if __name__ == "__main__":
asyncio.run(test_excel_processing())

View file

@ -1,51 +0,0 @@
#!/usr/bin/env python3
"""
Test script for Outlook filter logic
"""
def test_build_graph_filter():
"""Test the filter building logic"""
# Mock the _buildGraphFilter method
def _buildGraphFilter(filter_text):
if not filter_text:
return {}
filter_text = filter_text.strip()
# Handle email address filters
if '@' in filter_text and '.' in filter_text and ' ' not in filter_text:
return {"$filter": f"from/fromAddress/address eq '{filter_text}'"}
# Handle search queries (from:, to:, subject:, etc.)
if any(filter_text.startswith(prefix) for prefix in ['from:', 'to:', 'subject:', 'received:', 'hasattachment:']):
return {"$search": f'"{filter_text}"'}
# Handle text content - search in subject
return {"$filter": f"contains(subject,'{filter_text}')"}
# Test cases
test_cases = [
("peter.muster@domain.com", {"$filter": "from/fromAddress/address eq 'peter.muster@domain.com'"}),
("from:user@example.com", {"$search": '"from:user@example.com"'}),
("subject:meeting", {"$search": '"subject:meeting"'}),
("project update", {"$filter": "contains(subject,'project update')"}),
("", {}),
(" hello world ", {"$filter": "contains(subject,'hello world')"}),
]
print("Testing Outlook filter logic:")
print("=" * 50)
for test_input, expected_output in test_cases:
result = _buildGraphFilter(test_input)
status = "✓ PASS" if result == expected_output else "✗ FAIL"
print(f"{status} | Input: '{test_input}'")
print(f" | Expected: {expected_output}")
print(f" | Got: {result}")
print()
print("Test completed!")
if __name__ == "__main__":
test_build_graph_filter()

View file

@ -1,70 +0,0 @@
#!/usr/bin/env python3
"""
Test script for fixed Outlook filter logic
"""
def test_build_graph_filter():
"""Test the corrected filter building logic"""
# Mock the corrected _buildGraphFilter method
def _buildGraphFilter(filter_text):
if not filter_text:
return {}
filter_text = filter_text.strip()
# Handle search queries (from:, to:, subject:, etc.) - check this FIRST
if any(filter_text.startswith(prefix) for prefix in ['from:', 'to:', 'subject:', 'received:', 'hasattachment:']):
return {"$search": f'"{filter_text}"'}
# Handle email address filters (only if it's NOT a search query)
if '@' in filter_text and '.' in filter_text and ' ' not in filter_text and not filter_text.startswith('from:'):
return {"$filter": f"from/fromAddress/address eq '{filter_text}'"}
# Handle text content - search in subject
return {"$filter": f"contains(subject,'{filter_text}')"}
# Test cases
test_cases = [
("peter.muster@domain.com", {"$filter": "from/fromAddress/address eq 'peter.muster@domain.com'"}),
("from:user@example.com", {"$search": '"from:user@example.com"'}),
("subject:meeting", {"$search": '"subject:meeting"'}),
("project update", {"$filter": "contains(subject,'project update')"}),
("", {}),
(" hello world ", {"$filter": "contains(subject,'hello world')"}),
# Additional edge cases
("to:manager@company.com", {"$search": '"to:manager@company.com"'}),
("received:today", {"$search": '"received:today"'}),
("hasattachment:true", {"$search": '"hasattachment:true"'}),
("user@domain.com", {"$filter": "from/fromAddress/address eq 'user@domain.com'"}),
("from:user@domain.com subject:budget", {"$search": '"from:user@domain.com subject:budget"'}),
]
print("Testing FIXED Outlook filter logic:")
print("=" * 50)
passed = 0
failed = 0
for test_input, expected_output in test_cases:
result = _buildGraphFilter(test_input)
status = "✓ PASS" if result == expected_output else "✗ FAIL"
if result == expected_output:
passed += 1
else:
failed += 1
print(f"{status} | Input: '{test_input}'")
print(f" | Expected: {expected_output}")
print(f" | Got: {result}")
print()
print(f"Test completed! {passed} passed, {failed} failed")
if failed == 0:
print("🎉 All tests passed!")
else:
print("❌ Some tests failed. Please check the logic.")
if __name__ == "__main__":
test_build_graph_filter()

View file

@ -1,100 +0,0 @@
#!/usr/bin/env python3
"""
Test script for Pydantic compatibility module.
This script tests the version-aware functionality for both Pydantic v1 and v2.
"""
import sys
import os
# Add the modules directory to the path
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'modules'))
def test_compatibility_module():
"""Test the Pydantic compatibility module"""
try:
from shared.pydanticCompat import (
PYDANTIC_VERSION,
create_private_field,
create_model_config,
model_to_dict,
model_from_dict,
get_version_info
)
print(f"✅ Successfully imported Pydantic compatibility module")
print(f"📊 Pydantic version detected: {PYDANTIC_VERSION}")
# Test version info
version_info = get_version_info()
print(f"🔍 Version info: {version_info}")
# Test field creation
private_field = create_private_field(default="test")
print(f"✅ Private field created: {type(private_field)}")
# Test model config
config = create_model_config(validate_assignment=True)
print(f"✅ Model config created: {type(config)}")
return True
except Exception as e:
print(f"❌ Error testing compatibility module: {e}")
return False
def test_chat_document_model():
"""Test the ChatDocument model with compatibility"""
try:
from interfaces.interfaceChatModel import ChatDocument
print(f"✅ Successfully imported ChatDocument model")
# Test creating a document
doc = ChatDocument(fileId="test-file-123")
print(f"✅ ChatDocument created: {doc.id}")
# Test setting component interface
doc.setComponentInterface("mock_interface")
print(f"✅ Component interface set")
# Test serialization
doc_dict = doc.to_dict()
print(f"✅ Document serialized: {doc_dict}")
# Test validation
is_valid = doc.validate_component_interface()
print(f"✅ Component interface validation: {is_valid}")
return True
except Exception as e:
print(f"❌ Error testing ChatDocument model: {e}")
import traceback
traceback.print_exc()
return False
def main():
"""Main test function"""
print("🧪 Testing Pydantic Compatibility Module")
print("=" * 50)
# Test compatibility module
compat_ok = test_compatibility_module()
print()
# Test ChatDocument model
model_ok = test_chat_document_model()
print()
# Summary
print("=" * 50)
if compat_ok and model_ok:
print("🎉 All tests passed! Pydantic compatibility is working correctly.")
return 0
else:
print("💥 Some tests failed. Check the errors above.")
return 1
if __name__ == "__main__":
sys.exit(main())

1
tests/__init__.py Normal file
View file

@ -0,0 +1 @@
# noqa

View file

View file

@ -0,0 +1,108 @@
"""Tests for Tavliy web search."""
import pytest
import logging
from modules.interfaces.interfaceChatModel import ActionResult
from gateway.modules.interfaces.interfaceWebModel import (
WebSearchRequest,
WebCrawlRequest,
WebScrapeRequest,
)
from gateway.modules.connectors.connectorWebTavily import ConnectorTavily
logger = logging.getLogger(__name__)
@pytest.mark.asyncio
@pytest.mark.expensive
async def test_tavily_connector_search_test_live_api():
logger.info("Testing Tavliy connector search with live API calls")
# Test request
request = WebSearchRequest(query="How old is the Earth?", max_results=5)
# Tavily instance
connectorWebTavily = await ConnectorTavily.create()
# Search test
action_result = await connectorWebTavily.search_urls(request=request)
# Check results
assert isinstance(action_result, ActionResult)
logger.info("=" * 20)
logger.info(f"Action result success status: {action_result.success}")
logger.info(f"Action result error: {action_result.error}")
logger.info(f"Action result label: {action_result.resultLabel}")
logger.info("Documents:")
for doc in action_result.documents:
logger.info("-" * 10)
logger.info(f" - Document Name: {doc.documentName}")
logger.info(f" - Document Mime Type: {doc.mimeType}")
logger.info(f" - Document Data: {doc.documentData}")
@pytest.mark.asyncio
@pytest.mark.expensive
async def test_tavily_connector_crawl_test_live_api():
logger.info("Testing Tavily connector crawl with live API calls")
# Test request
urls = [
"https://en.wikipedia.org/wiki/Earth",
"https://valueon.ch",
]
request = WebCrawlRequest(urls=urls)
# Tavily instance
connectorWebTavily = await ConnectorTavily.create()
# Crawl test
action_result = await connectorWebTavily.crawl_urls(request=request)
# Check results
assert isinstance(action_result, ActionResult)
logger.info("=" * 20)
logger.info(f"Action result success status: {action_result.success}")
logger.info(f"Action result error: {action_result.error}")
logger.info(f"Action result label: {action_result.resultLabel}")
logger.info("Documents:")
for doc in action_result.documents:
logger.info("-" * 10)
logger.info(f" - Document Name: {doc.documentName}")
logger.info(f" - Document Mime Type: {doc.mimeType}")
logger.info(f" - Document Data: {doc.documentData}")
@pytest.mark.asyncio
@pytest.mark.expensive
async def test_tavily_connector_scrape_test_live_api():
logger.info("Testing Tavily connector scrape with live API calls")
# Test request with query
request = WebScrapeRequest(query="How old is the Earth?", max_results=3)
# Tavily instance
connectorWebTavily = await ConnectorTavily.create()
# Scrape test
action_result = await connectorWebTavily.scrape(request=request)
# Check results
assert isinstance(action_result, ActionResult)
logger.info("=" * 20)
logger.info(f"Action result success status: {action_result.success}")
logger.info(f"Action result error: {action_result.error}")
logger.info(f"Action result label: {action_result.resultLabel}")
logger.info("Documents:")
for doc in action_result.documents:
logger.info("-" * 10)
logger.info(f" - Document Name: {doc.documentName}")
logger.info(f" - Document Mime Type: {doc.mimeType}")
logger.info(f" - Document Data: {doc.documentData}")

0
tests/fixtures/__init__.py vendored Normal file
View file

71
tests/fixtures/tavily_responses.py vendored Normal file

File diff suppressed because one or more lines are too long

View file

View file

@ -0,0 +1,248 @@
"""Tests for method web.py"""
import json
import logging
import pytest
from unittest.mock import patch
from modules.methods.methodWeb import MethodWeb
from tests.fixtures.tavily_responses import (
RESPONSE_SEARCH_HOW_OLD_IS_EARTH_NO_ANSWER,
RESPONSE_EXTRACT_HOW_OLD_IS_EARTH_NO_ANSWER,
)
logger = logging.getLogger(__name__)
@pytest.mark.asyncio
@pytest.mark.expensive
async def test_method_web_search_live():
"""Tests method web search with live API calls."""
logger.info("=" * 50)
logger.info("==> Test: Method Web Search Live")
method_web = MethodWeb(serviceCenter=None)
# Actual request
action_result = await method_web.search(
{"query": "How old is the earth", "maxResults": 5}
)
# Evaluate results
assert action_result.success
assert len(action_result.documents) > 0
logger.info(f"Action result success status: {action_result.success}")
logger.info(f"Action result error: {action_result.error}")
logger.info(f"Action result label: {action_result.resultLabel}")
logger.info("Documents:")
for doc in action_result.documents:
logger.info(f" - Document Name: {doc.documentName}")
logger.info(f" --> Document Mime Type: {doc.mimeType}")
logger.info(f" --> Document Data: {doc.documentData}")
@pytest.mark.asyncio
async def test_method_web_search_dummy():
"""Tests method web search with dummy response data - no external API calls."""
logger.info("=" * 50)
logger.info("==> Test: Method Web Search Dummy")
method_web = MethodWeb(serviceCenter=None)
# Mock the Tavily API response
with patch(
"tavily.AsyncTavilyClient.search",
return_value=RESPONSE_SEARCH_HOW_OLD_IS_EARTH_NO_ANSWER,
) as mock_client:
action_result = await method_web.search(
{"query": "How old is the earth", "maxResults": 5}
)
mock_client.assert_called_once()
# Evaluate results
assert action_result.success
assert len(action_result.documents) > 0
logger.info(f"Action result success status: {action_result.success}")
logger.info(f"Action result error: {action_result.error}")
logger.info(f"Action result label: {action_result.resultLabel}")
logger.info("Documents:")
for doc in action_result.documents:
logger.info(f" - Document Name: {doc.documentName}")
logger.info(f" --> Document Mime Type: {doc.mimeType}")
logger.info(f" --> Document Data: {doc.documentData}")
@pytest.mark.asyncio
@pytest.mark.expensive
async def test_method_web_crawl_live():
"""Tests method web crawl with live API calls."""
logger.info("=" * 50)
logger.info("==> Test: Method Web Crawl Live")
method_web = MethodWeb(serviceCenter=None)
# Create mock document data with URLs from search results
search_results_json = {
"documentData": {
"results": [
{"url": "https://en.wikipedia.org/wiki/Age_of_Earth"},
{"url": "https://www.planetary.org/articles/how-old-is-the-earth"},
]
}
}
# Mock the service center methods
with patch.object(method_web, "service") as mock_service:
mock_service.getChatDocumentsFromDocumentList.return_value = [
type("MockDoc", (), {"fileId": "test-file-id", "fileName": "test-search-results.json"})()
]
mock_service.getFileData.return_value = json.dumps(search_results_json).encode(
"utf-8"
)
# Actual request
action_result = await method_web.crawl({"documentList": "test-document-list-ref"})
# Evaluate results
assert action_result.success
assert len(action_result.documents) > 0
logger.info(f"Action result success status: {action_result.success}")
logger.info(f"Action result error: {action_result.error}")
logger.info(f"Action result label: {action_result.resultLabel}")
logger.info("Documents:")
for doc in action_result.documents:
logger.info(f" - Document Name: {doc.documentName}")
logger.info(f" --> Document Mime Type: {doc.mimeType}")
logger.info(f" --> Document Data: {doc.documentData}")
@pytest.mark.asyncio
async def test_method_web_crawl_dummy():
"""Tests method web crawl with dummy response data - no external API calls."""
logger.info("=" * 50)
logger.info("==> Test: Method Web Crawl Dummy")
method_web = MethodWeb(serviceCenter=None)
# Create mock document data with URLs from search results
search_results_json = {
"documentData": {
"results": [
{"url": "https://en.wikipedia.org/wiki/Age_of_Earth"},
{"url": "https://www.planetary.org/articles/how-old-is-the-earth"},
]
}
}
# Mock both the service center and Tavily API
with (
patch.object(method_web, "service") as mock_service,
patch(
"tavily.AsyncTavilyClient.extract",
return_value=RESPONSE_EXTRACT_HOW_OLD_IS_EARTH_NO_ANSWER,
) as mock_client,
):
mock_service.getChatDocumentsFromDocumentList.return_value = [
type("MockDoc", (), {"fileId": "test-file-id", "fileName": "test-search-results.json"})()
]
mock_service.getFileData.return_value = json.dumps(search_results_json).encode(
"utf-8"
)
action_result = await method_web.crawl({"documentList": "test-document-list-ref"})
mock_client.assert_called_once()
# Evaluate results
assert action_result.success
assert len(action_result.documents) > 0
logger.info(f"Action result success status: {action_result.success}")
logger.info(f"Action result error: {action_result.error}")
logger.info(f"Action result label: {action_result.resultLabel}")
logger.info("Documents:")
for doc in action_result.documents:
logger.info(f" - Document Name: {doc.documentName}")
logger.info(f" --> Document Mime Type: {doc.mimeType}")
logger.info(f" --> Document Data: {doc.documentData}")
@pytest.mark.asyncio
@pytest.mark.expensive
async def test_method_web_scrape_live():
"""Tests method web scrape with live API calls."""
logger.info("=" * 50)
logger.info("==> Test: Method Web Scrape Live")
method_web = MethodWeb(serviceCenter=None)
# Actual request
action_result = await method_web.scrape(
{"query": "How old is the earth", "maxResults": 3}
)
# Evaluate results
assert action_result.success
assert len(action_result.documents) > 0
logger.info(f"Action result success status: {action_result.success}")
logger.info(f"Action result error: {action_result.error}")
logger.info(f"Action result label: {action_result.resultLabel}")
logger.info("Documents:")
for doc in action_result.documents:
logger.info(f" - Document Name: {doc.documentName}")
logger.info(f" --> Document Mime Type: {doc.mimeType}")
logger.info(f" --> Document Data: {doc.documentData}")
@pytest.mark.asyncio
async def test_method_web_scrape_dummy():
"""Tests method web scrape with dummy response data - no external API calls."""
logger.info("=" * 50)
logger.info("==> Test: Method Web Scrape Dummy")
method_web = MethodWeb(serviceCenter=None)
# Mock both Tavily API responses (search + extract)
with (
patch(
"tavily.AsyncTavilyClient.search",
return_value=RESPONSE_SEARCH_HOW_OLD_IS_EARTH_NO_ANSWER,
) as mock_search,
patch(
"tavily.AsyncTavilyClient.extract",
return_value=RESPONSE_EXTRACT_HOW_OLD_IS_EARTH_NO_ANSWER,
) as mock_extract,
):
action_result = await method_web.scrape(
{"query": "How old is the earth", "maxResults": 3}
)
mock_search.assert_called_once()
mock_extract.assert_called_once()
# Evaluate results
assert action_result.success
assert len(action_result.documents) > 0
logger.info(f"Action result success status: {action_result.success}")
logger.info(f"Action result error: {action_result.error}")
logger.info(f"Action result label: {action_result.resultLabel}")
logger.info("Documents:")
for doc in action_result.documents:
logger.info(f" - Document Name: {doc.documentName}")
logger.info(f" --> Document Mime Type: {doc.mimeType}")
logger.info(f" --> Document Data: {doc.documentData}")