sharepoint implemented
This commit is contained in:
parent
8726cd4fb8
commit
5e00b4bd07
24 changed files with 1024 additions and 4608 deletions
|
|
@ -341,7 +341,7 @@ class DocumentExtraction:
|
|||
|
||||
|
||||
# Use documentUtility for mime type
|
||||
mime_type = getMimeTypeFromExtension(getFileExtension(fileName), self._serviceCenter)
|
||||
mime_type = getMimeTypeFromExtension(getFileExtension(fileName))
|
||||
return [ContentItem(
|
||||
label="main",
|
||||
data=content,
|
||||
|
|
@ -360,7 +360,7 @@ class DocumentExtraction:
|
|||
"""Process CSV document with robust encoding detection"""
|
||||
try:
|
||||
content = self._robustTextDecode(fileData, fileName)
|
||||
mime_type = getMimeTypeFromExtension(getFileExtension(fileName), self._serviceCenter)
|
||||
mime_type = getMimeTypeFromExtension(getFileExtension(fileName))
|
||||
return [ContentItem(
|
||||
label="main",
|
||||
data=content,
|
||||
|
|
@ -380,7 +380,7 @@ class DocumentExtraction:
|
|||
try:
|
||||
content = self._robustTextDecode(fileData, fileName)
|
||||
jsonData = json.loads(content)
|
||||
mime_type = getMimeTypeFromExtension(getFileExtension(fileName), self._serviceCenter)
|
||||
mime_type = getMimeTypeFromExtension(getFileExtension(fileName))
|
||||
return [ContentItem(
|
||||
label="main",
|
||||
data=content,
|
||||
|
|
@ -399,7 +399,7 @@ class DocumentExtraction:
|
|||
"""Process XML document with robust encoding detection"""
|
||||
try:
|
||||
content = self._robustTextDecode(fileData, fileName)
|
||||
mime_type = getMimeTypeFromExtension(getFileExtension(fileName), self._serviceCenter)
|
||||
mime_type = getMimeTypeFromExtension(getFileExtension(fileName))
|
||||
return [ContentItem(
|
||||
label="main",
|
||||
data=content,
|
||||
|
|
@ -418,7 +418,7 @@ class DocumentExtraction:
|
|||
"""Process HTML document with robust encoding detection"""
|
||||
try:
|
||||
content = self._robustTextDecode(fileData, fileName)
|
||||
mime_type = getMimeTypeFromExtension(getFileExtension(fileName), self._serviceCenter)
|
||||
mime_type = getMimeTypeFromExtension(getFileExtension(fileName))
|
||||
return [ContentItem(
|
||||
label="main",
|
||||
data=content,
|
||||
|
|
@ -512,7 +512,7 @@ class DocumentExtraction:
|
|||
# Combine all meaningful content
|
||||
final_content = "\n".join(meaningful_content)
|
||||
|
||||
mime_type = getMimeTypeFromExtension(getFileExtension(fileName), self._serviceCenter)
|
||||
mime_type = getMimeTypeFromExtension(getFileExtension(fileName))
|
||||
return [ContentItem(
|
||||
label="svg_content",
|
||||
data=final_content,
|
||||
|
|
|
|||
|
|
@ -98,26 +98,12 @@ class DocumentGenerator:
|
|||
|
||||
logger.info(f"Document {document_name} has content: {len(content)} characters")
|
||||
|
||||
# Create file in system
|
||||
file_id = self.service.createFile(
|
||||
fileName=document_name,
|
||||
mimeType=mime_type,
|
||||
content=content,
|
||||
base64encoded=False
|
||||
)
|
||||
if not file_id:
|
||||
logger.error(f"Failed to create file for document {document_name}")
|
||||
continue
|
||||
|
||||
logger.info(f"Created file with ID: {file_id}")
|
||||
|
||||
# Create document object using existing file ID
|
||||
# Create document with file in one step
|
||||
document = self.service.createDocument(
|
||||
fileName=document_name,
|
||||
mimeType=mime_type,
|
||||
content=content,
|
||||
base64encoded=False,
|
||||
existing_file_id=file_id
|
||||
base64encoded=False
|
||||
)
|
||||
if document:
|
||||
# Set workflow context on the document if possible
|
||||
|
|
|
|||
|
|
@ -1,51 +1,160 @@
|
|||
import json
|
||||
import logging
|
||||
import os
|
||||
from typing import Any, Dict
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def getFileExtension(fileName: str) -> str:
|
||||
"""Extract file extension from fileName"""
|
||||
"""Extract file extension from fileName (without dot, lowercased)."""
|
||||
if '.' in fileName:
|
||||
return fileName.rsplit('.', 1)[-1].lower()
|
||||
return ''
|
||||
|
||||
def getMimeTypeFromExtension(extension: str, service=None) -> str:
|
||||
"""Get MIME type based on file extension. Optionally use a service for mapping."""
|
||||
if service:
|
||||
return service.getMimeTypeFromExtension(extension)
|
||||
# Fallback mapping
|
||||
mapping = {
|
||||
def getMimeTypeFromExtension(extension: str) -> str:
|
||||
"""
|
||||
Get MIME type based on file extension.
|
||||
This method consolidates MIME type detection from extension.
|
||||
|
||||
Args:
|
||||
extension: File extension (with or without dot)
|
||||
|
||||
Returns:
|
||||
str: MIME type for the extension
|
||||
"""
|
||||
# Normalize extension (remove dot if present)
|
||||
if extension.startswith('.'):
|
||||
extension = extension[1:]
|
||||
|
||||
# Map extensions to MIME types
|
||||
mime_types = {
|
||||
'txt': 'text/plain',
|
||||
'md': 'text/markdown',
|
||||
'html': 'text/html',
|
||||
'css': 'text/css',
|
||||
'js': 'application/javascript',
|
||||
'json': 'application/json',
|
||||
'csv': 'text/csv',
|
||||
'xml': 'application/xml',
|
||||
'csv': 'text/csv',
|
||||
'html': 'text/html',
|
||||
'htm': 'text/html',
|
||||
'md': 'text/markdown',
|
||||
'py': 'text/x-python',
|
||||
'js': 'application/javascript',
|
||||
'css': 'text/css',
|
||||
'pdf': 'application/pdf',
|
||||
'doc': 'application/msword',
|
||||
'docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
||||
'xls': 'application/vnd.ms-excel',
|
||||
'xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
|
||||
'png': 'image/png',
|
||||
'ppt': 'application/vnd.ms-powerpoint',
|
||||
'pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
|
||||
'svg': 'image/svg+xml',
|
||||
'jpg': 'image/jpeg',
|
||||
'jpeg': 'image/jpeg',
|
||||
'png': 'image/png',
|
||||
'gif': 'image/gif',
|
||||
'svg': 'image/svg+xml',
|
||||
'bmp': 'image/bmp',
|
||||
'webp': 'image/webp',
|
||||
'zip': 'application/zip',
|
||||
'rar': 'application/x-rar-compressed',
|
||||
'7z': 'application/x-7z-compressed',
|
||||
'tar': 'application/x-tar',
|
||||
'gz': 'application/gzip'
|
||||
}
|
||||
return mapping.get(extension.lower(), 'application/octet-stream')
|
||||
return mime_types.get(extension.lower(), 'application/octet-stream')
|
||||
|
||||
def detectContentTypeFromData(fileData: bytes, fileName: str) -> str:
|
||||
"""
|
||||
Detect content type from file data and fileName.
|
||||
This method makes the MIME type detection function accessible through the service center.
|
||||
|
||||
Args:
|
||||
fileData: Raw file data as bytes
|
||||
fileName: Name of the file
|
||||
|
||||
Returns:
|
||||
str: Detected MIME type
|
||||
"""
|
||||
try:
|
||||
# Check file extension first
|
||||
ext = os.path.splitext(fileName)[1].lower()
|
||||
if ext:
|
||||
# Map common extensions to MIME types
|
||||
extToMime = {
|
||||
'.txt': 'text/plain',
|
||||
'.md': 'text/markdown',
|
||||
'.csv': 'text/csv',
|
||||
'.json': 'application/json',
|
||||
'.xml': 'application/xml',
|
||||
'.js': 'application/javascript',
|
||||
'.py': 'application/x-python',
|
||||
'.svg': 'image/svg+xml',
|
||||
'.jpg': 'image/jpeg',
|
||||
'.jpeg': 'image/jpeg',
|
||||
'.png': 'image/png',
|
||||
'.gif': 'image/gif',
|
||||
'.bmp': 'image/bmp',
|
||||
'.webp': 'image/webp',
|
||||
'.pdf': 'application/pdf',
|
||||
'.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
||||
'.doc': 'application/msword',
|
||||
'.xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
|
||||
'.xls': 'application/vnd.ms-excel',
|
||||
'.pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
|
||||
'.ppt': 'application/vnd.ms-powerpoint',
|
||||
'.html': 'text/html',
|
||||
'.htm': 'text/html',
|
||||
'.css': 'text/css',
|
||||
'.zip': 'application/zip',
|
||||
'.rar': 'application/x-rar-compressed',
|
||||
'.7z': 'application/x-7z-compressed',
|
||||
'.tar': 'application/x-tar',
|
||||
'.gz': 'application/gzip'
|
||||
}
|
||||
if ext in extToMime:
|
||||
return extToMime[ext]
|
||||
|
||||
# Try to detect from content
|
||||
if fileData.startswith(b'%PDF'):
|
||||
return 'application/pdf'
|
||||
elif fileData.startswith(b'PK\x03\x04'):
|
||||
# ZIP-based formats (docx, xlsx, pptx)
|
||||
return 'application/zip'
|
||||
elif fileData.startswith(b'<'):
|
||||
# XML-based formats
|
||||
try:
|
||||
text = fileData.decode('utf-8', errors='ignore')
|
||||
if '<svg' in text.lower():
|
||||
return 'image/svg+xml'
|
||||
elif '<html' in text.lower():
|
||||
return 'text/html'
|
||||
else:
|
||||
return 'application/xml'
|
||||
except:
|
||||
pass
|
||||
elif fileData.startswith(b'\x89PNG\r\n\x1a\n'):
|
||||
return 'image/png'
|
||||
elif fileData.startswith(b'\xff\xd8\xff'):
|
||||
return 'image/jpeg'
|
||||
elif fileData.startswith(b'GIF87a') or fileData.startswith(b'GIF89a'):
|
||||
return 'image/gif'
|
||||
elif fileData.startswith(b'BM'):
|
||||
return 'image/bmp'
|
||||
elif fileData.startswith(b'RIFF') and fileData[8:12] == b'WEBP':
|
||||
return 'image/webp'
|
||||
|
||||
return 'application/octet-stream'
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error detecting content type from data: {str(e)}")
|
||||
return 'application/octet-stream'
|
||||
|
||||
def detectMimeTypeFromData(file_bytes: bytes, fileName: str, service=None) -> str:
|
||||
"""Detect MIME type from file bytes and fileName using a service if provided."""
|
||||
try:
|
||||
if service:
|
||||
if service and hasattr(service, 'detectContentTypeFromData'):
|
||||
detected = service.detectContentTypeFromData(file_bytes, fileName)
|
||||
if detected and detected != 'application/octet-stream':
|
||||
return detected
|
||||
# Fallback: guess from extension
|
||||
ext = getFileExtension(fileName)
|
||||
return getMimeTypeFromExtension(ext, service)
|
||||
# Fallback: use our consolidated function
|
||||
return detectContentTypeFromData(file_bytes, fileName)
|
||||
except Exception as e:
|
||||
logger.warning(f"Error in MIME type detection for {fileName}: {str(e)}")
|
||||
return 'application/octet-stream'
|
||||
|
|
|
|||
|
|
@ -108,7 +108,7 @@ class HandlingTasks:
|
|||
# Log the full task planning prompt being sent to AI for debugging
|
||||
logger.info("=== TASK PLANNING PROMPT SENT TO AI ===")
|
||||
logger.info(f"User Input: {userInput}")
|
||||
logger.info(f"Available Documents: {len(available_docs) if available_docs else 0}")
|
||||
logger.info(f"Available Documents: {available_docs}")
|
||||
logger.info("=== FULL TASK PLANNING PROMPT ===")
|
||||
logger.info(task_planning_prompt)
|
||||
logger.info("=== END TASK PLANNING PROMPT ===")
|
||||
|
|
@ -312,12 +312,8 @@ class HandlingTasks:
|
|||
|
||||
# Log available resources for debugging
|
||||
logger.info("=== AVAILABLE RESOURCES FOR ACTION GENERATION ===")
|
||||
logger.info(f"Available Documents: {len(available_docs) if available_docs else 0}")
|
||||
if available_docs:
|
||||
for i, doc in enumerate(available_docs[:5]): # Show first 5
|
||||
logger.info(f" Doc {i+1}: {doc}")
|
||||
if len(available_docs) > 5:
|
||||
logger.info(f" ... and {len(available_docs) - 5} more documents")
|
||||
logger.info(f"Available Documents: {available_docs}")
|
||||
# Note: available_docs is now a string description, not a list
|
||||
logger.info(f"Available Connections: {len(available_connections) if available_connections else 0}")
|
||||
if available_connections:
|
||||
for i, conn in enumerate(available_connections[:5]): # Show first 5
|
||||
|
|
@ -376,7 +372,7 @@ class HandlingTasks:
|
|||
logger.info(f"Task Step ID: {action_context.task_step.id if action_context.task_step else 'None'}")
|
||||
logger.info(f"Task Step Objective: {action_context.task_step.objective if action_context.task_step else 'None'}")
|
||||
logger.info(f"Workflow ID: {action_context.workflow_id}")
|
||||
logger.info(f"Available Documents Count: {len(action_context.available_documents) if action_context.available_documents else 0}")
|
||||
logger.info(f"Available Documents: {action_context.available_documents or 'No documents available'}")
|
||||
logger.info(f"Available Connections Count: {len(action_context.available_connections) if action_context.available_connections else 0}")
|
||||
logger.info(f"Previous Results Count: {len(action_context.previous_results) if action_context.previous_results else 0}")
|
||||
logger.info(f"Retry Count: {action_context.retry_count}")
|
||||
|
|
|
|||
|
|
@ -1,3 +0,0 @@
|
|||
|
||||
|
||||
|
||||
|
|
@ -20,13 +20,13 @@ def createTaskPlanningPrompt(context: TaskContext, service) -> str:
|
|||
user_request = context.task_step.objective if context.task_step else 'No request specified'
|
||||
|
||||
# Extract available documents from context - use Pydantic model directly
|
||||
available_documents = context.available_documents or []
|
||||
available_documents = context.available_documents or "No documents available"
|
||||
|
||||
return f"""You are a task planning AI that analyzes user requests and creates structured task plans with user-friendly feedback messages.
|
||||
|
||||
USER REQUEST: {user_request}
|
||||
|
||||
AVAILABLE DOCUMENTS: {', '.join(available_documents)}
|
||||
AVAILABLE DOCUMENTS: {available_documents}
|
||||
|
||||
INSTRUCTIONS:
|
||||
1. Analyze the user request and available documents
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@ from modules.interfaces.interfaceChatModel import ActionResult
|
|||
from modules.interfaces.interfaceComponentObjects import getInterface as getComponentObjects
|
||||
from modules.interfaces.interfaceAppObjects import getInterface as getAppObjects
|
||||
from modules.chat.documents.documentExtraction import DocumentExtraction
|
||||
from modules.chat.documents.documentUtility import getFileExtension, getMimeTypeFromExtension, detectContentTypeFromData
|
||||
from modules.chat.methodBase import MethodBase
|
||||
from modules.shared.timezoneUtils import get_utc_timestamp
|
||||
import uuid
|
||||
|
|
@ -111,165 +112,9 @@ class ServiceCenter:
|
|||
except Exception as e:
|
||||
logger.error(f"Error discovering methods: {str(e)}")
|
||||
|
||||
def detectContentTypeFromData(self, fileData: bytes, fileName: str) -> str:
|
||||
"""
|
||||
Detect content type from file data and fileName.
|
||||
This method makes the MIME type detection function accessible through the service center.
|
||||
|
||||
Args:
|
||||
fileData: Raw file data as bytes
|
||||
fileName: Name of the file
|
||||
|
||||
Returns:
|
||||
str: Detected MIME type
|
||||
"""
|
||||
try:
|
||||
# Check file extension first
|
||||
ext = os.path.splitext(fileName)[1].lower()
|
||||
if ext:
|
||||
# Map common extensions to MIME types
|
||||
extToMime = {
|
||||
'.txt': 'text/plain',
|
||||
'.md': 'text/markdown',
|
||||
'.csv': 'text/csv',
|
||||
'.json': 'application/json',
|
||||
'.xml': 'application/xml',
|
||||
'.js': 'application/javascript',
|
||||
'.py': 'application/x-python',
|
||||
'.svg': 'image/svg+xml',
|
||||
'.jpg': 'image/jpeg',
|
||||
'.jpeg': 'image/jpeg',
|
||||
'.png': 'image/png',
|
||||
'.gif': 'image/gif',
|
||||
'.bmp': 'image/bmp',
|
||||
'.webp': 'image/webp',
|
||||
'.pdf': 'application/pdf',
|
||||
'.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
||||
'.doc': 'application/msword',
|
||||
'.xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
|
||||
'.xls': 'application/vnd.ms-excel',
|
||||
'.pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
|
||||
'.ppt': 'application/vnd.ms-powerpoint',
|
||||
'.html': 'text/html',
|
||||
'.htm': 'text/html',
|
||||
'.css': 'text/css',
|
||||
'.zip': 'application/zip',
|
||||
'.rar': 'application/x-rar-compressed',
|
||||
'.7z': 'application/x-7z-compressed',
|
||||
'.tar': 'application/x-tar',
|
||||
'.gz': 'application/gzip'
|
||||
}
|
||||
if ext in extToMime:
|
||||
return extToMime[ext]
|
||||
|
||||
# Try to detect from content
|
||||
if fileData.startswith(b'%PDF'):
|
||||
return 'application/pdf'
|
||||
elif fileData.startswith(b'PK\x03\x04'):
|
||||
# ZIP-based formats (docx, xlsx, pptx)
|
||||
return 'application/zip'
|
||||
elif fileData.startswith(b'<'):
|
||||
# XML-based formats
|
||||
try:
|
||||
text = fileData.decode('utf-8', errors='ignore')
|
||||
if '<svg' in text.lower():
|
||||
return 'image/svg+xml'
|
||||
elif '<html' in text.lower():
|
||||
return 'text/html'
|
||||
else:
|
||||
return 'application/xml'
|
||||
except:
|
||||
pass
|
||||
elif fileData.startswith(b'\x89PNG\r\n\x1a\n'):
|
||||
return 'image/png'
|
||||
elif fileData.startswith(b'\xff\xd8\xff'):
|
||||
return 'image/jpeg'
|
||||
elif fileData.startswith(b'GIF87a') or fileData.startswith(b'GIF89a'):
|
||||
return 'image/gif'
|
||||
elif fileData.startswith(b'BM'):
|
||||
return 'image/bmp'
|
||||
elif fileData.startswith(b'RIFF') and fileData[8:12] == b'WEBP':
|
||||
return 'image/webp'
|
||||
|
||||
return 'application/octet-stream'
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error detecting content type from data: {str(e)}")
|
||||
return 'application/octet-stream'
|
||||
|
||||
def getMimeTypeFromExtension(self, extension: str) -> str:
|
||||
"""
|
||||
Get MIME type based on file extension.
|
||||
This method consolidates MIME type detection from extension.
|
||||
|
||||
Args:
|
||||
extension: File extension (with or without dot)
|
||||
|
||||
Returns:
|
||||
str: MIME type for the extension
|
||||
"""
|
||||
# Normalize extension (remove dot if present)
|
||||
if extension.startswith('.'):
|
||||
extension = extension[1:]
|
||||
|
||||
# Map extensions to MIME types
|
||||
mime_types = {
|
||||
'txt': 'text/plain',
|
||||
'json': 'application/json',
|
||||
'xml': 'application/xml',
|
||||
'csv': 'text/csv',
|
||||
'html': 'text/html',
|
||||
'htm': 'text/html',
|
||||
'md': 'text/markdown',
|
||||
'py': 'text/x-python',
|
||||
'js': 'application/javascript',
|
||||
'css': 'text/css',
|
||||
'pdf': 'application/pdf',
|
||||
'doc': 'application/msword',
|
||||
'docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
||||
'xls': 'application/vnd.ms-excel',
|
||||
'xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
|
||||
'ppt': 'application/vnd.ms-powerpoint',
|
||||
'pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
|
||||
'svg': 'image/svg+xml',
|
||||
'jpg': 'image/jpeg',
|
||||
'jpeg': 'image/jpeg',
|
||||
'png': 'image/png',
|
||||
'gif': 'image/gif',
|
||||
'bmp': 'image/bmp',
|
||||
'webp': 'image/webp',
|
||||
'zip': 'application/zip',
|
||||
'rar': 'application/x-rar-compressed',
|
||||
'7z': 'application/x-7z-compressed',
|
||||
'tar': 'application/x-tar',
|
||||
'gz': 'application/gzip'
|
||||
}
|
||||
return mime_types.get(extension.lower(), 'application/octet-stream')
|
||||
|
||||
def getFileExtension(self, fileName: str) -> str:
|
||||
"""
|
||||
Extract file extension from fileName.
|
||||
|
||||
Args:
|
||||
fileName: Name of the file
|
||||
|
||||
Returns:
|
||||
str: File extension (without dot)
|
||||
"""
|
||||
if '.' in fileName:
|
||||
return fileName.split('.')[-1].lower()
|
||||
return "txt" # Default to text
|
||||
|
||||
def getFileExtension(self, fileName):
|
||||
"""
|
||||
Extract file extension from fileName (without dot, lowercased).
|
||||
Returns empty string if no extension is found.
|
||||
"""
|
||||
if '.' in fileName:
|
||||
return fileName.rsplit('.', 1)[-1].lower()
|
||||
return ''
|
||||
|
||||
# ===== Functions =====
|
||||
# ===== Functions for Prompts: Context =====
|
||||
|
||||
def getMethodsList(self) -> List[str]:
|
||||
"""Get list of available methods with their signatures in the required format"""
|
||||
|
|
@ -283,157 +128,48 @@ class ServiceCenter:
|
|||
methodList.append(signature)
|
||||
return methodList
|
||||
|
||||
def generateDocumentLabel(self, document: ChatDocument, message: ChatMessage) -> str:
|
||||
"""Generate new document label: round+task+action+filename.extension"""
|
||||
async def summarizeChat(self, messages: List[ChatMessage]) -> str:
|
||||
"""
|
||||
Summarize chat messages from last to first message with status="first"
|
||||
|
||||
Args:
|
||||
messages: List of chat messages to summarize
|
||||
|
||||
Returns:
|
||||
str: Summary of the chat in user's language
|
||||
"""
|
||||
try:
|
||||
# Get workflow context from message
|
||||
round_num = message.roundNumber if hasattr(message, 'roundNumber') else 1
|
||||
task_num = message.taskNumber if hasattr(message, 'taskNumber') else 0
|
||||
action_num = message.actionNumber if hasattr(message, 'actionNumber') else 0
|
||||
# Get messages from last to first, stopping at first message with status="first"
|
||||
relevantMessages = []
|
||||
for msg in reversed(messages):
|
||||
relevantMessages.append(msg)
|
||||
if msg.status == "first":
|
||||
break
|
||||
|
||||
# Get file extension from document's fileName property
|
||||
try:
|
||||
file_extension = self.getFileExtension(document.fileName)
|
||||
filename = document.fileName
|
||||
except Exception as e:
|
||||
# Try to diagnose and recover the issue
|
||||
diagnosis = self.diagnoseDocumentAccess(document)
|
||||
logger.error(f"Critical error: Cannot access document fileName for document {document.id}. Diagnosis: {diagnosis}")
|
||||
|
||||
# Attempt recovery
|
||||
if self.recoverDocumentAccess(document):
|
||||
try:
|
||||
file_extension = self.getFileExtension(document.fileName)
|
||||
filename = document.fileName
|
||||
logger.info(f"Document access recovered for {document.id}")
|
||||
except Exception as recovery_error:
|
||||
logger.error(f"Recovery failed for document {document.id}: {str(recovery_error)}")
|
||||
raise RuntimeError(f"Document {document.id} is permanently inaccessible after recovery attempt: {str(recovery_error)}")
|
||||
else:
|
||||
# Recovery failed - don't continue with invalid data
|
||||
raise RuntimeError(f"Document {document.id} is inaccessible and recovery failed. Diagnosis: {diagnosis}")
|
||||
|
||||
# Construct label: round1_task2_action3_filename.ext
|
||||
if file_extension:
|
||||
label = f"round{round_num}_task{task_num}_action{action_num}_{filename}"
|
||||
else:
|
||||
label = f"round{round_num}_task{task_num}_action{action_num}_{filename}"
|
||||
|
||||
return label
|
||||
except Exception as e:
|
||||
logger.error(f"Critical error generating document label for document {document.id}: {str(e)}")
|
||||
# Re-raise the error to prevent workflow from continuing with invalid data
|
||||
raise
|
||||
# Create prompt for AI
|
||||
prompt = f"""You are an AI assistant providing a summary of a chat conversation.
|
||||
Please respond in '{self.user.language}' language.
|
||||
|
||||
def getDocumentReferenceList(self) -> Dict[str, List[DocumentExchange]]:
|
||||
"""Get list of document exchanges with new labeling format, sorted by recency"""
|
||||
# Collect all documents first and refresh their attributes
|
||||
all_documents = []
|
||||
for message in self.workflow.messages:
|
||||
if message.documents:
|
||||
all_documents.extend(message.documents)
|
||||
|
||||
# Refresh file attributes for all documents
|
||||
if all_documents:
|
||||
self.refreshDocumentFileAttributes(all_documents)
|
||||
|
||||
chat_exchanges = []
|
||||
history_exchanges = []
|
||||
|
||||
# Process messages in reverse order; "first" marks boundary
|
||||
in_current_round = True
|
||||
for message in reversed(self.workflow.messages):
|
||||
is_first = message.status == "first" if hasattr(message, 'status') else False
|
||||
Chat History:
|
||||
{chr(10).join(f"- {msg.message}" for msg in reversed(relevantMessages))}
|
||||
|
||||
Instructions:
|
||||
1. Summarize the conversation's key points and outcomes
|
||||
2. Be concise but informative
|
||||
3. Use a professional but friendly tone
|
||||
4. Focus on important decisions and next steps if any
|
||||
|
||||
Please provide a comprehensive summary of this conversation."""
|
||||
|
||||
# Build a DocumentExchange if message has documents
|
||||
doc_exchange = None
|
||||
if message.documents:
|
||||
if message.actionId and message.documentsLabel:
|
||||
# Validate that we use the same label as in the message
|
||||
validated_label = self._validateDocumentLabelConsistency(message)
|
||||
|
||||
# Use the message's actual documentsLabel
|
||||
doc_refs = []
|
||||
for doc in message.documents:
|
||||
doc_ref = self.getDocumentReferenceFromChatDocument(doc, message)
|
||||
doc_refs.append(doc_ref)
|
||||
|
||||
doc_exchange = DocumentExchange(
|
||||
documentsLabel=validated_label,
|
||||
documents=doc_refs
|
||||
)
|
||||
else:
|
||||
# Generate new labels for documents without explicit labels
|
||||
doc_refs = []
|
||||
for doc in message.documents:
|
||||
doc_ref = self.getDocumentReferenceFromChatDocument(doc, message)
|
||||
doc_refs.append(doc_ref)
|
||||
|
||||
if doc_refs:
|
||||
# Create a label based on message context
|
||||
round_num = message.roundNumber if hasattr(message, 'roundNumber') else 1
|
||||
task_num = message.taskNumber if hasattr(message, 'taskNumber') else 0
|
||||
action_num = message.actionNumber if hasattr(message, 'actionNumber') else 0
|
||||
context_label = f"round{round_num}_task{task_num}_action{action_num}_context"
|
||||
|
||||
doc_exchange = DocumentExchange(
|
||||
documentsLabel=context_label,
|
||||
documents=doc_refs
|
||||
)
|
||||
# Get summary using AI
|
||||
return await self.callAiTextBasic(prompt)
|
||||
|
||||
# Append to appropriate container based on boundary
|
||||
if doc_exchange:
|
||||
if in_current_round:
|
||||
chat_exchanges.append(doc_exchange)
|
||||
else:
|
||||
history_exchanges.append(doc_exchange)
|
||||
|
||||
# Flip boundary after including the "first" message in chat
|
||||
if in_current_round and is_first:
|
||||
in_current_round = False
|
||||
|
||||
# Sort by recency: most recent first, then current round, then earlier rounds
|
||||
# Sort chat exchanges by message sequence number (most recent first)
|
||||
chat_exchanges.sort(key=lambda x: self._getMessageSequenceForExchange(x), reverse=True)
|
||||
# Sort history exchanges by message sequence number (most recent first)
|
||||
history_exchanges.sort(key=lambda x: self._getMessageSequenceForExchange(x), reverse=True)
|
||||
|
||||
return {
|
||||
"chat": chat_exchanges,
|
||||
"history": history_exchanges
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Error summarizing chat: {str(e)}")
|
||||
return f"Error summarizing chat: {str(e)}"
|
||||
|
||||
# ===== Functions for Prompts + Actions: Document References generation and resolution =====
|
||||
|
||||
def _getMessageSequenceForExchange(self, exchange: DocumentExchange) -> int:
|
||||
"""Get message sequence number for sorting exchanges by recency"""
|
||||
try:
|
||||
# Extract message ID from the first document reference
|
||||
if exchange.documents and len(exchange.documents) > 0:
|
||||
first_doc_ref = exchange.documents[0]
|
||||
if first_doc_ref.startswith("docItem:"):
|
||||
# docItem:<id>:<label> - extract ID
|
||||
parts = first_doc_ref.split(':')
|
||||
if len(parts) >= 2:
|
||||
doc_id = parts[1]
|
||||
# Find the message containing this document
|
||||
for message in self.workflow.messages:
|
||||
if message.documents:
|
||||
for doc in message.documents:
|
||||
if doc.id == doc_id:
|
||||
return message.sequenceNr if hasattr(message, 'sequenceNr') else 0
|
||||
elif first_doc_ref.startswith("docList:"):
|
||||
# docList:<message_id>:<label> - extract message ID
|
||||
parts = first_doc_ref.split(':')
|
||||
if len(parts) >= 2:
|
||||
message_id = parts[1]
|
||||
# Find the message by ID
|
||||
for message in self.workflow.messages:
|
||||
if str(message.id) == message_id:
|
||||
return message.sequenceNr if hasattr(message, 'sequenceNr') else 0
|
||||
return 0
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting message sequence for exchange: {str(e)}")
|
||||
return 0
|
||||
|
||||
def getEnhancedDocumentContext(self) -> str:
|
||||
"""Get enhanced document context formatted for action planning prompts with proper docList and docItem references"""
|
||||
try:
|
||||
|
|
@ -509,6 +245,144 @@ class ServiceCenter:
|
|||
logger.error(f"Error generating enhanced document context: {str(e)}")
|
||||
return "NO DOCUMENTS AVAILABLE - Error generating document context."
|
||||
|
||||
def getDocumentReferenceList(self) -> Dict[str, List[DocumentExchange]]:
|
||||
"""Get list of document exchanges with new labeling format, sorted by recency"""
|
||||
# Collect all documents first and refresh their attributes
|
||||
all_documents = []
|
||||
for message in self.workflow.messages:
|
||||
if message.documents:
|
||||
all_documents.extend(message.documents)
|
||||
|
||||
# Refresh file attributes for all documents
|
||||
if all_documents:
|
||||
self._refreshDocumentFileAttributes(all_documents)
|
||||
|
||||
chat_exchanges = []
|
||||
history_exchanges = []
|
||||
|
||||
# Process messages in reverse order; "first" marks boundary
|
||||
in_current_round = True
|
||||
for message in reversed(self.workflow.messages):
|
||||
is_first = message.status == "first" if hasattr(message, 'status') else False
|
||||
|
||||
# Build a DocumentExchange if message has documents
|
||||
doc_exchange = None
|
||||
if message.documents:
|
||||
if message.actionId and message.documentsLabel:
|
||||
# Validate that we use the same label as in the message
|
||||
validated_label = self._validateDocumentLabelConsistency(message)
|
||||
|
||||
# Use the message's actual documentsLabel
|
||||
doc_refs = []
|
||||
for doc in message.documents:
|
||||
doc_ref = self._getDocumentReferenceFromChatDocument(doc, message)
|
||||
doc_refs.append(doc_ref)
|
||||
|
||||
doc_exchange = DocumentExchange(
|
||||
documentsLabel=validated_label,
|
||||
documents=doc_refs
|
||||
)
|
||||
else:
|
||||
# Generate new labels for documents without explicit labels
|
||||
doc_refs = []
|
||||
for doc in message.documents:
|
||||
doc_ref = self._getDocumentReferenceFromChatDocument(doc, message)
|
||||
doc_refs.append(doc_ref)
|
||||
|
||||
if doc_refs:
|
||||
# Create a label based on message context
|
||||
context_prefix = self._generateWorkflowContextPrefix(message)
|
||||
context_label = f"{context_prefix}_context"
|
||||
|
||||
doc_exchange = DocumentExchange(
|
||||
documentsLabel=context_label,
|
||||
documents=doc_refs
|
||||
)
|
||||
|
||||
# Append to appropriate container based on boundary
|
||||
if doc_exchange:
|
||||
if in_current_round:
|
||||
chat_exchanges.append(doc_exchange)
|
||||
else:
|
||||
history_exchanges.append(doc_exchange)
|
||||
|
||||
# Flip boundary after including the "first" message in chat
|
||||
if in_current_round and is_first:
|
||||
in_current_round = False
|
||||
|
||||
# Sort by recency: most recent first, then current round, then earlier rounds
|
||||
# Sort chat exchanges by message sequence number (most recent first)
|
||||
chat_exchanges.sort(key=lambda x: self._getMessageSequenceForExchange(x), reverse=True)
|
||||
# Sort history exchanges by message sequence number (most recent first)
|
||||
history_exchanges.sort(key=lambda x: self._getMessageSequenceForExchange(x), reverse=True)
|
||||
|
||||
return {
|
||||
"chat": chat_exchanges,
|
||||
"history": history_exchanges
|
||||
}
|
||||
|
||||
def _refreshDocumentFileAttributes(self, documents: List[ChatDocument]) -> None:
|
||||
"""Update file attributes (fileName, fileSize, mimeType) for documents"""
|
||||
for doc in documents:
|
||||
try:
|
||||
file_item = self.interfaceComponent.getFile(doc.fileId)
|
||||
if file_item:
|
||||
doc.fileName = file_item.fileName
|
||||
doc.fileSize = file_item.fileSize
|
||||
doc.mimeType = file_item.mimeType
|
||||
else:
|
||||
logger.warning(f"File not found for document {doc.id}, fileId: {doc.fileId}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error refreshing file attributes for document {doc.id}: {e}")
|
||||
|
||||
def _generateWorkflowContextPrefix(self, message: ChatMessage) -> str:
|
||||
"""Generate workflow context prefix: round{num}_task{num}_action{num}"""
|
||||
round_num = message.roundNumber if hasattr(message, 'roundNumber') else 1
|
||||
task_num = message.taskNumber if hasattr(message, 'taskNumber') else 0
|
||||
action_num = message.actionNumber if hasattr(message, 'actionNumber') else 0
|
||||
return f"round{round_num}_task{task_num}_action{action_num}"
|
||||
|
||||
def _getDocumentReferenceFromChatDocument(self, document: ChatDocument, message: ChatMessage) -> str:
|
||||
"""Get document reference using document ID and filename."""
|
||||
try:
|
||||
# Use document ID and filename for simple reference
|
||||
return f"docItem:{document.id}:{document.fileName}"
|
||||
except Exception as e:
|
||||
logger.error(f"Critical error creating document reference for document {document.id}: {str(e)}")
|
||||
# Re-raise the error to prevent workflow from continuing with invalid data
|
||||
raise
|
||||
|
||||
def _getMessageSequenceForExchange(self, exchange: DocumentExchange) -> int:
|
||||
"""Get message sequence number for sorting exchanges by recency"""
|
||||
try:
|
||||
# Extract message ID from the first document reference
|
||||
if exchange.documents and len(exchange.documents) > 0:
|
||||
first_doc_ref = exchange.documents[0]
|
||||
if first_doc_ref.startswith("docItem:"):
|
||||
# docItem:<id>:<label> - extract ID
|
||||
parts = first_doc_ref.split(':')
|
||||
if len(parts) >= 2:
|
||||
doc_id = parts[1]
|
||||
# Find the message containing this document
|
||||
for message in self.workflow.messages:
|
||||
if message.documents:
|
||||
for doc in message.documents:
|
||||
if doc.id == doc_id:
|
||||
return message.sequenceNr if hasattr(message, 'sequenceNr') else 0
|
||||
elif first_doc_ref.startswith("docList:"):
|
||||
# docList:<message_id>:<label> - extract message ID
|
||||
parts = first_doc_ref.split(':')
|
||||
if len(parts) >= 2:
|
||||
message_id = parts[1]
|
||||
# Find the message by ID
|
||||
for message in self.workflow.messages:
|
||||
if str(message.id) == message_id:
|
||||
return message.sequenceNr if hasattr(message, 'sequenceNr') else 0
|
||||
return 0
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting message sequence for exchange: {str(e)}")
|
||||
return 0
|
||||
|
||||
def _validateDocumentLabelConsistency(self, message) -> str:
|
||||
"""Validate that the document label used for references matches the message's actual label"""
|
||||
if not hasattr(message, 'documentsLabel') or not message.documentsLabel:
|
||||
|
|
@ -571,27 +445,6 @@ class ServiceCenter:
|
|||
logger.error(f"Error extracting document info from reference: {str(e)}")
|
||||
return None
|
||||
|
||||
def getDocumentReferenceFromChatDocument(self, document: ChatDocument, message: ChatMessage) -> str:
|
||||
"""Get document reference using document ID and filename."""
|
||||
try:
|
||||
# Use document ID and filename for simple reference
|
||||
return f"docItem:{document.id}:{document.fileName}"
|
||||
except Exception as e:
|
||||
logger.error(f"Critical error creating document reference for document {document.id}: {str(e)}")
|
||||
# Re-raise the error to prevent workflow from continuing with invalid data
|
||||
raise
|
||||
|
||||
def getDocumentListReferenceFromChatMessage(self, message: ChatMessage) -> str:
|
||||
"""Get document list reference using message ID and label."""
|
||||
try:
|
||||
# Use message ID and documentsLabel for document list reference
|
||||
label = getattr(message, 'documentsLabel', f"message_{message.id}")
|
||||
return f"docList:{message.id}:{label}"
|
||||
except Exception as e:
|
||||
logger.error(f"Critical error creating document list reference for message {message.id}: {str(e)}")
|
||||
# Re-raise the error to prevent workflow from continuing with invalid data
|
||||
raise
|
||||
|
||||
def getChatDocumentsFromDocumentList(self, documentList: List[str]) -> List[ChatDocument]:
|
||||
"""Get ChatDocuments from a list of document references using all three formats."""
|
||||
try:
|
||||
|
|
@ -731,6 +584,8 @@ class ServiceCenter:
|
|||
logger.error(f"Error getting documents from document list: {str(e)}")
|
||||
return []
|
||||
|
||||
# ===== Functions for Prompts + Actions: Connection References generation and resolution =====
|
||||
|
||||
def getConnectionReferenceList(self) -> List[str]:
|
||||
"""Get list of all UserConnection objects as references with enhanced state information"""
|
||||
connections = []
|
||||
|
|
@ -827,46 +682,8 @@ class ServiceCenter:
|
|||
logger.error(f"Error parsing connection reference: {str(e)}")
|
||||
return None
|
||||
|
||||
async def summarizeChat(self, messages: List[ChatMessage]) -> str:
|
||||
"""
|
||||
Summarize chat messages from last to first message with status="first"
|
||||
|
||||
Args:
|
||||
messages: List of chat messages to summarize
|
||||
# ===== Functions for Actions: AI calls =====
|
||||
|
||||
Returns:
|
||||
str: Summary of the chat in user's language
|
||||
"""
|
||||
try:
|
||||
# Get messages from last to first, stopping at first message with status="first"
|
||||
relevantMessages = []
|
||||
for msg in reversed(messages):
|
||||
relevantMessages.append(msg)
|
||||
if msg.status == "first":
|
||||
break
|
||||
|
||||
# Create prompt for AI
|
||||
prompt = f"""You are an AI assistant providing a summary of a chat conversation.
|
||||
Please respond in '{self.user.language}' language.
|
||||
|
||||
Chat History:
|
||||
{chr(10).join(f"- {msg.message}" for msg in reversed(relevantMessages))}
|
||||
|
||||
Instructions:
|
||||
1. Summarize the conversation's key points and outcomes
|
||||
2. Be concise but informative
|
||||
3. Use a professional but friendly tone
|
||||
4. Focus on important decisions and next steps if any
|
||||
|
||||
Please provide a comprehensive summary of this conversation."""
|
||||
|
||||
# Get summary using AI
|
||||
return await self.callAiTextBasic(prompt)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error summarizing chat: {str(e)}")
|
||||
return f"Error summarizing chat: {str(e)}"
|
||||
|
||||
async def callAiTextAdvanced(self, prompt: str, context: str = None) -> str:
|
||||
"""Advanced text processing using Anthropic, with fallback to OpenAI basic if advanced fails."""
|
||||
max_retries = 3
|
||||
|
|
@ -959,6 +776,8 @@ Please provide a comprehensive summary of this conversation."""
|
|||
|
||||
return response
|
||||
|
||||
# ===== Functions for Actions: Data management =====
|
||||
|
||||
def getFileInfo(self, fileId: str) -> Dict[str, Any]:
|
||||
"""Get file information"""
|
||||
file_item = self.interfaceComponent.getFile(fileId)
|
||||
|
|
@ -997,11 +816,11 @@ Please provide a comprehensive summary of this conversation."""
|
|||
mimeType = document.mimeType
|
||||
except Exception as e:
|
||||
# Try to diagnose and recover the issue
|
||||
diagnosis = self.diagnoseDocumentAccess(document)
|
||||
diagnosis = self._diagnoseDocumentAccess(document)
|
||||
logger.error(f"Critical error: Cannot access document properties for document {document.id}. Diagnosis: {diagnosis}")
|
||||
|
||||
# Attempt recovery
|
||||
if self.recoverDocumentAccess(document):
|
||||
if self._recoverDocumentAccess(document):
|
||||
try:
|
||||
fileName = document.fileName
|
||||
mimeType = document.mimeType
|
||||
|
|
@ -1031,9 +850,78 @@ Please provide a comprehensive summary of this conversation."""
|
|||
except Exception as e:
|
||||
logger.error(f"Error extracting from document: {str(e)}")
|
||||
raise
|
||||
|
||||
def createFile(self, fileName: str, mimeType: str, content: str, base64encoded: bool = False) -> str:
|
||||
"""Create new file and return its ID"""
|
||||
|
||||
def _diagnoseDocumentAccess(self, document: ChatDocument) -> Dict[str, Any]:
|
||||
"""
|
||||
Diagnose document access issues and provide recovery information.
|
||||
This method helps identify why document properties are inaccessible.
|
||||
"""
|
||||
try:
|
||||
diagnosis = {
|
||||
'document_id': document.id,
|
||||
'file_id': document.fileId,
|
||||
'has_component_interface': document._componentInterface is not None,
|
||||
'component_interface_type': type(document._componentInterface).__name__ if document._componentInterface else None,
|
||||
'file_exists': False,
|
||||
'file_info': None,
|
||||
'error_details': None
|
||||
}
|
||||
|
||||
# Check if component interface is set
|
||||
if not document._componentInterface:
|
||||
diagnosis['error_details'] = "Component interface not set - document cannot access file system"
|
||||
return diagnosis
|
||||
|
||||
# Try to access the file directly
|
||||
try:
|
||||
file_info = self.interfaceComponent.getFile(document.fileId)
|
||||
if file_info:
|
||||
diagnosis['file_exists'] = True
|
||||
diagnosis['file_info'] = {
|
||||
'fileName': file_info.fileName if hasattr(file_info, 'fileName') else 'N/A',
|
||||
'fileSize': file_info.fileSize if hasattr(file_info, 'fileSize') else 'N/A',
|
||||
'mimeType': file_info.mimeType if hasattr(file_info, 'mimeType') else 'N/A'
|
||||
}
|
||||
else:
|
||||
diagnosis['error_details'] = f"File with ID {document.fileId} not found in component interface"
|
||||
except Exception as e:
|
||||
diagnosis['error_details'] = f"Error accessing file {document.fileId}: {str(e)}"
|
||||
|
||||
return diagnosis
|
||||
|
||||
except Exception as e:
|
||||
return {
|
||||
'document_id': document.id if hasattr(document, 'id') else 'unknown',
|
||||
'file_id': document.fileId if hasattr(document, 'fileId') else 'unknown',
|
||||
'error_details': f"Error during diagnosis: {str(e)}"
|
||||
}
|
||||
|
||||
def _recoverDocumentAccess(self, document: ChatDocument) -> bool:
|
||||
"""
|
||||
Attempt to recover document access by re-setting the component interface.
|
||||
Returns True if recovery was successful.
|
||||
"""
|
||||
try:
|
||||
logger.info(f"Attempting to recover document access for document {document.id}")
|
||||
|
||||
# Re-set the component interface
|
||||
document.setComponentInterface(self.interfaceComponent)
|
||||
|
||||
# Test if we can now access the fileName
|
||||
try:
|
||||
test_fileName = document.fileName
|
||||
logger.info(f"Document access recovered for {document.id} -> {test_fileName}")
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"Document access recovery failed for {document.id}: {str(e)}")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error during document access recovery for {document.id}: {str(e)}")
|
||||
return False
|
||||
|
||||
def createDocument(self, fileName: str, mimeType: str, content: str, base64encoded: bool = True) -> ChatDocument:
|
||||
"""Create document with file in one step - handles file creation internally"""
|
||||
# Convert content to bytes based on base64 flag
|
||||
if base64encoded:
|
||||
import base64
|
||||
|
|
@ -1051,27 +939,16 @@ Please provide a comprehensive summary of this conversation."""
|
|||
# Then store the file data
|
||||
self.interfaceComponent.createFileData(file_item.id, content_bytes)
|
||||
|
||||
return file_item.id
|
||||
|
||||
def createDocument(self, fileName: str, mimeType: str, content: str, base64encoded: bool = True, existing_file_id: str = None) -> ChatDocument:
|
||||
"""Create document AND file from file data object created by AI call"""
|
||||
# Use existing file ID if provided, otherwise create new file
|
||||
if existing_file_id:
|
||||
file_id = existing_file_id
|
||||
else:
|
||||
# First create the file and get its ID
|
||||
file_id = self.createFile(fileName, mimeType, content, base64encoded)
|
||||
|
||||
# Get file info to copy attributes
|
||||
file_info = self.getFileInfo(file_id)
|
||||
file_info = self.getFileInfo(file_item.id)
|
||||
if not file_info:
|
||||
logger.error(f"Could not get file info for fileId: {file_id}")
|
||||
raise ValueError(f"File info not found for fileId: {file_id}")
|
||||
logger.error(f"Could not get file info for fileId: {file_item.id}")
|
||||
raise ValueError(f"File info not found for fileId: {file_item.id}")
|
||||
|
||||
# Create document with all file attributes copied
|
||||
document = ChatDocument(
|
||||
id=str(uuid.uuid4()),
|
||||
fileId=file_id,
|
||||
fileId=file_item.id,
|
||||
fileName=file_info.get("fileName", fileName),
|
||||
fileSize=file_info.get("size", 0),
|
||||
mimeType=file_info.get("mimeType", mimeType)
|
||||
|
|
@ -1079,6 +956,8 @@ Please provide a comprehensive summary of this conversation."""
|
|||
|
||||
return document
|
||||
|
||||
# ===== Internal public helper functions =====
|
||||
|
||||
def updateWorkflowStats(self, eventLabel: str = None, bytesSent: int = 0, bytesReceived: int = 0, tokenCount: int = 0) -> None:
|
||||
"""
|
||||
Centralized function to update workflow statistics in database and running workflow.
|
||||
|
|
@ -1128,24 +1007,40 @@ Please provide a comprehensive summary of this conversation."""
|
|||
logger.error(f"Error calculating object size: {str(e)}")
|
||||
return 0
|
||||
|
||||
def getAvailableDocuments(self, workflow) -> List[str]:
|
||||
def getAvailableDocuments(self, workflow) -> str:
|
||||
"""
|
||||
Get list of available document fileNames from workflow with new labeling format.
|
||||
Get simple description of available documents for task planning.
|
||||
|
||||
Args:
|
||||
workflow: ChatWorkflow object
|
||||
|
||||
Returns:
|
||||
List[str]: List of document labels in new format
|
||||
str: Simple description of document availability
|
||||
"""
|
||||
documents = []
|
||||
total_documents = 0
|
||||
document_types = set()
|
||||
|
||||
for message in workflow.messages:
|
||||
for doc in message.documents:
|
||||
# Generate new label format
|
||||
label = self.generateDocumentLabel(doc, message)
|
||||
documents.append(label)
|
||||
return documents
|
||||
if message.documents:
|
||||
total_documents += len(message.documents)
|
||||
for doc in message.documents:
|
||||
try:
|
||||
file_extension = getFileExtension(doc.fileName)
|
||||
if file_extension:
|
||||
document_types.add(file_extension.upper())
|
||||
except:
|
||||
pass
|
||||
|
||||
if total_documents == 0:
|
||||
return "No documents available"
|
||||
elif len(document_types) == 0:
|
||||
return f"{total_documents} document(s) available"
|
||||
else:
|
||||
types_str = ", ".join(sorted(document_types))
|
||||
return f"{total_documents} document(s) available ({types_str} files)"
|
||||
|
||||
# ===== Functions for Manager: Execution Tools =====
|
||||
|
||||
async def executeAction(self, methodName: str, actionName: str, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""Execute a method action"""
|
||||
try:
|
||||
|
|
@ -1193,6 +1088,8 @@ Please provide a comprehensive summary of this conversation."""
|
|||
"""Set user language for the service center"""
|
||||
self.user.language = language
|
||||
|
||||
# ===== Functions for Manager: Workflow Tools =====
|
||||
|
||||
def setWorkflowContext(self, round_number: int = None, task_number: int = None, action_number: int = None):
|
||||
"""Set current workflow context for document generation and routing"""
|
||||
try:
|
||||
|
|
@ -1287,88 +1184,5 @@ Please provide a comprehensive summary of this conversation."""
|
|||
'workflowId': 'unknown'
|
||||
}
|
||||
|
||||
def refreshDocumentFileAttributes(self, documents: List[ChatDocument]) -> None:
|
||||
"""Update file attributes (fileName, fileSize, mimeType) for documents"""
|
||||
for doc in documents:
|
||||
try:
|
||||
file_item = self.interfaceComponent.getFile(doc.fileId)
|
||||
if file_item:
|
||||
doc.fileName = file_item.fileName
|
||||
doc.fileSize = file_item.fileSize
|
||||
doc.mimeType = file_item.mimeType
|
||||
else:
|
||||
logger.warning(f"File not found for document {doc.id}, fileId: {doc.fileId}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error refreshing file attributes for document {doc.id}: {e}")
|
||||
|
||||
def diagnoseDocumentAccess(self, document: ChatDocument) -> Dict[str, Any]:
|
||||
"""
|
||||
Diagnose document access issues and provide recovery information.
|
||||
This method helps identify why document properties are inaccessible.
|
||||
"""
|
||||
try:
|
||||
diagnosis = {
|
||||
'document_id': document.id,
|
||||
'file_id': document.fileId,
|
||||
'has_component_interface': document._componentInterface is not None,
|
||||
'component_interface_type': type(document._componentInterface).__name__ if document._componentInterface else None,
|
||||
'file_exists': False,
|
||||
'file_info': None,
|
||||
'error_details': None
|
||||
}
|
||||
|
||||
# Check if component interface is set
|
||||
if not document._componentInterface:
|
||||
diagnosis['error_details'] = "Component interface not set - document cannot access file system"
|
||||
return diagnosis
|
||||
|
||||
# Try to access the file directly
|
||||
try:
|
||||
file_info = self.interfaceComponent.getFile(document.fileId)
|
||||
if file_info:
|
||||
diagnosis['file_exists'] = True
|
||||
diagnosis['file_info'] = {
|
||||
'fileName': file_info.fileName if hasattr(file_info, 'fileName') else 'N/A',
|
||||
'fileSize': file_info.fileSize if hasattr(file_info, 'fileSize') else 'N/A',
|
||||
'mimeType': file_info.mimeType if hasattr(file_info, 'mimeType') else 'N/A'
|
||||
}
|
||||
else:
|
||||
diagnosis['error_details'] = f"File with ID {document.fileId} not found in component interface"
|
||||
except Exception as e:
|
||||
diagnosis['error_details'] = f"Error accessing file {document.fileId}: {str(e)}"
|
||||
|
||||
return diagnosis
|
||||
|
||||
except Exception as e:
|
||||
return {
|
||||
'document_id': document.id if hasattr(document, 'id') else 'unknown',
|
||||
'file_id': document.fileId if hasattr(document, 'fileId') else 'unknown',
|
||||
'error_details': f"Error during diagnosis: {str(e)}"
|
||||
}
|
||||
|
||||
def recoverDocumentAccess(self, document: ChatDocument) -> bool:
|
||||
"""
|
||||
Attempt to recover document access by re-setting the component interface.
|
||||
Returns True if recovery was successful.
|
||||
"""
|
||||
try:
|
||||
logger.info(f"Attempting to recover document access for document {document.id}")
|
||||
|
||||
# Re-set the component interface
|
||||
document.setComponentInterface(self.interfaceComponent)
|
||||
|
||||
# Test if we can now access the fileName
|
||||
try:
|
||||
test_fileName = document.fileName
|
||||
logger.info(f"Document access recovered for {document.id} -> {test_fileName}")
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"Document access recovery failed for {document.id}: {str(e)}")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error during document access recovery for {document.id}: {str(e)}")
|
||||
return False
|
||||
|
||||
# Create singleton instance
|
||||
serviceObject = None
|
||||
|
|
|
|||
|
|
@ -732,7 +732,7 @@ class TaskContext(BaseModel, ModelMixin):
|
|||
workflow_id: Optional[str] = None
|
||||
|
||||
# Available resources
|
||||
available_documents: Optional[list[str]] = []
|
||||
available_documents: Optional[str] = "No documents available"
|
||||
available_connections: Optional[list[str]] = []
|
||||
|
||||
# Previous execution state
|
||||
|
|
@ -755,8 +755,8 @@ class TaskContext(BaseModel, ModelMixin):
|
|||
criteria_progress: Optional[dict] = None
|
||||
|
||||
def getDocumentReferences(self) -> List[str]:
|
||||
"""Get all available document references"""
|
||||
docs = self.available_documents or []
|
||||
"""Get all available document references from previous handover"""
|
||||
docs = []
|
||||
if self.previous_handover:
|
||||
for doc_exchange in self.previous_handover.inputDocuments:
|
||||
docs.extend(doc_exchange.documents)
|
||||
|
|
|
|||
|
|
@ -731,8 +731,6 @@ class MethodOutlook(MethodBase):
|
|||
attachment_docs = self.service.getChatDocumentsFromDocumentList([attachment_ref])
|
||||
if attachment_docs:
|
||||
for doc in attachment_docs:
|
||||
|
||||
|
||||
# Get the actual file content using fileId
|
||||
file_id = getattr(doc, 'fileId', None)
|
||||
if file_id:
|
||||
|
|
@ -757,15 +755,15 @@ class MethodOutlook(MethodBase):
|
|||
"contentBytes": base64_content
|
||||
}
|
||||
message["attachments"].append(attachment)
|
||||
|
||||
|
||||
else:
|
||||
logger.warning(f"No content found for attachment: {doc.fileName}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error reading attachment file {doc.fileName}: {str(e)}")
|
||||
else:
|
||||
logger.warning(f"Attachment document has no fileId: {doc.fileName}")
|
||||
else:
|
||||
logger.warning(f"No attachment documents found for reference: {attachment_ref}")
|
||||
else:
|
||||
logger.warning(f"No attachment documents found for reference: {attachment_ref}")
|
||||
|
||||
# Create the draft message
|
||||
# First, get the Drafts folder ID to ensure the draft is created there
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -141,7 +141,7 @@ class WorkflowManager:
|
|||
self.chatManager.handlingTasks._checkWorkflowStopped()
|
||||
|
||||
# Create initial message using interface
|
||||
# Generate the correct documentsLabel that matches what getDocumentReferenceList() will create
|
||||
# Generate the correct documentsLabel that matches what getDocumentReferenceString will create
|
||||
round_num = workflow.currentRound
|
||||
task_num = 0
|
||||
action_num = 0
|
||||
|
|
|
|||
|
|
@ -2,7 +2,9 @@
|
|||
TODO
|
||||
|
||||
# System
|
||||
- web
|
||||
- sharepoint to fix
|
||||
- document handling centralized
|
||||
- ai handling centralized
|
||||
- neutralizer to activate AND put back placeholders to the returned data
|
||||
|
||||
# Tests
|
||||
|
|
|
|||
|
|
@ -1,128 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="de">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Management Summary: Methoden-basierte Chat-Architektur</title>
|
||||
<style>
|
||||
body {
|
||||
font-family: Arial, sans-serif;
|
||||
line-height: 1.6;
|
||||
max-width: 800px;
|
||||
margin: 0 auto;
|
||||
padding: 20px;
|
||||
color: #333;
|
||||
}
|
||||
h1 {
|
||||
color: #2c3e50;
|
||||
border-bottom: 2px solid #3498db;
|
||||
padding-bottom: 10px;
|
||||
}
|
||||
h2 {
|
||||
color: #2c3e50;
|
||||
margin-top: 30px;
|
||||
}
|
||||
.example {
|
||||
background-color: #f8f9fa;
|
||||
border-left: 4px solid #3498db;
|
||||
padding: 15px;
|
||||
margin: 20px 0;
|
||||
}
|
||||
.old-arch, .new-arch {
|
||||
margin: 15px 0;
|
||||
padding: 15px;
|
||||
border-radius: 5px;
|
||||
}
|
||||
.old-arch {
|
||||
background-color: #fff3cd;
|
||||
border: 1px solid #ffeeba;
|
||||
}
|
||||
.new-arch {
|
||||
background-color: #d4edda;
|
||||
border: 1px solid #c3e6cb;
|
||||
}
|
||||
.benefits {
|
||||
background-color: #e8f4f8;
|
||||
padding: 15px;
|
||||
border-radius: 5px;
|
||||
margin: 20px 0;
|
||||
}
|
||||
.benefits ul {
|
||||
margin: 10px 0;
|
||||
padding-left: 20px;
|
||||
}
|
||||
.benefits li {
|
||||
margin: 5px 0;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<h1>Management Summary: Methoden-basierte Chat-Architektur</h1>
|
||||
|
||||
<p>Die Umstellung von einer Agenten-basierten auf eine Methoden-basierte Chat-Architektur stellt einen fundamentalen Paradigmenwechsel dar. Während die Mehrheit der KI-Chat-Systeme weiterhin auf Agenten-Architekturen setzt, ermöglicht unser methoden-basierter Ansatz eine präzisere Kontrolle und effizientere Integration.</p>
|
||||
|
||||
<p>Der methoden-basierte Ansatz definiert klare, selbstbeschreibende Operationen mit festgelegten Parametern und Ergebnissen. Im Gegensatz zu Agenten, die als Blackbox-Operationen fungieren, bieten Methoden eine transparente, validierbare und vorhersehbare Ausführung. Diese Struktur ermöglicht eine präzise Fehlerbehandlung und Retry-Logik auf Aktions-Ebene, anstatt auf Agenten-Ebene.</p>
|
||||
|
||||
<p>Die Integration mit Benutzerdaten erfolgt direkt über definierte Authentifizierungspfade, was die Sicherheit erhöht und die Komplexität reduziert. Jede Methode ist selbstbeschreibend und enthält ihre eigenen Validierungsregeln, was die Wartbarkeit verbessert und die Entwicklung neuer Funktionen beschleunigt.</p>
|
||||
|
||||
<p>Der methoden-basierte Ansatz reduziert die KI-Abhängigkeit bei der Ausführung von Operationen, während die KI weiterhin für die Planung und Koordination der Methoden eingesetzt wird. Diese Trennung von Planung und Ausführung führt zu zuverlässigeren Ergebnissen und besserer Nachvollziehbarkeit.</p>
|
||||
|
||||
<p>Die Architektur ermöglicht eine präzise Dokumentation und Validierung jeder Operation, was in einer regulierten Umgebung von besonderem Wert ist. Die klare Struktur erleichtert die Integration neuer Dienste und die Erweiterung bestehender Funktionalitäten.</p>
|
||||
|
||||
<h2>Praktisches Beispiel: Dokumentenverarbeitung und E-Mail-Versand</h2>
|
||||
|
||||
<div class="example">
|
||||
<div class="old-arch">
|
||||
<strong>Alte Agenten-basierte Architektur:</strong><br>
|
||||
<pre>
|
||||
Benutzer: "Suche nach Verträgen im SharePoint und sende mir eine Zusammenfassung per E-Mail"
|
||||
|
||||
Agent SharePoint:
|
||||
- Sucht nach Verträgen
|
||||
- Extrahiert Inhalte
|
||||
- Speichert Ergebnisse
|
||||
|
||||
Agent Outlook:
|
||||
- Liest Ergebnisse
|
||||
- Erstellt E-Mail
|
||||
- Sendet E-Mail</pre>
|
||||
</div>
|
||||
|
||||
<div class="new-arch">
|
||||
<strong>Neue Methoden-basierte Architektur:</strong><br>
|
||||
<pre>
|
||||
Benutzer: "Suche nach Verträgen im SharePoint und sende mir eine Zusammenfassung per E-Mail"
|
||||
|
||||
Methoden-Katalog:
|
||||
1. SharePoint.searchDocuments
|
||||
- Parameter: {query: "Verträge", site: "valueon"}
|
||||
- Retry: 3x bei Netzwerkfehler
|
||||
- Auth: MSFT
|
||||
|
||||
2. Document.extractContent
|
||||
- Parameter: {documents: [...], sections: ["Zusammenfassung"]}
|
||||
- Retry: 2x bei Extraktionsfehler
|
||||
- Auth: LOCAL
|
||||
|
||||
3. Outlook.sendMail
|
||||
- Parameter: {to: ["user@example.com"], subject: "Vertragszusammenfassung"}
|
||||
- Retry: 1x bei SMTP-Fehler
|
||||
- Auth: MSFT</pre>
|
||||
</div>
|
||||
|
||||
<div class="benefits">
|
||||
<strong>Vorteile im Beispiel:</strong>
|
||||
<ul>
|
||||
<li>Jede Operation ist klar definiert und validierbar</li>
|
||||
<li>Retry-Logik ist spezifisch für jede Operation</li>
|
||||
<li>Authentifizierung ist explizit definiert</li>
|
||||
<li>Fehler können präzise zugeordnet werden</li>
|
||||
<li>Operationen können unabhängig voneinander getestet werden</li>
|
||||
<li>Neue Operationen können einfach hinzugefügt werden</li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<p>Die KI plant die Ausführung dieser Methoden, aber die eigentliche Ausführung erfolgt durch die definierten Methoden mit klaren Parametern und Ergebnissen. Dies führt zu einer zuverlässigeren und besser nachvollziehbaren Ausführung.</p>
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -1,129 +0,0 @@
|
|||
|
||||
<!DOCTYPE html>
|
||||
<html lang="de">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Management Summary: Methoden-basierte Chat-Architektur</title>
|
||||
<style>
|
||||
body {
|
||||
font-family: Arial, sans-serif;
|
||||
line-height: 1.6;
|
||||
max-width: 800px;
|
||||
margin: 0 auto;
|
||||
padding: 20px;
|
||||
color: #333;
|
||||
}
|
||||
h1 {
|
||||
color: #2c3e50;
|
||||
border-bottom: 2px solid #3498db;
|
||||
padding-bottom: 10px;
|
||||
}
|
||||
h2 {
|
||||
color: #2c3e50;
|
||||
margin-top: 30px;
|
||||
}
|
||||
.example {
|
||||
background-color: #f8f9fa;
|
||||
border-left: 4px solid #3498db;
|
||||
padding: 15px;
|
||||
margin: 20px 0;
|
||||
}
|
||||
.old-arch, .new-arch {
|
||||
margin: 15px 0;
|
||||
padding: 15px;
|
||||
border-radius: 5px;
|
||||
}
|
||||
.old-arch {
|
||||
background-color: #fff3cd;
|
||||
border: 1px solid #ffeeba;
|
||||
}
|
||||
.new-arch {
|
||||
background-color: #d4edda;
|
||||
border: 1px solid #c3e6cb;
|
||||
}
|
||||
.benefits {
|
||||
background-color: #e8f4f8;
|
||||
padding: 15px;
|
||||
border-radius: 5px;
|
||||
margin: 20px 0;
|
||||
}
|
||||
.benefits ul {
|
||||
margin: 10px 0;
|
||||
padding-left: 20px;
|
||||
}
|
||||
.benefits li {
|
||||
margin: 5px 0;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<h1>Management Summary: Methoden-basierte Chat-Architektur</h1>
|
||||
|
||||
<p>Die Umstellung von einer Agenten-basierten auf eine Methoden-basierte Chat-Architektur stellt einen fundamentalen Paradigmenwechsel dar. Während die Mehrheit der KI-Chat-Systeme weiterhin auf Agenten-Architekturen setzt, ermöglicht unser methoden-basierter Ansatz eine präzisere Kontrolle und effizientere Integration.</p>
|
||||
|
||||
<p>Der methoden-basierte Ansatz definiert klare, selbstbeschreibende Operationen mit festgelegten Parametern und Ergebnissen. Im Gegensatz zu Agenten, die als Blackbox-Operationen fungieren, bieten Methoden eine transparente, validierbare und vorhersehbare Ausführung. Diese Struktur ermöglicht eine präzise Fehlerbehandlung und Retry-Logik auf Aktions-Ebene, anstatt auf Agenten-Ebene.</p>
|
||||
|
||||
<p>Die Integration mit Benutzerdaten erfolgt direkt über definierte Authentifizierungspfade, was die Sicherheit erhöht und die Komplexität reduziert. Jede Methode ist selbstbeschreibend und enthält ihre eigenen Validierungsregeln, was die Wartbarkeit verbessert und die Entwicklung neuer Funktionen beschleunigt.</p>
|
||||
|
||||
<p>Der methoden-basierte Ansatz reduziert die KI-Abhängigkeit bei der Ausführung von Operationen, während die KI weiterhin für die Planung und Koordination der Methoden eingesetzt wird. Diese Trennung von Planung und Ausführung führt zu zuverlässigeren Ergebnissen und besserer Nachvollziehbarkeit.</p>
|
||||
|
||||
<p>Die Architektur ermöglicht eine präzise Dokumentation und Validierung jeder Operation, was in einer regulierten Umgebung von besonderem Wert ist. Die klare Struktur erleichtert die Integration neuer Dienste und die Erweiterung bestehender Funktionalitäten.</p>
|
||||
|
||||
<h2>Praktisches Beispiel: Dokumentenverarbeitung und E-Mail-Versand</h2>
|
||||
|
||||
<div class="example">
|
||||
<div class="old-arch">
|
||||
<strong>Alte Agenten-basierte Architektur:</strong><br>
|
||||
<pre>
|
||||
Benutzer: "Suche nach Verträgen im SharePoint und sende mir eine Zusammenfassung per E-Mail"
|
||||
|
||||
Agent SharePoint:
|
||||
- Sucht nach Verträgen
|
||||
- Extrahiert Inhalte
|
||||
- Speichert Ergebnisse
|
||||
|
||||
Agent Outlook:
|
||||
- Liest Ergebnisse
|
||||
- Erstellt E-Mail
|
||||
- Sendet E-Mail</pre>
|
||||
</div>
|
||||
|
||||
<div class="new-arch">
|
||||
<strong>Neue Methoden-basierte Architektur:</strong><br>
|
||||
<pre>
|
||||
Benutzer: "Suche nach Verträgen im SharePoint und sende mir eine Zusammenfassung per E-Mail"
|
||||
|
||||
Methoden-Katalog:
|
||||
1. SharePoint.searchDocuments
|
||||
- Parameter: {query: "Verträge", site: "valueon"}
|
||||
- Retry: 3x bei Netzwerkfehler
|
||||
- Auth: MSFT
|
||||
|
||||
2. Document.extractContent
|
||||
- Parameter: {documents: [...], sections: ["Zusammenfassung"]}
|
||||
- Retry: 2x bei Extraktionsfehler
|
||||
- Auth: LOCAL
|
||||
|
||||
3. Outlook.sendMail
|
||||
- Parameter: {to: ["user@example.com"], subject: "Vertragszusammenfassung"}
|
||||
- Retry: 1x bei SMTP-Fehler
|
||||
- Auth: MSFT</pre>
|
||||
</div>
|
||||
|
||||
<div class="benefits">
|
||||
<strong>Vorteile im Beispiel:</strong>
|
||||
<ul>
|
||||
<li>Jede Operation ist klar definiert und validierbar</li>
|
||||
<li>Retry-Logik ist spezifisch für jede Operation</li>
|
||||
<li>Authentifizierung ist explizit definiert</li>
|
||||
<li>Fehler können präzise zugeordnet werden</li>
|
||||
<li>Operationen können unabhängig voneinander getestet werden</li>
|
||||
<li>Neue Operationen können einfach hinzugefügt werden</li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<p>Die KI plant die Ausführung dieser Methoden, aber die eigentliche Ausführung erfolgt durch die definierten Methoden mit klaren Parametern und Ergebnissen. Dies führt zu einer zuverlässigeren und besser nachvollziehbaren Ausführung.</p>
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -1,999 +0,0 @@
|
|||
# Chat System Process Flow Specification
|
||||
|
||||
## 1. System Overview
|
||||
|
||||
### 1.1 Core Components
|
||||
- **WorkflowManager**: Orchestrates the overall workflow process
|
||||
- **ChatManager**: Manages chat interactions and task execution
|
||||
- **ServiceCenter**: Central state and context management
|
||||
- **AgentTask**: Core data object for task execution
|
||||
|
||||
### 1.2 Service center Structure
|
||||
```python
|
||||
from enum import Enum
|
||||
from typing import Dict, List, Optional, Any, Literal
|
||||
from datetime import datetime, UTC
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
class TaskStatus(str, Enum):
|
||||
PENDING = "pending"
|
||||
SUCCESS = "success"
|
||||
FAILED = "failed"
|
||||
RETRY = "retry"
|
||||
TIMEOUT = "timeout"
|
||||
ROLLBACK = "rollback"
|
||||
|
||||
class ActionStatus(str, Enum):
|
||||
PENDING = "pending"
|
||||
SUCCESS = "success"
|
||||
FAILED = "failed"
|
||||
RETRY = "retry"
|
||||
TIMEOUT = "timeout"
|
||||
SKIPPED = "skipped"
|
||||
DEPENDENCY_FAILED = "dependency_failed"
|
||||
|
||||
class AuthSource(str, Enum):
|
||||
LOCAL = "local"
|
||||
MSFT = "msft"
|
||||
GOOGLE = "google"
|
||||
# Add more auth sources as needed
|
||||
|
||||
class MethodParameter(BaseModel):
|
||||
"""Model for method parameters"""
|
||||
name: str
|
||||
type: str
|
||||
required: bool
|
||||
validation: Optional[callable] = None
|
||||
description: str
|
||||
|
||||
class ActionResult(BaseModel):
|
||||
"""Model for method results"""
|
||||
success: bool
|
||||
data: Dict[str, Any]
|
||||
metadata: Dict[str, Any]
|
||||
validation: List[str]
|
||||
|
||||
class MethodBase:
|
||||
"""Base class for all methods"""
|
||||
|
||||
def __init__(self, service):
|
||||
self.service = service
|
||||
self.name: str
|
||||
self.description: str
|
||||
self.auth_source: AuthSource = AuthSource.LOCAL # Default to local auth
|
||||
|
||||
@property
|
||||
def actions(self) -> Dict[str, Dict[str, Any]]:
|
||||
"""Available actions and their parameters"""
|
||||
raise NotImplementedError
|
||||
|
||||
async def execute(self, action: str, parameters: Dict[str, Any], auth_data: Optional[Dict[str, Any]] = None) -> ActionResult:
|
||||
"""Execute method action with authentication data"""
|
||||
raise NotImplementedError
|
||||
|
||||
async def validate_parameters(self, action: str, parameters: Dict[str, Any]) -> bool:
|
||||
"""Validate action parameters"""
|
||||
if action not in self.actions:
|
||||
return False
|
||||
|
||||
action_def = self.actions[action]
|
||||
required_params = {k for k, v in action_def['parameters'].items() if v['required']}
|
||||
return all(param in parameters for param in required_params)
|
||||
|
||||
async def rollback(self, action: str, parameters: Dict[str, Any], auth_data: Optional[Dict[str, Any]] = None) -> None:
|
||||
"""Rollback action if needed"""
|
||||
pass
|
||||
|
||||
class Action(BaseModel):
|
||||
"""Action model with validation"""
|
||||
method: str
|
||||
action: str
|
||||
parameters: Dict[str, Any]
|
||||
retryCount: int = 0
|
||||
retryMax: int
|
||||
status: ActionStatus = ActionStatus.PENDING
|
||||
timeout: Optional[int] = None
|
||||
dependencies: List[str] = []
|
||||
rollback_on_failure: bool = False
|
||||
auth_source: Optional[AuthSource] = None # Auth source for this action
|
||||
|
||||
class Config:
|
||||
use_enum_values = True
|
||||
|
||||
class AgentTask(BaseModel):
|
||||
"""Task model with validation"""
|
||||
id: str
|
||||
workflowId: str
|
||||
status: TaskStatus = TaskStatus.PENDING
|
||||
userInput: str
|
||||
dataList: List[Dict[str, str]] # List of available connections
|
||||
actionList: List[Action]
|
||||
chatHistory: str
|
||||
taskHistory: str
|
||||
previousTaskFeedback: Optional[str]
|
||||
thisTaskFeedback: Optional[str]
|
||||
result: Optional[Dict[str, Any]]
|
||||
documentsInput: List[Dict]
|
||||
documentsOutput: List[Dict]
|
||||
startedAt: str
|
||||
finishedAt: Optional[str]
|
||||
error: Optional[str]
|
||||
dependencies: List[str] = []
|
||||
requiredOutputs: List[str] = []
|
||||
|
||||
class Config:
|
||||
use_enum_values = True
|
||||
|
||||
def get_auth_data(self, auth_source: AuthSource) -> Optional[Dict[str, Any]]:
|
||||
"""Get authentication data for the specified source"""
|
||||
return next(
|
||||
(conn for conn in self.dataList if conn.get('source') == auth_source),
|
||||
None
|
||||
)
|
||||
|
||||
def get_action_by_id(self, action_id: str) -> Optional[Action]:
|
||||
"""Get action by its ID (method:action)"""
|
||||
return next((a for a in self.actionList if f"{a.method}:{a.action}" == action_id), None)
|
||||
|
||||
def can_execute_action(self, action: Action) -> bool:
|
||||
"""Check if action can be executed based on dependencies and auth"""
|
||||
# Check dependencies
|
||||
if action.dependencies:
|
||||
if not all(
|
||||
self.get_action_by_id(dep).status == ActionStatus.SUCCESS
|
||||
for dep in action.dependencies
|
||||
):
|
||||
return False
|
||||
|
||||
# Check authentication
|
||||
if action.auth_source and action.auth_source != AuthSource.LOCAL:
|
||||
if not self.get_auth_data(action.auth_source):
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def is_complete(self) -> bool:
|
||||
"""Check if all actions are complete"""
|
||||
return all(a.status in [ActionStatus.SUCCESS, ActionStatus.SKIPPED]
|
||||
for a in self.actionList)
|
||||
|
||||
def has_failed(self) -> bool:
|
||||
"""Check if any action has failed"""
|
||||
return any(a.status == ActionStatus.FAILED for a in self.actionList)
|
||||
|
||||
class ServiceCenter:
|
||||
"""Service center with improved state management"""
|
||||
|
||||
def __init__(self):
|
||||
self.state = {
|
||||
'status': TaskStatus.PENDING,
|
||||
'retryCount': 0,
|
||||
'retryMax': 3,
|
||||
'timeout': 300, # 5 minutes
|
||||
'lastError': None,
|
||||
'lastErrorTime': None
|
||||
}
|
||||
self.methods: Dict[str, MethodBase] = {}
|
||||
self.tasks: Dict[str, AgentTask] = {}
|
||||
self.promptManager = AIPromptManager()
|
||||
self.taskStateManager = TaskStateManager()
|
||||
self.documentProcessor = DocumentExtraction()
|
||||
|
||||
async def execute_task(self, task: AgentTask) -> None:
|
||||
"""Execute task with improved error handling and timeout"""
|
||||
try:
|
||||
# Check for timeout
|
||||
if (datetime.now(UTC) - datetime.fromisoformat(task.startedAt)).seconds > self.state['timeout']:
|
||||
task.status = TaskStatus.TIMEOUT
|
||||
return
|
||||
|
||||
# Execute actions
|
||||
for action in task.actionList:
|
||||
if not task.can_execute_action(action):
|
||||
if not task.get_auth_data(action.auth_source):
|
||||
action.status = ActionStatus.FAILED
|
||||
task.error = f"Missing authentication for {action.auth_source}"
|
||||
else:
|
||||
action.status = ActionStatus.DEPENDENCY_FAILED
|
||||
continue
|
||||
|
||||
try:
|
||||
# Get method
|
||||
method = self.methods.get(action.method)
|
||||
if not method:
|
||||
raise ValueError(f"Unknown method: {action.method}")
|
||||
|
||||
# Validate parameters
|
||||
if not await method.validate_parameters(action.action, action.parameters):
|
||||
raise ValueError(f"Invalid parameters for {action.method}:{action.action}")
|
||||
|
||||
# Get auth data if needed
|
||||
auth_data = None
|
||||
if action.auth_source and action.auth_source != AuthSource.LOCAL:
|
||||
auth_data = task.get_auth_data(action.auth_source)
|
||||
if not auth_data:
|
||||
raise ValueError(f"Missing authentication data for {action.auth_source}")
|
||||
|
||||
# Execute with timeout
|
||||
result = await asyncio.wait_for(
|
||||
method.execute(action.action, action.parameters, auth_data),
|
||||
timeout=action.timeout or 60
|
||||
)
|
||||
|
||||
if result.success:
|
||||
action.status = ActionStatus.SUCCESS
|
||||
else:
|
||||
if self._should_retry(result.data.get('error')):
|
||||
action.retryCount += 1
|
||||
if action.retryCount > action.retryMax:
|
||||
action.status = ActionStatus.FAILED
|
||||
if action.rollback_on_failure:
|
||||
await method.rollback(action.action, action.parameters, auth_data)
|
||||
else:
|
||||
action.status = ActionStatus.RETRY
|
||||
else:
|
||||
action.status = ActionStatus.FAILED
|
||||
if action.rollback_on_failure:
|
||||
await method.rollback(action.action, action.parameters, auth_data)
|
||||
|
||||
except asyncio.TimeoutError:
|
||||
action.status = ActionStatus.TIMEOUT
|
||||
except Exception as e:
|
||||
action.status = ActionStatus.FAILED
|
||||
if action.rollback_on_failure:
|
||||
await method.rollback(action.action, action.parameters, auth_data)
|
||||
|
||||
# Update task status
|
||||
if task.has_failed():
|
||||
task.status = TaskStatus.FAILED
|
||||
elif task.is_complete():
|
||||
task.status = TaskStatus.SUCCESS
|
||||
task.finishedAt = datetime.now(UTC).isoformat()
|
||||
|
||||
except Exception as e:
|
||||
task.status = TaskStatus.FAILED
|
||||
task.error = str(e)
|
||||
|
||||
class AIPromptManager:
|
||||
"""Manages AI prompts and response validation"""
|
||||
|
||||
def generatePrompt(self, context: Dict[str, Any], examples: List[Dict]) -> str:
|
||||
"""Generate a context-aware prompt with few-shot examples"""
|
||||
prompt = (
|
||||
f"Task: {context['task']}\n"
|
||||
f"Document: {context['document']['name']} ({context['document']['type']})\n"
|
||||
"Examples:\n"
|
||||
)
|
||||
for ex in examples:
|
||||
prompt += f"- {ex['input']} => {ex['output']}\n"
|
||||
prompt += "Extract the most relevant information for the task above."
|
||||
return prompt
|
||||
|
||||
def validateResponse(self, response: str, schema: Dict) -> bool:
|
||||
"""Validate AI response against a schema"""
|
||||
import jsonschema
|
||||
try:
|
||||
jsonschema.validate(instance=response, schema=schema)
|
||||
return True
|
||||
except jsonschema.ValidationError:
|
||||
return False
|
||||
|
||||
class TaskStateManager:
|
||||
"""Manages task state and retry tracking"""
|
||||
|
||||
def __init__(self):
|
||||
self.taskStates = {}
|
||||
|
||||
def trackState(self, task: AgentTask):
|
||||
"""Track task state"""
|
||||
self.taskStates[task.id] = {
|
||||
"status": task.status,
|
||||
"retryState": getattr(task, "retryState", {}),
|
||||
"history": getattr(task, "history", [])
|
||||
}
|
||||
|
||||
def canRetry(self, task: AgentTask, method: str) -> bool:
|
||||
"""Check if task can be retried"""
|
||||
retryState = self.taskStates[task.id].get("retryState", {})
|
||||
return retryState.get(method, 0) < getattr(task, "retryMax", 3)
|
||||
|
||||
class DocumentContext(BaseModel):
|
||||
"""Model for document context"""
|
||||
id: str
|
||||
extractionHistory: List[Dict]
|
||||
relevantSections: List[str]
|
||||
processingStatus: Dict[str, str]
|
||||
|
||||
class DocumentExtraction:
|
||||
"""Processes documents with context awareness"""
|
||||
|
||||
def process_with_context(self, doc: Dict, context: DocumentContext) -> Dict:
|
||||
"""Process document with context"""
|
||||
extracted = {}
|
||||
for section in context.relevantSections:
|
||||
extracted[section] = doc.get(section)
|
||||
return extracted
|
||||
|
||||
def track_extraction(self, doc: Dict, extraction: Dict):
|
||||
"""Track document extraction"""
|
||||
if 'extractionHistory' not in doc:
|
||||
doc['extractionHistory'] = []
|
||||
doc['extractionHistory'].append(extraction)
|
||||
|
||||
class ErrorRecovery(BaseModel):
|
||||
"""Model for error recovery strategies"""
|
||||
strategy: str # e.g., "retry", "fallback", "skip"
|
||||
fallbackActions: List[str]
|
||||
contextPreservation: bool
|
||||
|
||||
### 1.3 Method-Based Module Structure
|
||||
```python
|
||||
# Example: methodSharepoint.py
|
||||
class MethodSharepoint:
|
||||
"""SharePoint method implementation"""
|
||||
|
||||
def __init__(self, service):
|
||||
self.service = service
|
||||
self.name = "sharepoint"
|
||||
self.description = "Search and process SharePoint documents"
|
||||
self.auth_source = AuthSource.MSFT # Requires Microsoft authentication
|
||||
|
||||
@property
|
||||
def actions(self) -> Dict[str, Dict[str, Any]]:
|
||||
"""Available actions and their parameters"""
|
||||
return {
|
||||
"search": {
|
||||
"description": "Search SharePoint documents",
|
||||
"retryMax": 3,
|
||||
"timeout": 30,
|
||||
"parameters": {
|
||||
"query": {"type": "string", "required": True},
|
||||
"site": {"type": "string", "required": False},
|
||||
"folder": {"type": "string", "required": False},
|
||||
"maxResults": {"type": "number", "required": False}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async def execute(self, action: str, parameters: Dict[str, Any], auth_data: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
|
||||
"""Execute SharePoint method"""
|
||||
if not auth_data:
|
||||
return {"success": False, "error": "Missing Microsoft authentication"}
|
||||
|
||||
if action == "search":
|
||||
return await self._searchDocuments(parameters, auth_data)
|
||||
return {"success": False, "error": f"Unknown action: {action}"}
|
||||
|
||||
async def _searchDocuments(self, parameters: Dict[str, Any], auth_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Search SharePoint documents"""
|
||||
# Implementation using existing SharePoint agent functionality
|
||||
pass
|
||||
|
||||
# Example: methodOutlook.py
|
||||
class MethodOutlook:
|
||||
"""Outlook method implementation"""
|
||||
|
||||
def __init__(self, service):
|
||||
self.service = service
|
||||
self.name = "outlook"
|
||||
self.description = "Handle Outlook email operations"
|
||||
|
||||
@property
|
||||
def actions(self) -> Dict[str, Dict[str, Any]]:
|
||||
"""Available actions and their parameters"""
|
||||
return {
|
||||
"readMails": {
|
||||
"description": "Read emails from specified folder",
|
||||
"retryMax": 2, # Action-specific retry limit
|
||||
"parameters": {
|
||||
"folder": {"type": "string", "required": False},
|
||||
"unreadOnly": {"type": "boolean", "required": False},
|
||||
"fromAddress": {"type": "string", "required": False},
|
||||
"maxResults": {"type": "number", "required": False}
|
||||
}
|
||||
},
|
||||
"sendMail": {
|
||||
"description": "Send an email",
|
||||
"retryMax": 1, # Action-specific retry limit
|
||||
"parameters": {
|
||||
"to": {"type": "array", "items": "string", "required": True},
|
||||
"subject": {"type": "string", "required": True},
|
||||
"body": {"type": "string", "required": True},
|
||||
"attachments": {"type": "array", "items": "FileRef", "required": False}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async def execute(self, action: str, parameters: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Execute Outlook method"""
|
||||
if action == "readMails":
|
||||
return await self._readMails(parameters)
|
||||
elif action == "sendMail":
|
||||
return await self._sendMail(parameters)
|
||||
return {"success": False, "error": f"Unknown action: {action}"}
|
||||
```
|
||||
|
||||
### 1.4 Key Data Objects
|
||||
```python
|
||||
class ChatWorkflow:
|
||||
id: str
|
||||
mandateId: str
|
||||
status: str
|
||||
name: Optional[str]
|
||||
currentRound: int
|
||||
lastActivity: str
|
||||
startedAt: str
|
||||
logs: List[ChatLog]
|
||||
messages: List[ChatMessage]
|
||||
stats: Optional[ChatStat]
|
||||
tasks: List[Task]
|
||||
|
||||
class AgentTask:
|
||||
id: str
|
||||
workflowId: str
|
||||
status: str # pending, success, failed, retry
|
||||
userInput: str # AI-processed summary
|
||||
dataList: List[Dict[str, str]] # User connections
|
||||
actionList: List[Dict[str, Any]] # Actions to execute, e.g.:
|
||||
# [
|
||||
# {
|
||||
# "method": "sharepoint",
|
||||
# "action": "search",
|
||||
# "parameters": {
|
||||
# "query": "offerings",
|
||||
# "site": "valueon"
|
||||
# },
|
||||
# "retryCount": 0,
|
||||
# "retryMax": 3,
|
||||
# "status": "pending" # pending, success, failed, retry
|
||||
# },
|
||||
# {
|
||||
# "method": "outlook",
|
||||
# "action": "sendMail",
|
||||
# "parameters": {
|
||||
# "to": ["user@example.com"],
|
||||
# "subject": "Offer Summary",
|
||||
# "body": "..."
|
||||
# },
|
||||
# "retryCount": 0,
|
||||
# "retryMax": 1,
|
||||
# "status": "pending"
|
||||
# }
|
||||
# ]
|
||||
chatHistory: str # Summary of previous messages
|
||||
taskHistory: str # Summary of previous tasks
|
||||
previousTaskFeedback: Optional[str]
|
||||
thisTaskFeedback: Optional[str]
|
||||
result: Optional[ChatMessage]
|
||||
documentsInput: List[Dict]
|
||||
documentsOutput: List[Dict]
|
||||
startedAt: str
|
||||
finishedAt: Optional[str]
|
||||
error: Optional[str]
|
||||
dependencies: List[str] = [] # Task dependencies
|
||||
requiredOutputs: List[str] = [] # Required outputs from dependencies
|
||||
```
|
||||
|
||||
## 2. Process Flow
|
||||
|
||||
### 2.1 Initialization Phase
|
||||
```mermaid
|
||||
graph TD
|
||||
A[User Input] --> B[WorkflowManager.workflowProcess]
|
||||
B --> C[ChatManager.initialize]
|
||||
C --> D[Create ServiceCenter]
|
||||
D --> E[Create Initial Task]
|
||||
```
|
||||
|
||||
1. **WorkflowManager.workflowProcess**
|
||||
- Receives user input and workflow
|
||||
- Initializes chat manager
|
||||
- Starts task processing loop
|
||||
|
||||
2. **ChatManager.initialize**
|
||||
- Creates ServiceCenter with all required components
|
||||
- Initializes service interfaces
|
||||
- Sets up task and state management
|
||||
|
||||
### 2.2 Task Creation Phase
|
||||
|
||||
1. **Create Initial Task**
|
||||
```python
|
||||
def createInitialTask(self, userInput: UserInputRequest) -> AgentTask:
|
||||
# 1. Get available methods and their actions
|
||||
available_methods = self._getAvailableMethods()
|
||||
method_catalog = {
|
||||
method.name: {
|
||||
"description": method.description,
|
||||
"actions": method.actions
|
||||
}
|
||||
for method in available_methods
|
||||
}
|
||||
|
||||
# 2. Process user input with AI including document analysis
|
||||
processedInput = await self.service.model['callAiBasic'](
|
||||
f"""Analyze user request and documents:
|
||||
User Prompt: {userInput.prompt}
|
||||
Documents: {userInput.listFileId}
|
||||
|
||||
Available Methods:
|
||||
{json.dumps(method_catalog, indent=2)}
|
||||
|
||||
Please provide:
|
||||
1. Main objective
|
||||
2. Required actions (using available methods and their actions)
|
||||
3. Required data sources
|
||||
4. Document processing requirements
|
||||
5. Expected output format
|
||||
|
||||
Format your response as JSON:
|
||||
{{
|
||||
"objective": "string",
|
||||
"actions": [
|
||||
{{
|
||||
"method": "string",
|
||||
"action": "string",
|
||||
"parameters": {{
|
||||
"param1": "value1",
|
||||
"param2": "value2"
|
||||
}}
|
||||
}}
|
||||
],
|
||||
"dataSources": ["string"],
|
||||
"documentRequirements": ["string"],
|
||||
"outputFormat": "string"
|
||||
}}
|
||||
"""
|
||||
)
|
||||
|
||||
# 3. Create task with processed input and initialize action states
|
||||
actions = []
|
||||
for action in processedInput['actions']:
|
||||
method = next(m for m in available_methods if m.name == action['method'])
|
||||
action_info = method.actions[action['action']]
|
||||
actions.append({
|
||||
**action,
|
||||
"retryCount": 0,
|
||||
"retryMax": action_info['retryMax'],
|
||||
"status": "pending"
|
||||
})
|
||||
|
||||
task = AgentTask(
|
||||
workflowId=self.service.workflow.id,
|
||||
userInput=processedInput,
|
||||
dataList=self.service.context['dataConnections'],
|
||||
actionList=actions,
|
||||
chatHistory=await self.workflowSummarize(userInput),
|
||||
startedAt=datetime.now(UTC).isoformat()
|
||||
)
|
||||
|
||||
# 4. Store in service
|
||||
self.service.tasks['current'] = task
|
||||
return task
|
||||
```
|
||||
|
||||
### 2.3 Task Execution Phase
|
||||
|
||||
1. **Execute Task**
|
||||
```python
|
||||
async def executeTask(self, task: AgentTask) -> None:
|
||||
"""Execute task actions in sequence"""
|
||||
for action in task.actionList:
|
||||
if action['status'] == 'pending':
|
||||
try:
|
||||
# Get method instance
|
||||
method = self.service.methods[action['method']]
|
||||
|
||||
# Execute action
|
||||
result = await method.execute(
|
||||
action['action'],
|
||||
action['parameters']
|
||||
)
|
||||
|
||||
if result['success']:
|
||||
action['status'] = 'success'
|
||||
else:
|
||||
if self._shouldRetry(result['error']):
|
||||
action['retryCount'] += 1
|
||||
if action['retryCount'] > action['retryMax']:
|
||||
action['status'] = 'failed'
|
||||
task.status = 'failed'
|
||||
task.error = "Maximum retries exceeded"
|
||||
else:
|
||||
action['status'] = 'retry'
|
||||
task.status = 'retry'
|
||||
else:
|
||||
action['status'] = 'failed'
|
||||
task.status = 'failed'
|
||||
task.error = result['error']
|
||||
|
||||
except Exception as e:
|
||||
action['status'] = 'failed'
|
||||
task.status = 'failed'
|
||||
task.error = str(e)
|
||||
|
||||
# Update task status based on action status
|
||||
if action['status'] == 'failed':
|
||||
break
|
||||
|
||||
# Mark task as complete if all actions succeeded
|
||||
if all(a['status'] == 'success' for a in task.actionList):
|
||||
task.status = 'success'
|
||||
task.finishedAt = datetime.now(UTC).isoformat()
|
||||
```
|
||||
|
||||
### 2.4 Task Analysis Phase
|
||||
|
||||
1. **Define Next Task**
|
||||
```python
|
||||
def defineNextTask(self, currentTask: AgentTask) -> Optional[AgentTask]:
|
||||
try:
|
||||
# 1. Analyze current task results using basic AI
|
||||
analysis = await self.service.model['callAiBasic'](
|
||||
f"""Analyze task results and determine next steps:
|
||||
Previous Feedback: {currentTask.previousTaskFeedback}
|
||||
Current Feedback: {currentTask.thisTaskFeedback}
|
||||
User Input: {currentTask.userInput}
|
||||
Current Documents: {currentTask.documentsOutput}
|
||||
|
||||
Please provide:
|
||||
1. Task completion status
|
||||
2. Next required actions
|
||||
3. Required documents
|
||||
4. Method recommendations
|
||||
|
||||
Format your response as JSON:
|
||||
{{
|
||||
"isComplete": boolean,
|
||||
"nextActions": ["string"],
|
||||
"requiredDocuments": ["string"],
|
||||
"recommendedMethods": ["string"]
|
||||
}}
|
||||
"""
|
||||
)
|
||||
|
||||
# 2. Parse and validate AI response
|
||||
analysis_data = json.loads(analysis)
|
||||
|
||||
# 3. Determine if next task needed
|
||||
if not analysis_data["isComplete"]:
|
||||
# 4. Create next task
|
||||
nextTask = self._createNextTask(currentTask, analysis_data)
|
||||
self.service.tasks['previous'] = currentTask
|
||||
self.service.tasks['current'] = nextTask
|
||||
return nextTask
|
||||
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error defining next task: {str(e)}")
|
||||
return None
|
||||
```
|
||||
|
||||
## 3. Method Integration
|
||||
|
||||
### 3.1 Method Registration
|
||||
```python
|
||||
def _registerMethods(self):
|
||||
"""Register available methods in service center"""
|
||||
self.service.methods = {
|
||||
"sharepoint": MethodSharepoint(self.service),
|
||||
"outlook": MethodOutlook(self.service),
|
||||
"web": MethodWeb(self.service),
|
||||
"document": MethodDocument(self.service)
|
||||
}
|
||||
```
|
||||
|
||||
### 3.2 Method Execution
|
||||
```python
|
||||
def _executeMethod(self, method: str, parameters: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Execute a method with parameters"""
|
||||
try:
|
||||
# Get method implementation
|
||||
method_impl = self.service.methods.get(method)
|
||||
if not method_impl:
|
||||
return {"success": False, "error": f"Unknown method: {method}"}
|
||||
|
||||
# Execute method
|
||||
return await method_impl.execute(parameters)
|
||||
|
||||
except Exception as e:
|
||||
return {"success": False, "error": str(e)}
|
||||
```
|
||||
|
||||
## 4. Error Handling
|
||||
|
||||
### 4.1 Error Types
|
||||
1. **AI Errors**
|
||||
- Model unavailable
|
||||
- Invalid response
|
||||
- Timeout
|
||||
|
||||
2. **Method Errors**
|
||||
- Invalid method
|
||||
- Execution failure
|
||||
- Resource unavailable
|
||||
|
||||
3. **Task Errors**
|
||||
- Invalid state
|
||||
- Missing data
|
||||
- Timeout
|
||||
|
||||
### 4.2 Retry Logic
|
||||
```python
|
||||
def _shouldRetry(self, error: str) -> bool:
|
||||
"""Determine if error is retryable"""
|
||||
retryable_errors = [
|
||||
"AI down",
|
||||
"Document not found",
|
||||
"Content extraction failed"
|
||||
]
|
||||
return any(err in error for err in retryable_errors)
|
||||
|
||||
def _shouldCreateNextTask(self, analysis: Dict[str, Any]) -> bool:
|
||||
"""Determine if next task is needed based on AI analysis"""
|
||||
return not analysis.get("isComplete", True)
|
||||
```
|
||||
|
||||
## 5. AI Integration Points
|
||||
|
||||
### 5.1 User Input Processing
|
||||
```python
|
||||
async def _processUserInput(self, input: str, documents: List[str]) -> str:
|
||||
"""Process user input including document analysis"""
|
||||
context = {
|
||||
"task": "Process user input",
|
||||
"document": {"name": "User Input", "type": "text"}
|
||||
}
|
||||
examples = [
|
||||
{"input": "Search documents", "output": "Extract relevant information"}
|
||||
]
|
||||
prompt = self.service.promptManager.generatePrompt(context, examples)
|
||||
|
||||
return await self.service.model['callAiBasic'](
|
||||
f"""Analyze user request and documents:
|
||||
User Input: {input}
|
||||
Documents: {documents}
|
||||
|
||||
{prompt}
|
||||
|
||||
Please provide:
|
||||
1. Main objective
|
||||
2. Required actions
|
||||
3. Required data sources
|
||||
4. Document processing requirements
|
||||
5. Expected output format
|
||||
|
||||
Format your response as JSON:
|
||||
{{
|
||||
"objective": "string",
|
||||
"actions": ["string"],
|
||||
"dataSources": ["string"],
|
||||
"documentRequirements": ["string"],
|
||||
"outputFormat": "string"
|
||||
}}
|
||||
"""
|
||||
)
|
||||
```
|
||||
|
||||
### 5.2 Task Analysis
|
||||
```python
|
||||
async def _analyzeTaskResults(self, task: AgentTask) -> str:
|
||||
"""Analyze task results and determine next steps"""
|
||||
context = {
|
||||
"task": "Analyze task results",
|
||||
"document": {"name": "Task Results", "type": "json"}
|
||||
}
|
||||
examples = [
|
||||
{"input": "Task completed", "output": "Generate next steps"}
|
||||
]
|
||||
prompt = self.service.promptManager.generatePrompt(context, examples)
|
||||
|
||||
return await self.service.model['callAiBasic'](
|
||||
f"""Analyze task results and determine next steps:
|
||||
Task Input: {task.userInput}
|
||||
Previous Feedback: {task.previousTaskFeedback}
|
||||
Current Feedback: {task.thisTaskFeedback}
|
||||
Current Documents: {task.documentsOutput}
|
||||
|
||||
{prompt}
|
||||
|
||||
Please provide:
|
||||
1. Task completion status
|
||||
2. Next required actions
|
||||
3. Required documents
|
||||
4. Method recommendations
|
||||
|
||||
Format your response as JSON:
|
||||
{{
|
||||
"isComplete": boolean,
|
||||
"nextActions": ["string"],
|
||||
"requiredDocuments": ["string"],
|
||||
"recommendedMethods": ["string"]
|
||||
}}
|
||||
"""
|
||||
)
|
||||
```
|
||||
|
||||
### 5.3 Result Processing
|
||||
```python
|
||||
async def _processTaskResults(self, task: AgentTask) -> str:
|
||||
"""Process task results and generate feedback"""
|
||||
context = {
|
||||
"task": "Process task results",
|
||||
"document": {"name": "Task Results", "type": "json"}
|
||||
}
|
||||
examples = [
|
||||
{"input": "Task results", "output": "Generate summary"}
|
||||
]
|
||||
prompt = self.service.promptManager.generatePrompt(context, examples)
|
||||
|
||||
return await self.service.model['callAiBasic'](
|
||||
f"""Process task results and generate feedback:
|
||||
Task Input: {task.userInput}
|
||||
Method Results: {task.result}
|
||||
Generated Documents: {task.documentsOutput}
|
||||
|
||||
{prompt}
|
||||
|
||||
Please provide:
|
||||
1. Summary of completed actions
|
||||
2. Generated document descriptions
|
||||
3. Next steps or completion status
|
||||
|
||||
Format your response as JSON:
|
||||
{{
|
||||
"summary": "string",
|
||||
"documents": ["string"],
|
||||
"nextSteps": ["string"]
|
||||
}}
|
||||
"""
|
||||
)
|
||||
```
|
||||
|
||||
## 6. File Structure and Implementation Plan
|
||||
|
||||
### 6.1 File Structure
|
||||
```
|
||||
gateway/
|
||||
├── modules/
|
||||
│ ├── workflow/
|
||||
│ │ ├── managerWorkflow.py # Workflow management and state machine
|
||||
│ │ ├── managerChat.py # Chat management and AI response validation
|
||||
│ │ ├── managerPrompt.py # AI prompt generation and management
|
||||
│ │ ├── methodBase.py # Base method class with result validation
|
||||
│ │ └── documentExtraction.py # Document content extraction
|
||||
│ │
|
||||
│ ├── agents/ # To be refactored into methods
|
||||
│ │ ├── agentSharepoint.py → methods/methodSharepoint.py
|
||||
│ │ ├── agentOutlook.py → methods/methodOutlook.py
|
||||
│ │ ├── agentWebcrawler.py → methods/methodWeb.py
|
||||
│ │ ├── agentDocument.py → methods/methodDocument.py
|
||||
│ │ └── agentCoder.py → methods/methodCoder.py
|
||||
│ │
|
||||
│ ├── methods/ # New directory for method implementations
|
||||
│ │ ├── methodSharepoint.py # SharePoint operations
|
||||
│ │ ├── methodOutlook.py # Outlook operations
|
||||
│ │ ├── methodWeb.py # Web operations
|
||||
│ │ ├── methodDocument.py # Document operations
|
||||
│ │ ├── methodCoder.py # Code generation operations
|
||||
│ │ └── methodPowerpoint.py # PowerPoint operations
|
||||
│ │
|
||||
│ └── interfaces/
|
||||
│ ├── interfaceChatModel.py # Chat system models and enums
|
||||
│ └── interfaceAppModel.py # Application models including UserConnection
|
||||
```
|
||||
|
||||
### 6.2 Implementation Plan
|
||||
|
||||
#### Phase 1: Core Structure Setup
|
||||
1. **File Renaming and Organization**
|
||||
- Rename manager files to follow `manager*.py` pattern
|
||||
- Move document processor to `documentExtraction.py`
|
||||
- Create new `methods` directory
|
||||
|
||||
2. **Model Updates**
|
||||
- Update `interfaceChatModel.py` with new enums and models
|
||||
- Integrate `UserConnection` from `interfaceAppModel.py`
|
||||
- Update validation logic in respective modules
|
||||
|
||||
#### Phase 2: Method Migration
|
||||
1. **Base Method Implementation**
|
||||
- Implement `methodBase.py` with core functionality
|
||||
- Add method result validation
|
||||
- Set up authentication handling
|
||||
|
||||
2. **Agent to Method Conversion**
|
||||
- Convert each agent to its method implementation
|
||||
- Migrate functionality while maintaining existing behavior
|
||||
- Add method-specific validation
|
||||
|
||||
3. **New Method Implementation**
|
||||
- Implement `methodPowerpoint.py`
|
||||
- Add PowerPoint-specific operations
|
||||
- Integrate with document processing
|
||||
|
||||
#### Phase 3: Manager Updates
|
||||
1. **Chat Manager Enhancement**
|
||||
- Integrate AI response validation
|
||||
- Update service center structure
|
||||
- Improve error handling
|
||||
|
||||
2. **Document Manager Integration**
|
||||
- Update document operations for new method structure
|
||||
- Enhance content extraction capabilities
|
||||
- Improve file handling
|
||||
|
||||
3. **Workflow Manager Updates**
|
||||
- Update state machine for method-based approach
|
||||
- Improve task management
|
||||
- Enhance error recovery
|
||||
|
||||
#### Phase 4: Testing and Validation
|
||||
1. **Unit Testing**
|
||||
- Test each method implementation
|
||||
- Validate error handling
|
||||
- Verify authentication flow
|
||||
|
||||
2. **Integration Testing**
|
||||
- Test method interactions
|
||||
- Validate document processing
|
||||
- Verify workflow execution
|
||||
|
||||
3. **Performance Testing**
|
||||
- Measure response times
|
||||
- Validate resource usage
|
||||
- Test concurrent operations
|
||||
|
||||
#### Phase 5: Documentation and Cleanup
|
||||
1. **Documentation**
|
||||
- Update API documentation
|
||||
- Document method implementations
|
||||
- Add usage examples
|
||||
|
||||
2. **Code Cleanup**
|
||||
- Remove deprecated code
|
||||
- Clean up old agent files
|
||||
- Optimize imports
|
||||
|
||||
3. **Final Review**
|
||||
- Code review
|
||||
- Security audit
|
||||
- Performance optimization
|
||||
|
||||
### 6.3 Migration Strategy
|
||||
1. **Incremental Migration**
|
||||
- Migrate one agent at a time
|
||||
- Maintain backward compatibility
|
||||
- Use feature flags for gradual rollout
|
||||
|
||||
2. **Testing Strategy**
|
||||
- Unit tests for each method
|
||||
- Integration tests for workflows
|
||||
- End-to-end tests for complete scenarios
|
||||
|
||||
3. **Rollback Plan**
|
||||
- Keep old agent implementations until stable
|
||||
- Maintain version control
|
||||
- Document rollback procedures
|
||||
|
||||
### 6.4 Success Criteria
|
||||
1. **Functionality**
|
||||
- All existing features working
|
||||
- New method-based structure operational
|
||||
- Improved error handling
|
||||
|
||||
2. **Performance**
|
||||
- Equal or better response times
|
||||
- Reduced resource usage
|
||||
- Improved scalability
|
||||
|
||||
3. **Maintainability**
|
||||
- Clear code structure
|
||||
- Comprehensive documentation
|
||||
- Easy to extend
|
||||
|
||||
4. **Security**
|
||||
- Proper authentication handling
|
||||
- Secure data processing
|
||||
- Access control implementation
|
||||
235
test_ai_calls.md
235
test_ai_calls.md
|
|
@ -1,235 +0,0 @@
|
|||
# AI Call Functions Test and Content Size Analysis
|
||||
|
||||
## Overview
|
||||
This file documents the ServiceCenter AI functions that have risk of delivering too big content,
|
||||
along with their usage patterns and potential size issues.
|
||||
|
||||
## High-Risk AI Functions
|
||||
|
||||
### 1. summarizeChat() -> callAiTextBasic()
|
||||
**Location**: gateway/modules/chat/handling/promptFactory.py:122
|
||||
**Risk Level**: MEDIUM
|
||||
**Content**: Entire workflow message history
|
||||
**Usage**:
|
||||
```python
|
||||
messageSummary = await service.summarizeChat(context.workflow.messages) if context.workflow else ""
|
||||
```
|
||||
**Potential Issues**:
|
||||
- Long conversations can generate very large summaries
|
||||
- Includes all previous messages in workflow
|
||||
- No size limits or truncation
|
||||
|
||||
### 2. callAiTextAdvanced() -> interfaceAiCalls.callAiTextAdvanced()
|
||||
**Risk Level**: HIGH
|
||||
**Multiple Usage Points**:
|
||||
|
||||
#### A. Task Planning (handlingTasks.py:116)
|
||||
```python
|
||||
prompt = await self.service.callAiTextAdvanced(task_planning_prompt)
|
||||
```
|
||||
**Content**: User input + document context + connection context + previous results
|
||||
**Risk**: VERY HIGH - includes all available documents and context
|
||||
|
||||
#### B. Action Definition (handlingTasks.py:388)
|
||||
```python
|
||||
prompt = await self.service.callAiTextAdvanced(action_prompt)
|
||||
```
|
||||
**Content**: Task context + available documents + connections + previous results
|
||||
**Risk**: HIGH - comprehensive context for action planning
|
||||
|
||||
#### C. Result Review (handlingTasks.py:894)
|
||||
```python
|
||||
response = await self.service.callAiTextAdvanced(prompt)
|
||||
```
|
||||
**Content**: Action results + success criteria + context
|
||||
**Risk**: MEDIUM-HIGH - depends on result size
|
||||
|
||||
#### D. Email Composition (methodOutlook.py:1609)
|
||||
```python
|
||||
composed_email = await self.service.interfaceAiCalls.callAiTextAdvanced(ai_prompt)
|
||||
```
|
||||
**Content**: Document content + email requirements
|
||||
**Risk**: MEDIUM - depends on document size
|
||||
|
||||
#### E. AI Processing (methodAi.py:175)
|
||||
```python
|
||||
result = await self.service.callAiTextAdvanced(enhanced_prompt, context)
|
||||
```
|
||||
**Content**: User prompt + extracted document content
|
||||
**Risk**: HIGH - includes full document content
|
||||
|
||||
### 3. callAiTextBasic() -> interfaceAiCalls.callAiTextBasic()
|
||||
**Risk Level**: MEDIUM
|
||||
**Multiple Usage Points**:
|
||||
|
||||
#### A. Document Format Conversion (methodDocument.py:429)
|
||||
```python
|
||||
formatted_content = await self.service.callAiTextBasic(ai_prompt, content)
|
||||
```
|
||||
**Content**: Document content + format requirements
|
||||
**Risk**: MEDIUM - depends on document size
|
||||
|
||||
#### B. HTML Report Generation (methodDocument.py:642)
|
||||
```python
|
||||
aiReport = await self.service.callAiTextBasic(aiPrompt, combinedContent)
|
||||
```
|
||||
**Content**: Combined content from multiple documents
|
||||
**Risk**: HIGH - combines multiple documents
|
||||
|
||||
#### C. AI Processing Fallback (methodAi.py:177)
|
||||
```python
|
||||
result = await self.service.callAiTextBasic(enhanced_prompt, context)
|
||||
```
|
||||
**Content**: User prompt + document context
|
||||
**Risk**: MEDIUM - includes document content
|
||||
|
||||
#### D. Document Content Processing (documentExtraction.py:1459)
|
||||
```python
|
||||
processedContent = await self._serviceCenter.callAiTextBasic(aiPrompt, contentToProcess)
|
||||
```
|
||||
**Content**: Document chunks + AI prompt
|
||||
**Risk**: MEDIUM - processes document chunks
|
||||
|
||||
### 4. extractContentFromDocument() -> documentProcessor.processFileData()
|
||||
**Risk Level**: HIGH
|
||||
**Multiple Usage Points**:
|
||||
|
||||
#### A. Document Content Extraction (methodDocument.py:74)
|
||||
```python
|
||||
extracted_content = await self.service.extractContentFromDocument(
|
||||
prompt=aiPrompt,
|
||||
document=chatDocument
|
||||
)
|
||||
```
|
||||
**Content**: Full document + extraction prompt
|
||||
**Risk**: HIGH - processes entire documents
|
||||
|
||||
#### B. HTML Report Generation (methodDocument.py:581)
|
||||
```python
|
||||
extracted_content = await self.service.extractContentFromDocument(
|
||||
prompt="Extract readable text content for HTML report generation",
|
||||
document=doc
|
||||
)
|
||||
```
|
||||
**Content**: Full document content
|
||||
**Risk**: HIGH - processes documents for reports
|
||||
|
||||
#### C. Email Composition (methodOutlook.py:1510)
|
||||
```python
|
||||
extracted_content = await self.service.extractContentFromDocument(
|
||||
prompt="Extract readable text content for email composition",
|
||||
document=doc
|
||||
)
|
||||
```
|
||||
**Content**: Full document content
|
||||
**Risk**: HIGH - processes documents for emails
|
||||
|
||||
#### D. AI Processing (methodAi.py:94)
|
||||
```python
|
||||
extracted_content = await self.service.extractContentFromDocument(
|
||||
prompt=extraction_prompt.strip(),
|
||||
document=doc
|
||||
)
|
||||
```
|
||||
**Content**: Full document content
|
||||
**Risk**: HIGH - processes documents for AI analysis
|
||||
|
||||
## Risk Assessment Summary
|
||||
|
||||
### CRITICAL RISK (Immediate Attention Required)
|
||||
1. **Task Planning** (handlingTasks.py:116) - Entire workflow context
|
||||
2. **Action Definition** (handlingTasks.py:388) - Comprehensive context
|
||||
3. **Document Processing** (all extractContentFromDocument calls) - Full documents
|
||||
4. **AI Method Processing** (methodAi.py:175) - Document content + context
|
||||
5. **Report Generation** (methodDocument.py:642) - Multiple documents combined
|
||||
|
||||
### HIGH RISK (Monitor Closely)
|
||||
1. **Chat Summarization** (promptFactory.py:122) - Message history
|
||||
2. **Document Format Conversion** (methodDocument.py:429) - Single documents
|
||||
3. **Email Composition** (methodOutlook.py:1609) - Document content
|
||||
|
||||
## Potential Issues
|
||||
|
||||
### Content Size Problems
|
||||
- Large documents (PDFs, Word docs, Excel files) can exceed AI model limits
|
||||
- Combined document content in reports can be massive
|
||||
- Long conversation histories in chat summarization
|
||||
- Full workflow context in task planning
|
||||
|
||||
### Performance Issues
|
||||
- Timeout errors for large content
|
||||
- Memory issues with large document processing
|
||||
- API rate limiting with large requests
|
||||
- Cost implications for large AI calls
|
||||
|
||||
### Error Scenarios
|
||||
- OpenAI API 400 errors (content too large)
|
||||
- Timeout errors (processing too slow)
|
||||
- Memory exhaustion (large document processing)
|
||||
- Incomplete processing (truncated content)
|
||||
|
||||
## Recommended Solutions
|
||||
|
||||
### 1. Content Size Limits
|
||||
- Implement maximum content size checks before AI calls
|
||||
- Truncate large content with appropriate warnings
|
||||
- Split large documents into chunks
|
||||
|
||||
### 2. Content Filtering
|
||||
- Remove unnecessary context from prompts
|
||||
- Filter out large binary content
|
||||
- Use document summaries instead of full content
|
||||
|
||||
### 3. Chunking Strategy
|
||||
- Process large documents in smaller chunks
|
||||
- Implement progressive processing
|
||||
- Use streaming for large responses
|
||||
|
||||
### 4. Caching and Optimization
|
||||
- Cache processed document content
|
||||
- Reuse extracted content across operations
|
||||
- Implement smart content selection
|
||||
|
||||
### 5. Error Handling
|
||||
- Graceful degradation for oversized content
|
||||
- Fallback strategies for failed AI calls
|
||||
- User notifications for content size issues
|
||||
|
||||
## Test Scenarios
|
||||
|
||||
### Test Case 1: Large Document Processing
|
||||
- Upload a 10MB PDF document
|
||||
- Try to extract content for AI processing
|
||||
- Monitor for size limit errors
|
||||
|
||||
### Test Case 2: Multiple Document Reports
|
||||
- Upload 5+ large documents
|
||||
- Generate HTML report
|
||||
- Check for combined content size issues
|
||||
|
||||
### Test Case 3: Long Conversation History
|
||||
- Create workflow with 50+ messages
|
||||
- Test chat summarization
|
||||
- Monitor for context size limits
|
||||
|
||||
### Test Case 4: Task Planning with Large Context
|
||||
- Create workflow with many documents
|
||||
- Test task planning functionality
|
||||
- Check for prompt size limits
|
||||
|
||||
## Monitoring Recommendations
|
||||
|
||||
1. **Log Content Sizes**: Track the size of content sent to AI functions
|
||||
2. **Monitor API Errors**: Watch for 400 errors indicating content too large
|
||||
3. **Performance Metrics**: Track processing times for large content
|
||||
4. **User Feedback**: Monitor for incomplete or failed operations
|
||||
5. **Cost Tracking**: Monitor AI API costs for large requests
|
||||
|
||||
## Implementation Priority
|
||||
|
||||
1. **Immediate**: Add content size checks to extractContentFromDocument
|
||||
2. **High**: Implement chunking for large document processing
|
||||
3. **Medium**: Add content filtering to task planning prompts
|
||||
4. **Low**: Implement caching for processed content
|
||||
|
||||
This analysis should help identify and mitigate the risks of delivering too big content to AI functions.
|
||||
|
|
@ -1,103 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Test script to verify AI fallback mechanism from Basic to Advanced when context length is exceeded.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
from modules.interfaces.interfaceAiCalls import AiCalls
|
||||
from modules.connectors.connectorAiOpenai import ContextLengthExceededException
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
async def test_context_length_fallback():
|
||||
"""Test the fallback mechanism when context length is exceeded"""
|
||||
|
||||
# Create AI calls instance
|
||||
ai_calls = AiCalls()
|
||||
|
||||
# Create a very large context that would exceed OpenAI's context limit
|
||||
large_context = "This is a test context. " * 10000 # Create a large context
|
||||
prompt = "Please summarize this context in one sentence."
|
||||
|
||||
logger.info("Testing AI Basic with large context (should trigger fallback)...")
|
||||
|
||||
try:
|
||||
# This should trigger the context length exceeded error and fallback to Advanced
|
||||
result = await ai_calls.callAiTextBasic(prompt, large_context)
|
||||
logger.info(f"✅ Fallback successful! Result: {result[:100]}...")
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Test failed: {str(e)}")
|
||||
return False
|
||||
|
||||
async def test_direct_context_length_exception():
|
||||
"""Test that the ContextLengthExceededException is properly raised"""
|
||||
|
||||
from modules.connectors.connectorAiOpenai import AiOpenai
|
||||
|
||||
logger.info("Testing direct ContextLengthExceededException...")
|
||||
|
||||
try:
|
||||
# Create OpenAI connector
|
||||
openai_connector = AiOpenai()
|
||||
|
||||
# Create messages that would exceed context length
|
||||
large_messages = [
|
||||
{"role": "user", "content": "Test message. " * 50000} # Very large message
|
||||
]
|
||||
|
||||
# This should raise ContextLengthExceededException
|
||||
await openai_connector.callAiBasic(large_messages)
|
||||
logger.error("❌ Expected ContextLengthExceededException but none was raised")
|
||||
return False
|
||||
|
||||
except ContextLengthExceededException as e:
|
||||
logger.info(f"✅ ContextLengthExceededException properly raised: {str(e)}")
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Unexpected exception: {str(e)}")
|
||||
return False
|
||||
|
||||
async def main():
|
||||
"""Run all tests"""
|
||||
logger.info("Starting AI fallback mechanism tests...")
|
||||
|
||||
tests = [
|
||||
("Context Length Fallback", test_context_length_fallback),
|
||||
("Direct Exception Test", test_direct_context_length_exception),
|
||||
]
|
||||
|
||||
results = []
|
||||
for test_name, test_func in tests:
|
||||
logger.info(f"\n--- Running {test_name} ---")
|
||||
try:
|
||||
result = await test_func()
|
||||
results.append((test_name, result))
|
||||
except Exception as e:
|
||||
logger.error(f"Test {test_name} crashed: {str(e)}")
|
||||
results.append((test_name, False))
|
||||
|
||||
# Summary
|
||||
logger.info("\n" + "="*50)
|
||||
logger.info("TEST SUMMARY")
|
||||
logger.info("="*50)
|
||||
|
||||
passed = 0
|
||||
for test_name, result in results:
|
||||
status = "✅ PASSED" if result else "❌ FAILED"
|
||||
logger.info(f"{test_name}: {status}")
|
||||
if result:
|
||||
passed += 1
|
||||
|
||||
logger.info(f"\nTotal: {passed}/{len(results)} tests passed")
|
||||
|
||||
if passed == len(results):
|
||||
logger.info("🎉 All tests passed! Fallback mechanism is working correctly.")
|
||||
else:
|
||||
logger.warning("⚠️ Some tests failed. Please check the implementation.")
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
|
|
@ -1,855 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Test script for DocumentExtraction class.
|
||||
Processes all files in d:/temp folder and stores extracted content in d:/temp/extracted.
|
||||
|
||||
Features:
|
||||
- Option to extract content WITH AI processing (default)
|
||||
- Option to extract content WITHOUT AI processing (content-only mode)
|
||||
- Supports all document types: text, images, PDFs, Office documents, etc.
|
||||
- Detailed logging and progress tracking
|
||||
- Separate output directories for AI vs content-only modes
|
||||
|
||||
Usage:
|
||||
- Interactive mode: python test_documentExtraction.py
|
||||
- Content-only mode: python test_documentExtraction.py --no-ai
|
||||
- Content-only mode: python test_documentExtraction.py --content-only
|
||||
- Specify custom input/output: python test_documentExtraction.py --input-dir /path/to/input --output-dir /path/to/output --no-ai
|
||||
"""
|
||||
|
||||
import os
|
||||
import asyncio
|
||||
import logging
|
||||
import sys
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
from typing import List, Optional
|
||||
from datetime import datetime, UTC
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(
|
||||
level=logging.DEBUG, # Changed from INFO to DEBUG
|
||||
format='%(asctime)s - %(levelname)s - %(message)s'
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Filter out specific unwanted log messages
|
||||
class LogFilter(logging.Filter):
|
||||
"""Filter to hide specific unwanted log messages."""
|
||||
|
||||
def filter(self, record):
|
||||
# Hide workflow stats update errors
|
||||
if "Workflow" in record.getMessage() and "not found for stats update" in record.getMessage():
|
||||
return False
|
||||
|
||||
# Hide HTTP request info messages
|
||||
if "HTTP Request:" in record.getMessage() and "POST https://api.openai.com" in record.getMessage():
|
||||
return False
|
||||
|
||||
# Hide HTTP response info messages
|
||||
if "HTTP/1.1 200 OK" in record.getMessage():
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
# Apply the filter to the root logger
|
||||
root_logger = logging.getLogger()
|
||||
root_logger.addFilter(LogFilter())
|
||||
|
||||
def check_dependencies():
|
||||
"""Check if required dependencies are available and provide installation instructions."""
|
||||
missing_deps = []
|
||||
|
||||
# Check for required dependencies
|
||||
try:
|
||||
import bs4
|
||||
logger.info("beautifulsoup4 is available")
|
||||
except ImportError:
|
||||
missing_deps.append("beautifulsoup4")
|
||||
logger.error("beautifulsoup4 is missing")
|
||||
|
||||
try:
|
||||
import PyPDF2
|
||||
logger.info("PyPDF2 is available")
|
||||
except ImportError:
|
||||
missing_deps.append("PyPDF2")
|
||||
logger.error("PyPDF2 is missing")
|
||||
|
||||
try:
|
||||
import fitz
|
||||
logger.info("PyMuPDF (fitz) is available")
|
||||
except ImportError:
|
||||
missing_deps.append("PyMuPDF")
|
||||
logger.error("PyMuPDF (fitz) is missing")
|
||||
|
||||
try:
|
||||
import docx
|
||||
logger.info("python-docx is available")
|
||||
except ImportError:
|
||||
missing_deps.append("python-docx")
|
||||
logger.error("python-docx is missing")
|
||||
|
||||
try:
|
||||
import openpyxl
|
||||
logger.info("openpyxl is available")
|
||||
except ImportError:
|
||||
missing_deps.append("openpyxl")
|
||||
logger.error("openpyxl is missing")
|
||||
|
||||
try:
|
||||
import pptx
|
||||
logger.info("python-pptx is available")
|
||||
except ImportError:
|
||||
missing_deps.append("python-pptx")
|
||||
logger.error("python-pptx is missing")
|
||||
|
||||
try:
|
||||
from PIL import Image
|
||||
logger.info("Pillow (PIL) is available")
|
||||
except ImportError:
|
||||
missing_deps.append("Pillow")
|
||||
logger.error("Pillow (PIL) is missing")
|
||||
|
||||
if missing_deps:
|
||||
logger.error("\n" + "="*60)
|
||||
logger.error("MISSING DEPENDENCIES DETECTED!")
|
||||
logger.error("="*60)
|
||||
logger.error("The following packages are required but not installed:")
|
||||
for dep in missing_deps:
|
||||
logger.error(f" - {dep}")
|
||||
logger.error("\nTo install all dependencies, run:")
|
||||
logger.error("pip install -r requirements.txt")
|
||||
logger.error("\nOr install individual packages:")
|
||||
for dep in missing_deps:
|
||||
if dep == "beautifulsoup4":
|
||||
logger.error(f" pip install {dep}")
|
||||
elif dep == "PyMuPDF":
|
||||
logger.error(f" pip install {dep}")
|
||||
elif dep == "Pillow":
|
||||
logger.error(f" pip install {dep}")
|
||||
else:
|
||||
logger.error(f" pip install {dep}")
|
||||
logger.error("="*60)
|
||||
return False
|
||||
|
||||
logger.info("All required dependencies are available!")
|
||||
return True
|
||||
|
||||
def check_module_imports():
|
||||
"""Check if we can import the required modules."""
|
||||
try:
|
||||
# Add the gateway directory to the path so we can import our modules
|
||||
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', '..'))
|
||||
|
||||
from modules.chat.documents.documentExtraction import DocumentExtraction
|
||||
from modules.chat.serviceCenter import ServiceCenter
|
||||
from modules.interfaces.interfaceAppModel import User, UserConnection
|
||||
from modules.interfaces.interfaceChatModel import ChatWorkflow, TaskItem
|
||||
|
||||
logger.info("All required modules imported successfully")
|
||||
return True
|
||||
except ImportError as e:
|
||||
logger.error(f"Failed to import required modules: {e}")
|
||||
logger.error("Make sure you're running this script from the gateway directory")
|
||||
return False
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected error importing modules: {e}")
|
||||
return False
|
||||
|
||||
def create_mock_service_center():
|
||||
"""Create a proper ServiceCenter for testing purposes with all required fields."""
|
||||
try:
|
||||
from modules.chat.serviceCenter import ServiceCenter
|
||||
from modules.interfaces.interfaceAppModel import User, UserPrivilege, AuthAuthority
|
||||
from modules.interfaces.interfaceChatModel import ChatWorkflow, TaskItem, TaskStatus
|
||||
from modules.interfaces.interfaceChatModel import ChatLog, ChatMessage, ChatStat
|
||||
|
||||
# Create proper user with all required fields
|
||||
mock_user = User(
|
||||
id="test_user_001",
|
||||
username="testuser",
|
||||
email="test@example.com",
|
||||
fullName="Test User",
|
||||
language="en",
|
||||
enabled=True,
|
||||
privilege=UserPrivilege.USER,
|
||||
authenticationAuthority=AuthAuthority.LOCAL,
|
||||
mandateId="test_mandate_001"
|
||||
)
|
||||
|
||||
# Create proper workflow with all required fields
|
||||
current_time = datetime.now(UTC).isoformat()
|
||||
mock_workflow = ChatWorkflow(
|
||||
id="test_workflow_001",
|
||||
mandateId="test_mandate_001",
|
||||
status="active",
|
||||
name="Test Document Extraction Workflow",
|
||||
currentRound=1,
|
||||
lastActivity=current_time,
|
||||
startedAt=current_time,
|
||||
logs=[],
|
||||
messages=[],
|
||||
stats=None,
|
||||
tasks=[]
|
||||
)
|
||||
|
||||
# Create service center
|
||||
service_center = ServiceCenter(mock_user, mock_workflow)
|
||||
logger.info("ServiceCenter created successfully with proper objects")
|
||||
return service_center
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to create ServiceCenter: {e}")
|
||||
return None
|
||||
|
||||
class DocumentExtractionTester:
|
||||
"""Test class for DocumentExtraction functionality."""
|
||||
|
||||
def __init__(self, input_dir: str = "d:/temp/test-extraction", output_dir: str = None, enable_ai: bool = True):
|
||||
"""
|
||||
Initialize the tester.
|
||||
|
||||
Args:
|
||||
input_dir: Directory containing files to process
|
||||
output_dir: Directory to store extracted content (auto-generated if None)
|
||||
enable_ai: Whether to enable AI processing (default: True)
|
||||
"""
|
||||
self.input_dir = Path(input_dir)
|
||||
|
||||
# Auto-generate output directory if not specified
|
||||
if output_dir is None:
|
||||
if enable_ai:
|
||||
self.output_dir = Path(input_dir) / "extracted"
|
||||
else:
|
||||
self.output_dir = Path(input_dir) / "extracted-raw"
|
||||
else:
|
||||
self.output_dir = Path(output_dir)
|
||||
|
||||
self.extractor = None
|
||||
self.service_center = None
|
||||
self.enable_ai = enable_ai
|
||||
|
||||
if enable_ai:
|
||||
self.prompt = "Make a summary of each sentence for each page or chapter of the document"
|
||||
else:
|
||||
self.prompt = None # No prompt needed for content-only extraction
|
||||
|
||||
# Track processing results for summary
|
||||
self.processing_results = []
|
||||
|
||||
# Ensure output directory exists
|
||||
logger.info(f"Creating output directory: {self.output_dir}")
|
||||
self.output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Verify directory was created
|
||||
if self.output_dir.exists():
|
||||
logger.info(f"Output directory created/verified: {self.output_dir}")
|
||||
logger.info(f"Output directory absolute path: {self.output_dir.absolute()}")
|
||||
else:
|
||||
logger.error(f"Failed to create output directory: {self.output_dir}")
|
||||
|
||||
# Log configuration
|
||||
logger.info(f"Configuration: AI processing = {'ENABLED' if self.enable_ai else 'DISABLED'}")
|
||||
logger.info(f"Input directory: {self.input_dir}")
|
||||
logger.info(f"Output directory: {self.output_dir}")
|
||||
|
||||
# Test basic file writing capability
|
||||
test_file = self.output_dir / "test_write_capability.txt"
|
||||
try:
|
||||
logger.info(f"Testing file write capability to: {test_file}")
|
||||
logger.info(f"Absolute path: {test_file.absolute()}")
|
||||
|
||||
with open(test_file, 'w', encoding='utf-8') as f:
|
||||
f.write("Test file to verify write capability")
|
||||
|
||||
if test_file.exists():
|
||||
actual_size = test_file.stat().st_size
|
||||
logger.info(f"Basic file writing test passed: {test_file} (size: {actual_size} bytes)")
|
||||
|
||||
# Test reading the file back
|
||||
with open(test_file, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
logger.info(f"File read test passed: content length = {len(content)}")
|
||||
|
||||
# Clean up test file
|
||||
test_file.unlink()
|
||||
logger.info("Test file cleaned up")
|
||||
else:
|
||||
logger.error(f"Basic file writing test failed: {test_file}")
|
||||
except Exception as e:
|
||||
logger.error(f"Basic file writing test failed with error: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
|
||||
# Supported file extensions for content extraction
|
||||
self.supported_extensions = {
|
||||
# Text and data files
|
||||
'.txt', '.csv', '.json', '.xml', '.html', '.htm', '.svg',
|
||||
'.md', '.markdown', '.rst', '.log', '.ini', '.cfg', '.conf',
|
||||
|
||||
# Programming languages
|
||||
'.js', '.ts', '.jsx', '.tsx', '.py', '.java', '.c', '.cpp', '.cc', '.cxx',
|
||||
'.h', '.hpp', '.cs', '.php', '.rb', '.go', '.rs', '.swift', '.kt', '.scala',
|
||||
'.r', '.m', '.pl', '.sh', '.bash', '.zsh', '.fish', '.ps1', '.bat', '.cmd',
|
||||
'.vbs', '.lua', '.sql', '.r', '.dart', '.elm', '.clj', '.hs', '.fs', '.ml',
|
||||
|
||||
# Web technologies
|
||||
'.css', '.scss', '.sass', '.less', '.vue', '.svelte', '.astro',
|
||||
|
||||
# Configuration and build files
|
||||
'.yaml', '.yml', '.toml', '.env', '.gitignore', '.dockerfile', '.dockerignore',
|
||||
'.makefile', '.cmake', '.gradle', '.maven', '.pom', '.sln', '.vcxproj',
|
||||
'.csproj', '.fsproj', '.vbproj', '.xcodeproj', '.pbxproj',
|
||||
|
||||
# Documentation and markup
|
||||
'.tex', '.bib', '.adoc', '.asciidoc', '.wiki', '.creole',
|
||||
|
||||
# Images
|
||||
'.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp', '.tiff', '.ico',
|
||||
|
||||
# Documents
|
||||
'.pdf', '.docx', '.xlsx', '.pptx', '.odt', '.ods', '.odp',
|
||||
|
||||
# Legacy Office formats
|
||||
'.doc', '.xls', '.ppt',
|
||||
|
||||
# Archives and binaries
|
||||
'.zip', '.tar', '.gz', '.7z', '.rar', '.exe', '.dll', '.so', '.dylib'
|
||||
}
|
||||
|
||||
def initialize_extractor(self):
|
||||
"""Initialize the DocumentExtraction instance with a proper ServiceCenter."""
|
||||
try:
|
||||
# First create the service center
|
||||
self.service_center = create_mock_service_center()
|
||||
if not self.service_center:
|
||||
logger.error("Failed to create ServiceCenter!")
|
||||
return False
|
||||
|
||||
# Now create DocumentExtraction with the service center
|
||||
from modules.chat.documents.documentExtraction import DocumentExtraction
|
||||
self.extractor = DocumentExtraction(self.service_center)
|
||||
logger.info("DocumentExtraction initialized successfully with ServiceCenter")
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to initialize DocumentExtraction: {e}")
|
||||
return False
|
||||
|
||||
def get_files_to_process(self) -> List[Path]:
|
||||
"""Get list of files to process from input directory."""
|
||||
if not self.input_dir.exists():
|
||||
logger.error(f"Input directory {self.input_dir} does not exist!")
|
||||
logger.info("Creating input directory and adding a test file...")
|
||||
self.input_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Create a test file if none exist
|
||||
test_file = self.input_dir / "test.txt"
|
||||
with open(test_file, 'w') as f:
|
||||
f.write("This is a test file for document extraction.\nIt contains multiple lines.\nAnd some special characters: äöüß")
|
||||
logger.info(f"Created test file: {test_file}")
|
||||
|
||||
files = []
|
||||
all_files = list(self.input_dir.iterdir())
|
||||
logger.info(f"All files in directory: {[f.name for f in all_files]}")
|
||||
|
||||
for file_path in all_files:
|
||||
if file_path.is_file():
|
||||
logger.debug(f"Checking file: {file_path.name} (extension: {file_path.suffix})")
|
||||
if file_path.suffix.lower() in self.supported_extensions:
|
||||
files.append(file_path)
|
||||
logger.debug(f"Added file: {file_path.name}")
|
||||
else:
|
||||
logger.debug(f"Skipped file: {file_path.name} (unsupported extension)")
|
||||
|
||||
logger.info(f"Found {len(files)} supported files to process")
|
||||
if files:
|
||||
logger.info(f"Files to process: {[f.name for f in files]}")
|
||||
return files
|
||||
|
||||
async def process_single_file(self, file_path: Path) -> bool:
|
||||
"""
|
||||
Process a single file and extract its content.
|
||||
|
||||
Args:
|
||||
file_path: Path to the file to process
|
||||
|
||||
Returns:
|
||||
True if successful, False otherwise
|
||||
"""
|
||||
if not self.extractor:
|
||||
logger.error("DocumentExtraction not initialized!")
|
||||
return False
|
||||
|
||||
try:
|
||||
logger.info(f"Processing file: {file_path.name}")
|
||||
|
||||
# Read file data
|
||||
with open(file_path, 'rb') as f:
|
||||
file_data = f.read()
|
||||
|
||||
logger.debug(f"File size: {len(file_data)} bytes")
|
||||
|
||||
# Determine MIME type based on extension
|
||||
mime_type = self._get_mime_type(file_path.suffix)
|
||||
logger.debug(f"MIME type: {mime_type}")
|
||||
|
||||
# Process the file with or without AI based on configuration
|
||||
extracted_content = await self.extractor.processFileData(
|
||||
fileData=file_data,
|
||||
fileName=file_path.name,
|
||||
mimeType=mime_type,
|
||||
base64Encoded=False,
|
||||
prompt=self.prompt,
|
||||
enableAI=self.enable_ai
|
||||
)
|
||||
|
||||
logger.debug(f"Extracted {len(extracted_content.contents)} content items")
|
||||
|
||||
# Debug: Show content details
|
||||
for i, content_item in enumerate(extracted_content.contents):
|
||||
logger.debug(f"Content item {i+1}: label='{content_item.label}', has_data={content_item.data is not None}, data_length={len(content_item.data) if content_item.data else 0}")
|
||||
|
||||
# Special logging for JavaScript files
|
||||
if mime_type == "application/javascript":
|
||||
logger.debug(f"JavaScript file detected: {file_path.name}")
|
||||
logger.debug(f"Original file size: {len(file_data)} bytes")
|
||||
for i, content_item in enumerate(extracted_content.contents):
|
||||
if content_item.data:
|
||||
content_size = len(content_item.data.encode('utf-8'))
|
||||
logger.debug(f"JavaScript content item {i+1}: {content_size} bytes")
|
||||
# Check if content was truncated
|
||||
if content_size < len(file_data) * 0.9: # If less than 90% of original
|
||||
logger.warning(f"JavaScript content may be truncated: {content_size} bytes vs {len(file_data)} bytes original")
|
||||
|
||||
# Track processing result
|
||||
result = {
|
||||
'fileName': file_path.name,
|
||||
'status': 'OK',
|
||||
'content_items': 0,
|
||||
'output_files': [],
|
||||
'total_content_size': 0
|
||||
}
|
||||
|
||||
# Save each content item as a separate file
|
||||
if extracted_content.contents:
|
||||
for i, content_item in enumerate(extracted_content.contents):
|
||||
if content_item.data:
|
||||
content_size = len(content_item.data.encode('utf-8'))
|
||||
result['total_content_size'] += content_size
|
||||
logger.debug(f"Content item {i+1}: {content_item.label}, size: {content_size} bytes")
|
||||
|
||||
# Generate fileName with new naming convention
|
||||
if len(extracted_content.contents) == 1:
|
||||
# Single content item
|
||||
output_fileName = f"{file_path.stem} - {content_item.label} 1.txt"
|
||||
else:
|
||||
# Multiple content items - add sequence number
|
||||
output_fileName = f"{file_path.stem} - {content_item.label} {i+1}.txt"
|
||||
|
||||
output_file = self.output_dir / output_fileName
|
||||
|
||||
# Write only the raw extracted content
|
||||
logger.debug(f"Attempting to write to: {output_file}")
|
||||
try:
|
||||
with open(output_file, 'w', encoding='utf-8') as f:
|
||||
f.write(content_item.data)
|
||||
|
||||
# Verify file was created
|
||||
if output_file.exists():
|
||||
actual_size = output_file.stat().st_size
|
||||
logger.info(f"File created successfully: {output_fileName} (expected: {content_size} bytes, actual: {actual_size} bytes)")
|
||||
else:
|
||||
logger.error(f"File was not created: {output_file}")
|
||||
|
||||
result['output_files'].append(output_fileName)
|
||||
result['content_items'] += 1
|
||||
except Exception as write_error:
|
||||
logger.error(f"Error writing file {output_fileName}: {write_error}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
else:
|
||||
logger.warning(f"Content item {i+1} has no data, skipping")
|
||||
else:
|
||||
logger.warning(f"No content extracted from {file_path.name}")
|
||||
result['status'] = 'FAIL'
|
||||
result['error'] = 'No content extracted'
|
||||
|
||||
# Add result to tracking list
|
||||
self.processing_results.append(result)
|
||||
|
||||
logger.info(f"Successfully processed {file_path.name} - Total content: {result['total_content_size']} bytes")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
error_msg = str(e)
|
||||
logger.error(f"Error processing {file_path.name}: {error_msg}")
|
||||
|
||||
# Track failed result
|
||||
result = {
|
||||
'fileName': file_path.name,
|
||||
'status': 'FAIL',
|
||||
'content_items': 0,
|
||||
'output_files': [],
|
||||
'error': error_msg,
|
||||
'total_content_size': 0
|
||||
}
|
||||
self.processing_results.append(result)
|
||||
|
||||
return False
|
||||
|
||||
def _get_mime_type(self, extension: str) -> str:
|
||||
"""Get MIME type based on file extension."""
|
||||
mime_types = {
|
||||
# Text and data files
|
||||
'.txt': 'text/plain',
|
||||
'.csv': 'text/csv',
|
||||
'.json': 'application/json',
|
||||
'.xml': 'application/xml',
|
||||
'.html': 'text/html',
|
||||
'.htm': 'text/html',
|
||||
'.svg': 'image/svg+xml',
|
||||
'.md': 'text/markdown',
|
||||
'.markdown': 'text/markdown',
|
||||
'.rst': 'text/x-rst',
|
||||
'.log': 'text/plain',
|
||||
'.ini': 'text/plain',
|
||||
'.cfg': 'text/plain',
|
||||
'.conf': 'text/plain',
|
||||
|
||||
# Programming languages
|
||||
'.js': 'application/javascript',
|
||||
'.ts': 'application/typescript',
|
||||
'.jsx': 'text/jsx',
|
||||
'.tsx': 'text/tsx',
|
||||
'.py': 'text/x-python',
|
||||
'.java': 'text/x-java-source',
|
||||
'.c': 'text/x-c',
|
||||
'.cpp': 'text/x-c++src',
|
||||
'.cc': 'text/x-c++src',
|
||||
'.cxx': 'text/x-c++src',
|
||||
'.h': 'text/x-c',
|
||||
'.hpp': 'text/x-c++hdr',
|
||||
'.cs': 'text/x-csharp',
|
||||
'.php': 'application/x-httpd-php',
|
||||
'.rb': 'text/x-ruby',
|
||||
'.go': 'text/x-go',
|
||||
'.rs': 'text/x-rust',
|
||||
'.swift': 'text/x-swift',
|
||||
'.kt': 'text/x-kotlin',
|
||||
'.scala': 'text/x-scala',
|
||||
'.r': 'text/x-r',
|
||||
'.m': 'text/x-matlab',
|
||||
'.pl': 'text/x-perl',
|
||||
'.sh': 'application/x-sh',
|
||||
'.bash': 'application/x-sh',
|
||||
'.zsh': 'application/x-sh',
|
||||
'.fish': 'application/x-sh',
|
||||
'.ps1': 'application/x-powershell',
|
||||
'.bat': 'application/x-msdos-program',
|
||||
'.cmd': 'application/x-msdos-program',
|
||||
'.vbs': 'text/vbscript',
|
||||
'.lua': 'text/x-lua',
|
||||
'.sql': 'application/sql',
|
||||
'.dart': 'application/dart',
|
||||
'.elm': 'text/x-elm',
|
||||
'.clj': 'text/x-clojure',
|
||||
'.hs': 'text/x-haskell',
|
||||
'.fs': 'text/x-fsharp',
|
||||
'.ml': 'text/x-ocaml',
|
||||
|
||||
# Web technologies
|
||||
'.css': 'text/css',
|
||||
'.scss': 'text/x-scss',
|
||||
'.sass': 'text/x-sass',
|
||||
'.less': 'text/x-less',
|
||||
'.vue': 'text/x-vue',
|
||||
'.svelte': 'text/x-svelte',
|
||||
'.astro': 'text/x-astro',
|
||||
|
||||
# Configuration and build files
|
||||
'.yaml': 'application/x-yaml',
|
||||
'.yml': 'application/x-yaml',
|
||||
'.toml': 'application/toml',
|
||||
'.env': 'text/plain',
|
||||
'.gitignore': 'text/plain',
|
||||
'.dockerfile': 'text/x-dockerfile',
|
||||
'.dockerignore': 'text/plain',
|
||||
'.makefile': 'text/x-makefile',
|
||||
'.cmake': 'text/x-cmake',
|
||||
'.gradle': 'text/x-gradle',
|
||||
'.maven': 'text/x-maven',
|
||||
'.pom': 'application/xml',
|
||||
'.sln': 'text/plain',
|
||||
'.vcxproj': 'application/xml',
|
||||
'.csproj': 'application/xml',
|
||||
'.fsproj': 'application/xml',
|
||||
'.vbproj': 'application/xml',
|
||||
'.xcodeproj': 'text/plain',
|
||||
'.pbxproj': 'text/plain',
|
||||
|
||||
# Documentation and markup
|
||||
'.tex': 'application/x-tex',
|
||||
'.bib': 'text/x-bibtex',
|
||||
'.adoc': 'text/asciidoc',
|
||||
'.asciidoc': 'text/asciidoc',
|
||||
'.wiki': 'text/x-wiki',
|
||||
'.creole': 'text/x-wiki',
|
||||
|
||||
# Images
|
||||
'.jpg': 'image/jpeg',
|
||||
'.jpeg': 'image/jpeg',
|
||||
'.png': 'image/png',
|
||||
'.gif': 'image/gif',
|
||||
'.webp': 'image/webp',
|
||||
'.bmp': 'image/bmp',
|
||||
'.tiff': 'image/tiff',
|
||||
'.ico': 'image/x-icon',
|
||||
|
||||
# Documents
|
||||
'.pdf': 'application/pdf',
|
||||
'.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
||||
'.xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
|
||||
'.pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
|
||||
'.odt': 'application/vnd.oasis.opendocument.text',
|
||||
'.ods': 'application/vnd.oasis.opendocument.spreadsheet',
|
||||
'.odp': 'application/vnd.oasis.opendocument.presentation',
|
||||
|
||||
# Legacy Office formats
|
||||
'.doc': 'application/msword',
|
||||
'.xls': 'application/vnd.ms-excel',
|
||||
'.ppt': 'application/vnd.ms-powerpoint',
|
||||
|
||||
# Archives and binaries (will be processed as binary)
|
||||
'.zip': 'application/zip',
|
||||
'.tar': 'application/x-tar',
|
||||
'.gz': 'application/gzip',
|
||||
'.7z': 'application/x-7z-compressed',
|
||||
'.rar': 'application/vnd.rar',
|
||||
'.exe': 'application/x-msdownload',
|
||||
'.dll': 'application/x-msdownload',
|
||||
'.so': 'application/x-sharedlib',
|
||||
'.dylib': 'application/x-mach-binary'
|
||||
}
|
||||
return mime_types.get(extension.lower(), 'application/octet-stream')
|
||||
|
||||
async def run_tests(self) -> None:
|
||||
"""Run the document extraction tests on all files."""
|
||||
mode = "WITH AI" if self.enable_ai else "CONTENT ONLY (No AI)"
|
||||
logger.info(f"Starting document extraction tests - {mode}")
|
||||
logger.info(f"Input directory: {self.input_dir}")
|
||||
logger.info(f"Output directory: {self.output_dir}")
|
||||
if self.enable_ai:
|
||||
logger.info(f"Processing prompt: {self.prompt}")
|
||||
else:
|
||||
logger.info("AI processing: DISABLED - Raw content extraction only")
|
||||
|
||||
# Initialize the extractor
|
||||
if not self.initialize_extractor():
|
||||
logger.error("Cannot proceed without DocumentExtraction!")
|
||||
return
|
||||
|
||||
# Get files to process
|
||||
files = self.get_files_to_process()
|
||||
|
||||
if not files:
|
||||
logger.warning("No files found to process!")
|
||||
return
|
||||
|
||||
# Process each file
|
||||
successful = 0
|
||||
failed = 0
|
||||
|
||||
logger.info(f"Starting to process {len(files)} files...")
|
||||
for i, file_path in enumerate(files):
|
||||
logger.info(f"Processing file {i+1}/{len(files)}: {file_path.name}")
|
||||
try:
|
||||
if await self.process_single_file(file_path):
|
||||
successful += 1
|
||||
logger.info(f"File {i+1} processed successfully")
|
||||
else:
|
||||
failed += 1
|
||||
logger.error(f"File {i+1} processing failed")
|
||||
except Exception as e:
|
||||
failed += 1
|
||||
logger.error(f"Exception processing file {i+1}: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
|
||||
# Print detailed summary
|
||||
mode = "WITH AI" if self.enable_ai else "CONTENT ONLY (No AI)"
|
||||
logger.info("\n" + "=" * 80)
|
||||
logger.info(f"DETAILED TEST SUMMARY - {mode}")
|
||||
logger.info("=" * 80)
|
||||
logger.info(f"Total files processed: {len(files)}")
|
||||
logger.info(f"Successful: {successful}")
|
||||
logger.info(f"Failed: {failed}")
|
||||
logger.info(f"Output directory: {self.output_dir}")
|
||||
if self.enable_ai:
|
||||
logger.info("AI processing: ENABLED")
|
||||
else:
|
||||
logger.info("AI processing: DISABLED")
|
||||
logger.info("=" * 80)
|
||||
|
||||
# List all processed documents with results
|
||||
logger.info("\nPROCESSING RESULTS:")
|
||||
logger.info("-" * 80)
|
||||
|
||||
for result in self.processing_results:
|
||||
status_icon = "✅" if result['status'] == 'OK' else "❌"
|
||||
logger.info(f"{status_icon} {result['fileName']} - {result['status']}")
|
||||
|
||||
if result['status'] == 'OK':
|
||||
if result['content_items'] == 1:
|
||||
logger.info(f" └─ Generated: {result['output_files'][0]} ({result['total_content_size']} bytes)")
|
||||
else:
|
||||
logger.info(f" └─ Generated {result['content_items']} files ({result['total_content_size']} total bytes):")
|
||||
for output_file in result['output_files']:
|
||||
logger.info(f" └─ {output_file}")
|
||||
else:
|
||||
error_msg = result.get('error', 'Unknown error')
|
||||
logger.info(f" └─ Error: {error_msg}")
|
||||
|
||||
logger.info("-" * 80)
|
||||
logger.info("=" * 80)
|
||||
|
||||
def parse_arguments():
|
||||
"""Parse command line arguments."""
|
||||
parser = argparse.ArgumentParser(description='Document Extraction Test Script')
|
||||
parser.add_argument('--no-ai', '--content-only', action='store_true',
|
||||
help='Run in content-only mode without AI processing')
|
||||
parser.add_argument('--input-dir', type=str, default='d:/temp/test-extraction',
|
||||
help='Input directory containing files to process (default: d:/temp/test-extraction)')
|
||||
parser.add_argument('--output-dir', type=str,
|
||||
help='Output directory for extracted content (auto-generated if not specified)')
|
||||
parser.add_argument('--verbose', '-v', action='store_true',
|
||||
help='Enable verbose logging')
|
||||
|
||||
return parser.parse_args()
|
||||
|
||||
async def main():
|
||||
"""Main function to run the tests."""
|
||||
# Parse command line arguments
|
||||
args = parse_arguments()
|
||||
|
||||
# Set logging level based on verbosity
|
||||
if args.verbose:
|
||||
logging.getLogger().setLevel(logging.DEBUG)
|
||||
else:
|
||||
logging.getLogger().setLevel(logging.INFO)
|
||||
|
||||
logger.info("DocumentExtraction Test Script")
|
||||
logger.info("=" * 50)
|
||||
logger.info(f"Source: {args.input_dir}")
|
||||
|
||||
# Determine output directory
|
||||
if args.output_dir:
|
||||
output_dir = args.output_dir
|
||||
else:
|
||||
if args.no_ai:
|
||||
output_dir = f"{args.input_dir}/extracted-raw"
|
||||
else:
|
||||
output_dir = f"{args.input_dir}/extracted"
|
||||
|
||||
logger.info(f"Output: {output_dir}")
|
||||
logger.info("=" * 50)
|
||||
|
||||
# Check dependencies first
|
||||
if not check_dependencies():
|
||||
logger.error("Please install missing dependencies before running tests.")
|
||||
return
|
||||
|
||||
# Check module imports
|
||||
if not check_module_imports():
|
||||
logger.error("Cannot import required modules. Please check your setup.")
|
||||
return
|
||||
|
||||
# Determine mode based on command line arguments
|
||||
if args.no_ai:
|
||||
enable_ai = False
|
||||
logger.info("Running in CONTENT ONLY mode (no AI processing)")
|
||||
else:
|
||||
# Interactive mode: ask user for choice
|
||||
print("\n" + "=" * 50)
|
||||
print("SELECT EXTRACTION MODE:")
|
||||
print("=" * 50)
|
||||
print("1. With AI processing (default)")
|
||||
print("2. Content only (no AI processing)")
|
||||
print("=" * 50)
|
||||
|
||||
try:
|
||||
choice = input("Enter your choice (1 or 2, default is 1): ").strip()
|
||||
if choice == "2":
|
||||
enable_ai = False
|
||||
output_dir = f"{args.input_dir}/extracted-raw"
|
||||
logger.info("Selected: Content only mode (no AI processing)")
|
||||
else:
|
||||
enable_ai = True
|
||||
output_dir = f"{args.input_dir}/extracted"
|
||||
logger.info("Selected: AI processing mode")
|
||||
except (EOFError, KeyboardInterrupt):
|
||||
# Default to AI mode if input fails
|
||||
enable_ai = True
|
||||
output_dir = f"{args.input_dir}/extracted"
|
||||
logger.info("Defaulting to AI processing mode")
|
||||
|
||||
# Run tests with selected mode
|
||||
tester = DocumentExtractionTester(
|
||||
input_dir=args.input_dir,
|
||||
output_dir=output_dir,
|
||||
enable_ai=enable_ai
|
||||
)
|
||||
await tester.run_tests()
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Check if command line arguments are provided for automated testing
|
||||
if len(sys.argv) > 1:
|
||||
# Parse arguments and run directly
|
||||
asyncio.run(main())
|
||||
else:
|
||||
# Interactive mode: ask user for choice
|
||||
asyncio.run(main())
|
||||
|
||||
# Convenience function for easy content-only extraction
|
||||
async def extract_documents_content_only(input_folder: str, output_folder: str = None):
|
||||
"""
|
||||
Convenience function to extract documents without AI processing.
|
||||
|
||||
Args:
|
||||
input_folder: Path to folder containing documents to extract
|
||||
output_folder: Path to folder where extracted content will be stored (optional)
|
||||
|
||||
Example:
|
||||
# Extract from d:/temp to d:/temp/extracted-raw
|
||||
asyncio.run(extract_documents_content_only("d:/temp"))
|
||||
|
||||
# Extract from custom folders
|
||||
asyncio.run(extract_documents_content_only("c:/my_docs", "c:/my_docs/extracted"))
|
||||
"""
|
||||
if output_folder is None:
|
||||
output_folder = f"{input_folder}/extracted-raw"
|
||||
|
||||
logger.info(f"Running content-only extraction from {input_folder} to {output_folder}")
|
||||
|
||||
# Check dependencies and imports
|
||||
if not check_dependencies():
|
||||
logger.error("Missing dependencies. Please install required packages.")
|
||||
return False
|
||||
|
||||
if not check_module_imports():
|
||||
logger.error("Cannot import required modules. Please check your setup.")
|
||||
return False
|
||||
|
||||
# Create tester and run
|
||||
tester = DocumentExtractionTester(
|
||||
input_dir=input_folder,
|
||||
output_dir=output_folder,
|
||||
enable_ai=False
|
||||
)
|
||||
|
||||
await tester.run_tests()
|
||||
return True
|
||||
|
||||
# Example usage (uncomment to use):
|
||||
# if __name__ == "__main__":
|
||||
# # For content-only extraction from d:/temp to d:/temp/extracted-raw
|
||||
# asyncio.run(extract_documents_content_only("d:/temp"))
|
||||
|
|
@ -1,189 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Simple test script for enhanced Excel processing functionality.
|
||||
This script tests the DocumentExtraction class with Excel files.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import asyncio
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(
|
||||
level=logging.DEBUG,
|
||||
format='%(asctime)s - %(levelname)s - %(message)s'
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Add the gateway directory to the path
|
||||
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', '..'))
|
||||
|
||||
async def test_excel_processing():
|
||||
"""Test Excel processing functionality."""
|
||||
try:
|
||||
# Import required modules
|
||||
from modules.chat.documents.documentExtraction import DocumentExtraction
|
||||
from modules.chat.serviceCenter import ServiceCenter
|
||||
from modules.interfaces.interfaceAppModel import User, UserPrivilege, AuthAuthority
|
||||
from modules.interfaces.interfaceChatModel import ChatWorkflow
|
||||
from datetime import datetime, UTC
|
||||
|
||||
logger.info("Testing Excel processing functionality...")
|
||||
|
||||
# Create mock service center
|
||||
mock_user = User(
|
||||
id="test_user_001",
|
||||
username="testuser",
|
||||
email="test@example.com",
|
||||
fullName="Test User",
|
||||
language="en",
|
||||
enabled=True,
|
||||
privilege=UserPrivilege.USER,
|
||||
authenticationAuthority=AuthAuthority.LOCAL,
|
||||
mandateId="test_mandate_001"
|
||||
)
|
||||
|
||||
current_time = datetime.now(UTC).isoformat()
|
||||
mock_workflow = ChatWorkflow(
|
||||
id="test_workflow_001",
|
||||
mandateId="test_mandate_001",
|
||||
status="active",
|
||||
name="Test Excel Processing Workflow",
|
||||
currentRound=1,
|
||||
lastActivity=current_time,
|
||||
startedAt=current_time,
|
||||
logs=[],
|
||||
messages=[],
|
||||
stats=None,
|
||||
tasks=[]
|
||||
)
|
||||
|
||||
service_center = ServiceCenter(mock_user, mock_workflow)
|
||||
logger.info("ServiceCenter created successfully")
|
||||
|
||||
# Create DocumentExtraction instance
|
||||
extractor = DocumentExtraction(service_center)
|
||||
logger.info("DocumentExtraction created successfully")
|
||||
|
||||
# Test with a sample Excel file if available
|
||||
test_file_path = "d:/temp/test-extraction/test.xlsx"
|
||||
|
||||
if os.path.exists(test_file_path):
|
||||
logger.info(f"Found test file: {test_file_path}")
|
||||
|
||||
# Read the file
|
||||
with open(test_file_path, 'rb') as f:
|
||||
file_data = f.read()
|
||||
|
||||
logger.info(f"File size: {len(file_data)} bytes")
|
||||
|
||||
# Process the Excel file
|
||||
logger.info("Processing Excel file...")
|
||||
result = await extractor.processFileData(
|
||||
fileData=file_data,
|
||||
fileName="test.xlsx",
|
||||
mimeType="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
||||
base64Encoded=False,
|
||||
prompt=None,
|
||||
enableAI=False
|
||||
)
|
||||
|
||||
logger.info(f"Excel processing completed successfully!")
|
||||
logger.info(f"Generated {len(result.contents)} content items:")
|
||||
|
||||
for i, content_item in enumerate(result.contents):
|
||||
logger.info(f" Item {i+1}: {content_item.label}")
|
||||
logger.info(f" MIME type: {content_item.metadata.mimeType}")
|
||||
logger.info(f" Size: {content_item.metadata.size} bytes")
|
||||
if content_item.data:
|
||||
logger.info(f" Data preview: {content_item.data[:100]}...")
|
||||
else:
|
||||
logger.info(f" Data: None")
|
||||
|
||||
else:
|
||||
logger.info("No test Excel file found. Creating a simple test...")
|
||||
|
||||
# Test the openpyxl library directly
|
||||
try:
|
||||
import openpyxl
|
||||
from openpyxl import Workbook
|
||||
|
||||
# Create a test workbook
|
||||
wb = Workbook()
|
||||
ws = wb.active
|
||||
ws.title = "Test Sheet"
|
||||
|
||||
# Add some test data
|
||||
ws['A1'] = "Name"
|
||||
ws['B1'] = "Age"
|
||||
ws['C1'] = "City"
|
||||
ws['A2'] = "John Doe"
|
||||
ws['B2'] = 30
|
||||
ws['C2'] = "New York"
|
||||
ws['A3'] = "Jane Smith"
|
||||
ws['B3'] = 25
|
||||
ws['C3'] = "Los Angeles"
|
||||
|
||||
# Test properties
|
||||
wb.properties.title = "Test Workbook"
|
||||
wb.properties.creator = "Test User"
|
||||
wb.properties.subject = "Test Subject"
|
||||
|
||||
logger.info("Test workbook created successfully")
|
||||
logger.info(f" Title: {wb.properties.title}")
|
||||
logger.info(f" Creator: {wb.properties.creator}")
|
||||
logger.info(f" Subject: {wb.properties.subject}")
|
||||
logger.info(f" Sheets: {wb.sheetnames}")
|
||||
|
||||
# Test the DocumentExtraction with this workbook
|
||||
from io import BytesIO
|
||||
|
||||
# Save to bytes
|
||||
buffer = BytesIO()
|
||||
wb.save(buffer)
|
||||
buffer.seek(0)
|
||||
file_data = buffer.getvalue()
|
||||
|
||||
logger.info(f"Test workbook size: {len(file_data)} bytes")
|
||||
|
||||
# Process with DocumentExtraction
|
||||
result = await extractor.processFileData(
|
||||
fileData=file_data,
|
||||
fileName="test_workbook.xlsx",
|
||||
mimeType="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
||||
base64Encoded=False,
|
||||
prompt=None,
|
||||
enableAI=False
|
||||
)
|
||||
|
||||
logger.info(f"Test workbook processing completed successfully!")
|
||||
logger.info(f"Generated {len(result.contents)} content items:")
|
||||
|
||||
for i, content_item in enumerate(result.contents):
|
||||
logger.info(f" Item {i+1}: {content_item.label}")
|
||||
logger.info(f" MIME type: {content_item.metadata.mimeType}")
|
||||
logger.info(f" Size: {content_item.metadata.size} bytes")
|
||||
if content_item.data:
|
||||
logger.info(f" Data preview: {content_item.data[:200]}...")
|
||||
else:
|
||||
logger.info(f" Data: None")
|
||||
|
||||
except ImportError as e:
|
||||
logger.error(f"openpyxl not available: {e}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error testing Excel functionality: {e}")
|
||||
|
||||
logger.info("Excel processing test completed!")
|
||||
|
||||
except ImportError as e:
|
||||
logger.error(f"Failed to import required modules: {e}")
|
||||
logger.error("Make sure you're running this script from the gateway directory")
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected error: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(test_excel_processing())
|
||||
|
|
@ -1,658 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Test script for MethodWeb class.
|
||||
Tests all web actions: search, crawl, and scrape with various parameter sets.
|
||||
|
||||
Features:
|
||||
- Tests web search functionality with different queries
|
||||
- Tests web crawling with URL lists
|
||||
- Tests web scraping (search + crawl combined)
|
||||
- Detailed logging and progress tracking
|
||||
- Error handling and validation testing
|
||||
- Configuration validation
|
||||
|
||||
Usage:
|
||||
- Interactive mode: python test_methodWeb.py
|
||||
- Automated mode: python test_methodWeb.py --auto
|
||||
- Verbose mode: python test_methodWeb.py --verbose
|
||||
"""
|
||||
|
||||
import os
|
||||
import asyncio
|
||||
import logging
|
||||
import sys
|
||||
import argparse
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import List, Optional, Dict, Any
|
||||
from datetime import datetime, UTC
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(levelname)s - %(message)s'
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Filter out specific unwanted log messages
|
||||
class LogFilter(logging.Filter):
|
||||
"""Filter to hide specific unwanted log messages."""
|
||||
|
||||
def filter(self, record):
|
||||
# Hide HTTP request info messages
|
||||
if "HTTP Request:" in record.getMessage() and "POST https://api.tavily.com" in record.getMessage():
|
||||
return False
|
||||
|
||||
# Hide HTTP response info messages
|
||||
if "HTTP/1.1 200 OK" in record.getMessage():
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
# Apply the filter to the root logger
|
||||
root_logger = logging.getLogger()
|
||||
root_logger.addFilter(LogFilter())
|
||||
|
||||
def check_dependencies():
|
||||
"""Check if required dependencies are available."""
|
||||
missing_deps = []
|
||||
|
||||
# Check for required dependencies
|
||||
try:
|
||||
import tavily
|
||||
logger.info("tavily-python is available")
|
||||
except ImportError:
|
||||
missing_deps.append("tavily-python")
|
||||
logger.error("tavily-python is missing")
|
||||
|
||||
try:
|
||||
import httpx
|
||||
logger.info("httpx is available")
|
||||
except ImportError:
|
||||
missing_deps.append("httpx")
|
||||
logger.error("httpx is missing")
|
||||
|
||||
if missing_deps:
|
||||
logger.error("\n" + "="*60)
|
||||
logger.error("MISSING DEPENDENCIES DETECTED!")
|
||||
logger.error("="*60)
|
||||
logger.error("The following packages are required but not installed:")
|
||||
for dep in missing_deps:
|
||||
logger.error(f" - {dep}")
|
||||
logger.error("\nTo install all dependencies, run:")
|
||||
logger.error("pip install -r requirements.txt")
|
||||
logger.error("="*60)
|
||||
return False
|
||||
|
||||
logger.info("All required dependencies are available!")
|
||||
return True
|
||||
|
||||
def check_module_imports():
|
||||
"""Check if we can import the required modules."""
|
||||
try:
|
||||
# Add the gateway directory to the path so we can import our modules
|
||||
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', '..'))
|
||||
|
||||
from modules.methods.methodWeb import MethodWeb
|
||||
from modules.chat.serviceCenter import ServiceCenter
|
||||
from modules.interfaces.interfaceAppModel import User, UserConnection, UserPrivilege, AuthAuthority
|
||||
from modules.interfaces.interfaceChatModel import ChatWorkflow, TaskItem, TaskStatus
|
||||
from modules.shared.configuration import APP_CONFIG
|
||||
|
||||
logger.info("All required modules imported successfully")
|
||||
return True
|
||||
except ImportError as e:
|
||||
logger.error(f"Failed to import required modules: {e}")
|
||||
logger.error("Make sure you're running this script from the gateway directory")
|
||||
return False
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected error importing modules: {e}")
|
||||
return False
|
||||
|
||||
def check_configuration():
|
||||
"""Check if required configuration is available."""
|
||||
try:
|
||||
from modules.shared.configuration import APP_CONFIG
|
||||
|
||||
# Check Tavily API key
|
||||
tavily_api_key = APP_CONFIG.get("Connector_WebTavily_API_KEY")
|
||||
if not tavily_api_key or tavily_api_key == "your_tavily_api_key_here":
|
||||
logger.error("Tavily API key not configured!")
|
||||
logger.error("Please set Connector_WebTavily_API_KEY in config.ini")
|
||||
return False
|
||||
|
||||
logger.info("Tavily API key is configured")
|
||||
|
||||
# Check other web configuration
|
||||
web_configs = [
|
||||
"Web_Search_MAX_QUERY_LENGTH",
|
||||
"Web_Search_MAX_RESULTS",
|
||||
"Web_Search_MIN_RESULTS",
|
||||
"Web_Crawl_TIMEOUT",
|
||||
"Web_Crawl_MAX_RETRIES",
|
||||
"Web_Crawl_RETRY_DELAY"
|
||||
]
|
||||
|
||||
for config_key in web_configs:
|
||||
value = APP_CONFIG.get(config_key)
|
||||
if value:
|
||||
logger.info(f"Configuration {config_key}: {value}")
|
||||
else:
|
||||
logger.warning(f"Configuration {config_key} not set, using default")
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to check configuration: {e}")
|
||||
return False
|
||||
|
||||
def create_mock_service_center():
|
||||
"""Create a proper ServiceCenter for testing purposes."""
|
||||
try:
|
||||
from modules.chat.serviceCenter import ServiceCenter
|
||||
from modules.interfaces.interfaceAppModel import User, UserPrivilege, AuthAuthority
|
||||
from modules.interfaces.interfaceChatModel import ChatWorkflow, TaskItem, TaskStatus
|
||||
from modules.interfaces.interfaceChatModel import ChatLog, ChatMessage, ChatStat
|
||||
|
||||
# Create proper user with all required fields
|
||||
mock_user = User(
|
||||
id="test_user_web_001",
|
||||
username="testuser_web",
|
||||
email="testweb@example.com",
|
||||
fullName="Test Web User",
|
||||
language="en",
|
||||
enabled=True,
|
||||
privilege=UserPrivilege.USER,
|
||||
authenticationAuthority=AuthAuthority.LOCAL,
|
||||
mandateId="test_mandate_web_001"
|
||||
)
|
||||
|
||||
# Create proper workflow with all required fields
|
||||
current_time = datetime.now(UTC).timestamp()
|
||||
mock_workflow = ChatWorkflow(
|
||||
id="test_workflow_web_001",
|
||||
mandateId="test_mandate_web_001",
|
||||
status="active",
|
||||
name="Test Web Method Workflow",
|
||||
currentRound=1,
|
||||
lastActivity=current_time,
|
||||
startedAt=current_time,
|
||||
logs=[],
|
||||
messages=[],
|
||||
stats=None,
|
||||
tasks=[]
|
||||
)
|
||||
|
||||
# Create service center
|
||||
service_center = ServiceCenter(mock_user, mock_workflow)
|
||||
logger.info("ServiceCenter created successfully for web testing")
|
||||
return service_center
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to create ServiceCenter: {e}")
|
||||
return None
|
||||
|
||||
class MethodWebTester:
|
||||
"""Test class for MethodWeb functionality."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the tester."""
|
||||
self.method_web = None
|
||||
self.service_center = None
|
||||
|
||||
# Test results tracking
|
||||
self.test_results = []
|
||||
|
||||
# Test parameter sets
|
||||
self.test_queries = [
|
||||
"Python programming tutorial",
|
||||
"FastAPI documentation",
|
||||
"machine learning basics",
|
||||
"web scraping best practices"
|
||||
]
|
||||
|
||||
self.test_urls = [
|
||||
"https://docs.python.org/3/tutorial/",
|
||||
"https://fastapi.tiangolo.com/",
|
||||
"https://scikit-learn.org/stable/",
|
||||
"https://requests.readthedocs.io/en/latest/"
|
||||
]
|
||||
|
||||
def initialize_method_web(self):
|
||||
"""Initialize the MethodWeb instance with a proper ServiceCenter."""
|
||||
try:
|
||||
# First create the service center
|
||||
self.service_center = create_mock_service_center()
|
||||
if not self.service_center:
|
||||
logger.error("Failed to create ServiceCenter!")
|
||||
return False
|
||||
|
||||
# Now create MethodWeb with the service center
|
||||
from modules.methods.methodWeb import MethodWeb
|
||||
self.method_web = MethodWeb(self.service_center)
|
||||
logger.info("MethodWeb initialized successfully with ServiceCenter")
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to initialize MethodWeb: {e}")
|
||||
return False
|
||||
|
||||
async def test_search_action(self, query: str, max_results: int = 5) -> Dict[str, Any]:
|
||||
"""Test the search action with given parameters."""
|
||||
logger.info(f"Testing search action with query: '{query}', max_results: {max_results}")
|
||||
|
||||
try:
|
||||
parameters = {
|
||||
"query": query,
|
||||
"maxResults": max_results
|
||||
}
|
||||
|
||||
result = await self.method_web.search(parameters)
|
||||
|
||||
test_result = {
|
||||
"action": "search",
|
||||
"query": query,
|
||||
"max_results": max_results,
|
||||
"success": result.success,
|
||||
"error": result.error if not result.success else None,
|
||||
"documents_count": len(result.documents) if result.documents else 0,
|
||||
"result_label": result.resultLabel if hasattr(result, 'resultLabel') else None
|
||||
}
|
||||
|
||||
if result.success:
|
||||
logger.info(f"✅ Search successful: {test_result['documents_count']} documents returned")
|
||||
if result.documents:
|
||||
for i, doc in enumerate(result.documents):
|
||||
logger.info(f" Document {i+1}: {doc.documentName}")
|
||||
if hasattr(doc, 'documentData') and hasattr(doc.documentData, 'results'):
|
||||
logger.info(f" Results count: {len(doc.documentData.results)}")
|
||||
else:
|
||||
logger.error(f"❌ Search failed: {result.error}")
|
||||
|
||||
return test_result
|
||||
|
||||
except Exception as e:
|
||||
error_msg = str(e)
|
||||
logger.error(f"❌ Search action exception: {error_msg}")
|
||||
return {
|
||||
"action": "search",
|
||||
"query": query,
|
||||
"max_results": max_results,
|
||||
"success": False,
|
||||
"error": f"Exception: {error_msg}",
|
||||
"documents_count": 0,
|
||||
"result_label": None
|
||||
}
|
||||
|
||||
async def test_crawl_action(self, urls: List[str]) -> Dict[str, Any]:
|
||||
"""Test the crawl action with given URLs."""
|
||||
logger.info(f"Testing crawl action with {len(urls)} URLs")
|
||||
|
||||
try:
|
||||
# Monkeypatch the service to return a mock document and file data
|
||||
class _MockDoc:
|
||||
def __init__(self, fileId: str, fileName: str = "mock_search_results.json"):
|
||||
self.fileId = fileId
|
||||
self.fileName = fileName
|
||||
|
||||
def _mock_get_docs(_doc_ids):
|
||||
return [_MockDoc(fileId="mock_file_id", fileName="mock_search_results.json")]
|
||||
|
||||
# Build minimal JSON structure expected by methodWeb.crawl
|
||||
mock_payload = {
|
||||
"documentData": {
|
||||
"results": [{"url": u} for u in urls]
|
||||
}
|
||||
}
|
||||
|
||||
def _mock_get_file_data(_file_id):
|
||||
return json.dumps(mock_payload).encode("utf-8")
|
||||
|
||||
# Apply monkeypatches to the method's service
|
||||
self.method_web.service.getChatDocumentsFromDocumentList = _mock_get_docs
|
||||
self.method_web.service.getFileData = _mock_get_file_data
|
||||
|
||||
# Use any string as the document list reference; service is mocked
|
||||
parameters = {"documentList": "mock_document_list_ref"}
|
||||
|
||||
result = await self.method_web.crawl(parameters)
|
||||
|
||||
test_result = {
|
||||
"action": "crawl",
|
||||
"urls_count": len(urls),
|
||||
"success": result.success,
|
||||
"error": result.error if not result.success else None,
|
||||
"documents_count": len(result.documents) if result.documents else 0,
|
||||
"result_label": result.resultLabel if hasattr(result, 'resultLabel') else None
|
||||
}
|
||||
|
||||
if result.success:
|
||||
logger.info(f"✅ Crawl successful: {test_result['documents_count']} documents returned")
|
||||
if result.documents:
|
||||
for i, doc in enumerate(result.documents):
|
||||
logger.info(f" Document {i+1}: {doc.documentName}")
|
||||
else:
|
||||
logger.error(f"❌ Crawl failed: {result.error}")
|
||||
|
||||
return test_result
|
||||
|
||||
except Exception as e:
|
||||
error_msg = str(e)
|
||||
logger.error(f"❌ Crawl action exception: {error_msg}")
|
||||
return {
|
||||
"action": "crawl",
|
||||
"urls_count": len(urls),
|
||||
"success": False,
|
||||
"error": f"Exception: {error_msg}",
|
||||
"documents_count": 0,
|
||||
"result_label": None
|
||||
}
|
||||
|
||||
async def test_scrape_action(self, query: str, max_results: int = 3) -> Dict[str, Any]:
|
||||
"""Test the scrape action (search + crawl combined) with given parameters."""
|
||||
logger.info(f"Testing scrape action with query: '{query}', max_results: {max_results}")
|
||||
|
||||
try:
|
||||
parameters = {
|
||||
"query": query,
|
||||
"maxResults": max_results
|
||||
}
|
||||
|
||||
result = await self.method_web.scrape(parameters)
|
||||
|
||||
test_result = {
|
||||
"action": "scrape",
|
||||
"query": query,
|
||||
"max_results": max_results,
|
||||
"success": result.success,
|
||||
"error": result.error if not result.success else None,
|
||||
"documents_count": len(result.documents) if result.documents else 0,
|
||||
"result_label": result.resultLabel if hasattr(result, 'resultLabel') else None
|
||||
}
|
||||
|
||||
if result.success:
|
||||
logger.info(f"✅ Scrape successful: {test_result['documents_count']} documents returned")
|
||||
if result.documents:
|
||||
for i, doc in enumerate(result.documents):
|
||||
logger.info(f" Document {i+1}: {doc.documentName}")
|
||||
if hasattr(doc, 'documentData') and hasattr(doc.documentData, 'results'):
|
||||
logger.info(f" Results count: {len(doc.documentData.results)}")
|
||||
else:
|
||||
logger.error(f"❌ Scrape failed: {result.error}")
|
||||
|
||||
return test_result
|
||||
|
||||
except Exception as e:
|
||||
error_msg = str(e)
|
||||
logger.error(f"❌ Scrape action exception: {error_msg}")
|
||||
return {
|
||||
"action": "scrape",
|
||||
"query": query,
|
||||
"max_results": max_results,
|
||||
"success": False,
|
||||
"error": f"Exception: {error_msg}",
|
||||
"documents_count": 0,
|
||||
"result_label": None
|
||||
}
|
||||
|
||||
async def test_parameter_validation(self) -> List[Dict[str, Any]]:
|
||||
"""Test parameter validation with invalid inputs."""
|
||||
logger.info("Testing parameter validation with invalid inputs")
|
||||
|
||||
validation_tests = []
|
||||
|
||||
# Test 1: Empty query
|
||||
logger.info("Test 1: Empty query")
|
||||
result = await self.test_search_action("", 5)
|
||||
# For validation tests, we expect the request to fail with validation error
|
||||
if not result["success"] and "validation error" in result.get("error", "").lower():
|
||||
result["success"] = True # Mark as successful validation test
|
||||
result["validation_test"] = True
|
||||
result["expected_behavior"] = "Correctly rejected empty query"
|
||||
logger.info("✅ Validation test PASSED: Empty query correctly rejected")
|
||||
validation_tests.append(result)
|
||||
|
||||
# Test 2: Query too long (over 400 characters)
|
||||
long_query = "a" * 500
|
||||
logger.info("Test 2: Query too long")
|
||||
result = await self.test_search_action(long_query, 5)
|
||||
if not result["success"] and "validation error" in result.get("error", "").lower():
|
||||
result["success"] = True # Mark as successful validation test
|
||||
result["validation_test"] = True
|
||||
result["expected_behavior"] = "Correctly rejected overly long query"
|
||||
logger.info("✅ Validation test PASSED: Long query correctly rejected")
|
||||
validation_tests.append(result)
|
||||
|
||||
# Test 3: Max results too high
|
||||
logger.info("Test 3: Max results too high")
|
||||
result = await self.test_search_action("test", 25)
|
||||
if not result["success"] and "validation error" in result.get("error", "").lower():
|
||||
result["success"] = True # Mark as successful validation test
|
||||
result["validation_test"] = True
|
||||
result["expected_behavior"] = "Correctly rejected excessive max results"
|
||||
logger.info("✅ Validation test PASSED: High max results correctly rejected")
|
||||
validation_tests.append(result)
|
||||
|
||||
# Test 4: Max results too low
|
||||
logger.info("Test 4: Max results too low")
|
||||
result = await self.test_search_action("test", 0)
|
||||
if not result["success"] and "validation error" in result.get("error", "").lower():
|
||||
result["success"] = True # Mark as successful validation test
|
||||
result["validation_test"] = True
|
||||
result["expected_behavior"] = "Correctly rejected zero max results"
|
||||
logger.info("✅ Validation test PASSED: Zero max results correctly rejected")
|
||||
validation_tests.append(result)
|
||||
|
||||
return validation_tests
|
||||
|
||||
async def run_all_tests(self) -> None:
|
||||
"""Run all web method tests."""
|
||||
logger.info("Starting MethodWeb comprehensive tests")
|
||||
logger.info("=" * 60)
|
||||
|
||||
# Initialize the method
|
||||
if not self.initialize_method_web():
|
||||
logger.error("Cannot proceed without MethodWeb!")
|
||||
return
|
||||
|
||||
# Test 1: Search actions with different queries
|
||||
logger.info("\n" + "=" * 60)
|
||||
logger.info("TEST 1: SEARCH ACTIONS")
|
||||
logger.info("=" * 60)
|
||||
|
||||
for i, query in enumerate(self.test_queries):
|
||||
logger.info(f"\nSearch test {i+1}/{len(self.test_queries)}")
|
||||
result = await self.test_search_action(query, 3)
|
||||
self.test_results.append(result)
|
||||
await asyncio.sleep(1) # Rate limiting
|
||||
|
||||
# Test 2: Scrape actions (search + crawl combined)
|
||||
logger.info("\n" + "=" * 60)
|
||||
logger.info("TEST 2: SCRAPE ACTIONS")
|
||||
logger.info("=" * 60)
|
||||
|
||||
scrape_queries = self.test_queries[:2] # Use first 2 queries for scraping
|
||||
for i, query in enumerate(scrape_queries):
|
||||
logger.info(f"\nScrape test {i+1}/{len(scrape_queries)}")
|
||||
result = await self.test_scrape_action(query, 2)
|
||||
self.test_results.append(result)
|
||||
await asyncio.sleep(2) # Rate limiting for scraping
|
||||
|
||||
# Test 3: Parameter validation
|
||||
logger.info("\n" + "=" * 60)
|
||||
logger.info("TEST 3: PARAMETER VALIDATION")
|
||||
logger.info("=" * 60)
|
||||
|
||||
validation_results = await self.test_parameter_validation()
|
||||
self.test_results.extend(validation_results)
|
||||
|
||||
# Test 4: Crawl action (if we have search results)
|
||||
logger.info("\n" + "=" * 60)
|
||||
logger.info("TEST 4: CRAWL ACTIONS")
|
||||
logger.info("=" * 60)
|
||||
|
||||
logger.info("Testing crawl with sample URLs")
|
||||
result = await self.test_crawl_action(self.test_urls[:2])
|
||||
self.test_results.append(result)
|
||||
|
||||
# Print comprehensive summary
|
||||
self.print_test_summary()
|
||||
|
||||
def print_test_summary(self):
|
||||
"""Print comprehensive test summary."""
|
||||
logger.info("\n" + "=" * 80)
|
||||
logger.info("COMPREHENSIVE TEST SUMMARY")
|
||||
logger.info("=" * 80)
|
||||
|
||||
total_tests = len(self.test_results)
|
||||
successful_tests = sum(1 for result in self.test_results if result["success"])
|
||||
failed_tests = total_tests - successful_tests
|
||||
|
||||
logger.info(f"Total tests run: {total_tests}")
|
||||
logger.info(f"Successful: {successful_tests}")
|
||||
logger.info(f"Failed: {failed_tests}")
|
||||
logger.info(f"Success rate: {(successful_tests/total_tests)*100:.1f}%")
|
||||
|
||||
# Group results by action type
|
||||
action_groups = {}
|
||||
for result in self.test_results:
|
||||
action = result["action"]
|
||||
if action not in action_groups:
|
||||
action_groups[action] = []
|
||||
action_groups[action].append(result)
|
||||
|
||||
logger.info("\n" + "-" * 80)
|
||||
logger.info("RESULTS BY ACTION TYPE:")
|
||||
logger.info("-" * 80)
|
||||
|
||||
for action, results in action_groups.items():
|
||||
action_successful = sum(1 for r in results if r["success"])
|
||||
action_total = len(results)
|
||||
logger.info(f"\n{action.upper()} ACTIONS:")
|
||||
logger.info(f" Total: {action_total}, Successful: {action_successful}, Failed: {action_total - action_successful}")
|
||||
|
||||
for i, result in enumerate(results):
|
||||
status_icon = "✅" if result["success"] else "❌"
|
||||
|
||||
# Handle validation tests specially
|
||||
if result.get("validation_test", False):
|
||||
logger.info(f" {status_icon} Validation Test {i+1}: {result.get('expected_behavior', 'Validation working correctly')}")
|
||||
if result.get("error"):
|
||||
logger.info(f" Validation Error: {result['error']}")
|
||||
elif action == "search":
|
||||
logger.info(f" {status_icon} Test {i+1}: '{result['query']}' -> {result['documents_count']} docs")
|
||||
elif action == "scrape":
|
||||
logger.info(f" {status_icon} Test {i+1}: '{result['query']}' -> {result['documents_count']} docs")
|
||||
elif action == "crawl":
|
||||
logger.info(f" {status_icon} Test {i+1}: {result['urls_count']} URLs -> {result['documents_count']} docs")
|
||||
|
||||
if not result["success"] and not result.get("validation_test", False):
|
||||
logger.info(f" Error: {result['error']}")
|
||||
|
||||
logger.info("\n" + "-" * 80)
|
||||
logger.info("CONFIGURATION STATUS:")
|
||||
logger.info("-" * 80)
|
||||
|
||||
try:
|
||||
from modules.shared.configuration import APP_CONFIG
|
||||
tavily_key = APP_CONFIG.get("Connector_WebTavily_API_KEY")
|
||||
if tavily_key and tavily_key != "your_tavily_api_key_here":
|
||||
logger.info("✅ Tavily API key: Configured")
|
||||
else:
|
||||
logger.info("❌ Tavily API key: Not configured")
|
||||
|
||||
web_configs = [
|
||||
("Web_Search_MAX_QUERY_LENGTH", "400"),
|
||||
("Web_Search_MAX_RESULTS", "20"),
|
||||
("Web_Search_MIN_RESULTS", "1"),
|
||||
("Web_Crawl_TIMEOUT", "30"),
|
||||
("Web_Crawl_MAX_RETRIES", "3"),
|
||||
("Web_Crawl_RETRY_DELAY", "2")
|
||||
]
|
||||
|
||||
for config_key, default_value in web_configs:
|
||||
value = APP_CONFIG.get(config_key, default_value)
|
||||
logger.info(f"✅ {config_key}: {value}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Configuration check failed: {e}")
|
||||
|
||||
logger.info("=" * 80)
|
||||
|
||||
def parse_arguments():
|
||||
"""Parse command line arguments."""
|
||||
parser = argparse.ArgumentParser(description='MethodWeb Test Script')
|
||||
parser.add_argument('--auto', action='store_true',
|
||||
help='Run tests automatically without user interaction')
|
||||
parser.add_argument('--verbose', '-v', action='store_true',
|
||||
help='Enable verbose logging')
|
||||
parser.add_argument('--quick', action='store_true',
|
||||
help='Run quick tests with fewer queries')
|
||||
|
||||
return parser.parse_args()
|
||||
|
||||
async def main():
|
||||
"""Main function to run the tests."""
|
||||
# Parse command line arguments
|
||||
args = parse_arguments()
|
||||
|
||||
# Set logging level based on verbosity
|
||||
if args.verbose:
|
||||
logging.getLogger().setLevel(logging.DEBUG)
|
||||
else:
|
||||
logging.getLogger().setLevel(logging.INFO)
|
||||
|
||||
logger.info("MethodWeb Test Script")
|
||||
logger.info("=" * 50)
|
||||
|
||||
# Check dependencies first
|
||||
if not check_dependencies():
|
||||
logger.error("Please install missing dependencies before running tests.")
|
||||
return
|
||||
|
||||
# Check module imports
|
||||
if not check_module_imports():
|
||||
logger.error("Cannot import required modules. Please check your setup.")
|
||||
return
|
||||
|
||||
# Check configuration
|
||||
if not check_configuration():
|
||||
logger.error("Configuration check failed. Please check your config.ini file.")
|
||||
return
|
||||
|
||||
# Determine test mode
|
||||
if args.auto:
|
||||
logger.info("Running in automated mode")
|
||||
else:
|
||||
# Interactive mode: ask user for confirmation
|
||||
print("\n" + "=" * 50)
|
||||
print("METHODWEB TEST SCRIPT")
|
||||
print("=" * 50)
|
||||
print("This script will test the MethodWeb functionality including:")
|
||||
print("- Web search actions")
|
||||
print("- Web scraping actions")
|
||||
print("- Web crawling actions")
|
||||
print("- Parameter validation")
|
||||
print("=" * 50)
|
||||
|
||||
try:
|
||||
choice = input("Do you want to proceed? (y/N): ").strip().lower()
|
||||
if choice not in ['y', 'yes']:
|
||||
logger.info("Test cancelled by user")
|
||||
return
|
||||
except (EOFError, KeyboardInterrupt):
|
||||
logger.info("Test cancelled by user")
|
||||
return
|
||||
|
||||
# Create tester and run tests
|
||||
tester = MethodWebTester()
|
||||
|
||||
# Modify test queries for quick mode
|
||||
if args.quick:
|
||||
tester.test_queries = tester.test_queries[:2] # Use only first 2 queries
|
||||
logger.info("Running in quick mode with reduced test set")
|
||||
|
||||
await tester.run_all_tests()
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Run the tests
|
||||
asyncio.run(main())
|
||||
|
|
@ -1,51 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Test script for Outlook filter logic
|
||||
"""
|
||||
|
||||
def test_build_graph_filter():
|
||||
"""Test the filter building logic"""
|
||||
|
||||
# Mock the _buildGraphFilter method
|
||||
def _buildGraphFilter(filter_text):
|
||||
if not filter_text:
|
||||
return {}
|
||||
|
||||
filter_text = filter_text.strip()
|
||||
|
||||
# Handle email address filters
|
||||
if '@' in filter_text and '.' in filter_text and ' ' not in filter_text:
|
||||
return {"$filter": f"from/fromAddress/address eq '{filter_text}'"}
|
||||
|
||||
# Handle search queries (from:, to:, subject:, etc.)
|
||||
if any(filter_text.startswith(prefix) for prefix in ['from:', 'to:', 'subject:', 'received:', 'hasattachment:']):
|
||||
return {"$search": f'"{filter_text}"'}
|
||||
|
||||
# Handle text content - search in subject
|
||||
return {"$filter": f"contains(subject,'{filter_text}')"}
|
||||
|
||||
# Test cases
|
||||
test_cases = [
|
||||
("peter.muster@domain.com", {"$filter": "from/fromAddress/address eq 'peter.muster@domain.com'"}),
|
||||
("from:user@example.com", {"$search": '"from:user@example.com"'}),
|
||||
("subject:meeting", {"$search": '"subject:meeting"'}),
|
||||
("project update", {"$filter": "contains(subject,'project update')"}),
|
||||
("", {}),
|
||||
(" hello world ", {"$filter": "contains(subject,'hello world')"}),
|
||||
]
|
||||
|
||||
print("Testing Outlook filter logic:")
|
||||
print("=" * 50)
|
||||
|
||||
for test_input, expected_output in test_cases:
|
||||
result = _buildGraphFilter(test_input)
|
||||
status = "✓ PASS" if result == expected_output else "✗ FAIL"
|
||||
print(f"{status} | Input: '{test_input}'")
|
||||
print(f" | Expected: {expected_output}")
|
||||
print(f" | Got: {result}")
|
||||
print()
|
||||
|
||||
print("Test completed!")
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_build_graph_filter()
|
||||
|
|
@ -1,70 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Test script for fixed Outlook filter logic
|
||||
"""
|
||||
|
||||
def test_build_graph_filter():
|
||||
"""Test the corrected filter building logic"""
|
||||
|
||||
# Mock the corrected _buildGraphFilter method
|
||||
def _buildGraphFilter(filter_text):
|
||||
if not filter_text:
|
||||
return {}
|
||||
|
||||
filter_text = filter_text.strip()
|
||||
|
||||
# Handle search queries (from:, to:, subject:, etc.) - check this FIRST
|
||||
if any(filter_text.startswith(prefix) for prefix in ['from:', 'to:', 'subject:', 'received:', 'hasattachment:']):
|
||||
return {"$search": f'"{filter_text}"'}
|
||||
|
||||
# Handle email address filters (only if it's NOT a search query)
|
||||
if '@' in filter_text and '.' in filter_text and ' ' not in filter_text and not filter_text.startswith('from:'):
|
||||
return {"$filter": f"from/fromAddress/address eq '{filter_text}'"}
|
||||
|
||||
# Handle text content - search in subject
|
||||
return {"$filter": f"contains(subject,'{filter_text}')"}
|
||||
|
||||
# Test cases
|
||||
test_cases = [
|
||||
("peter.muster@domain.com", {"$filter": "from/fromAddress/address eq 'peter.muster@domain.com'"}),
|
||||
("from:user@example.com", {"$search": '"from:user@example.com"'}),
|
||||
("subject:meeting", {"$search": '"subject:meeting"'}),
|
||||
("project update", {"$filter": "contains(subject,'project update')"}),
|
||||
("", {}),
|
||||
(" hello world ", {"$filter": "contains(subject,'hello world')"}),
|
||||
# Additional edge cases
|
||||
("to:manager@company.com", {"$search": '"to:manager@company.com"'}),
|
||||
("received:today", {"$search": '"received:today"'}),
|
||||
("hasattachment:true", {"$search": '"hasattachment:true"'}),
|
||||
("user@domain.com", {"$filter": "from/fromAddress/address eq 'user@domain.com'"}),
|
||||
("from:user@domain.com subject:budget", {"$search": '"from:user@domain.com subject:budget"'}),
|
||||
]
|
||||
|
||||
print("Testing FIXED Outlook filter logic:")
|
||||
print("=" * 50)
|
||||
|
||||
passed = 0
|
||||
failed = 0
|
||||
|
||||
for test_input, expected_output in test_cases:
|
||||
result = _buildGraphFilter(test_input)
|
||||
status = "✓ PASS" if result == expected_output else "✗ FAIL"
|
||||
if result == expected_output:
|
||||
passed += 1
|
||||
else:
|
||||
failed += 1
|
||||
|
||||
print(f"{status} | Input: '{test_input}'")
|
||||
print(f" | Expected: {expected_output}")
|
||||
print(f" | Got: {result}")
|
||||
print()
|
||||
|
||||
print(f"Test completed! {passed} passed, {failed} failed")
|
||||
|
||||
if failed == 0:
|
||||
print("🎉 All tests passed!")
|
||||
else:
|
||||
print("❌ Some tests failed. Please check the logic.")
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_build_graph_filter()
|
||||
|
|
@ -1,100 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Test script for Pydantic compatibility module.
|
||||
This script tests the version-aware functionality for both Pydantic v1 and v2.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
|
||||
# Add the modules directory to the path
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'modules'))
|
||||
|
||||
def test_compatibility_module():
|
||||
"""Test the Pydantic compatibility module"""
|
||||
try:
|
||||
from shared.pydanticCompat import (
|
||||
PYDANTIC_VERSION,
|
||||
create_private_field,
|
||||
create_model_config,
|
||||
model_to_dict,
|
||||
model_from_dict,
|
||||
get_version_info
|
||||
)
|
||||
|
||||
print(f"✅ Successfully imported Pydantic compatibility module")
|
||||
print(f"📊 Pydantic version detected: {PYDANTIC_VERSION}")
|
||||
|
||||
# Test version info
|
||||
version_info = get_version_info()
|
||||
print(f"🔍 Version info: {version_info}")
|
||||
|
||||
# Test field creation
|
||||
private_field = create_private_field(default="test")
|
||||
print(f"✅ Private field created: {type(private_field)}")
|
||||
|
||||
# Test model config
|
||||
config = create_model_config(validate_assignment=True)
|
||||
print(f"✅ Model config created: {type(config)}")
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ Error testing compatibility module: {e}")
|
||||
return False
|
||||
|
||||
def test_chat_document_model():
|
||||
"""Test the ChatDocument model with compatibility"""
|
||||
try:
|
||||
from interfaces.interfaceChatModel import ChatDocument
|
||||
|
||||
print(f"✅ Successfully imported ChatDocument model")
|
||||
|
||||
# Test creating a document
|
||||
doc = ChatDocument(fileId="test-file-123")
|
||||
print(f"✅ ChatDocument created: {doc.id}")
|
||||
|
||||
# Test setting component interface
|
||||
doc.setComponentInterface("mock_interface")
|
||||
print(f"✅ Component interface set")
|
||||
|
||||
# Test serialization
|
||||
doc_dict = doc.to_dict()
|
||||
print(f"✅ Document serialized: {doc_dict}")
|
||||
|
||||
# Test validation
|
||||
is_valid = doc.validate_component_interface()
|
||||
print(f"✅ Component interface validation: {is_valid}")
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ Error testing ChatDocument model: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
return False
|
||||
|
||||
def main():
|
||||
"""Main test function"""
|
||||
print("🧪 Testing Pydantic Compatibility Module")
|
||||
print("=" * 50)
|
||||
|
||||
# Test compatibility module
|
||||
compat_ok = test_compatibility_module()
|
||||
print()
|
||||
|
||||
# Test ChatDocument model
|
||||
model_ok = test_chat_document_model()
|
||||
print()
|
||||
|
||||
# Summary
|
||||
print("=" * 50)
|
||||
if compat_ok and model_ok:
|
||||
print("🎉 All tests passed! Pydantic compatibility is working correctly.")
|
||||
return 0
|
||||
else:
|
||||
print("💥 Some tests failed. Check the errors above.")
|
||||
return 1
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
|
|
@ -1,207 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Test script for web CSV functionality
|
||||
Tests both CSV output generation and CSV input reading
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
import asyncio
|
||||
from typing import Dict, Any
|
||||
|
||||
# Add the gateway directory to the Python path
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__)))
|
||||
|
||||
from modules.methods.methodWeb import MethodWeb
|
||||
from modules.interfaces.interfaceWebModel import WebSearchResultItem, WebSearchDocumentData, WebSearchActionDocument, WebSearchActionResult
|
||||
from pydantic import HttpUrl
|
||||
|
||||
|
||||
def create_mock_web_search_result():
|
||||
"""Create a mock WebSearchActionResult with the provided example data"""
|
||||
|
||||
# Create mock search result items based on the provided example
|
||||
results = [
|
||||
WebSearchResultItem(
|
||||
title="Switzerland Market Analysis :: Fitch Solutions",
|
||||
url=HttpUrl("https://www.fitchsolutions.com/bmi/region/switzerland")
|
||||
),
|
||||
WebSearchResultItem(
|
||||
title="OECD Economic Outlook, Volume 2024 Issue 2: Switzerland",
|
||||
url=HttpUrl("https://www.oecd.org/en/publications/2024/12/oecd-economic-outlook-volume-2024-issue-2_67bb8fac/full-report/switzerland_605fd31f.html")
|
||||
),
|
||||
WebSearchResultItem(
|
||||
title="The economic context of Switzerland - International Trade Portal",
|
||||
url=HttpUrl("https://www.lloydsbanktrade.com/en/market-potential/switzerland/economical-context")
|
||||
),
|
||||
WebSearchResultItem(
|
||||
title="Switzerland: Country File, Economic Risk Analysis | Coface",
|
||||
url=HttpUrl("https://www.coface.com/news-economy-and-insights/business-risk-dashboard/country-risk-files/switzerland")
|
||||
),
|
||||
WebSearchResultItem(
|
||||
title="Swiss Economic Outlook 2025 - Roland Berger",
|
||||
url=HttpUrl("https://www.rolandberger.com/en/Insights/Publications/Swiss-Economic-Outlook-2025.html")
|
||||
)
|
||||
]
|
||||
|
||||
# Create document data
|
||||
document_data = WebSearchDocumentData(
|
||||
query="current market trends Switzerland business economy 2024 analysis report",
|
||||
results=results,
|
||||
total_count=len(results)
|
||||
)
|
||||
|
||||
# Create action document
|
||||
action_document = WebSearchActionDocument(
|
||||
documentName="test_search_results.json",
|
||||
documentData=document_data,
|
||||
mimeType="application/json"
|
||||
)
|
||||
|
||||
# Create action result
|
||||
action_result = WebSearchActionResult(
|
||||
success=True,
|
||||
documents=[action_document]
|
||||
)
|
||||
|
||||
return action_result
|
||||
|
||||
|
||||
def test_csv_output_generation():
|
||||
"""Test CSV output generation from web search results"""
|
||||
print("Testing CSV output generation...")
|
||||
|
||||
# Create method instance (without service center for testing)
|
||||
method = MethodWeb(None)
|
||||
|
||||
# Create mock search result
|
||||
mock_result = create_mock_web_search_result()
|
||||
|
||||
# Convert to CSV
|
||||
csv_content = method._convert_search_results_to_csv(mock_result)
|
||||
|
||||
print("Generated CSV content:")
|
||||
print(csv_content)
|
||||
print()
|
||||
|
||||
# Verify CSV format
|
||||
lines = csv_content.strip().split('\n')
|
||||
assert len(lines) == 6, f"Expected 6 lines (header + 5 results), got {len(lines)}"
|
||||
|
||||
# Check header
|
||||
assert lines[0] == "url;title", f"Expected header 'url;title', got '{lines[0]}'"
|
||||
|
||||
# Check that URLs are present
|
||||
for i, line in enumerate(lines[1:], 1):
|
||||
parts = line.split(';')
|
||||
assert len(parts) == 2, f"Line {i} should have 2 parts separated by ';', got {len(parts)}"
|
||||
url, title = parts
|
||||
assert url.startswith('https://'), f"Line {i} URL should start with 'https://', got '{url}'"
|
||||
assert title, f"Line {i} should have a title, got empty title"
|
||||
|
||||
print("✓ CSV output generation test passed!")
|
||||
return csv_content
|
||||
|
||||
|
||||
def test_csv_input_reading():
|
||||
"""Test CSV input reading functionality"""
|
||||
print("Testing CSV input reading...")
|
||||
|
||||
# Create method instance
|
||||
method = MethodWeb(None)
|
||||
|
||||
# Test semicolon-separated CSV
|
||||
semicolon_csv = """url;title
|
||||
https://www.fitchsolutions.com/bmi/region/switzerland;Switzerland Market Analysis :: Fitch Solutions
|
||||
https://www.oecd.org/en/publications/2024/12/oecd-economic-outlook-volume-2024-issue-2_67bb8fac/full-report/switzerland_605fd31f.html;OECD Economic Outlook, Volume 2024 Issue 2: Switzerland
|
||||
https://www.lloydsbanktrade.com/en/market-potential/switzerland/economical-context;The economic context of Switzerland - International Trade Portal"""
|
||||
|
||||
urls_semicolon = method._read_csv_with_urls(semicolon_csv)
|
||||
print(f"Extracted {len(urls_semicolon)} URLs from semicolon CSV:")
|
||||
for url in urls_semicolon:
|
||||
print(f" - {url}")
|
||||
|
||||
assert len(urls_semicolon) == 3, f"Expected 3 URLs, got {len(urls_semicolon)}"
|
||||
assert all(url.startswith('https://') for url in urls_semicolon), "All URLs should start with https://"
|
||||
|
||||
print("✓ Semicolon CSV reading test passed!")
|
||||
|
||||
# Test comma-separated CSV
|
||||
comma_csv = """url,title
|
||||
https://www.fitchsolutions.com/bmi/region/switzerland,Switzerland Market Analysis :: Fitch Solutions
|
||||
https://www.oecd.org/en/publications/2024/12/oecd-economic-outlook-volume-2024-issue-2_67bb8fac/full-report/switzerland_605fd31f.html,OECD Economic Outlook, Volume 2024 Issue 2: Switzerland"""
|
||||
|
||||
urls_comma = method._read_csv_with_urls(comma_csv)
|
||||
print(f"Extracted {len(urls_comma)} URLs from comma CSV:")
|
||||
for url in urls_comma:
|
||||
print(f" - {url}")
|
||||
|
||||
assert len(urls_comma) == 2, f"Expected 2 URLs, got {len(urls_comma)}"
|
||||
assert all(url.startswith('https://') for url in urls_comma), "All URLs should start with https://"
|
||||
|
||||
print("✓ Comma CSV reading test passed!")
|
||||
|
||||
# Test case-insensitive column names
|
||||
case_insensitive_csv = """URL;Title
|
||||
https://example.com/test;Test Title"""
|
||||
|
||||
urls_case = method._read_csv_with_urls(case_insensitive_csv)
|
||||
assert len(urls_case) == 1, f"Expected 1 URL, got {len(urls_case)}"
|
||||
assert urls_case[0] == "https://example.com/test", f"Expected 'https://example.com/test', got '{urls_case[0]}'"
|
||||
|
||||
print("✓ Case-insensitive CSV reading test passed!")
|
||||
|
||||
|
||||
def test_integration():
|
||||
"""Test the complete integration: generate CSV and then read it back"""
|
||||
print("Testing integration: generate CSV and read it back...")
|
||||
|
||||
method = MethodWeb(None)
|
||||
|
||||
# Generate CSV from mock data
|
||||
mock_result = create_mock_web_search_result()
|
||||
csv_content = method._convert_search_results_to_csv(mock_result)
|
||||
|
||||
# Read URLs back from the generated CSV
|
||||
extracted_urls = method._read_csv_with_urls(csv_content)
|
||||
|
||||
print(f"Generated CSV with {len(mock_result.documents[0].documentData.results)} results")
|
||||
print(f"Extracted {len(extracted_urls)} URLs from generated CSV")
|
||||
|
||||
# Verify we got the same number of URLs
|
||||
assert len(extracted_urls) == len(mock_result.documents[0].documentData.results), \
|
||||
f"Expected {len(mock_result.documents[0].documentData.results)} URLs, got {len(extracted_urls)}"
|
||||
|
||||
# Verify URLs match
|
||||
original_urls = [str(result.url) for result in mock_result.documents[0].documentData.results]
|
||||
for i, (original, extracted) in enumerate(zip(original_urls, extracted_urls)):
|
||||
assert original == extracted, f"URL {i} mismatch: expected '{original}', got '{extracted}'"
|
||||
|
||||
print("✓ Integration test passed!")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("Running Web CSV Functionality Tests")
|
||||
print("=" * 50)
|
||||
|
||||
try:
|
||||
# Test CSV output generation
|
||||
csv_content = test_csv_output_generation()
|
||||
print()
|
||||
|
||||
# Test CSV input reading
|
||||
test_csv_input_reading()
|
||||
print()
|
||||
|
||||
# Test integration
|
||||
test_integration()
|
||||
print()
|
||||
|
||||
print("=" * 50)
|
||||
print("🎉 All tests passed successfully!")
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ Test failed: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
sys.exit(1)
|
||||
Loading…
Reference in a new issue