gateway/modules/workflow/serviceCenter.py
2025-07-10 20:03:50 +02:00

824 lines
35 KiB
Python

import logging
import importlib
import pkgutil
import inspect
import os
from typing import Dict, Any, List, Optional
from modules.interfaces.interfaceAppModel import User, UserConnection
from modules.interfaces.interfaceChatModel import (
TaskStatus, ChatDocument, TaskItem, TaskAction, TaskResult,
ChatStat, ChatLog, ChatMessage, ChatWorkflow, DocumentExchange, ExtractedContent
)
from modules.interfaces.interfaceAiCalls import AiCalls
from modules.interfaces.interfaceChatObjects import getInterface as getChatObjects
from modules.interfaces.interfaceChatModel import ActionResult
from modules.interfaces.interfaceComponentObjects import getInterface as getComponentObjects
from modules.interfaces.interfaceAppObjects import getInterface as getAppObjects
from modules.workflow.processorDocument import DocumentProcessor
from modules.workflow.methodBase import MethodBase
import uuid
import base64
import hashlib
logger = logging.getLogger(__name__)
class ServiceCenter:
"""Service center that provides access to all services and their functions"""
def __init__(self, currentUser: User, workflow: ChatWorkflow):
# Core services
self.user = currentUser
self.workflow = workflow
self.tasks = workflow.tasks
self.statusEnums = TaskStatus
self.currentTask = None # Initialize current task as None
# Initialize managers
self.interfaceChat = getChatObjects(currentUser)
self.interfaceComponent = getComponentObjects(currentUser)
self.interfaceApp = getAppObjects(currentUser)
self.interfaceAiCalls = AiCalls()
self.documentProcessor = DocumentProcessor(self)
# Initialize methods catalog
self.methods = {}
# Discover additional methods
self._discoverMethods()
def _discoverMethods(self):
"""Dynamically discover all method classes and their actions in modules.methods package"""
try:
# Import the methods package
methodsPackage = importlib.import_module('modules.methods')
# Discover all modules in the package
for _, name, isPkg in pkgutil.iter_modules(methodsPackage.__path__):
if not isPkg and name.startswith('method'):
try:
# Import the module
module = importlib.import_module(f'modules.methods.{name}')
# Find all classes in the module that inherit from MethodBase
for itemName, item in inspect.getmembers(module):
if (inspect.isclass(item) and
issubclass(item, MethodBase) and
item != MethodBase):
# Instantiate the method
methodInstance = item(self)
# Discover actions from public methods
actions = {}
for methodName, method in inspect.getmembers(type(methodInstance), predicate=inspect.iscoroutinefunction):
if not methodName.startswith('_') and methodName not in ['execute', 'validateParameters']:
# Bind the method to the instance
bound_method = method.__get__(methodInstance, type(methodInstance))
sig = inspect.signature(method)
params = {}
for paramName, param in sig.parameters.items():
if paramName not in ['self', 'authData']:
# Get parameter type
paramType = param.annotation if param.annotation != param.empty else Any
# Get parameter description from docstring or default
paramDesc = None
if param.default != param.empty and hasattr(param.default, '__doc__'):
paramDesc = param.default.__doc__
params[paramName] = {
'type': paramType,
'required': param.default == param.empty,
'description': paramDesc,
'default': param.default if param.default != param.empty else None
}
actions[methodName] = {
'description': method.__doc__ or '',
'parameters': params,
'method': bound_method
}
# Add method instance with discovered actions
self.methods[methodInstance.name] = {
'instance': methodInstance,
'description': methodInstance.description,
'actions': actions
}
logger.info(f"Discovered method: {methodInstance.name} with {len(actions)} actions")
except Exception as e:
logger.error(f"Error loading method module {name}: {str(e)}", exc_info=True)
except Exception as e:
logger.error(f"Error discovering methods: {str(e)}")
def detectContentTypeFromData(self, fileData: bytes, filename: str) -> str:
"""
Detect content type from file data and filename.
This method makes the MIME type detection function accessible through the service center.
Args:
fileData: Raw file data as bytes
filename: Name of the file
Returns:
str: Detected MIME type
"""
try:
# Check file extension first
ext = os.path.splitext(filename)[1].lower()
if ext:
# Map common extensions to MIME types
extToMime = {
'.txt': 'text/plain',
'.md': 'text/markdown',
'.csv': 'text/csv',
'.json': 'application/json',
'.xml': 'application/xml',
'.js': 'application/javascript',
'.py': 'application/x-python',
'.svg': 'image/svg+xml',
'.jpg': 'image/jpeg',
'.jpeg': 'image/jpeg',
'.png': 'image/png',
'.gif': 'image/gif',
'.bmp': 'image/bmp',
'.webp': 'image/webp',
'.pdf': 'application/pdf',
'.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
'.doc': 'application/msword',
'.xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
'.xls': 'application/vnd.ms-excel',
'.pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
'.ppt': 'application/vnd.ms-powerpoint',
'.html': 'text/html',
'.htm': 'text/html',
'.css': 'text/css',
'.zip': 'application/zip',
'.rar': 'application/x-rar-compressed',
'.7z': 'application/x-7z-compressed',
'.tar': 'application/x-tar',
'.gz': 'application/gzip'
}
if ext in extToMime:
return extToMime[ext]
# Try to detect from content
if fileData.startswith(b'%PDF'):
return 'application/pdf'
elif fileData.startswith(b'PK\x03\x04'):
# ZIP-based formats (docx, xlsx, pptx)
return 'application/zip'
elif fileData.startswith(b'<'):
# XML-based formats
try:
text = fileData.decode('utf-8', errors='ignore')
if '<svg' in text.lower():
return 'image/svg+xml'
elif '<html' in text.lower():
return 'text/html'
else:
return 'application/xml'
except:
pass
elif fileData.startswith(b'\x89PNG\r\n\x1a\n'):
return 'image/png'
elif fileData.startswith(b'\xff\xd8\xff'):
return 'image/jpeg'
elif fileData.startswith(b'GIF87a') or fileData.startswith(b'GIF89a'):
return 'image/gif'
elif fileData.startswith(b'BM'):
return 'image/bmp'
elif fileData.startswith(b'RIFF') and fileData[8:12] == b'WEBP':
return 'image/webp'
return 'application/octet-stream'
except Exception as e:
logger.error(f"Error detecting content type from data: {str(e)}")
return 'application/octet-stream'
def getMimeTypeFromExtension(self, extension: str) -> str:
"""
Get MIME type based on file extension.
This method consolidates MIME type detection from extension.
Args:
extension: File extension (with or without dot)
Returns:
str: MIME type for the extension
"""
# Normalize extension (remove dot if present)
if extension.startswith('.'):
extension = extension[1:]
# Map extensions to MIME types
mime_types = {
'txt': 'text/plain',
'json': 'application/json',
'xml': 'application/xml',
'csv': 'text/csv',
'html': 'text/html',
'htm': 'text/html',
'md': 'text/markdown',
'py': 'text/x-python',
'js': 'application/javascript',
'css': 'text/css',
'pdf': 'application/pdf',
'doc': 'application/msword',
'docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
'xls': 'application/vnd.ms-excel',
'xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
'ppt': 'application/vnd.ms-powerpoint',
'pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
'svg': 'image/svg+xml',
'jpg': 'image/jpeg',
'jpeg': 'image/jpeg',
'png': 'image/png',
'gif': 'image/gif',
'bmp': 'image/bmp',
'webp': 'image/webp',
'zip': 'application/zip',
'rar': 'application/x-rar-compressed',
'7z': 'application/x-7z-compressed',
'tar': 'application/x-tar',
'gz': 'application/gzip'
}
return mime_types.get(extension.lower(), 'application/octet-stream')
def getFileExtension(self, filename: str) -> str:
"""
Extract file extension from filename.
Args:
filename: Name of the file
Returns:
str: File extension (without dot)
"""
if '.' in filename:
return filename.split('.')[-1].lower()
return "txt" # Default to text
# ===== Functions =====
def extractContent(self, prompt: str, document: ChatDocument) -> ExtractedContent:
"""Extract content from document using prompt"""
return self.extractContentFromDocument(prompt, document)
def getMethodsCatalog(self) -> Dict[str, Any]:
"""Get catalog of available methods and their actions"""
catalog = {}
for methodName, method in self.methods.items():
catalog[methodName] = {
'description': method['description'],
'actions': {
actionName: {
'description': action['description'],
'parameters': action['parameters']
}
for actionName, action in method['actions'].items()
}
}
return catalog
def getMethodsList(self) -> List[str]:
"""Get list of available methods with their signatures in the required format"""
methodList = []
for methodName, method in self.methods.items():
methodInstance = method['instance']
for actionName, action in method['actions'].items():
# Use the new signature format from MethodBase
signature = methodInstance.getActionSignature(actionName)
if signature:
methodList.append(signature)
return methodList
def getDocumentReferenceList(self) -> Dict[str, List[DocumentExchange]]:
"""Get list of document exchanges sorted by datetime, categorized by chat round"""
chat_exchanges = []
history_exchanges = []
# Process messages in reverse order to find current chat round
for message in reversed(self.workflow.messages):
# Get document references from message
if message.documents:
# For messages with action context, create DocumentExchange with docList reference
if message.actionId and message.documentsLabel:
doc_ref = self.getDocumentReferenceFromMessage(message)
if doc_ref:
# Create DocumentExchange with single docList reference
doc_exchange = DocumentExchange(
documentsLabel=message.documentsLabel,
documents=[doc_ref]
)
# Add to appropriate list based on message status
if message.status == "first":
chat_exchanges.append(doc_exchange)
break # Stop after finding first message
elif message.status == "step":
chat_exchanges.append(doc_exchange)
else:
history_exchanges.append(doc_exchange)
# For regular messages, create DocumentExchange with individual docItem references
else:
doc_refs = []
for doc in message.documents:
doc_ref = self.getDocumentReferenceFromChatDocument(doc)
doc_refs.append(doc_ref)
if doc_refs:
# Create DocumentExchange with individual document references
doc_exchange = DocumentExchange(
documentsLabel=f"{message.id}:documents",
documents=doc_refs
)
# Add to appropriate list based on message status
if message.status == "first":
chat_exchanges.append(doc_exchange)
break # Stop after finding first message
elif message.status == "step":
chat_exchanges.append(doc_exchange)
else:
history_exchanges.append(doc_exchange)
# Stop processing if we hit a first message
if message.status == "first":
break
# Sort both lists by datetime in descending order
chat_exchanges.sort(key=lambda x: x.documentsLabel, reverse=True)
history_exchanges.sort(key=lambda x: x.documentsLabel, reverse=True)
return {
"chat": chat_exchanges,
"history": history_exchanges
}
def getDocumentReferenceFromChatDocument(self, document: ChatDocument) -> str:
"""Get document reference from ChatDocument"""
return f"docItem:{document.id}:{document.filename}"
def getDocumentReferenceFromMessage(self, message: ChatMessage) -> str:
"""Get document reference from ChatMessage"""
# If documentsLabel already contains the full reference format, return it
if message.documentsLabel.startswith("docList:"):
return message.documentsLabel
# Otherwise construct the reference using the message ID and documents label
return f"docList:{message.id}:{message.documentsLabel}"
def resolveDocumentReference(self, intent_label: str) -> str:
"""Resolve an intent label (e.g., 'task1_extract_results') to a docList reference with message ID."""
for message in self.workflow.messages:
if message.documentsLabel == intent_label and message.documents:
return f"docList:{message.id}:{intent_label}"
return None
def getChatDocumentsFromDocumentList(self, documentList: List[str]) -> List[ChatDocument]:
"""Get ChatDocuments from a list of document references (intent or resolved)."""
try:
all_documents = []
for doc_ref in documentList:
# Parse reference format
parts = doc_ref.split(':', 2) # Split into max 3 parts
if len(parts) < 3:
continue
ref_type = parts[0]
ref_id = parts[1]
ref_label = parts[2]
if ref_type == "docItem":
# Handle ChatDocument reference: docItem:<id>:<filename>
for message in self.workflow.messages:
if message.documents:
for doc in message.documents:
if doc.id == ref_id:
all_documents.append(doc)
break
if any(doc.id == ref_id for doc in message.documents):
break
elif ref_type == "docList":
# If ref_id is not a message ID (i.e., not all digits or not found), treat as intent label
found = False
for message in self.workflow.messages:
if message.documentsLabel == ref_label and message.documents:
all_documents.extend(message.documents)
found = True
break
if not found:
# Try to resolve intent label to message ID
resolved_ref = self.resolveDocumentReference(ref_label)
if resolved_ref:
# Recursively resolve the resolved reference
all_documents.extend(self.getChatDocumentsFromDocumentList([resolved_ref]))
return all_documents
except Exception as e:
logger.error(f"Error getting documents from document list: {str(e)}")
return []
def getConnectionReferenceList(self) -> List[str]:
"""Get list of all UserConnection objects as references"""
connections = []
# Get user connections through AppObjects interface
user_connections = self.interfaceApp.getUserConnections(self.user.id)
for conn in user_connections:
connections.append(self.getConnectionReferenceFromUserConnection(conn))
# Sort by connection reference
return sorted(connections)
def getConnectionReferenceFromUserConnection(self, connection: UserConnection) -> str:
"""Get connection reference from UserConnection"""
return f"connection:{connection.authority}:{connection.externalUsername}:{connection.id}"
def getUserConnectionFromConnectionReference(self, connectionReference: str) -> Optional[UserConnection]:
"""Get UserConnection from reference string"""
try:
# Parse reference format: connection:{authority}:{username}:{id}
parts = connectionReference.split(':')
if len(parts) != 4 or parts[0] != "connection":
return None
authority = parts[1]
username = parts[2]
conn_id = parts[3]
# Get user connections through AppObjects interface
user_connections = self.interfaceApp.getUserConnections(self.user.id)
# Find matching connection
for conn in user_connections:
if str(conn.id) == conn_id and conn.authority == authority and conn.externalUsername == username:
return conn
return None
except Exception as e:
logger.error(f"Error parsing connection reference: {str(e)}")
return None
async def summarizeChat(self, messages: List[ChatMessage]) -> str:
"""
Summarize chat messages from last to first message with status="first"
Args:
messages: List of chat messages to summarize
Returns:
str: Summary of the chat in user's language
"""
try:
# Get messages from last to first, stopping at first message with status="first"
relevantMessages = []
for msg in reversed(messages):
relevantMessages.append(msg)
if msg.status == "first":
break
# Create prompt for AI
prompt = f"""You are an AI assistant providing a summary of a chat conversation.
Please respond in '{self.user.language}' language.
Chat History:
{chr(10).join(f"- {msg.message}" for msg in reversed(relevantMessages))}
Instructions:
1. Summarize the conversation's key points and outcomes
2. Be concise but informative
3. Use a professional but friendly tone
4. Focus on important decisions and next steps if any
Please provide a comprehensive summary of this conversation."""
# Get summary using AI
return await self.callAiTextBasic(prompt)
except Exception as e:
logger.error(f"Error summarizing chat: {str(e)}")
return f"Error summarizing chat: {str(e)}"
async def summarizeMessage(self, message: ChatMessage) -> str:
"""
Summarize a single chat message
Args:
message: Chat message to summarize
Returns:
str: Summary of the message in user's language
"""
try:
# Create prompt for AI
prompt = f"""You are an AI assistant providing a summary of a chat message.
Please respond in '{self.user.language}' language.
Message:
{message.message}
Instructions:
1. Summarize the key points of this message
2. Be concise but informative
3. Use a professional but friendly tone
4. Focus on important information and any actions needed
Please provide a clear summary of this message."""
# Get summary using AI
return await self.callAiTextBasic(prompt)
except Exception as e:
logger.error(f"Error summarizing message: {str(e)}")
return f"Error summarizing message: {str(e)}"
async def callAiTextBasic(self, prompt: str, context: str = None) -> str:
"""Basic text processing using OpenAI"""
# Calculate prompt size for stats
prompt_size = self.calculateObjectSize(prompt)
if context:
prompt_size += self.calculateObjectSize(context)
# Call AI
response = await self.interfaceAiCalls.callAiTextBasic(prompt, context)
# Calculate response size for stats
response_size = self.calculateObjectSize(response)
# Update stats
self.updateWorkflowStats(eventLabel="aicall.openai.text", bytesSent=prompt_size, bytesReceived=response_size)
return response
async def callAiTextAdvanced(self, prompt: str, context: str = None) -> str:
"""Advanced text processing using Anthropic"""
# Calculate prompt size for stats
prompt_size = self.calculateObjectSize(prompt)
if context:
prompt_size += self.calculateObjectSize(context)
# Call AI
response = await self.interfaceAiCalls.callAiTextAdvanced(prompt, context)
# Calculate response size for stats
response_size = self.calculateObjectSize(response)
# Update stats
self.updateWorkflowStats(eventLabel="aicall.anthropic.text", bytesSent=prompt_size, bytesReceived=response_size)
return response
async def callAiImageBasic(self, prompt: str, imageData: str, mimeType: str) -> str:
"""Basic image processing using OpenAI"""
# Calculate prompt size for stats
prompt_size = self.calculateObjectSize(prompt)
prompt_size += self.calculateObjectSize(imageData)
# Call AI
response = await self.interfaceAiCalls.callAiImageBasic(prompt, imageData, mimeType)
# Calculate response size for stats
response_size = self.calculateObjectSize(response)
# Update stats
self.updateWorkflowStats(eventLabel="aicall.openai.image", bytesSent=prompt_size, bytesReceived=response_size)
return response
async def callAiImageAdvanced(self, prompt: str, imageData: str, mimeType: str) -> str:
"""Advanced image processing using Anthropic"""
# Calculate prompt size for stats
prompt_size = self.calculateObjectSize(prompt)
prompt_size += self.calculateObjectSize(imageData)
# Call AI
response = await self.interfaceAiCalls.callAiImageAdvanced(prompt, imageData, mimeType)
# Calculate response size for stats
response_size = self.calculateObjectSize(response)
# Update stats
self.updateWorkflowStats(eventLabel="aicall.anthropic.image", bytesSent=prompt_size, bytesReceived=response_size)
return response
def getFileInfo(self, fileId: str) -> Dict[str, Any]:
"""Get file information"""
file_item = self.interfaceComponent.getFile(fileId)
if file_item:
return {
"id": file_item.id,
"filename": file_item.filename,
"size": file_item.fileSize,
"mimeType": file_item.mimeType,
"fileHash": file_item.fileHash,
"creationDate": file_item.creationDate
}
return None
def getFileData(self, fileId: str) -> bytes:
"""Get file data by ID"""
return self.interfaceComponent.getFileData(fileId)
async def extractContentFromDocument(self, prompt: str, document: ChatDocument) -> ExtractedContent:
"""Extract content from ChatDocument using prompt"""
try:
# Extract file data from ChatDocument
if document.data:
fileData = document.data.encode('utf-8') if isinstance(document.data, str) else document.data
else:
# Try to get file data from service center if document has fileId
if hasattr(document, 'fileId') and document.fileId:
fileData = self.getFileData(document.fileId)
else:
logger.error(f"No file data available in document: {document}")
raise ValueError("No file data available in document")
# Get filename and mime type from document
filename = document.filename if hasattr(document, 'filename') else "document"
mimeType = document.mimeType if hasattr(document, 'mimeType') else "application/octet-stream"
# Process with document processor directly
extractedContent = await self.documentProcessor.processFileData(
fileData=fileData,
filename=filename,
mimeType=mimeType,
base64Encoded=False,
prompt=prompt,
documentId=document.id
)
# Update objectId to match document ID
extractedContent.objectId = document.id
extractedContent.objectType = "ChatDocument"
return extractedContent
except Exception as e:
logger.error(f"Error extracting from document: {str(e)}")
raise
async def extractContentFromFileData(self, prompt: str, fileData: bytes, filename: str, mimeType: str, base64Encoded: bool = False, documentId: str = None) -> ExtractedContent:
"""Extract content from file data directly using prompt"""
try:
return await self.documentProcessor.processFileData(
prompt=prompt,
fileData=fileData,
filename=filename,
mimeType=mimeType,
base64Encoded=base64Encoded,
documentId=documentId
)
except Exception as e:
logger.error(f"Error extracting from file data: {str(e)}")
raise
def createFile(self, fileName: str, mimeType: str, content: str, base64encoded: bool = False) -> str:
"""Create new file and return its ID"""
# Convert content to bytes based on base64 flag
if base64encoded:
import base64
content_bytes = base64.b64decode(content)
else:
content_bytes = content.encode('utf-8')
# Create the file (hash and size are computed inside interfaceComponent)
file_item = self.interfaceComponent.createFile(
name=fileName,
mimeType=mimeType,
content=content_bytes
)
# Then store the file data
self.interfaceComponent.createFileData(file_item.id, content_bytes)
return file_item.id
def createDocument(self, fileName: str, mimeType: str, content: str, base64encoded: bool = True) -> ChatDocument:
"""Create document from file data object created by AI call"""
# First create the file and get its ID
file_id = self.createFile(fileName, mimeType, content, base64encoded)
# Get file info for metadata
file_info = self.interfaceComponent.getFile(file_id)
# Create document with file reference
return ChatDocument(
id=str(uuid.uuid4()),
fileId=file_id,
filename=fileName,
fileSize=file_info.fileSize,
mimeType=mimeType
)
def updateWorkflowStats(self, eventLabel: str = None, bytesSent: int = 0, bytesReceived: int = 0, tokenCount: int = 0) -> None:
"""
Centralized function to update workflow statistics in database and running workflow.
Args:
eventLabel: Label for the event (e.g., "userinput", "taskplan", "action", "aicall<ainame>")
bytesSent: Bytes sent (incremental)
bytesReceived: Bytes received (incremental)
tokenCount: Token count (incremental, default 0)
"""
try:
if hasattr(self, 'workflow') and self.workflow:
# Update the running workflow stats
self.interfaceChat.updateWorkflowStats(
self.workflow.id,
bytesSent=bytesSent,
bytesReceived=bytesReceived
)
# Log the stats event
logger.debug(f"Workflow stats updated - Event: {eventLabel}, Sent: {bytesSent}, Received: {bytesReceived}, Tokens: {tokenCount}")
except Exception as e:
logger.error(f"Error updating workflow stats: {str(e)}")
def calculateObjectSize(self, obj: Any) -> int:
"""
Calculate the size of an object in bytes.
Args:
obj: Object to calculate size for
Returns:
int: Size in bytes
"""
try:
import json
import sys
if obj is None:
return 0
# Convert object to JSON string and calculate size
json_str = json.dumps(obj, ensure_ascii=False, default=str)
return len(json_str.encode('utf-8'))
except Exception as e:
logger.error(f"Error calculating object size: {str(e)}")
return 0
def calculateUserInputSize(self, userInput: Any) -> int:
"""
Calculate size of user input including file sizes.
Args:
userInput: User input object
Returns:
int: Total size in bytes
"""
try:
total_size = 0
# Calculate base user input size
if hasattr(userInput, 'prompt'):
total_size += self.calculateObjectSize(userInput.prompt)
# Add file sizes if present
if hasattr(userInput, 'listFileId') and userInput.listFileId:
for fileId in userInput.listFileId:
file_info = self.getFileInfo(fileId)
if file_info:
total_size += file_info.get('size', 0)
return total_size
except Exception as e:
logger.error(f"Error calculating user input size: {str(e)}")
return 0
async def executeAction(self, methodName: str, actionName: str, parameters: Dict[str, Any]) -> ActionResult:
"""Execute a method action"""
try:
if methodName not in self.methods:
raise ValueError(f"Unknown method: {methodName}")
method = self.methods[methodName]
if actionName not in method['actions']:
raise ValueError(f"Unknown action: {actionName} for method {methodName}")
action = method['actions'][actionName]
# Execute the action
return await action['method'](parameters)
except Exception as e:
logger.error(f"Error executing method {methodName}.{actionName}: {str(e)}")
raise
# Create singleton instance
serviceObject = None
def initializeServiceCenter(currentUser: User, workflow: ChatWorkflow) -> ServiceCenter:
"""Initialize the service center singleton"""
global serviceObject
if serviceObject is None:
serviceObject = ServiceCenter(currentUser, workflow)
return serviceObject