gateway/modules/services/serviceWorkflow/mainServiceWorkflow.py
2025-09-25 16:59:44 +02:00

581 lines
31 KiB
Python

import logging
import uuid
from typing import Dict, Any, List, Optional
from modules.datamodels.datamodelUam import User, UserConnection
from modules.datamodels.datamodelChat import ChatDocument, ChatMessage
from modules.datamodels.datamodelChat import ExtractedContent
from modules.services.serviceDocument.mainServiceDocumentExtraction import DocumentExtractionService
from modules.services.serviceDocument.documentUtility import getFileExtension, getMimeTypeFromExtension, detectContentTypeFromData
from modules.shared.timezoneUtils import get_utc_timestamp
logger = logging.getLogger(__name__)
class WorkflowService:
"""Service class containing methods for document processing, chat operations, and workflow management"""
def __init__(self, serviceCenter):
self.serviceCenter = serviceCenter
self.user = serviceCenter.user
self.workflow = serviceCenter.workflow
self.interfaceChat = serviceCenter.interfaceChat
self.interfaceComponent = serviceCenter.interfaceComponent
self.interfaceApp = serviceCenter.interfaceApp
async def summarizeChat(self, messages: List[ChatMessage]) -> str:
"""
Summarize chat messages from last to first message with status="first"
Args:
messages: List of chat messages to summarize
Returns:
str: Summary of the chat in user's language
"""
try:
# Get messages from last to first, stopping at first message with status="first"
relevantMessages = []
for msg in reversed(messages):
relevantMessages.append(msg)
if msg.status == "first":
break
# Create prompt for AI
prompt = f"""You are an AI assistant providing a summary of a chat conversation.
Please respond in '{self.user.language}' language.
Chat History:
{chr(10).join(f"- {msg.message}" for msg in reversed(relevantMessages))}
Instructions:
1. Summarize the conversation's key points and outcomes
2. Be concise but informative
3. Use a professional but friendly tone
4. Focus on important decisions and next steps if any
Please provide a comprehensive summary of this conversation."""
# Get summary using AI service directly (avoiding circular dependency)
from modules.services.serviceAi.mainServiceAi import AiService
ai_service = AiService(self)
return await ai_service.callAi(
prompt=prompt,
documents=None,
options={
"process_type": "text",
"operation_type": "generate_content",
"priority": "speed",
"compress_prompt": True,
"compress_documents": False,
"max_cost": 0.01
}
)
except Exception as e:
logger.error(f"Error summarizing chat: {str(e)}")
return f"Error summarizing chat: {str(e)}"
def getChatDocumentsFromDocumentList(self, documentList: List[str]) -> List[ChatDocument]:
"""Get ChatDocuments from a list of document references using all three formats."""
try:
all_documents = []
for doc_ref in documentList:
if doc_ref.startswith("docItem:"):
# docItem:<id>:<filename> - extract ID and find document
parts = doc_ref.split(':')
if len(parts) >= 2:
doc_id = parts[1]
# Find the document by ID
for message in self.workflow.messages:
if message.documents:
for doc in message.documents:
if doc.id == doc_id:
doc_name = getattr(doc, 'fileName', 'unknown')
logger.debug(f"Found docItem reference {doc_ref}: {doc_name}")
all_documents.append(doc)
break
elif doc_ref.startswith("docList:"):
# docList:<messageId>:<label> or docList:<label> - extract message ID and find document list
parts = doc_ref.split(':')
if len(parts) >= 3:
# Format: docList:<messageId>:<label>
message_id = parts[1]
label = parts[2]
# Find the message by ID and get all its documents
for message in self.workflow.messages:
if str(message.id) == message_id:
if message.documents:
doc_names = [doc.fileName for doc in message.documents if hasattr(doc, 'fileName')]
logger.debug(f"Found docList reference {doc_ref}: {len(message.documents)} documents - {doc_names}")
all_documents.extend(message.documents)
else:
logger.debug(f"Found docList reference {doc_ref} but message has no documents")
break
elif len(parts) >= 2:
# Format: docList:<label> - find message by documentsLabel
label = parts[1]
logger.debug(f"Looking for message with documentsLabel: {label}")
# Find messages with matching documentsLabel
matching_messages = []
for message in self.workflow.messages:
# Check both attribute and raw data for documentsLabel
msg_label = getattr(message, 'documentsLabel', None)
if msg_label == label:
matching_messages.append(message)
logger.debug(f"Found message {message.id} with matching documentsLabel: {msg_label}")
else:
# Debug: show what labels we're comparing
logger.debug(f"Message {message.id} has documentsLabel: '{msg_label}' (looking for: '{label}')")
if matching_messages:
# Use the newest message (highest publishedAt)
matching_messages.sort(key=lambda msg: getattr(msg, 'publishedAt', 0), reverse=True)
newest_message = matching_messages[0]
if newest_message.documents:
doc_names = [doc.fileName for doc in newest_message.documents if hasattr(doc, 'fileName')]
logger.debug(f"Found docList reference {doc_ref}: {len(newest_message.documents)} documents - {doc_names}")
all_documents.extend(newest_message.documents)
else:
logger.debug(f"Found docList reference {doc_ref} but message has no documents")
else:
logger.debug(f"No messages found with documentsLabel: {label}")
else:
# Direct label reference (round1_task2_action3_contextinfo)
# Search for messages with matching documentsLabel to find the actual documents
if doc_ref.startswith("round"):
# Parse round/task/action to find the corresponding document list
label_parts = doc_ref.split('_', 3)
if len(label_parts) >= 4:
round_num = int(label_parts[0].replace('round', ''))
task_num = int(label_parts[1].replace('task', ''))
action_num = int(label_parts[2].replace('action', ''))
context_info = label_parts[3]
logger.debug(f"Resolving round reference: round{round_num}_task{task_num}_action{action_num}_{context_info}")
logger.debug(f"Looking for messages with documentsLabel matching: {doc_ref}")
# Find messages with matching documentsLabel (this is the correct way!)
# In case of retries, we want the NEWEST message (most recent publishedAt)
matching_messages = []
for message in self.workflow.messages:
msg_documents_label = getattr(message, 'documentsLabel', '')
# Check if this message's documentsLabel matches our reference
if msg_documents_label == doc_ref:
# Found a matching message, collect it for comparison
matching_messages.append(message)
logger.debug(f"Found message {message.id} with matching documentsLabel: {msg_documents_label}")
# If we found matching messages, take the newest one (highest publishedAt)
if matching_messages:
# Sort by publishedAt descending (newest first)
matching_messages.sort(key=lambda msg: getattr(msg, 'publishedAt', 0), reverse=True)
newest_message = matching_messages[0]
logger.debug(f"Found {len(matching_messages)} matching messages, using newest: {newest_message.id} (publishedAt: {getattr(newest_message, 'publishedAt', 'unknown')})")
logger.debug(f"Newest message has {len(newest_message.documents) if newest_message.documents else 0} documents")
if newest_message.documents:
doc_names = [doc.fileName for doc in newest_message.documents if hasattr(doc, 'fileName')]
logger.debug(f"Added {len(newest_message.documents)} documents from newest message {newest_message.id}: {doc_names}")
all_documents.extend(newest_message.documents)
else:
logger.debug(f"No documents found in newest message {newest_message.id}")
else:
logger.debug(f"No messages found with documentsLabel: {doc_ref}")
# Fallback: also check if any message has this documentsLabel as a prefix
logger.debug(f"Trying fallback search for messages with documentsLabel containing: {doc_ref}")
fallback_messages = []
for message in self.workflow.messages:
msg_documents_label = getattr(message, 'documentsLabel', '')
if msg_documents_label and msg_documents_label.startswith(doc_ref):
fallback_messages.append(message)
logger.debug(f"Found fallback message {message.id} with documentsLabel: {msg_documents_label}")
if fallback_messages:
# Sort by publishedAt descending (newest first)
fallback_messages.sort(key=lambda msg: getattr(msg, 'publishedAt', 0), reverse=True)
newest_fallback = fallback_messages[0]
logger.debug(f"Using fallback message {newest_fallback.id} with documentsLabel: {getattr(newest_fallback, 'documentsLabel', 'unknown')}")
if newest_fallback.documents:
doc_names = [doc.fileName for doc in newest_fallback.documents if hasattr(doc, 'fileName')]
logger.debug(f"Added {len(newest_fallback.documents)} documents from fallback message {newest_fallback.id}: {doc_names}")
all_documents.extend(newest_fallback.documents)
else:
logger.debug(f"No documents found in fallback message {newest_fallback.id}")
else:
logger.debug(f"No fallback messages found either")
logger.debug(f"Resolved {len(all_documents)} documents from document list: {documentList}")
return all_documents
except Exception as e:
logger.error(f"Error getting documents from document list: {str(e)}")
return []
def getConnectionReferenceFromUserConnection(self, connection: UserConnection) -> str:
"""Get connection reference from UserConnection with enhanced state information"""
# Get token information to check if it's expired
token = None
token_status = "unknown"
try:
# Get a fresh token via TokenManager convenience method
logger.debug(f"Getting fresh token for connection {connection.id}")
from modules.security.tokenManager import TokenManager
token = TokenManager().getFreshToken(self.interfaceApp, connection.id)
if token:
if hasattr(token, 'expiresAt') and token.expiresAt:
current_time = get_utc_timestamp()
logger.debug(f"getConnectionReferenceFromUserConnection: Current time: {current_time}")
logger.debug(f"getConnectionReferenceFromUserConnection: Token expires at: {token.expiresAt}")
if current_time > token.expiresAt:
token_status = "expired"
else:
# Check if this token was recently refreshed (within last 5 minutes)
time_since_creation = current_time - token.createdAt if hasattr(token, 'createdAt') else 0
if time_since_creation < 300: # 5 minutes
token_status = "valid (refreshed)"
else:
token_status = "valid"
else:
token_status = "no_expiration"
else:
token_status = "no_token"
except Exception as e:
token_status = f"error: {str(e)}"
# Build enhanced reference with state information
base_ref = f"connection:{connection.authority.value}:{connection.externalUsername}:{connection.id}"
state_info = f" [status:{connection.status.value}, token:{token_status}]"
logger.debug(f"getConnectionReferenceFromUserConnection: Built reference: {base_ref + state_info}")
return base_ref + state_info
def getUserConnectionByExternalUsername(self, authority: str, externalUsername: str) -> Optional[UserConnection]:
"""Fetch the user's connection by authority and external username."""
try:
if not authority or not externalUsername:
return None
user_connections = self.interfaceApp.getUserConnections(self.user.id)
for connection in user_connections:
# Normalize authority for comparison (enum vs string)
connection_authority = connection.authority.value if hasattr(connection.authority, 'value') else str(connection.authority)
if connection_authority.lower() == authority.lower() and connection.externalUsername == externalUsername:
return connection
return None
except Exception as e:
logger.error(f"Error getting connection by external username: {str(e)}")
return None
def getUserConnectionFromConnectionReference(self, connectionReference: str) -> Optional[UserConnection]:
"""Get UserConnection from reference string (handles both old and enhanced formats)"""
try:
# Parse reference format: connection:{authority}:{username}:{id} [status:..., token:...]
# Remove state information if present
base_reference = connectionReference.split(' [')[0]
parts = base_reference.split(':')
if len(parts) != 4 or parts[0] != "connection":
return None
authority = parts[1]
username = parts[2]
conn_id = parts[3]
# Get user connections through AppObjects interface
user_connections = self.interfaceApp.getUserConnections(self.user.id)
# Find matching connection
for conn in user_connections:
if str(conn.id) == conn_id and conn.authority.value == authority and conn.externalUsername == username:
return conn
return None
except Exception as e:
logger.error(f"Error parsing connection reference: {str(e)}")
return None
def getFileInfo(self, fileId: str) -> Dict[str, Any]:
"""Get file information"""
file_item = self.interfaceComponent.getFile(fileId)
if file_item:
return {
"id": file_item.id,
"fileName": file_item.fileName,
"size": file_item.fileSize,
"mimeType": file_item.mimeType,
"fileHash": file_item.fileHash,
"creationDate": file_item.creationDate
}
return None
def getFileData(self, fileId: str) -> bytes:
"""Get file data by ID"""
return self.interfaceComponent.getFileData(fileId)
async def extractContentFromDocument(self, prompt: str, document: ChatDocument) -> ExtractedContent:
"""Extract content from ChatDocument using prompt"""
try:
# ChatDocument is just a reference, so we need to get file data using fileId
if not hasattr(document, 'fileId') or not document.fileId:
logger.error(f"Document {document.id} has no fileId")
raise ValueError("Document has no fileId")
# Get file data from service center using document's fileId
fileData = self.getFileData(document.fileId)
if not fileData:
logger.error(f"No file data found for fileId: {document.fileId}")
raise ValueError("No file data found for document")
# Get fileName and mime type from document properties
try:
fileName = document.fileName
mimeType = document.mimeType
except Exception as e:
# Try to diagnose and recover the issue
diagnosis = self._diagnoseDocumentAccess(document)
logger.error(f"Critical error: Cannot access document properties for document {document.id}. Diagnosis: {diagnosis}")
# Attempt recovery
if self._recoverDocumentAccess(document):
try:
fileName = document.fileName
mimeType = document.mimeType
logger.info(f"Document access recovered for {document.id} - proceeding with AI extraction")
except Exception as recovery_error:
logger.error(f"Recovery failed for document {document.id}: {str(recovery_error)}")
raise RuntimeError(f"Document {document.id} properties are permanently inaccessible after recovery attempt - cannot proceed with AI extraction: {str(recovery_error)}")
else:
# Recovery failed - don't continue with invalid data
raise RuntimeError(f"Document {document.id} properties are inaccessible and recovery failed. Diagnosis: {diagnosis}")
# Process with DocumentExtractionService directly (no circular dependency)
from modules.services.serviceDocument.mainServiceDocumentExtraction import DocumentExtractionService
docService = DocumentExtractionService(None) # Pass None to avoid circular dependency
content_items = await docService.processFileData(
fileData=fileData,
fileName=fileName,
mimeType=mimeType,
base64Encoded=False,
prompt=prompt,
enableAI=True
)
# Convert ContentItem list to ExtractedContent
contents = []
for item in content_items:
contents.append({
'label': item.label,
'data': item.data,
'metadata': {
'mimeType': item.metadata.mimeType if hasattr(item.metadata, 'mimeType') else mimeType,
'size': item.metadata.size if hasattr(item.metadata, 'size') else len(fileData),
'base64Encoded': item.metadata.base64Encoded if hasattr(item.metadata, 'base64Encoded') else False
}
})
extractedContent = ExtractedContent(
id=document.id,
contents=contents
)
# Note: ExtractedContent model only has 'id' and 'contents' fields
# No need to set objectId or objectType as they don't exist in the model
return extractedContent
except Exception as e:
logger.error(f"Error extracting from document: {str(e)}")
raise
def _diagnoseDocumentAccess(self, document: ChatDocument) -> Dict[str, Any]:
"""
Diagnose document access issues and provide recovery information.
This method helps identify why document properties are inaccessible.
"""
try:
diagnosis = {
'document_id': document.id,
'file_id': document.fileId,
'has_component_interface': document._componentInterface is not None,
'component_interface_type': type(document._componentInterface).__name__ if document._componentInterface else None,
'file_exists': False,
'file_info': None,
'error_details': None
}
# Check if component interface is set
if not document._componentInterface:
diagnosis['error_details'] = "Component interface not set - document cannot access file system"
return diagnosis
# Try to access the file directly
try:
file_info = self.interfaceComponent.getFile(document.fileId)
if file_info:
diagnosis['file_exists'] = True
diagnosis['file_info'] = {
'fileName': file_info.fileName if hasattr(file_info, 'fileName') else 'N/A',
'fileSize': file_info.fileSize if hasattr(file_info, 'fileSize') else 'N/A',
'mimeType': file_info.mimeType if hasattr(file_info, 'mimeType') else 'N/A'
}
else:
diagnosis['error_details'] = f"File with ID {document.fileId} not found in component interface"
except Exception as e:
diagnosis['error_details'] = f"Error accessing file {document.fileId}: {str(e)}"
return diagnosis
except Exception as e:
return {
'document_id': document.id if hasattr(document, 'id') else 'unknown',
'file_id': document.fileId if hasattr(document, 'fileId') else 'unknown',
'error_details': f"Error during diagnosis: {str(e)}"
}
def _recoverDocumentAccess(self, document: ChatDocument) -> bool:
"""
Attempt to recover document access by re-setting the component interface.
Returns True if recovery was successful.
"""
try:
logger.info(f"Attempting to recover document access for document {document.id}")
# Re-set the component interface
document.setComponentInterface(self.interfaceComponent)
# Test if we can now access the fileName
try:
test_fileName = document.fileName
logger.info(f"Document access recovered for {document.id} -> {test_fileName}")
return True
except Exception as e:
logger.error(f"Document access recovery failed for {document.id}: {str(e)}")
return False
except Exception as e:
logger.error(f"Error during document access recovery for {document.id}: {str(e)}")
return False
def createDocument(self, fileName: str, mimeType: str, content: str, base64encoded: bool = True, messageId: str = None) -> ChatDocument:
"""Create document with file in one step - handles file creation internally"""
# Convert content to bytes based on base64 flag
if base64encoded:
import base64
content_bytes = base64.b64decode(content)
else:
content_bytes = content.encode('utf-8')
# Create the file (hash and size are computed inside interfaceComponent)
file_item = self.interfaceComponent.createFile(
name=fileName,
mimeType=mimeType,
content=content_bytes
)
# Then store the file data
self.interfaceComponent.createFileData(file_item.id, content_bytes)
# Get file info to copy attributes
file_info = self.getFileInfo(file_item.id)
if not file_info:
logger.error(f"Could not get file info for fileId: {file_item.id}")
raise ValueError(f"File info not found for fileId: {file_item.id}")
# Create document with all file attributes copied
document = ChatDocument(
id=str(uuid.uuid4()),
messageId=messageId or "", # Use provided messageId or empty string as fallback
fileId=file_item.id,
fileName=file_info.get("fileName", fileName),
fileSize=file_info.get("size", 0),
mimeType=file_info.get("mimeType", mimeType)
)
return document
def calculateObjectSize(self, obj: Any) -> int:
"""
Calculate the size of an object in bytes.
Args:
obj: Object to calculate size for
Returns:
int: Size in bytes
"""
try:
import json
import sys
if obj is None:
return 0
# Convert object to JSON string and calculate size
json_str = json.dumps(obj, ensure_ascii=False, default=str)
return len(json_str.encode('utf-8'))
except Exception as e:
logger.error(f"Error calculating object size: {str(e)}")
return 0
def getWorkflowContext(self) -> Dict[str, int]:
"""Get current workflow context for document generation"""
try:
return {
'currentRound': self.workflow.currentRound if hasattr(self.workflow, 'currentRound') else 0,
'currentTask': self.workflow.currentTask if hasattr(self.workflow, 'currentTask') else 0,
'currentAction': self.workflow.currentAction if hasattr(self.workflow, 'currentAction') else 0
}
except Exception as e:
logger.error(f"Error getting workflow context: {str(e)}")
return {'currentRound': 0, 'currentTask': 0, 'currentAction': 0}
def setWorkflowContext(self, round_number: int = None, task_number: int = None, action_number: int = None):
"""Set current workflow context for document generation and routing"""
try:
# Prepare update data
update_data = {}
if round_number is not None:
self.workflow.currentRound = round_number
update_data["currentRound"] = round_number
if task_number is not None:
self.workflow.currentTask = task_number
update_data["currentTask"] = task_number
if action_number is not None:
self.workflow.currentAction = action_number
update_data["currentAction"] = action_number
# Persist changes to database if any updates were made
if update_data:
self.interfaceChat.updateWorkflow(self.workflow.id, update_data)
logger.debug(f"Updated workflow context: Round {self.workflow.currentRound if hasattr(self.workflow, 'currentRound') else 'N/A'}, Task {self.workflow.currentTask if hasattr(self.workflow, 'currentTask') else 'N/A'}, Action {self.workflow.currentAction if hasattr(self.workflow, 'currentAction') else 'N/A'}")
except Exception as e:
logger.error(f"Error setting workflow context: {str(e)}")
def getWorkflowStats(self) -> Dict[str, Any]:
"""Get comprehensive workflow statistics including current context"""
try:
workflow_context = self.getWorkflowContext()
return {
'currentRound': workflow_context['currentRound'],
'currentTask': workflow_context['currentTask'],
'currentAction': workflow_context['currentAction'],
'totalTasks': self.workflow.totalTasks if hasattr(self.workflow, 'totalTasks') else 0,
'totalActions': self.workflow.totalActions if hasattr(self.workflow, 'totalActions') else 0,
'workflowStatus': self.workflow.status if hasattr(self.workflow, 'status') else 'unknown',
'workflowId': self.workflow.id if hasattr(self.workflow, 'id') else 'unknown'
}
except Exception as e:
logger.error(f"Error getting workflow stats: {str(e)}")
return {
'currentRound': 0,
'currentTask': 0,
'currentAction': 0,
'totalTasks': 0,
'totalActions': 0,
'workflowStatus': 'unknown',
'workflowId': 'unknown'
}