gateway/modules/services/serviceWorkflow/mainServiceWorkflow.py
2025-09-24 23:18:10 +02:00

580 lines
30 KiB
Python

import logging
import uuid
from typing import Dict, Any, List, Optional
from modules.interfaces.interfaceAppModel import User, UserConnection
from modules.interfaces.interfaceChatModel import ChatDocument, ChatMessage, ExtractedContent
from modules.services.serviceDocument.mainServiceDocumentExtraction import DocumentExtractionService
from modules.services.serviceDocument.documentUtility import getFileExtension, getMimeTypeFromExtension, detectContentTypeFromData
from modules.shared.timezoneUtils import get_utc_timestamp
logger = logging.getLogger(__name__)
class WorkflowService:
"""Service class containing methods for document processing, chat operations, and workflow management"""
def __init__(self, serviceCenter):
self.serviceCenter = serviceCenter
self.user = serviceCenter.user
self.workflow = serviceCenter.workflow
self.interfaceChat = serviceCenter.interfaceChat
self.interfaceComponent = serviceCenter.interfaceComponent
self.interfaceApp = serviceCenter.interfaceApp
async def summarizeChat(self, messages: List[ChatMessage]) -> str:
"""
Summarize chat messages from last to first message with status="first"
Args:
messages: List of chat messages to summarize
Returns:
str: Summary of the chat in user's language
"""
try:
# Get messages from last to first, stopping at first message with status="first"
relevantMessages = []
for msg in reversed(messages):
relevantMessages.append(msg)
if msg.status == "first":
break
# Create prompt for AI
prompt = f"""You are an AI assistant providing a summary of a chat conversation.
Please respond in '{self.user.language}' language.
Chat History:
{chr(10).join(f"- {msg.message}" for msg in reversed(relevantMessages))}
Instructions:
1. Summarize the conversation's key points and outcomes
2. Be concise but informative
3. Use a professional but friendly tone
4. Focus on important decisions and next steps if any
Please provide a comprehensive summary of this conversation."""
# Get summary using AI service directly (avoiding circular dependency)
from modules.services.serviceAi.mainServiceAi import AiService
ai_service = AiService(self)
return await ai_service.callAi(
prompt=prompt,
documents=None,
options={
"process_type": "text",
"operation_type": "generate_content",
"priority": "speed",
"compress_prompt": True,
"compress_documents": False,
"max_cost": 0.01
}
)
except Exception as e:
logger.error(f"Error summarizing chat: {str(e)}")
return f"Error summarizing chat: {str(e)}"
def getChatDocumentsFromDocumentList(self, documentList: List[str]) -> List[ChatDocument]:
"""Get ChatDocuments from a list of document references using all three formats."""
try:
all_documents = []
for doc_ref in documentList:
if doc_ref.startswith("docItem:"):
# docItem:<id>:<filename> - extract ID and find document
parts = doc_ref.split(':')
if len(parts) >= 2:
doc_id = parts[1]
# Find the document by ID
for message in self.workflow.messages:
if message.documents:
for doc in message.documents:
if doc.id == doc_id:
doc_name = getattr(doc, 'fileName', 'unknown')
logger.debug(f"Found docItem reference {doc_ref}: {doc_name}")
all_documents.append(doc)
break
elif doc_ref.startswith("docList:"):
# docList:<messageId>:<label> or docList:<label> - extract message ID and find document list
parts = doc_ref.split(':')
if len(parts) >= 3:
# Format: docList:<messageId>:<label>
message_id = parts[1]
label = parts[2]
# Find the message by ID and get all its documents
for message in self.workflow.messages:
if str(message.id) == message_id:
if message.documents:
doc_names = [doc.fileName for doc in message.documents if hasattr(doc, 'fileName')]
logger.debug(f"Found docList reference {doc_ref}: {len(message.documents)} documents - {doc_names}")
all_documents.extend(message.documents)
else:
logger.debug(f"Found docList reference {doc_ref} but message has no documents")
break
elif len(parts) >= 2:
# Format: docList:<label> - find message by documentsLabel
label = parts[1]
logger.debug(f"Looking for message with documentsLabel: {label}")
# Find messages with matching documentsLabel
matching_messages = []
for message in self.workflow.messages:
# Check both attribute and raw data for documentsLabel
msg_label = getattr(message, 'documentsLabel', None)
if msg_label == label:
matching_messages.append(message)
logger.debug(f"Found message {message.id} with matching documentsLabel: {msg_label}")
else:
# Debug: show what labels we're comparing
logger.debug(f"Message {message.id} has documentsLabel: '{msg_label}' (looking for: '{label}')")
if matching_messages:
# Use the newest message (highest publishedAt)
matching_messages.sort(key=lambda msg: getattr(msg, 'publishedAt', 0), reverse=True)
newest_message = matching_messages[0]
if newest_message.documents:
doc_names = [doc.fileName for doc in newest_message.documents if hasattr(doc, 'fileName')]
logger.debug(f"Found docList reference {doc_ref}: {len(newest_message.documents)} documents - {doc_names}")
all_documents.extend(newest_message.documents)
else:
logger.debug(f"Found docList reference {doc_ref} but message has no documents")
else:
logger.debug(f"No messages found with documentsLabel: {label}")
else:
# Direct label reference (round1_task2_action3_contextinfo)
# Search for messages with matching documentsLabel to find the actual documents
if doc_ref.startswith("round"):
# Parse round/task/action to find the corresponding document list
label_parts = doc_ref.split('_', 3)
if len(label_parts) >= 4:
round_num = int(label_parts[0].replace('round', ''))
task_num = int(label_parts[1].replace('task', ''))
action_num = int(label_parts[2].replace('action', ''))
context_info = label_parts[3]
logger.debug(f"Resolving round reference: round{round_num}_task{task_num}_action{action_num}_{context_info}")
logger.debug(f"Looking for messages with documentsLabel matching: {doc_ref}")
# Find messages with matching documentsLabel (this is the correct way!)
# In case of retries, we want the NEWEST message (most recent publishedAt)
matching_messages = []
for message in self.workflow.messages:
msg_documents_label = getattr(message, 'documentsLabel', '')
# Check if this message's documentsLabel matches our reference
if msg_documents_label == doc_ref:
# Found a matching message, collect it for comparison
matching_messages.append(message)
logger.debug(f"Found message {message.id} with matching documentsLabel: {msg_documents_label}")
# If we found matching messages, take the newest one (highest publishedAt)
if matching_messages:
# Sort by publishedAt descending (newest first)
matching_messages.sort(key=lambda msg: getattr(msg, 'publishedAt', 0), reverse=True)
newest_message = matching_messages[0]
logger.debug(f"Found {len(matching_messages)} matching messages, using newest: {newest_message.id} (publishedAt: {getattr(newest_message, 'publishedAt', 'unknown')})")
logger.debug(f"Newest message has {len(newest_message.documents) if newest_message.documents else 0} documents")
if newest_message.documents:
doc_names = [doc.fileName for doc in newest_message.documents if hasattr(doc, 'fileName')]
logger.debug(f"Added {len(newest_message.documents)} documents from newest message {newest_message.id}: {doc_names}")
all_documents.extend(newest_message.documents)
else:
logger.debug(f"No documents found in newest message {newest_message.id}")
else:
logger.debug(f"No messages found with documentsLabel: {doc_ref}")
# Fallback: also check if any message has this documentsLabel as a prefix
logger.debug(f"Trying fallback search for messages with documentsLabel containing: {doc_ref}")
fallback_messages = []
for message in self.workflow.messages:
msg_documents_label = getattr(message, 'documentsLabel', '')
if msg_documents_label and msg_documents_label.startswith(doc_ref):
fallback_messages.append(message)
logger.debug(f"Found fallback message {message.id} with documentsLabel: {msg_documents_label}")
if fallback_messages:
# Sort by publishedAt descending (newest first)
fallback_messages.sort(key=lambda msg: getattr(msg, 'publishedAt', 0), reverse=True)
newest_fallback = fallback_messages[0]
logger.debug(f"Using fallback message {newest_fallback.id} with documentsLabel: {getattr(newest_fallback, 'documentsLabel', 'unknown')}")
if newest_fallback.documents:
doc_names = [doc.fileName for doc in newest_fallback.documents if hasattr(doc, 'fileName')]
logger.debug(f"Added {len(newest_fallback.documents)} documents from fallback message {newest_fallback.id}: {doc_names}")
all_documents.extend(newest_fallback.documents)
else:
logger.debug(f"No documents found in fallback message {newest_fallback.id}")
else:
logger.debug(f"No fallback messages found either")
logger.debug(f"Resolved {len(all_documents)} documents from document list: {documentList}")
return all_documents
except Exception as e:
logger.error(f"Error getting documents from document list: {str(e)}")
return []
def getConnectionReferenceFromUserConnection(self, connection: UserConnection) -> str:
"""Get connection reference from UserConnection with enhanced state information"""
# Get token information to check if it's expired
token = None
token_status = "unknown"
try:
# Get a fresh token via TokenManager convenience method
logger.debug(f"Getting fresh token for connection {connection.id}")
from modules.security.tokenManager import TokenManager
token = TokenManager().getFreshToken(self.interfaceApp, connection.id)
if token:
if hasattr(token, 'expiresAt') and token.expiresAt:
current_time = get_utc_timestamp()
logger.debug(f"getConnectionReferenceFromUserConnection: Current time: {current_time}")
logger.debug(f"getConnectionReferenceFromUserConnection: Token expires at: {token.expiresAt}")
if current_time > token.expiresAt:
token_status = "expired"
else:
# Check if this token was recently refreshed (within last 5 minutes)
time_since_creation = current_time - token.createdAt if hasattr(token, 'createdAt') else 0
if time_since_creation < 300: # 5 minutes
token_status = "valid (refreshed)"
else:
token_status = "valid"
else:
token_status = "no_expiration"
else:
token_status = "no_token"
except Exception as e:
token_status = f"error: {str(e)}"
# Build enhanced reference with state information
base_ref = f"connection:{connection.authority.value}:{connection.externalUsername}:{connection.id}"
state_info = f" [status:{connection.status.value}, token:{token_status}]"
logger.debug(f"getConnectionReferenceFromUserConnection: Built reference: {base_ref + state_info}")
return base_ref + state_info
def getUserConnectionByExternalUsername(self, authority: str, externalUsername: str) -> Optional[UserConnection]:
"""Fetch the user's connection by authority and external username."""
try:
if not authority or not externalUsername:
return None
user_connections = self.interfaceApp.getUserConnections(self.user.id)
for connection in user_connections:
# Normalize authority for comparison (enum vs string)
connection_authority = connection.authority.value if hasattr(connection.authority, 'value') else str(connection.authority)
if connection_authority.lower() == authority.lower() and connection.externalUsername == externalUsername:
return connection
return None
except Exception as e:
logger.error(f"Error getting connection by external username: {str(e)}")
return None
def getUserConnectionFromConnectionReference(self, connectionReference: str) -> Optional[UserConnection]:
"""Get UserConnection from reference string (handles both old and enhanced formats)"""
try:
# Parse reference format: connection:{authority}:{username}:{id} [status:..., token:...]
# Remove state information if present
base_reference = connectionReference.split(' [')[0]
parts = base_reference.split(':')
if len(parts) != 4 or parts[0] != "connection":
return None
authority = parts[1]
username = parts[2]
conn_id = parts[3]
# Get user connections through AppObjects interface
user_connections = self.interfaceApp.getUserConnections(self.user.id)
# Find matching connection
for conn in user_connections:
if str(conn.id) == conn_id and conn.authority.value == authority and conn.externalUsername == username:
return conn
return None
except Exception as e:
logger.error(f"Error parsing connection reference: {str(e)}")
return None
def getFileInfo(self, fileId: str) -> Dict[str, Any]:
"""Get file information"""
file_item = self.interfaceComponent.getFile(fileId)
if file_item:
return {
"id": file_item.id,
"fileName": file_item.fileName,
"size": file_item.fileSize,
"mimeType": file_item.mimeType,
"fileHash": file_item.fileHash,
"creationDate": file_item.creationDate
}
return None
def getFileData(self, fileId: str) -> bytes:
"""Get file data by ID"""
return self.interfaceComponent.getFileData(fileId)
async def extractContentFromDocument(self, prompt: str, document: ChatDocument) -> ExtractedContent:
"""Extract content from ChatDocument using prompt"""
try:
# ChatDocument is just a reference, so we need to get file data using fileId
if not hasattr(document, 'fileId') or not document.fileId:
logger.error(f"Document {document.id} has no fileId")
raise ValueError("Document has no fileId")
# Get file data from service center using document's fileId
fileData = self.getFileData(document.fileId)
if not fileData:
logger.error(f"No file data found for fileId: {document.fileId}")
raise ValueError("No file data found for document")
# Get fileName and mime type from document properties
try:
fileName = document.fileName
mimeType = document.mimeType
except Exception as e:
# Try to diagnose and recover the issue
diagnosis = self._diagnoseDocumentAccess(document)
logger.error(f"Critical error: Cannot access document properties for document {document.id}. Diagnosis: {diagnosis}")
# Attempt recovery
if self._recoverDocumentAccess(document):
try:
fileName = document.fileName
mimeType = document.mimeType
logger.info(f"Document access recovered for {document.id} - proceeding with AI extraction")
except Exception as recovery_error:
logger.error(f"Recovery failed for document {document.id}: {str(recovery_error)}")
raise RuntimeError(f"Document {document.id} properties are permanently inaccessible after recovery attempt - cannot proceed with AI extraction: {str(recovery_error)}")
else:
# Recovery failed - don't continue with invalid data
raise RuntimeError(f"Document {document.id} properties are inaccessible and recovery failed. Diagnosis: {diagnosis}")
# Process with DocumentExtractionService directly (no circular dependency)
from modules.services.serviceDocument.mainServiceDocumentExtraction import DocumentExtractionService
docService = DocumentExtractionService(None) # Pass None to avoid circular dependency
content_items = await docService.processFileData(
fileData=fileData,
fileName=fileName,
mimeType=mimeType,
base64Encoded=False,
prompt=prompt,
enableAI=True
)
# Convert ContentItem list to ExtractedContent
contents = []
for item in content_items:
contents.append({
'label': item.label,
'data': item.data,
'metadata': {
'mimeType': item.metadata.mimeType if hasattr(item.metadata, 'mimeType') else mimeType,
'size': item.metadata.size if hasattr(item.metadata, 'size') else len(fileData),
'base64Encoded': item.metadata.base64Encoded if hasattr(item.metadata, 'base64Encoded') else False
}
})
extractedContent = ExtractedContent(
id=document.id,
contents=contents
)
# Note: ExtractedContent model only has 'id' and 'contents' fields
# No need to set objectId or objectType as they don't exist in the model
return extractedContent
except Exception as e:
logger.error(f"Error extracting from document: {str(e)}")
raise
def _diagnoseDocumentAccess(self, document: ChatDocument) -> Dict[str, Any]:
"""
Diagnose document access issues and provide recovery information.
This method helps identify why document properties are inaccessible.
"""
try:
diagnosis = {
'document_id': document.id,
'file_id': document.fileId,
'has_component_interface': document._componentInterface is not None,
'component_interface_type': type(document._componentInterface).__name__ if document._componentInterface else None,
'file_exists': False,
'file_info': None,
'error_details': None
}
# Check if component interface is set
if not document._componentInterface:
diagnosis['error_details'] = "Component interface not set - document cannot access file system"
return diagnosis
# Try to access the file directly
try:
file_info = self.interfaceComponent.getFile(document.fileId)
if file_info:
diagnosis['file_exists'] = True
diagnosis['file_info'] = {
'fileName': file_info.fileName if hasattr(file_info, 'fileName') else 'N/A',
'fileSize': file_info.fileSize if hasattr(file_info, 'fileSize') else 'N/A',
'mimeType': file_info.mimeType if hasattr(file_info, 'mimeType') else 'N/A'
}
else:
diagnosis['error_details'] = f"File with ID {document.fileId} not found in component interface"
except Exception as e:
diagnosis['error_details'] = f"Error accessing file {document.fileId}: {str(e)}"
return diagnosis
except Exception as e:
return {
'document_id': document.id if hasattr(document, 'id') else 'unknown',
'file_id': document.fileId if hasattr(document, 'fileId') else 'unknown',
'error_details': f"Error during diagnosis: {str(e)}"
}
def _recoverDocumentAccess(self, document: ChatDocument) -> bool:
"""
Attempt to recover document access by re-setting the component interface.
Returns True if recovery was successful.
"""
try:
logger.info(f"Attempting to recover document access for document {document.id}")
# Re-set the component interface
document.setComponentInterface(self.interfaceComponent)
# Test if we can now access the fileName
try:
test_fileName = document.fileName
logger.info(f"Document access recovered for {document.id} -> {test_fileName}")
return True
except Exception as e:
logger.error(f"Document access recovery failed for {document.id}: {str(e)}")
return False
except Exception as e:
logger.error(f"Error during document access recovery for {document.id}: {str(e)}")
return False
def createDocument(self, fileName: str, mimeType: str, content: str, base64encoded: bool = True, messageId: str = None) -> ChatDocument:
"""Create document with file in one step - handles file creation internally"""
# Convert content to bytes based on base64 flag
if base64encoded:
import base64
content_bytes = base64.b64decode(content)
else:
content_bytes = content.encode('utf-8')
# Create the file (hash and size are computed inside interfaceComponent)
file_item = self.interfaceComponent.createFile(
name=fileName,
mimeType=mimeType,
content=content_bytes
)
# Then store the file data
self.interfaceComponent.createFileData(file_item.id, content_bytes)
# Get file info to copy attributes
file_info = self.getFileInfo(file_item.id)
if not file_info:
logger.error(f"Could not get file info for fileId: {file_item.id}")
raise ValueError(f"File info not found for fileId: {file_item.id}")
# Create document with all file attributes copied
document = ChatDocument(
id=str(uuid.uuid4()),
messageId=messageId or "", # Use provided messageId or empty string as fallback
fileId=file_item.id,
fileName=file_info.get("fileName", fileName),
fileSize=file_info.get("size", 0),
mimeType=file_info.get("mimeType", mimeType)
)
return document
def calculateObjectSize(self, obj: Any) -> int:
"""
Calculate the size of an object in bytes.
Args:
obj: Object to calculate size for
Returns:
int: Size in bytes
"""
try:
import json
import sys
if obj is None:
return 0
# Convert object to JSON string and calculate size
json_str = json.dumps(obj, ensure_ascii=False, default=str)
return len(json_str.encode('utf-8'))
except Exception as e:
logger.error(f"Error calculating object size: {str(e)}")
return 0
def getWorkflowContext(self) -> Dict[str, int]:
"""Get current workflow context for document generation"""
try:
return {
'currentRound': self.workflow.currentRound if hasattr(self.workflow, 'currentRound') else 0,
'currentTask': self.workflow.currentTask if hasattr(self.workflow, 'currentTask') else 0,
'currentAction': self.workflow.currentAction if hasattr(self.workflow, 'currentAction') else 0
}
except Exception as e:
logger.error(f"Error getting workflow context: {str(e)}")
return {'currentRound': 0, 'currentTask': 0, 'currentAction': 0}
def setWorkflowContext(self, round_number: int = None, task_number: int = None, action_number: int = None):
"""Set current workflow context for document generation and routing"""
try:
# Prepare update data
update_data = {}
if round_number is not None:
self.workflow.currentRound = round_number
update_data["currentRound"] = round_number
if task_number is not None:
self.workflow.currentTask = task_number
update_data["currentTask"] = task_number
if action_number is not None:
self.workflow.currentAction = action_number
update_data["currentAction"] = action_number
# Persist changes to database if any updates were made
if update_data:
self.interfaceChat.updateWorkflow(self.workflow.id, update_data)
logger.debug(f"Updated workflow context: Round {self.workflow.currentRound if hasattr(self.workflow, 'currentRound') else 'N/A'}, Task {self.workflow.currentTask if hasattr(self.workflow, 'currentTask') else 'N/A'}, Action {self.workflow.currentAction if hasattr(self.workflow, 'currentAction') else 'N/A'}")
except Exception as e:
logger.error(f"Error setting workflow context: {str(e)}")
def getWorkflowStats(self) -> Dict[str, Any]:
"""Get comprehensive workflow statistics including current context"""
try:
workflow_context = self.getWorkflowContext()
return {
'currentRound': workflow_context['currentRound'],
'currentTask': workflow_context['currentTask'],
'currentAction': workflow_context['currentAction'],
'totalTasks': self.workflow.totalTasks if hasattr(self.workflow, 'totalTasks') else 0,
'totalActions': self.workflow.totalActions if hasattr(self.workflow, 'totalActions') else 0,
'workflowStatus': self.workflow.status if hasattr(self.workflow, 'status') else 'unknown',
'workflowId': self.workflow.id if hasattr(self.workflow, 'id') else 'unknown'
}
except Exception as e:
logger.error(f"Error getting workflow stats: {str(e)}")
return {
'currentRound': 0,
'currentTask': 0,
'currentAction': 0,
'totalTasks': 0,
'totalActions': 0,
'workflowStatus': 'unknown',
'workflowId': 'unknown'
}