fixed critical bug in models to use previous workflow object instead of properly instanciated current object
This commit is contained in:
parent
55fb23f7c0
commit
2255c9009d
6 changed files with 402 additions and 371 deletions
|
|
@ -1,5 +1,6 @@
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
|
import re
|
||||||
import time
|
import time
|
||||||
from typing import Dict, Any, List, Optional, Tuple, Union
|
from typing import Dict, Any, List, Optional, Tuple, Union
|
||||||
from modules.datamodels.datamodelChat import PromptPlaceholder, ChatDocument
|
from modules.datamodels.datamodelChat import PromptPlaceholder, ChatDocument
|
||||||
|
|
@ -731,28 +732,68 @@ Respond with ONLY a JSON object in this exact format:
|
||||||
self.services.chat.progressLogFinish(aiOperationId, False)
|
self.services.chat.progressLogFinish(aiOperationId, False)
|
||||||
return {"success": False, "error": f"Generated content is not valid JSON: {str(e)}"}
|
return {"success": False, "error": f"Generated content is not valid JSON: {str(e)}"}
|
||||||
|
|
||||||
|
# Extract title and filename from generated document structure
|
||||||
|
extractedTitle = title # Default to user-provided title
|
||||||
|
extractedFilename = None
|
||||||
|
if isinstance(generated_data, dict) and "documents" in generated_data:
|
||||||
|
documents = generated_data["documents"]
|
||||||
|
if isinstance(documents, list) and len(documents) > 0:
|
||||||
|
firstDoc = documents[0]
|
||||||
|
if isinstance(firstDoc, dict):
|
||||||
|
# Extract title from document (preferred over user-provided title)
|
||||||
|
if firstDoc.get("title"):
|
||||||
|
extractedTitle = firstDoc["title"]
|
||||||
|
# Extract filename from document
|
||||||
|
if firstDoc.get("filename"):
|
||||||
|
extractedFilename = firstDoc["filename"]
|
||||||
|
|
||||||
|
# Ensure metadata contains the extracted title for renderers
|
||||||
|
if "metadata" not in generated_data:
|
||||||
|
generated_data["metadata"] = {}
|
||||||
|
if extractedTitle:
|
||||||
|
generated_data["metadata"]["title"] = extractedTitle
|
||||||
|
|
||||||
self.services.chat.progressLogUpdate(aiOperationId, 0.8, f"Rendering to {outputFormat} format")
|
self.services.chat.progressLogUpdate(aiOperationId, 0.8, f"Rendering to {outputFormat} format")
|
||||||
# Render to final format using the existing renderer
|
# Render to final format using the existing renderer
|
||||||
try:
|
try:
|
||||||
from modules.services.serviceGeneration.mainServiceGeneration import GenerationService
|
from modules.services.serviceGeneration.mainServiceGeneration import GenerationService
|
||||||
generationService = GenerationService(self.services)
|
generationService = GenerationService(self.services)
|
||||||
|
# Pass extracted title to renderer (will use metadata.title if available)
|
||||||
rendered_content, mime_type = await generationService.renderReport(
|
rendered_content, mime_type = await generationService.renderReport(
|
||||||
generated_data, outputFormat, title or "Generated Document", prompt, self
|
generated_data, outputFormat, extractedTitle or "Generated Document", prompt, self
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Use extracted filename if available, otherwise generate from title or use generic
|
||||||
|
if extractedFilename:
|
||||||
|
documentName = extractedFilename
|
||||||
|
elif extractedTitle and extractedTitle != "Generated Document":
|
||||||
|
# Sanitize title for filename
|
||||||
|
sanitized = re.sub(r"[^a-zA-Z0-9._-]", "_", extractedTitle)
|
||||||
|
sanitized = re.sub(r"_+", "_", sanitized).strip("_")
|
||||||
|
if sanitized:
|
||||||
|
# Ensure correct extension
|
||||||
|
if not sanitized.lower().endswith(f".{outputFormat}"):
|
||||||
|
documentName = f"{sanitized}.{outputFormat}"
|
||||||
|
else:
|
||||||
|
documentName = sanitized
|
||||||
|
else:
|
||||||
|
documentName = f"generated.{outputFormat}"
|
||||||
|
else:
|
||||||
|
documentName = f"generated.{outputFormat}"
|
||||||
|
|
||||||
# Build result in the expected format
|
# Build result in the expected format
|
||||||
result = {
|
result = {
|
||||||
"success": True,
|
"success": True,
|
||||||
"content": generated_data,
|
"content": generated_data,
|
||||||
"documents": [{
|
"documents": [{
|
||||||
"documentName": f"generated.{outputFormat}",
|
"documentName": documentName,
|
||||||
"documentData": rendered_content,
|
"documentData": rendered_content,
|
||||||
"mimeType": mime_type,
|
"mimeType": mime_type,
|
||||||
"title": title or "Generated Document"
|
"title": extractedTitle or "Generated Document"
|
||||||
}],
|
}],
|
||||||
"is_multi_file": False,
|
"is_multi_file": False,
|
||||||
"format": outputFormat,
|
"format": outputFormat,
|
||||||
"title": title,
|
"title": extractedTitle or title,
|
||||||
"split_strategy": "single",
|
"split_strategy": "single",
|
||||||
"total_documents": 1,
|
"total_documents": 1,
|
||||||
"processed_documents": 1
|
"processed_documents": 1
|
||||||
|
|
|
||||||
|
|
@ -29,153 +29,232 @@ class ChatService:
|
||||||
logger.error("getChatDocumentsFromDocumentList: No workflow available (self.services.workflow is not set)")
|
logger.error("getChatDocumentsFromDocumentList: No workflow available (self.services.workflow is not set)")
|
||||||
return []
|
return []
|
||||||
|
|
||||||
workflow_id = workflow.id if hasattr(workflow, 'id') else 'NO_ID'
|
workflowId = workflow.id if hasattr(workflow, 'id') else 'NO_ID'
|
||||||
workflow_obj_id = id(workflow)
|
workflowObjId = id(workflow)
|
||||||
logger.debug(f"getChatDocumentsFromDocumentList: input documentList = {documentList}")
|
logger.debug(f"getChatDocumentsFromDocumentList: input documentList = {documentList}")
|
||||||
logger.debug(f"getChatDocumentsFromDocumentList: using workflow.id = {workflow_id}, workflow object id = {workflow_obj_id}")
|
logger.debug(f"getChatDocumentsFromDocumentList: using workflow.id = {workflowId}, workflow object id = {workflowObjId}")
|
||||||
|
|
||||||
# Debug: list available messages with their labels and document names
|
# Root cause analysis: Verify workflow.messages integrity and detect workflow changes
|
||||||
|
self._verifyWorkflowMessagesIntegrity(workflow, workflowId)
|
||||||
|
|
||||||
|
# Debug: list available messages with their labels and document names (filtered by workflowId)
|
||||||
try:
|
try:
|
||||||
if workflow and hasattr(workflow, 'messages') and workflow.messages:
|
if workflow and hasattr(workflow, 'messages') and workflow.messages:
|
||||||
msg_lines = []
|
msgLines = []
|
||||||
|
messagesFromOtherWorkflows = []
|
||||||
for message in workflow.messages:
|
for message in workflow.messages:
|
||||||
|
msgWorkflowId = getattr(message, 'workflowId', None)
|
||||||
|
# Only include messages that belong to this workflow
|
||||||
|
if msgWorkflowId and msgWorkflowId != workflowId:
|
||||||
|
messagesFromOtherWorkflows.append(f"id={getattr(message, 'id', None)}, label={getattr(message, 'documentsLabel', None)}, workflowId={msgWorkflowId}")
|
||||||
|
continue
|
||||||
|
# Also skip messages without workflowId (shouldn't happen, but be safe)
|
||||||
|
if not msgWorkflowId:
|
||||||
|
messagesFromOtherWorkflows.append(f"id={getattr(message, 'id', None)}, label={getattr(message, 'documentsLabel', None)}, workflowId=Missing")
|
||||||
|
continue
|
||||||
|
|
||||||
label = getattr(message, 'documentsLabel', None)
|
label = getattr(message, 'documentsLabel', None)
|
||||||
doc_names = []
|
docNames = []
|
||||||
if getattr(message, 'documents', None):
|
if getattr(message, 'documents', None):
|
||||||
for doc in message.documents:
|
for doc in message.documents:
|
||||||
name = getattr(doc, 'fileName', None) or getattr(doc, 'documentName', None) or 'Unnamed'
|
name = getattr(doc, 'fileName', None) or getattr(doc, 'documentName', None) or 'Unnamed'
|
||||||
doc_names.append(name)
|
docNames.append(name)
|
||||||
msg_lines.append(
|
msgLines.append(
|
||||||
f"- id={getattr(message, 'id', None)}, label={label}, docs={doc_names}"
|
f"- id={getattr(message, 'id', None)}, label={label}, workflowId={msgWorkflowId}, docs={docNames}"
|
||||||
)
|
)
|
||||||
if msg_lines:
|
if msgLines:
|
||||||
logger.debug("getChatDocumentsFromDocumentList: available messages:\n" + "\n".join(msg_lines))
|
logger.debug("getChatDocumentsFromDocumentList: available messages (filtered for workflow):\n" + "\n".join(msgLines))
|
||||||
|
if messagesFromOtherWorkflows:
|
||||||
|
logger.warning(f"getChatDocumentsFromDocumentList: Found {len(messagesFromOtherWorkflows)} messages from other workflows in workflow.messages list:\n" + "\n".join(messagesFromOtherWorkflows))
|
||||||
else:
|
else:
|
||||||
logger.debug("getChatDocumentsFromDocumentList: no messages available on current workflow")
|
logger.debug("getChatDocumentsFromDocumentList: no messages available on current workflow")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.debug(f"getChatDocumentsFromDocumentList: unable to enumerate messages for debug: {e}")
|
logger.debug(f"getChatDocumentsFromDocumentList: unable to enumerate messages for debug: {e}")
|
||||||
|
|
||||||
all_documents = []
|
allDocuments = []
|
||||||
for doc_ref in documentList:
|
for docRef in documentList:
|
||||||
if doc_ref.startswith("docItem:"):
|
if docRef.startswith("docItem:"):
|
||||||
# docItem:<id>:<filename> - extract ID and find document
|
# docItem:<id>:<filename> - extract ID and find document
|
||||||
parts = doc_ref.split(':')
|
parts = docRef.split(':')
|
||||||
if len(parts) >= 2:
|
if len(parts) >= 2:
|
||||||
doc_id = parts[1]
|
docId = parts[1]
|
||||||
# Find the document by ID
|
# Find the document by ID
|
||||||
for message in workflow.messages:
|
for message in workflow.messages:
|
||||||
# Validate message belongs to this workflow
|
# Validate message belongs to this workflow
|
||||||
msg_workflow_id = getattr(message, 'workflowId', None)
|
msgWorkflowId = getattr(message, 'workflowId', None)
|
||||||
if msg_workflow_id and msg_workflow_id != workflow_id:
|
if not msgWorkflowId or msgWorkflowId != workflowId:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if message.documents:
|
if message.documents:
|
||||||
for doc in message.documents:
|
for doc in message.documents:
|
||||||
if doc.id == doc_id:
|
if doc.id == docId:
|
||||||
doc_name = getattr(doc, 'fileName', 'unknown')
|
docName = getattr(doc, 'fileName', 'unknown')
|
||||||
all_documents.append(doc)
|
allDocuments.append(doc)
|
||||||
break
|
break
|
||||||
elif doc_ref.startswith("docList:"):
|
elif docRef.startswith("docList:"):
|
||||||
# docList:<messageId>:<label> or docList:<label> - extract message ID and find document list
|
# docList:<messageId>:<label> or docList:<label> - extract message ID and find document list
|
||||||
parts = doc_ref.split(':')
|
parts = docRef.split(':')
|
||||||
if len(parts) >= 3:
|
if len(parts) >= 3:
|
||||||
# Format: docList:<messageId>:<label>
|
# Format: docList:<messageId>:<label>
|
||||||
message_id = parts[1]
|
messageId = parts[1]
|
||||||
label = parts[2]
|
label = parts[2]
|
||||||
# First try to find the message by ID in the current workflow
|
# First try to find the message by ID in the current workflow
|
||||||
message_found = None
|
messageFound = None
|
||||||
for message in workflow.messages:
|
for message in workflow.messages:
|
||||||
# Validate message belongs to this workflow
|
# Validate message belongs to this workflow
|
||||||
msg_workflow_id = getattr(message, 'workflowId', None)
|
msgWorkflowId = getattr(message, 'workflowId', None)
|
||||||
if msg_workflow_id and msg_workflow_id != workflow_id:
|
if not msgWorkflowId or msgWorkflowId != workflowId:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if str(message.id) == message_id:
|
if str(message.id) == messageId:
|
||||||
message_found = message
|
messageFound = message
|
||||||
break
|
break
|
||||||
|
|
||||||
# If message ID not found in current workflow, this is a stale reference
|
# If message ID not found in current workflow, this is a stale reference
|
||||||
# Log warning and return empty list (don't fall back to label - it might match wrong message)
|
# Log warning and return empty list (don't fall back to label - it might match wrong message)
|
||||||
if not message_found:
|
if not messageFound:
|
||||||
available_ids = [str(msg.id) for msg in workflow.messages]
|
availableIds = [str(msg.id) for msg in workflow.messages]
|
||||||
logger.warning(f"Document reference contains stale message ID {message_id} not found in current workflow {workflow.id}. Label: {label}. Available message IDs: {available_ids}")
|
logger.warning(f"Document reference contains stale message ID {messageId} not found in current workflow {workflow.id}. Label: {label}. Available message IDs: {availableIds}")
|
||||||
logger.warning(f"This indicates the document reference was created in a different workflow state. Returning empty list.")
|
logger.warning(f"This indicates the document reference was created in a different workflow state. Returning empty list.")
|
||||||
# Return empty list - don't fall back to label matching which could match wrong message
|
# Return empty list - don't fall back to label matching which could match wrong message
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# If found, add documents
|
# If found, add documents
|
||||||
if message_found and message_found.documents:
|
if messageFound and messageFound.documents:
|
||||||
all_documents.extend(message_found.documents)
|
allDocuments.extend(messageFound.documents)
|
||||||
elif len(parts) >= 2:
|
elif len(parts) >= 2:
|
||||||
# Format: docList:<label> - find message by documentsLabel
|
# Format: docList:<label> - find message by documentsLabel
|
||||||
label = parts[1]
|
label = parts[1]
|
||||||
message_found = None
|
messageFound = None
|
||||||
for message in workflow.messages:
|
for message in workflow.messages:
|
||||||
# Validate message belongs to this workflow
|
# Validate message belongs to this workflow
|
||||||
msg_workflow_id = getattr(message, 'workflowId', None)
|
msgWorkflowId = getattr(message, 'workflowId', None)
|
||||||
if msg_workflow_id and msg_workflow_id != workflow_id:
|
if not msgWorkflowId or msgWorkflowId != workflowId:
|
||||||
logger.warning(f"Message {message.id} has workflowId {msg_workflow_id} but belongs to workflow {workflow_id}. Skipping.")
|
if msgWorkflowId:
|
||||||
|
logger.warning(f"Message {message.id} has workflowId {msgWorkflowId} but belongs to workflow {workflowId}. Skipping.")
|
||||||
|
else:
|
||||||
|
logger.warning(f"Message {message.id} has no workflowId. Skipping.")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
msg_label = getattr(message, 'documentsLabel', None)
|
msgLabel = getattr(message, 'documentsLabel', None)
|
||||||
if msg_label == label:
|
if msgLabel == label:
|
||||||
message_found = message
|
messageFound = message
|
||||||
break
|
break
|
||||||
|
|
||||||
# If found, add documents
|
# If found, add documents
|
||||||
if message_found and message_found.documents:
|
if messageFound and messageFound.documents:
|
||||||
all_documents.extend(message_found.documents)
|
allDocuments.extend(messageFound.documents)
|
||||||
else:
|
else:
|
||||||
# Direct label reference (round1_task2_action3_contextinfo)
|
# Direct label reference (round1_task2_action3_contextinfo)
|
||||||
# Search for messages with matching documentsLabel to find the actual documents
|
# Search for messages with matching documentsLabel to find the actual documents
|
||||||
if doc_ref.startswith("round"):
|
if docRef.startswith("round"):
|
||||||
# Parse round/task/action to find the corresponding document list
|
# Parse round/task/action to find the corresponding document list
|
||||||
label_parts = doc_ref.split('_', 3)
|
labelParts = docRef.split('_', 3)
|
||||||
if len(label_parts) >= 4:
|
if len(labelParts) >= 4:
|
||||||
round_num = int(label_parts[0].replace('round', ''))
|
roundNum = int(labelParts[0].replace('round', ''))
|
||||||
task_num = int(label_parts[1].replace('task', ''))
|
taskNum = int(labelParts[1].replace('task', ''))
|
||||||
action_num = int(label_parts[2].replace('action', ''))
|
actionNum = int(labelParts[2].replace('action', ''))
|
||||||
context_info = label_parts[3]
|
contextInfo = labelParts[3]
|
||||||
|
|
||||||
# Find messages with matching documentsLabel (this is the correct way!)
|
# Find messages with matching documentsLabel (this is the correct way!)
|
||||||
# In case of retries, we want the NEWEST message (most recent publishedAt)
|
# In case of retries, we want the NEWEST message (most recent publishedAt)
|
||||||
matching_messages = []
|
matchingMessages = []
|
||||||
for message in workflow.messages:
|
for message in workflow.messages:
|
||||||
# Validate message belongs to this workflow
|
# Validate message belongs to this workflow
|
||||||
msg_workflow_id = getattr(message, 'workflowId', None)
|
msgWorkflowId = getattr(message, 'workflowId', None)
|
||||||
if msg_workflow_id and msg_workflow_id != workflow_id:
|
if not msgWorkflowId or msgWorkflowId != workflowId:
|
||||||
logger.debug(f"Skipping message {message.id} with workflowId {msg_workflow_id} (expected {workflow_id})")
|
if msgWorkflowId:
|
||||||
|
logger.debug(f"Skipping message {message.id} with workflowId {msgWorkflowId} (expected {workflowId})")
|
||||||
|
else:
|
||||||
|
logger.debug(f"Skipping message {message.id} with no workflowId (expected {workflowId})")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
msg_documents_label = getattr(message, 'documentsLabel', '')
|
msgDocumentsLabel = getattr(message, 'documentsLabel', '')
|
||||||
|
|
||||||
# Check if this message's documentsLabel matches our reference
|
# Check if this message's documentsLabel matches our reference
|
||||||
if msg_documents_label == doc_ref:
|
if msgDocumentsLabel == docRef:
|
||||||
# Found a matching message, collect it for comparison
|
# Found a matching message, collect it for comparison
|
||||||
matching_messages.append(message)
|
matchingMessages.append(message)
|
||||||
|
|
||||||
# If we found matching messages, take the newest one (highest publishedAt)
|
# If we found matching messages, take the newest one (highest publishedAt)
|
||||||
if matching_messages:
|
if matchingMessages:
|
||||||
# Sort by publishedAt descending (newest first)
|
# Sort by publishedAt descending (newest first)
|
||||||
matching_messages.sort(key=lambda msg: getattr(msg, 'publishedAt', 0), reverse=True)
|
matchingMessages.sort(key=lambda msg: getattr(msg, 'publishedAt', 0), reverse=True)
|
||||||
newest_message = matching_messages[0]
|
newestMessage = matchingMessages[0]
|
||||||
|
|
||||||
if newest_message.documents:
|
if newestMessage.documents:
|
||||||
doc_names = [doc.fileName for doc in newest_message.documents if hasattr(doc, 'fileName')]
|
docNames = [doc.fileName for doc in newestMessage.documents if hasattr(doc, 'fileName')]
|
||||||
logger.debug(f"Added {len(newest_message.documents)} documents from newest message {newest_message.id}: {doc_names}")
|
logger.debug(f"Added {len(newestMessage.documents)} documents from newest message {newestMessage.id}: {docNames}")
|
||||||
all_documents.extend(newest_message.documents)
|
allDocuments.extend(newestMessage.documents)
|
||||||
else:
|
else:
|
||||||
logger.debug(f"No documents found in newest message {newest_message.id}")
|
logger.debug(f"No documents found in newest message {newestMessage.id}")
|
||||||
else:
|
else:
|
||||||
logger.error(f"No messages found with documentsLabel: {doc_ref}")
|
logger.error(f"No messages found with documentsLabel: {docRef}")
|
||||||
raise ValueError(f"Document reference not found: {doc_ref}")
|
raise ValueError(f"Document reference not found: {docRef}")
|
||||||
|
|
||||||
logger.debug(f"Resolved {len(all_documents)} documents from document list: {documentList}")
|
logger.debug(f"Resolved {len(allDocuments)} documents from document list: {documentList}")
|
||||||
return all_documents
|
return allDocuments
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error getting documents from document list: {str(e)}")
|
logger.error(f"Error getting documents from document list: {str(e)}")
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
def _verifyWorkflowMessagesIntegrity(self, workflow, expectedWorkflowId: str) -> None:
|
||||||
|
"""
|
||||||
|
Verify that all messages in workflow.messages belong to the expected workflow.
|
||||||
|
This helps detect when workflow objects are being mixed up or when messages from
|
||||||
|
other workflows are incorrectly included.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
if not workflow or not hasattr(workflow, 'messages') or not workflow.messages:
|
||||||
|
return
|
||||||
|
|
||||||
|
messagesFromOtherWorkflows = []
|
||||||
|
messagesWithoutWorkflowId = []
|
||||||
|
totalMessages = len(workflow.messages)
|
||||||
|
|
||||||
|
for message in workflow.messages:
|
||||||
|
msgWorkflowId = getattr(message, 'workflowId', None)
|
||||||
|
if not msgWorkflowId:
|
||||||
|
messagesWithoutWorkflowId.append({
|
||||||
|
'id': getattr(message, 'id', 'unknown'),
|
||||||
|
'label': getattr(message, 'documentsLabel', None)
|
||||||
|
})
|
||||||
|
elif msgWorkflowId != expectedWorkflowId:
|
||||||
|
messagesFromOtherWorkflows.append({
|
||||||
|
'id': getattr(message, 'id', 'unknown'),
|
||||||
|
'label': getattr(message, 'documentsLabel', None),
|
||||||
|
'workflowId': msgWorkflowId,
|
||||||
|
'expectedWorkflowId': expectedWorkflowId
|
||||||
|
})
|
||||||
|
|
||||||
|
if messagesFromOtherWorkflows:
|
||||||
|
logger.error(
|
||||||
|
f"CRITICAL: Workflow integrity violation detected! "
|
||||||
|
f"Workflow {expectedWorkflowId} contains {len(messagesFromOtherWorkflows)} messages from other workflows. "
|
||||||
|
f"Total messages: {totalMessages}. "
|
||||||
|
f"Foreign messages: {messagesFromOtherWorkflows}"
|
||||||
|
)
|
||||||
|
|
||||||
|
if messagesWithoutWorkflowId:
|
||||||
|
logger.warning(
|
||||||
|
f"Workflow integrity issue: Workflow {expectedWorkflowId} contains {len(messagesWithoutWorkflowId)} messages without workflowId. "
|
||||||
|
f"Messages: {messagesWithoutWorkflowId}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Also check if self.services.workflow has changed (workflow object ID mismatch)
|
||||||
|
currentWorkflow = self.services.workflow
|
||||||
|
if currentWorkflow and hasattr(currentWorkflow, 'id'):
|
||||||
|
currentWorkflowId = currentWorkflow.id
|
||||||
|
if currentWorkflowId != expectedWorkflowId:
|
||||||
|
logger.error(
|
||||||
|
f"CRITICAL: Workflow object changed during execution! "
|
||||||
|
f"Expected workflow {expectedWorkflowId}, but self.services.workflow now points to {currentWorkflowId}. "
|
||||||
|
f"This indicates the workflow object was swapped mid-execution."
|
||||||
|
)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(f"Error during workflow integrity verification: {e}")
|
||||||
|
|
||||||
def getConnectionReferenceFromUserConnection(self, connection: UserConnection) -> str:
|
def getConnectionReferenceFromUserConnection(self, connection: UserConnection) -> str:
|
||||||
"""Get connection reference from UserConnection with enhanced state information"""
|
"""Get connection reference from UserConnection with enhanced state information"""
|
||||||
# Get token information to check if it's expired
|
# Get token information to check if it's expired
|
||||||
|
|
@ -640,9 +719,12 @@ class ChatService:
|
||||||
if not workflow or not hasattr(workflow, 'messages'):
|
if not workflow or not hasattr(workflow, 'messages'):
|
||||||
return "No documents available"
|
return "No documents available"
|
||||||
|
|
||||||
workflow_id = workflow.id if hasattr(workflow, 'id') else 'NO_ID'
|
workflowId = workflow.id if hasattr(workflow, 'id') else 'NO_ID'
|
||||||
workflow_obj_id = id(workflow)
|
workflowObjId = id(workflow)
|
||||||
logger.debug(f"getAvailableDocuments: workflow.id = {workflow_id}, workflow object id = {workflow_obj_id}")
|
logger.debug(f"getAvailableDocuments: workflow.id = {workflowId}, workflow object id = {workflowObjId}")
|
||||||
|
|
||||||
|
# Root cause analysis: Verify workflow.messages integrity and detect workflow changes
|
||||||
|
self._verifyWorkflowMessagesIntegrity(workflow, workflowId)
|
||||||
|
|
||||||
# Use the provided workflow object directly to avoid database reload issues
|
# Use the provided workflow object directly to avoid database reload issues
|
||||||
# that can cause filename truncation. The workflow object should already be up-to-date.
|
# that can cause filename truncation. The workflow object should already be up-to-date.
|
||||||
|
|
|
||||||
|
|
@ -1083,12 +1083,12 @@ class MethodOutlook(MethodBase):
|
||||||
return ActionResult.isFailure(error=str(e))
|
return ActionResult.isFailure(error=str(e))
|
||||||
|
|
||||||
@action
|
@action
|
||||||
async def composeAndSendEmailWithContext(self, parameters: Dict[str, Any]) -> ActionResult:
|
async def composeAndDraftEmailWithContext(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||||
"""
|
"""
|
||||||
GENERAL:
|
GENERAL:
|
||||||
- Purpose: Compose email content using AI from context and optional documents, then create a draft/send.
|
- Purpose: Compose email content using AI from context and optional documents, then create a draft.
|
||||||
- Input requirements: connectionReference (required); to (required); context (required); optional documentList, cc, bcc, emailStyle, maxLength.
|
- Input requirements: connectionReference (required); to (required); context (required); optional documentList, cc, bcc, emailStyle, maxLength.
|
||||||
- Output format: JSON confirmation with AI-generated draft/send metadata.
|
- Output format: JSON confirmation with AI-generated draft metadata.
|
||||||
|
|
||||||
Parameters:
|
Parameters:
|
||||||
- connectionReference (str, required): Microsoft connection label.
|
- connectionReference (str, required): Microsoft connection label.
|
||||||
|
|
@ -1336,8 +1336,9 @@ Return JSON:
|
||||||
draft_data = response.json()
|
draft_data = response.json()
|
||||||
draft_id = draft_data.get("id", "Unknown")
|
draft_id = draft_data.get("id", "Unknown")
|
||||||
|
|
||||||
result_data = {
|
# Create draft result data with full draft information
|
||||||
"status": "success",
|
draft_result_data = {
|
||||||
|
"status": "draft",
|
||||||
"message": "Email draft created successfully with AI-generated content",
|
"message": "Email draft created successfully with AI-generated content",
|
||||||
"draftId": draft_id,
|
"draftId": draft_id,
|
||||||
"folder": "Drafts (Entwürfe)",
|
"folder": "Drafts (Entwürfe)",
|
||||||
|
|
@ -1352,14 +1353,15 @@ Return JSON:
|
||||||
"aiGenerated": True,
|
"aiGenerated": True,
|
||||||
"context": context,
|
"context": context,
|
||||||
"emailStyle": emailStyle,
|
"emailStyle": emailStyle,
|
||||||
"timestamp": self.services.utils.timestampGetUtc()
|
"timestamp": self.services.utils.timestampGetUtc(),
|
||||||
|
"draftData": draft_data
|
||||||
}
|
}
|
||||||
|
|
||||||
return ActionResult(
|
return ActionResult(
|
||||||
success=True,
|
success=True,
|
||||||
documents=[ActionDocument(
|
documents=[ActionDocument(
|
||||||
documentName=f"ai_generated_email_draft_{self._format_timestamp_for_filename()}.json",
|
documentName=f"ai_generated_email_draft_{self._format_timestamp_for_filename()}.json",
|
||||||
documentData=json.dumps(result_data, indent=2),
|
documentData=json.dumps(draft_result_data, indent=2),
|
||||||
mimeType="application/json"
|
mimeType="application/json"
|
||||||
)]
|
)]
|
||||||
)
|
)
|
||||||
|
|
@ -1372,7 +1374,142 @@ Return JSON:
|
||||||
return ActionResult.isFailure(error=f"Failed to create email: {str(e)}")
|
return ActionResult.isFailure(error=f"Failed to create email: {str(e)}")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error in composeAndSendEmailWithContext: {str(e)}")
|
logger.error(f"Error in composeAndDraftEmailWithContext: {str(e)}")
|
||||||
|
return ActionResult.isFailure(error=str(e))
|
||||||
|
|
||||||
|
@action
|
||||||
|
async def sendDraftEmail(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||||
|
"""
|
||||||
|
GENERAL:
|
||||||
|
- Purpose: Send a draft email using the draft email JSON data from action outlook.composeAndDraftEmailWithContext. This action is used to send the email after the email has been composed and drafted.
|
||||||
|
- Input requirements: connectionReference (required); draftEmailJson (required).
|
||||||
|
- Output format: JSON confirmation with sent mail metadata.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
- connectionReference (str, required): Microsoft connection label.
|
||||||
|
- draftEmailJson (str or dict, required): Draft email JSON data containing draftId or draftData with id field.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
connectionReference = parameters.get("connectionReference")
|
||||||
|
draftEmailJson = parameters.get("draftEmailJson")
|
||||||
|
|
||||||
|
if not connectionReference:
|
||||||
|
return ActionResult.isFailure(error="Connection reference is required")
|
||||||
|
|
||||||
|
if not draftEmailJson:
|
||||||
|
return ActionResult.isFailure(error="Draft email JSON is required")
|
||||||
|
|
||||||
|
# Parse draft email JSON if it's a string
|
||||||
|
if isinstance(draftEmailJson, str):
|
||||||
|
try:
|
||||||
|
draftEmailJson = json.loads(draftEmailJson)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
return ActionResult.isFailure(error="Invalid JSON format in draftEmailJson parameter")
|
||||||
|
|
||||||
|
# Extract draft ID from the JSON
|
||||||
|
draft_id = None
|
||||||
|
if isinstance(draftEmailJson, dict):
|
||||||
|
# Try to get draftId directly
|
||||||
|
draft_id = draftEmailJson.get("draftId")
|
||||||
|
# If not found, try to get it from draftData
|
||||||
|
if not draft_id and "draftData" in draftEmailJson:
|
||||||
|
draft_data = draftEmailJson.get("draftData")
|
||||||
|
if isinstance(draft_data, dict):
|
||||||
|
draft_id = draft_data.get("id")
|
||||||
|
# If still not found, try id field directly
|
||||||
|
if not draft_id:
|
||||||
|
draft_id = draftEmailJson.get("id")
|
||||||
|
|
||||||
|
if not draft_id:
|
||||||
|
return ActionResult.isFailure(error="Could not extract draft ID from draftEmailJson. Ensure it contains 'draftId' or 'draftData.id' field")
|
||||||
|
|
||||||
|
# Get Microsoft connection
|
||||||
|
connection = self._getMicrosoftConnection(connectionReference)
|
||||||
|
if not connection:
|
||||||
|
return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")
|
||||||
|
|
||||||
|
# Check permissions
|
||||||
|
permissions_ok = await self._checkPermissions(connection)
|
||||||
|
if not permissions_ok:
|
||||||
|
return ActionResult.isFailure(error="Connection lacks necessary permissions for Outlook operations")
|
||||||
|
|
||||||
|
# Send the draft email
|
||||||
|
try:
|
||||||
|
graph_url = "https://graph.microsoft.com/v1.0"
|
||||||
|
headers = {
|
||||||
|
"Authorization": f"Bearer {connection['accessToken']}",
|
||||||
|
"Content-Type": "application/json"
|
||||||
|
}
|
||||||
|
|
||||||
|
send_url = f"{graph_url}/me/messages/{draft_id}/send"
|
||||||
|
send_response = requests.post(send_url, headers=headers)
|
||||||
|
|
||||||
|
# Extract email details from draft JSON for confirmation
|
||||||
|
subject = draftEmailJson.get("subject", "Unknown")
|
||||||
|
recipients = draftEmailJson.get("recipients", [])
|
||||||
|
cc = draftEmailJson.get("cc", [])
|
||||||
|
bcc = draftEmailJson.get("bcc", [])
|
||||||
|
attachments_count = draftEmailJson.get("attachments", 0)
|
||||||
|
|
||||||
|
if send_response.status_code in [200, 202, 204]:
|
||||||
|
sent_confirmation_data = {
|
||||||
|
"status": "sent",
|
||||||
|
"message": "Email sent successfully",
|
||||||
|
"draftId": draft_id,
|
||||||
|
"subject": subject,
|
||||||
|
"recipients": recipients,
|
||||||
|
"cc": cc,
|
||||||
|
"bcc": bcc,
|
||||||
|
"attachments": attachments_count,
|
||||||
|
"sentTimestamp": self.services.utils.timestampGetUtc(),
|
||||||
|
"confirmation": "Email has been successfully sent to recipients"
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.info(f"Email sent successfully. Draft ID: {draft_id}")
|
||||||
|
|
||||||
|
return ActionResult(
|
||||||
|
success=True,
|
||||||
|
documents=[ActionDocument(
|
||||||
|
documentName=f"sent_mail_confirmation_{self._format_timestamp_for_filename()}.json",
|
||||||
|
documentData=json.dumps(sent_confirmation_data, indent=2),
|
||||||
|
mimeType="application/json"
|
||||||
|
)]
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
logger.error(f"Failed to send email. Status: {send_response.status_code}, Response: {send_response.text}")
|
||||||
|
|
||||||
|
sent_confirmation_data = {
|
||||||
|
"status": "error",
|
||||||
|
"message": "Failed to send draft email",
|
||||||
|
"draftId": draft_id,
|
||||||
|
"subject": subject,
|
||||||
|
"recipients": recipients,
|
||||||
|
"sendError": {
|
||||||
|
"statusCode": send_response.status_code,
|
||||||
|
"response": send_response.text
|
||||||
|
},
|
||||||
|
"sentTimestamp": self.services.utils.timestampGetUtc(),
|
||||||
|
"confirmation": "Email draft sending failed"
|
||||||
|
}
|
||||||
|
|
||||||
|
return ActionResult.isFailure(
|
||||||
|
error=f"Failed to send email: {send_response.status_code} - {send_response.text}",
|
||||||
|
documents=[ActionDocument(
|
||||||
|
documentName=f"sent_mail_confirmation_{self._format_timestamp_for_filename()}.json",
|
||||||
|
documentData=json.dumps(sent_confirmation_data, indent=2),
|
||||||
|
mimeType="application/json"
|
||||||
|
)]
|
||||||
|
)
|
||||||
|
|
||||||
|
except ImportError:
|
||||||
|
logger.error("requests module not available")
|
||||||
|
return ActionResult.isFailure(error="requests module not available")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error sending draft email via Microsoft Graph API: {str(e)}")
|
||||||
|
return ActionResult.isFailure(error=f"Failed to send draft email: {str(e)}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error in sendDraftEmail: {str(e)}")
|
||||||
return ActionResult.isFailure(error=str(e))
|
return ActionResult.isFailure(error=str(e))
|
||||||
|
|
||||||
async def checkPermissions(self, parameters: Dict[str, Any]) -> ActionResult:
|
async def checkPermissions(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||||
|
|
|
||||||
|
|
@ -1,266 +0,0 @@
|
||||||
# Content Validator - Deep Analysis & Target Design
|
|
||||||
|
|
||||||
## CURRENT STATE ANALYSIS
|
|
||||||
|
|
||||||
### How Validator Currently Works
|
|
||||||
|
|
||||||
#### 1. **Document Input Flow**
|
|
||||||
```
|
|
||||||
ActionResult.documents (List[ActionDocument])
|
|
||||||
→ modeReact.py extracts "structured content" with hardcoded checks
|
|
||||||
→ Creates SimpleNamespace objects with wrapped documentData
|
|
||||||
→ Passes to ContentValidator.validateContent()
|
|
||||||
```
|
|
||||||
|
|
||||||
#### 2. **Current Problems in modeReact.py (Lines 99-136)**
|
|
||||||
- ❌ **Hardcoded document name checks**: `docName == "structured_content.json"`
|
|
||||||
- ❌ **Hardcoded mimeType checks**: `mimeType == "application/json"`
|
|
||||||
- ❌ **Hardcoded structure checks**: `'content' in docData or 'documents' in docData or 'sections' in docData`
|
|
||||||
- ❌ **Single document selection**: `break` after first match - ignores other documents
|
|
||||||
- ❌ **Non-generic logic**: Specific to certain document structures
|
|
||||||
- ❌ **Workaround approach**: Trying to find structured content in various ways
|
|
||||||
|
|
||||||
#### 3. **Current Problems in contentValidator.py**
|
|
||||||
|
|
||||||
**`_extractContent()` method (Lines 21-41)**:
|
|
||||||
- ❌ **Inconsistent handling**: Checks for `dict with 'content'` but then also handles raw `data`
|
|
||||||
- ❌ **Silent failures**: Returns empty string on any exception
|
|
||||||
- ❌ **Size limit hardcoded**: 10KB threshold is arbitrary
|
|
||||||
- ❌ **No format awareness**: Doesn't check if document is binary/base64 before extracting
|
|
||||||
- ❌ **No document type detection**: Doesn't distinguish text vs binary vs structured data
|
|
||||||
|
|
||||||
**`_validateWithAI()` method (Lines 60-200)**:
|
|
||||||
- ❌ **Forces all content to string**: `content[:2000]` truncation assumes text
|
|
||||||
- ❌ **No document metadata passed**: Only name and content, no size, format, mimeType info
|
|
||||||
- ❌ **No binary/base64 handling**: Will fail or show garbage for binary documents
|
|
||||||
- ❌ **Multiple JSON extraction strategies**: Indicates unreliable AI response parsing
|
|
||||||
- ❌ **Size limits inconsistent**: 10KB in extraction, 2KB in prompt - why different?
|
|
||||||
|
|
||||||
#### 4. **Missing Capabilities**
|
|
||||||
- ❌ No document size reporting to validator
|
|
||||||
- ❌ No format validation (txt vs md vs pdf vs docx)
|
|
||||||
- ❌ No binary data handling (images, PDFs, etc.)
|
|
||||||
- ❌ No document count/summary statistics
|
|
||||||
- ❌ No distinction between document types for validation
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## TARGET DESIGN
|
|
||||||
|
|
||||||
### Core Principles
|
|
||||||
1. **GENERIC**: No hardcoded document names, types, or structures
|
|
||||||
2. **DOCUMENT-AWARE**: Handle all document types (text, binary, base64, structured)
|
|
||||||
3. **SIZE-CONSCIOUS**: Never pass full large documents to AI
|
|
||||||
4. **METADATA-RICH**: Pass document metadata (name, size, format, mimeType) to validator
|
|
||||||
5. **FORMAT-FLEXIBLE**: Allow format flexibility (md ≈ text, but pdf ≠ docx)
|
|
||||||
|
|
||||||
### Target Architecture
|
|
||||||
|
|
||||||
```
|
|
||||||
Documents Input (List[ActionDocument])
|
|
||||||
↓
|
|
||||||
Document Analyzer (generic)
|
|
||||||
- Extract metadata (name, size, mimeType, format)
|
|
||||||
- Determine content type (text/binary/base64/structured)
|
|
||||||
- Create preview/summary for large documents
|
|
||||||
↓
|
|
||||||
Document Summary (for AI validation)
|
|
||||||
- Metadata only for binary/base64
|
|
||||||
- Preview/sample for large text documents
|
|
||||||
- Full content for small text/structured documents
|
|
||||||
↓
|
|
||||||
Validation Prompt Builder (generic)
|
|
||||||
- Include document summaries (not full content)
|
|
||||||
- Include document metadata
|
|
||||||
- Include format validation rules (generic)
|
|
||||||
↓
|
|
||||||
AI Validator
|
|
||||||
- Validates against task objective (generic)
|
|
||||||
- Validates format compliance (flexible)
|
|
||||||
- Validates document count/size
|
|
||||||
```
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## REQUIRED CHANGES
|
|
||||||
|
|
||||||
### 1. **Remove All Hardcoded Checks from modeReact.py**
|
|
||||||
- ❌ Remove document name checks
|
|
||||||
- ❌ Remove mimeType-specific logic
|
|
||||||
- ❌ Remove structure-specific checks
|
|
||||||
- ✅ Pass ALL documents to validator (let validator decide what to validate)
|
|
||||||
- ✅ Keep it simple: `validationDocs = result.documents`
|
|
||||||
|
|
||||||
### 2. **Redesign contentValidator.py - New Structure**
|
|
||||||
|
|
||||||
#### New Method: `_analyzeDocuments(documents)`
|
|
||||||
```python
|
|
||||||
def _analyzeDocuments(self, documents: List[Any]) -> List[Dict[str, Any]]:
|
|
||||||
"""
|
|
||||||
Generic document analysis - extract metadata and create summaries.
|
|
||||||
Returns list of document summaries ready for validation prompt.
|
|
||||||
"""
|
|
||||||
summaries = []
|
|
||||||
for doc in documents:
|
|
||||||
summary = {
|
|
||||||
"name": getattr(doc, 'documentName', 'Unknown'),
|
|
||||||
"mimeType": getattr(doc, 'mimeType', 'unknown'),
|
|
||||||
"format": self._detectFormat(doc),
|
|
||||||
"size": self._calculateSize(doc),
|
|
||||||
"type": self._detectContentType(doc), # text/binary/base64/structured
|
|
||||||
"preview": self._createPreview(doc), # None for binary, sample for large text
|
|
||||||
"isAccessible": self._isContentAccessible(doc) # Can we read content?
|
|
||||||
}
|
|
||||||
summaries.append(summary)
|
|
||||||
return summaries
|
|
||||||
```
|
|
||||||
|
|
||||||
#### New Method: `_detectFormat(doc)`
|
|
||||||
- Extract from filename extension or mimeType
|
|
||||||
- Generic mapping: `text/plain` → `txt`, `text/markdown` → `md`, etc.
|
|
||||||
- Return format string (txt, md, pdf, docx, json, etc.)
|
|
||||||
|
|
||||||
#### New Method: `_calculateSize(doc)`
|
|
||||||
- Calculate document size in bytes
|
|
||||||
- Handle string, dict, list, bytes, base64
|
|
||||||
- Return: `{"bytes": int, "readable": "1.5 MB"}`
|
|
||||||
|
|
||||||
#### New Method: `_detectContentType(doc)`
|
|
||||||
- `text`: Readable text content
|
|
||||||
- `structured`: JSON/dict/list structures
|
|
||||||
- `binary`: Binary data (PDF, images, etc.)
|
|
||||||
- `base64`: Base64-encoded data
|
|
||||||
- Return content type string
|
|
||||||
|
|
||||||
#### New Method: `_createPreview(doc)`
|
|
||||||
- **Binary/Base64**: Return `None` (metadata only)
|
|
||||||
- **Large text (>50KB)**: Return first 1KB + size indicator
|
|
||||||
- **Small text (≤50KB)**: Return full content
|
|
||||||
- **Structured data**: Return JSON string (truncated if large)
|
|
||||||
|
|
||||||
#### New Method: `_isContentAccessible(doc)`
|
|
||||||
- Check if document content can be extracted for validation
|
|
||||||
- Binary/base64 documents: `False` (validate by metadata only)
|
|
||||||
- Text/structured documents: `True`
|
|
||||||
|
|
||||||
### 3. **Redesign Validation Prompt (Generic)**
|
|
||||||
|
|
||||||
```python
|
|
||||||
validationPrompt = f"""TASK VALIDATION
|
|
||||||
|
|
||||||
USER REQUEST: '{intent.get('primaryGoal', 'Unknown')}'
|
|
||||||
EXPECTED DATA TYPE: {intent.get('dataType', 'unknown')}
|
|
||||||
EXPECTED FORMAT: {intent.get('expectedFormat', 'unknown')}
|
|
||||||
SUCCESS CRITERIA ({criteriaCount} items): {successCriteria}
|
|
||||||
|
|
||||||
DELIVERED DOCUMENTS ({len(documentSummaries)} items):
|
|
||||||
{json.dumps(documentSummaries, indent=2)}
|
|
||||||
|
|
||||||
VALIDATION RULES:
|
|
||||||
1. Check if delivered documents match expected data type
|
|
||||||
2. Check if delivered formats are compatible with expected format
|
|
||||||
(Note: text formats like txt/md are compatible; pdf ≠ docx but both are documents)
|
|
||||||
3. Verify each success criterion is met based on document content/metadata
|
|
||||||
4. Check document sizes are reasonable for the task
|
|
||||||
5. Rate overall quality (0.0-1.0)
|
|
||||||
6. Identify specific gaps
|
|
||||||
7. Suggest next steps
|
|
||||||
|
|
||||||
OUTPUT FORMAT - JSON ONLY (no prose):
|
|
||||||
{{
|
|
||||||
"overallSuccess": false,
|
|
||||||
"qualityScore": 0.0,
|
|
||||||
"dataTypeMatch": false,
|
|
||||||
"formatMatch": false,
|
|
||||||
"documentCount": {len(documentSummaries)},
|
|
||||||
"successCriteriaMet": {[False] * criteriaCount},
|
|
||||||
"gapAnalysis": "Specific gaps found",
|
|
||||||
"improvementSuggestions": ["NEXT STEP: Action 1"],
|
|
||||||
"validationDetails": [
|
|
||||||
{{
|
|
||||||
"documentName": "document.ext",
|
|
||||||
"issues": ["Issue 1"],
|
|
||||||
"suggestions": ["NEXT STEP: Fix 1"]
|
|
||||||
}}
|
|
||||||
]
|
|
||||||
}}
|
|
||||||
"""
|
|
||||||
```
|
|
||||||
|
|
||||||
### 4. **Format Validation Logic (Generic & Flexible)**
|
|
||||||
|
|
||||||
```python
|
|
||||||
def _isFormatCompatible(self, deliveredFormat: str, expectedFormat: str) -> bool:
|
|
||||||
"""
|
|
||||||
Generic format compatibility check.
|
|
||||||
- txt/md/html are text formats (compatible with each other)
|
|
||||||
- pdf/docx/xlsx are document formats (not compatible with each other)
|
|
||||||
- json/xml are structured formats
|
|
||||||
- images are image formats
|
|
||||||
"""
|
|
||||||
# Text formats are interchangeable
|
|
||||||
textFormats = ['txt', 'md', 'html', 'text', 'plain']
|
|
||||||
if deliveredFormat.lower() in textFormats and expectedFormat.lower() in textFormats:
|
|
||||||
return True
|
|
||||||
|
|
||||||
# Exact match
|
|
||||||
if deliveredFormat.lower() == expectedFormat.lower():
|
|
||||||
return True
|
|
||||||
|
|
||||||
# Structured formats
|
|
||||||
if deliveredFormat.lower() in ['json', 'xml'] and expectedFormat.lower() in ['json', 'xml']:
|
|
||||||
return True # Could be made more flexible
|
|
||||||
|
|
||||||
return False
|
|
||||||
```
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## IMPLEMENTATION PLAN
|
|
||||||
|
|
||||||
### Phase 1: Clean Up modeReact.py
|
|
||||||
- Remove all hardcoded checks
|
|
||||||
- Simply pass `result.documents` to validator
|
|
||||||
|
|
||||||
### Phase 2: Redesign Document Analysis
|
|
||||||
- Implement `_analyzeDocuments()`
|
|
||||||
- Implement helper methods: `_detectFormat()`, `_calculateSize()`, `_detectContentType()`, `_createPreview()`
|
|
||||||
|
|
||||||
### Phase 3: Redesign Validation Prompt
|
|
||||||
- Generic prompt with document summaries
|
|
||||||
- Include metadata, not full content
|
|
||||||
- Size-aware handling
|
|
||||||
|
|
||||||
### Phase 4: Implement Format Validation
|
|
||||||
- Generic format compatibility logic
|
|
||||||
- Flexible matching (text formats, document formats, etc.)
|
|
||||||
|
|
||||||
### Phase 5: Testing
|
|
||||||
- Test with text documents (small & large)
|
|
||||||
- Test with binary documents (PDF, images)
|
|
||||||
- Test with base64 documents
|
|
||||||
- Test with structured data (JSON)
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## KEY DESIGN DECISIONS
|
|
||||||
|
|
||||||
1. **Pass ALL documents**: Validator decides what to validate, not the caller
|
|
||||||
2. **Metadata over content**: For large/binary documents, pass metadata only
|
|
||||||
3. **Preview samples**: For large text documents, pass preview + size info
|
|
||||||
4. **Generic prompts**: No task-specific or format-specific logic
|
|
||||||
5. **Flexible format matching**: Text formats compatible, document formats strict
|
|
||||||
6. **Size limits**: 50KB threshold for full content (configurable)
|
|
||||||
7. **Content type detection**: Explicit type detection (text/binary/base64/structured)
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## BENEFITS OF TARGET DESIGN
|
|
||||||
|
|
||||||
✅ **Generic**: Works with any document type without hardcoding
|
|
||||||
✅ **Scalable**: Handles large documents without issues
|
|
||||||
✅ **Flexible**: Format validation is flexible where appropriate
|
|
||||||
✅ **Maintainable**: Clear separation of concerns
|
|
||||||
✅ **Robust**: Handles edge cases (binary, base64, large files)
|
|
||||||
✅ **Testable**: Each component can be tested independently
|
|
||||||
|
|
||||||
|
|
@ -15,7 +15,12 @@ logger = logging.getLogger(__name__)
|
||||||
methods = {}
|
methods = {}
|
||||||
|
|
||||||
def discoverMethods(serviceCenter):
|
def discoverMethods(serviceCenter):
|
||||||
"""Dynamically discover all method classes and their actions in modules methods package"""
|
"""Dynamically discover all method classes and their actions in modules methods package.
|
||||||
|
|
||||||
|
CRITICAL: If methods are already discovered, updates their Services reference to ensure
|
||||||
|
they use the current workflow (self.services.workflow). This prevents stale workflow IDs
|
||||||
|
from being used when a new workflow starts.
|
||||||
|
"""
|
||||||
try:
|
try:
|
||||||
# Import the methods package
|
# Import the methods package
|
||||||
methodsPackage = importlib.import_module('modules.workflows.methods')
|
methodsPackage = importlib.import_module('modules.workflows.methods')
|
||||||
|
|
@ -32,7 +37,29 @@ def discoverMethods(serviceCenter):
|
||||||
if (inspect.isclass(item) and
|
if (inspect.isclass(item) and
|
||||||
issubclass(item, MethodBase) and
|
issubclass(item, MethodBase) and
|
||||||
item != MethodBase):
|
item != MethodBase):
|
||||||
# Instantiate the method
|
|
||||||
|
# Check if method already exists in cache
|
||||||
|
shortName = itemName.replace('Method', '').lower()
|
||||||
|
if itemName in methods or shortName in methods:
|
||||||
|
# Method already discovered - update Services reference to use current workflow
|
||||||
|
existingMethodInfo = methods.get(itemName) or methods.get(shortName)
|
||||||
|
if existingMethodInfo and existingMethodInfo.get('instance'):
|
||||||
|
existingMethodInfo['instance'].services = serviceCenter
|
||||||
|
logger.debug(f"Updated Services reference for cached method {itemName} to use current workflow")
|
||||||
|
else:
|
||||||
|
# Method exists but instance is missing - recreate it
|
||||||
|
methodInstance = item(serviceCenter)
|
||||||
|
actions = methodInstance.actions
|
||||||
|
methodInfo = {
|
||||||
|
'instance': methodInstance,
|
||||||
|
'actions': actions,
|
||||||
|
'description': item.__doc__ or f"Method {itemName}"
|
||||||
|
}
|
||||||
|
methods[itemName] = methodInfo
|
||||||
|
methods[shortName] = methodInfo
|
||||||
|
logger.info(f"Recreated method {itemName} (short: {shortName}) with {len(actions)} actions")
|
||||||
|
else:
|
||||||
|
# Method not discovered yet - create new instance
|
||||||
methodInstance = item(serviceCenter)
|
methodInstance = item(serviceCenter)
|
||||||
|
|
||||||
# Use the actions property from MethodBase which handles @action decorator
|
# Use the actions property from MethodBase which handles @action decorator
|
||||||
|
|
@ -49,7 +76,6 @@ def discoverMethods(serviceCenter):
|
||||||
methods[itemName] = methodInfo
|
methods[itemName] = methodInfo
|
||||||
|
|
||||||
# Also store with short name for action executor access
|
# Also store with short name for action executor access
|
||||||
shortName = itemName.replace('Method', '').lower()
|
|
||||||
methods[shortName] = methodInfo
|
methods[shortName] = methodInfo
|
||||||
|
|
||||||
logger.info(f"Discovered method {itemName} (short: {shortName}) with {len(actions)} actions")
|
logger.info(f"Discovered method {itemName} (short: {shortName}) with {len(actions)} actions")
|
||||||
|
|
@ -58,7 +84,7 @@ def discoverMethods(serviceCenter):
|
||||||
logger.error(f"Error discovering method {name}: {str(e)}")
|
logger.error(f"Error discovering method {name}: {str(e)}")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
logger.info(f"Discovered {len(methods)} method entries total")
|
logger.info(f"Discovered/updated {len(methods)} method entries total")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error discovering methods: {str(e)}")
|
logger.error(f"Error discovering methods: {str(e)}")
|
||||||
|
|
|
||||||
|
|
@ -42,6 +42,11 @@ class WorkflowManager:
|
||||||
# Store workflow in services for reference (this is the ChatWorkflow object)
|
# Store workflow in services for reference (this is the ChatWorkflow object)
|
||||||
self.services.workflow = workflow
|
self.services.workflow = workflow
|
||||||
|
|
||||||
|
# CRITICAL: Update all method instances to use the current Services object with the correct workflow
|
||||||
|
from modules.workflows.processing.shared.methodDiscovery import discoverMethods
|
||||||
|
discoverMethods(self.services)
|
||||||
|
logger.debug(f"Updated method instances to use workflow {self.services.workflow.id}")
|
||||||
|
|
||||||
if workflow.status == "running":
|
if workflow.status == "running":
|
||||||
logger.info(f"Stopping running workflow {workflowId} before processing new prompt")
|
logger.info(f"Stopping running workflow {workflowId} before processing new prompt")
|
||||||
workflow.status = "stopped"
|
workflow.status = "stopped"
|
||||||
|
|
@ -102,6 +107,12 @@ class WorkflowManager:
|
||||||
# Store workflow in services (this is the ChatWorkflow object)
|
# Store workflow in services (this is the ChatWorkflow object)
|
||||||
self.services.workflow = workflow
|
self.services.workflow = workflow
|
||||||
|
|
||||||
|
# CRITICAL: Update all method instances to use the current Services object with the correct workflow
|
||||||
|
# This ensures cached method instances don't use stale workflow IDs from previous workflows
|
||||||
|
from modules.workflows.processing.shared.methodDiscovery import discoverMethods
|
||||||
|
discoverMethods(self.services)
|
||||||
|
logger.debug(f"Updated method instances to use workflow {self.services.workflow.id}")
|
||||||
|
|
||||||
# Start workflow processing asynchronously
|
# Start workflow processing asynchronously
|
||||||
asyncio.create_task(self._workflowProcess(userInput))
|
asyncio.create_task(self._workflowProcess(userInput))
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue