stable workflow
This commit is contained in:
parent
a0219181e9
commit
8592cdd790
20 changed files with 3701 additions and 3581 deletions
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
|
@ -22,13 +22,11 @@ class DocumentGenerator:
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
documents = action_result.data.get("documents", [])
|
documents = action_result.data.get("documents", [])
|
||||||
logger.debug(f"Processing {len(documents)} documents from action result")
|
|
||||||
processed_documents = []
|
processed_documents = []
|
||||||
for doc in documents:
|
for doc in documents:
|
||||||
processed_doc = self.processSingleDocument(doc, action)
|
processed_doc = self.processSingleDocument(doc, action)
|
||||||
if processed_doc:
|
if processed_doc:
|
||||||
processed_documents.append(processed_doc)
|
processed_documents.append(processed_doc)
|
||||||
logger.debug(f"Successfully processed {len(processed_documents)} documents")
|
|
||||||
return processed_documents
|
return processed_documents
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error processing action result documents: {str(e)}")
|
logger.error(f"Error processing action result documents: {str(e)}")
|
||||||
|
|
@ -54,19 +52,29 @@ class DocumentGenerator:
|
||||||
# Dictionary format document - handle both 'documentName' and 'filename' keys
|
# Dictionary format document - handle both 'documentName' and 'filename' keys
|
||||||
filename = doc.get('documentName', doc.get('filename', \
|
filename = doc.get('documentName', doc.get('filename', \
|
||||||
f"{action.execMethod}_{action.execAction}_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}"))
|
f"{action.execMethod}_{action.execAction}_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}"))
|
||||||
fileSize = doc.get('fileSize', len(str(doc.get('documentData', ''))))
|
|
||||||
mimeType = doc.get('mimeType', 'application/octet-stream')
|
mimeType = doc.get('mimeType', 'application/octet-stream')
|
||||||
if mimeType == "application/octet-stream":
|
|
||||||
document_data = doc.get('documentData', '')
|
|
||||||
mimeType = detectMimeTypeFromContent(document_data, filename, self.service)
|
|
||||||
|
|
||||||
# Handle documentData structure - it might be a dict with 'content' key or direct content
|
# Handle documentData structure - it might be a dict with 'content' key or direct content
|
||||||
document_data = doc.get('documentData', '')
|
document_data = doc.get('documentData', '')
|
||||||
if isinstance(document_data, dict) and 'content' in document_data:
|
if isinstance(document_data, dict) and 'content' in document_data:
|
||||||
|
# This is the structure returned by extract action: documentData.content
|
||||||
content = document_data['content']
|
content = document_data['content']
|
||||||
|
# Also check for other potential content fields
|
||||||
|
if not content and 'data' in document_data:
|
||||||
|
content = document_data['data']
|
||||||
else:
|
else:
|
||||||
|
# Direct content (fallback)
|
||||||
content = document_data
|
content = document_data
|
||||||
|
|
||||||
|
# Calculate file size from actual content
|
||||||
|
fileSize = len(str(content)) if content else 0
|
||||||
|
|
||||||
|
# Detect mime type if not specified
|
||||||
|
if mimeType == "application/octet-stream":
|
||||||
|
mimeType = detectMimeTypeFromContent(content, filename, self.service)
|
||||||
|
|
||||||
|
logger.info(f"Processed document: {filename}, content length: {len(str(content))}, mimeType: {mimeType}")
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'filename': filename,
|
'filename': filename,
|
||||||
'fileSize': fileSize,
|
'fileSize': fileSize,
|
||||||
|
|
@ -96,20 +104,32 @@ class DocumentGenerator:
|
||||||
Returns a list of created document objects.
|
Returns a list of created document objects.
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
|
logger.info(f"Creating documents from action result for {action.execMethod}.{action.execAction}")
|
||||||
|
logger.info(f"Action result data keys: {list(action_result.data.keys())}")
|
||||||
|
|
||||||
processed_docs = self.processActionResultDocuments(action_result, action, workflow)
|
processed_docs = self.processActionResultDocuments(action_result, action, workflow)
|
||||||
|
logger.info(f"Processed {len(processed_docs)} documents")
|
||||||
|
|
||||||
created_documents = []
|
created_documents = []
|
||||||
for doc_data in processed_docs:
|
for i, doc_data in enumerate(processed_docs):
|
||||||
try:
|
try:
|
||||||
document_name = doc_data['filename']
|
document_name = doc_data['filename']
|
||||||
document_data = doc_data['content']
|
document_data = doc_data['content']
|
||||||
mime_type = doc_data['mimeType']
|
mime_type = doc_data['mimeType']
|
||||||
|
|
||||||
|
logger.info(f"Creating document {i+1}: {document_name} (mime: {mime_type}, content length: {len(str(document_data))})")
|
||||||
|
|
||||||
# Convert document data to string content
|
# Convert document data to string content
|
||||||
content = convertDocumentDataToString(document_data, getFileExtension(document_name))
|
content = convertDocumentDataToString(document_data, getFileExtension(document_name))
|
||||||
|
|
||||||
# Skip empty or minimal content
|
# Skip empty or minimal content
|
||||||
minimal_content_patterns = ['{}', '[]', 'null', '""', "''"]
|
minimal_content_patterns = ['{}', '[]', 'null', '""', "''"]
|
||||||
if not content or content.strip() == "" or content.strip() in minimal_content_patterns:
|
if not content or content.strip() == "" or content.strip() in minimal_content_patterns:
|
||||||
logger.warning(f"Empty or minimal content for document {document_name}, skipping")
|
logger.warning(f"Empty or minimal content for document {document_name}, skipping")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
logger.info(f"Document {document_name} has content: {len(content)} characters")
|
||||||
|
|
||||||
# Create file in system
|
# Create file in system
|
||||||
file_id = self.service.createFile(
|
file_id = self.service.createFile(
|
||||||
fileName=document_name,
|
fileName=document_name,
|
||||||
|
|
@ -120,22 +140,27 @@ class DocumentGenerator:
|
||||||
if not file_id:
|
if not file_id:
|
||||||
logger.error(f"Failed to create file for document {document_name}")
|
logger.error(f"Failed to create file for document {document_name}")
|
||||||
continue
|
continue
|
||||||
# Create document object
|
|
||||||
|
logger.info(f"Created file with ID: {file_id}")
|
||||||
|
|
||||||
|
# Create document object using existing file ID
|
||||||
document = self.service.createDocument(
|
document = self.service.createDocument(
|
||||||
fileName=document_name,
|
fileName=document_name,
|
||||||
mimeType=mime_type,
|
mimeType=mime_type,
|
||||||
content=content,
|
content=content,
|
||||||
base64encoded=False
|
base64encoded=False,
|
||||||
|
existing_file_id=file_id
|
||||||
)
|
)
|
||||||
if document:
|
if document:
|
||||||
created_documents.append(document)
|
created_documents.append(document)
|
||||||
logger.debug(f"Created document: {document_name} ({len(content)} bytes, {mime_type})")
|
logger.info(f"Successfully created ChatDocument: {document_name} (ID: {getattr(document, 'id', 'N/A')}, fileId: {getattr(document, 'fileId', 'N/A')})")
|
||||||
else:
|
else:
|
||||||
logger.error(f"Failed to create ChatDocument object for {document_name}")
|
logger.error(f"Failed to create ChatDocument object for {document_name}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error creating document {doc_data.get('filename', 'unknown')}: {str(e)}")
|
logger.error(f"Error creating document {doc_data.get('filename', 'unknown')}: {str(e)}")
|
||||||
continue
|
continue
|
||||||
logger.info(f"Created {len(created_documents)} documents from action result")
|
|
||||||
|
logger.info(f"Successfully created {len(created_documents)} documents")
|
||||||
return created_documents
|
return created_documents
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error creating documents from action result: {str(e)}")
|
logger.error(f"Error creating documents from action result: {str(e)}")
|
||||||
|
|
|
||||||
|
|
@ -16,22 +16,53 @@ from modules.chat.documents.documentGeneration import DocumentGenerator
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
class WorkflowStoppedException(Exception):
|
||||||
|
"""Exception raised when a workflow is stopped by the user."""
|
||||||
|
pass
|
||||||
|
|
||||||
class HandlingTasks:
|
class HandlingTasks:
|
||||||
def __init__(self, chatInterface, service, workflow=None):
|
def __init__(self, chatInterface, service, workflow=None):
|
||||||
self.chatInterface = chatInterface
|
self.chatInterface = chatInterface
|
||||||
self.service = service
|
self.service = service
|
||||||
self.workflow = workflow
|
self.workflow = workflow
|
||||||
self.documentGenerator = DocumentGenerator(service)
|
self.documentGenerator = DocumentGenerator(service)
|
||||||
|
|
||||||
|
def _checkWorkflowStopped(self):
|
||||||
|
"""
|
||||||
|
Check if workflow has been stopped by user and raise exception if so.
|
||||||
|
This function centralizes all workflow stop checking logic to avoid code duplication.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Get the current workflow status from the database to avoid stale data
|
||||||
|
current_workflow = self.chatInterface.getWorkflow(self.service.workflow.id)
|
||||||
|
if current_workflow and current_workflow.status == "stopped":
|
||||||
|
logger.info("Workflow stopped by user, aborting execution")
|
||||||
|
raise WorkflowStoppedException("Workflow was stopped by user")
|
||||||
|
except WorkflowStoppedException:
|
||||||
|
# Re-raise the WorkflowStoppedException immediately
|
||||||
|
raise
|
||||||
|
except Exception as e:
|
||||||
|
# If we can't get the current status due to other database issues, fall back to the in-memory object
|
||||||
|
logger.warning(f"Could not check current workflow status from database: {str(e)}")
|
||||||
|
if self.service.workflow.status == "stopped":
|
||||||
|
logger.info("Workflow stopped by user (from in-memory object), aborting execution")
|
||||||
|
raise WorkflowStoppedException("Workflow was stopped by user")
|
||||||
|
|
||||||
async def generateTaskPlan(self, userInput: str, workflow) -> TaskPlan:
|
async def generateTaskPlan(self, userInput: str, workflow) -> TaskPlan:
|
||||||
"""Generate a high-level task plan for the workflow."""
|
"""Generate a high-level task plan for the workflow."""
|
||||||
try:
|
try:
|
||||||
|
# Check workflow status before generating task plan
|
||||||
|
self._checkWorkflowStopped()
|
||||||
|
|
||||||
logger.info(f"Generating task plan for workflow {workflow.id}")
|
logger.info(f"Generating task plan for workflow {workflow.id}")
|
||||||
available_docs = self.service.getAvailableDocuments(workflow)
|
available_docs = self.service.getAvailableDocuments(workflow)
|
||||||
logger.debug(f"Available documents: {available_docs}")
|
|
||||||
|
|
||||||
|
# Check workflow status before calling AI service
|
||||||
|
self._checkWorkflowStopped()
|
||||||
|
|
||||||
prompt = await self.service.callAiTextAdvanced(
|
prompt = await self.service.callAiTextAdvanced(
|
||||||
createTaskPlanningPrompt(self, {
|
createTaskPlanningPrompt({
|
||||||
'user_request': userInput,
|
'user_request': userInput,
|
||||||
'available_documents': available_docs,
|
'available_documents': available_docs,
|
||||||
'workflow_id': workflow.id
|
'workflow_id': workflow.id
|
||||||
|
|
@ -53,16 +84,38 @@ class HandlingTasks:
|
||||||
|
|
||||||
if not self._validateTaskPlan(task_plan_dict):
|
if not self._validateTaskPlan(task_plan_dict):
|
||||||
logger.error("Generated task plan failed validation")
|
logger.error("Generated task plan failed validation")
|
||||||
|
logger.error(f"AI Response: {prompt}")
|
||||||
|
logger.error(f"Parsed Task Plan: {json.dumps(task_plan_dict, indent=2)}")
|
||||||
raise Exception("AI-generated task plan failed validation - AI is required for task planning")
|
raise Exception("AI-generated task plan failed validation - AI is required for task planning")
|
||||||
|
|
||||||
tasks = [TaskStep(**task_dict) for task_dict in task_plan_dict.get('tasks', [])]
|
tasks = []
|
||||||
|
for task_dict in task_plan_dict.get('tasks', []):
|
||||||
|
# Map old 'description' field to new 'objective' field
|
||||||
|
if 'description' in task_dict and 'objective' not in task_dict:
|
||||||
|
task_dict['objective'] = task_dict.pop('description')
|
||||||
|
tasks.append(TaskStep(**task_dict))
|
||||||
task_plan = TaskPlan(
|
task_plan = TaskPlan(
|
||||||
overview=task_plan_dict.get('overview', ''),
|
overview=task_plan_dict.get('overview', ''),
|
||||||
tasks=tasks
|
tasks=tasks
|
||||||
)
|
)
|
||||||
|
|
||||||
logger.info(f"Task plan generated successfully with {len(tasks)} tasks")
|
logger.info(f"Task plan generated successfully with {len(tasks)} tasks")
|
||||||
logger.debug(f"Task plan: {json.dumps(task_plan_dict, indent=2)}")
|
|
||||||
|
# Log the generated tasks
|
||||||
|
for i, task in enumerate(tasks):
|
||||||
|
logger.info(f" Task {i+1}: {task.objective}")
|
||||||
|
if hasattr(task, 'success_criteria') and task.success_criteria:
|
||||||
|
logger.info(f" Success criteria: {task.success_criteria}")
|
||||||
|
|
||||||
|
# Log the complete task plan
|
||||||
|
logger.info("=== GENERATED TASK PLAN ===")
|
||||||
|
logger.info(f"Overview: {task_plan.overview}")
|
||||||
|
logger.info(f"Total tasks: {len(tasks)}")
|
||||||
|
|
||||||
|
# Log the RAW AI-generated task plan JSON for debugging
|
||||||
|
logger.info("=== RAW AI TASK PLAN JSON ===")
|
||||||
|
logger.info(f"AI Response with task plan: {prompt}")
|
||||||
|
logger.info("=== END RAW AI TASK PLAN JSON ===")
|
||||||
|
|
||||||
return task_plan
|
return task_plan
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
@ -72,12 +125,14 @@ class HandlingTasks:
|
||||||
async def generateTaskActions(self, task_step, workflow, previous_results=None, enhanced_context=None) -> List[TaskAction]:
|
async def generateTaskActions(self, task_step, workflow, previous_results=None, enhanced_context=None) -> List[TaskAction]:
|
||||||
"""Generate actions for a given task step."""
|
"""Generate actions for a given task step."""
|
||||||
try:
|
try:
|
||||||
logger.info(f"Generating actions for task: {task_step.description}")
|
# Check workflow status before generating actions
|
||||||
|
self._checkWorkflowStopped()
|
||||||
|
|
||||||
|
logger.info(f"Generating actions for task: {task_step.objective}")
|
||||||
|
|
||||||
available_docs = self.service.getAvailableDocuments(workflow)
|
available_docs = self.service.getAvailableDocuments(workflow)
|
||||||
available_connections = self.service.getConnectionReferenceList()
|
available_connections = self.service.getConnectionReferenceList()
|
||||||
logger.debug(f"Available documents: {available_docs}")
|
|
||||||
logger.debug(f"Available connections: {available_connections}")
|
|
||||||
|
|
||||||
context = enhanced_context or TaskContext(
|
context = enhanced_context or TaskContext(
|
||||||
task_step=task_step,
|
task_step=task_step,
|
||||||
|
|
@ -94,8 +149,11 @@ class HandlingTasks:
|
||||||
failed_actions=[],
|
failed_actions=[],
|
||||||
successful_actions=[]
|
successful_actions=[]
|
||||||
)
|
)
|
||||||
|
# Check workflow status before calling AI service
|
||||||
|
self._checkWorkflowStopped()
|
||||||
|
|
||||||
prompt = await self.service.callAiTextAdvanced(
|
prompt = await self.service.callAiTextAdvanced(
|
||||||
await createActionDefinitionPrompt(self, context)
|
await createActionDefinitionPrompt(context, self.service)
|
||||||
)
|
)
|
||||||
# Inline parseActionResponse logic here
|
# Inline parseActionResponse logic here
|
||||||
json_start = prompt.find('{')
|
json_start = prompt.find('{')
|
||||||
|
|
@ -126,40 +184,169 @@ class HandlingTasks:
|
||||||
}) for a in actions]
|
}) for a in actions]
|
||||||
|
|
||||||
valid_actions = [ta for ta in task_actions if ta]
|
valid_actions = [ta for ta in task_actions if ta]
|
||||||
logger.info(f"Generated {len(valid_actions)} actions for task: {task_step.description}")
|
logger.info(f"Generated {len(valid_actions)} actions for task: {task_step.objective}")
|
||||||
logger.debug(f"Task actions plan: {json.dumps(action_data, indent=2)}")
|
|
||||||
|
# Log the generated actions
|
||||||
|
for i, action in enumerate(valid_actions):
|
||||||
|
logger.info(f" Action {i+1}: {action.execMethod}.{action.execAction}")
|
||||||
|
if action.expectedDocumentFormats:
|
||||||
|
logger.info(f" Expected formats: {action.expectedDocumentFormats}")
|
||||||
|
if action.execParameters.get('documentList'):
|
||||||
|
logger.info(f" Input documents: {action.execParameters['documentList']}")
|
||||||
|
|
||||||
|
# Log the complete action plan
|
||||||
|
logger.info("=== GENERATED ACTION PLAN ===")
|
||||||
|
logger.info(f"Task: {task_step.objective}")
|
||||||
|
logger.info(f"Total actions: {len(valid_actions)}")
|
||||||
|
|
||||||
|
# Log the RAW AI-generated action plan JSON for debugging
|
||||||
|
logger.info("=== RAW AI ACTION PLAN JSON ===")
|
||||||
|
logger.info(f"AI Response with parsed actions: {prompt}")
|
||||||
|
logger.info("=== END RAW AI ACTION PLAN JSON ===")
|
||||||
|
|
||||||
return valid_actions
|
return valid_actions
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error in generateTaskActions: {str(e)}")
|
logger.error(f"Error in generateTaskActions: {str(e)}")
|
||||||
return []
|
return []
|
||||||
|
|
||||||
async def executeTask(self, task_step, workflow, context) -> TaskResult:
|
async def executeTask(self, task_step, workflow, context, task_index=None, total_tasks=None) -> TaskResult:
|
||||||
"""Execute all actions for a task step, with state management and retries."""
|
"""Execute all actions for a task step, with state management and retries."""
|
||||||
logger.info(f"Executing task: {task_step.description}")
|
logger.info(f"=== STARTING TASK {task_index or '?'}: {task_step.objective} ===")
|
||||||
|
|
||||||
|
# Create database log entry for task start in format expected by frontend
|
||||||
|
if task_index is not None:
|
||||||
|
if total_tasks is not None:
|
||||||
|
self.chatInterface.createWorkflowLog({
|
||||||
|
"workflowId": workflow.id,
|
||||||
|
"message": f"Executing task {task_index}/{total_tasks}",
|
||||||
|
"type": "info"
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
self.chatInterface.createWorkflowLog({
|
||||||
|
"workflowId": workflow.id,
|
||||||
|
"message": f"Executing task {task_index}/?",
|
||||||
|
"type": "info"
|
||||||
|
})
|
||||||
|
|
||||||
|
# Create a task start message for the user
|
||||||
|
task_progress = f"{task_index}/{total_tasks}" if total_tasks is not None else str(task_index)
|
||||||
|
task_start_message = {
|
||||||
|
"workflowId": workflow.id,
|
||||||
|
"role": "assistant",
|
||||||
|
"message": f"🚀 Starting Task {task_progress}\n\nObjective: {task_step.objective}",
|
||||||
|
"status": "step",
|
||||||
|
"sequenceNr": len(workflow.messages) + 1,
|
||||||
|
"publishedAt": datetime.now(UTC).isoformat(),
|
||||||
|
"documentsLabel": f"task_{task_index}_start",
|
||||||
|
"documents": []
|
||||||
|
}
|
||||||
|
|
||||||
|
message = self.chatInterface.createWorkflowMessage(task_start_message)
|
||||||
|
if message:
|
||||||
|
workflow.messages.append(message)
|
||||||
|
logger.info(f"Task start message created for task {task_index}")
|
||||||
|
|
||||||
state = TaskExecutionState(task_step)
|
state = TaskExecutionState(task_step)
|
||||||
retry_context = context
|
retry_context = context
|
||||||
max_retries = state.max_retries
|
max_retries = state.max_retries
|
||||||
for attempt in range(max_retries):
|
for attempt in range(max_retries):
|
||||||
logger.info(f"Task execution attempt {attempt+1}/{max_retries}")
|
logger.info(f"Task execution attempt {attempt+1}/{max_retries}")
|
||||||
|
|
||||||
|
# Check workflow status before starting task execution
|
||||||
|
self._checkWorkflowStopped()
|
||||||
|
|
||||||
actions = await self.generateTaskActions(task_step, workflow, previous_results=retry_context.previous_results, enhanced_context=retry_context)
|
actions = await self.generateTaskActions(task_step, workflow, previous_results=retry_context.previous_results, enhanced_context=retry_context)
|
||||||
if not actions:
|
if not actions:
|
||||||
logger.error("No actions defined for task step, aborting task execution")
|
logger.error("No actions defined for task step, aborting task execution")
|
||||||
break
|
break
|
||||||
|
|
||||||
|
# Log total actions count for this task
|
||||||
|
total_actions = len(actions)
|
||||||
|
logger.info(f"Task {task_index or '?'} has {total_actions} actions")
|
||||||
|
|
||||||
action_results = []
|
action_results = []
|
||||||
for action in actions:
|
for action_idx, action in enumerate(actions):
|
||||||
result = await self.executeSingleAction(action, workflow)
|
# Check workflow status before each action execution
|
||||||
|
self._checkWorkflowStopped()
|
||||||
|
|
||||||
|
# Log action start in format expected by frontend
|
||||||
|
action_number = action_idx + 1
|
||||||
|
logger.info(f"Task {task_index} - Starting action {action_number}/{total_actions}")
|
||||||
|
|
||||||
|
# Create database log entry for action start
|
||||||
|
self.chatInterface.createWorkflowLog({
|
||||||
|
"workflowId": workflow.id,
|
||||||
|
"message": f"Task {task_index} - Starting action {action_number}/{total_actions}",
|
||||||
|
"type": "info"
|
||||||
|
})
|
||||||
|
|
||||||
|
# Create an action start message for the user
|
||||||
|
action_start_message = {
|
||||||
|
"workflowId": workflow.id,
|
||||||
|
"role": "assistant",
|
||||||
|
"message": f"⚡ Task {task_index} - Action {action_number}/{total_actions}\n\nMethod: {action.execMethod}.{action.execAction}",
|
||||||
|
"status": "step",
|
||||||
|
"sequenceNr": len(workflow.messages) + 1,
|
||||||
|
"publishedAt": datetime.now(UTC).isoformat(),
|
||||||
|
"documentsLabel": f"action_{action_number}_start",
|
||||||
|
"documents": []
|
||||||
|
}
|
||||||
|
|
||||||
|
message = self.chatInterface.createWorkflowMessage(action_start_message)
|
||||||
|
if message:
|
||||||
|
workflow.messages.append(message)
|
||||||
|
logger.info(f"Action start message created for action {action_number}")
|
||||||
|
|
||||||
|
# Pass action index to executeSingleAction with task context
|
||||||
|
result = await self.executeSingleAction(action, workflow, task_step, task_index, action_number, total_actions)
|
||||||
action_results.append(result)
|
action_results.append(result)
|
||||||
if result.success:
|
if result.success:
|
||||||
state.addSuccessfulAction(result)
|
state.addSuccessfulAction(result)
|
||||||
else:
|
else:
|
||||||
state.addFailedAction(result)
|
state.addFailedAction(result)
|
||||||
|
|
||||||
|
# Check workflow status before review
|
||||||
|
self._checkWorkflowStopped()
|
||||||
|
|
||||||
review_result = await self.reviewTaskCompletion(task_step, actions, action_results, workflow)
|
review_result = await self.reviewTaskCompletion(task_step, actions, action_results, workflow)
|
||||||
success = review_result.status == 'success'
|
success = review_result.status == 'success'
|
||||||
feedback = review_result.reason
|
feedback = review_result.reason
|
||||||
error = None if success else review_result.reason
|
error = None if success else review_result.reason
|
||||||
if success:
|
if success:
|
||||||
logger.info(f"Task step '{task_step.description}' completed successfully")
|
logger.info(f"=== TASK {task_index or '?'} COMPLETED SUCCESSFULLY: {task_step.objective} ===")
|
||||||
|
|
||||||
|
# Create database log entry for task completion
|
||||||
|
if total_tasks is not None:
|
||||||
|
self.chatInterface.createWorkflowLog({
|
||||||
|
"workflowId": workflow.id,
|
||||||
|
"message": f"🎯 Task {task_index}/{total_tasks} completed",
|
||||||
|
"type": "success"
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
self.chatInterface.createWorkflowLog({
|
||||||
|
"workflowId": workflow.id,
|
||||||
|
"message": f"🎯 Task {task_index}/? completed",
|
||||||
|
"type": "success"
|
||||||
|
})
|
||||||
|
|
||||||
|
# Create a task completion message for the user
|
||||||
|
task_progress = f"{task_index}/{total_tasks}" if total_tasks is not None else str(task_index)
|
||||||
|
task_completion_message = {
|
||||||
|
"workflowId": workflow.id,
|
||||||
|
"role": "assistant",
|
||||||
|
"message": f"🎯 Task {task_progress} Completed Successfully!\n\nObjective: {task_step.objective}\n\nFeedback: {feedback or 'Task completed successfully'}",
|
||||||
|
"status": "step",
|
||||||
|
"sequenceNr": len(workflow.messages) + 1,
|
||||||
|
"publishedAt": datetime.now(UTC).isoformat(),
|
||||||
|
"documentsLabel": f"task_{task_index}_completion",
|
||||||
|
"documents": []
|
||||||
|
}
|
||||||
|
|
||||||
|
message = self.chatInterface.createWorkflowMessage(task_completion_message)
|
||||||
|
if message:
|
||||||
|
workflow.messages.append(message)
|
||||||
|
logger.info(f"Task completion message created for task {task_index}")
|
||||||
|
|
||||||
return TaskResult(
|
return TaskResult(
|
||||||
taskId=task_step.id,
|
taskId=task_step.id,
|
||||||
status=TaskStatus.COMPLETED,
|
status=TaskStatus.COMPLETED,
|
||||||
|
|
@ -168,7 +355,7 @@ class HandlingTasks:
|
||||||
error=None
|
error=None
|
||||||
)
|
)
|
||||||
elif review_result.status == 'retry' and state.canRetry():
|
elif review_result.status == 'retry' and state.canRetry():
|
||||||
logger.warning(f"Task step '{task_step.description}' requires retry: {review_result.improvements}")
|
logger.warning(f"Task step '{task_step.objective}' requires retry: {review_result.improvements}")
|
||||||
state.incrementRetryCount()
|
state.incrementRetryCount()
|
||||||
retry_context.retry_count = state.retry_count
|
retry_context.retry_count = state.retry_count
|
||||||
retry_context.improvements = review_result.improvements
|
retry_context.improvements = review_result.improvements
|
||||||
|
|
@ -180,7 +367,7 @@ class HandlingTasks:
|
||||||
retry_context.successful_actions = state.successful_actions
|
retry_context.successful_actions = state.successful_actions
|
||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
logger.error(f"Task step '{task_step.description}' failed after {attempt+1} attempts")
|
logger.error(f"=== TASK {task_index or '?'} FAILED: {task_step.objective} after {attempt+1} attempts ===")
|
||||||
return TaskResult(
|
return TaskResult(
|
||||||
taskId=task_step.id,
|
taskId=task_step.id,
|
||||||
status=TaskStatus.FAILED,
|
status=TaskStatus.FAILED,
|
||||||
|
|
@ -188,7 +375,7 @@ class HandlingTasks:
|
||||||
feedback=feedback,
|
feedback=feedback,
|
||||||
error=error
|
error=error
|
||||||
)
|
)
|
||||||
logger.error(f"Task step '{task_step.description}' failed after all retries")
|
logger.error(f"=== TASK {task_index or '?'} FAILED AFTER ALL RETRIES: {task_step.objective} ===")
|
||||||
return TaskResult(
|
return TaskResult(
|
||||||
taskId=task_step.id,
|
taskId=task_step.id,
|
||||||
status=TaskStatus.FAILED,
|
status=TaskStatus.FAILED,
|
||||||
|
|
@ -199,6 +386,9 @@ class HandlingTasks:
|
||||||
|
|
||||||
async def reviewTaskCompletion(self, task_step, task_actions, action_results, workflow):
|
async def reviewTaskCompletion(self, task_step, task_actions, action_results, workflow):
|
||||||
try:
|
try:
|
||||||
|
# Check workflow status before reviewing task completion
|
||||||
|
self._checkWorkflowStopped()
|
||||||
|
|
||||||
review_context = ReviewContext(
|
review_context = ReviewContext(
|
||||||
task_step=task_step,
|
task_step=task_step,
|
||||||
action_results=action_results,
|
action_results=action_results,
|
||||||
|
|
@ -210,8 +400,11 @@ class HandlingTasks:
|
||||||
'errors': [result.error for result in action_results if not result.success]
|
'errors': [result.error for result in action_results if not result.success]
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
# Check workflow status before calling AI service
|
||||||
|
self._checkWorkflowStopped()
|
||||||
|
|
||||||
# Use promptFactory for review prompt
|
# Use promptFactory for review prompt
|
||||||
prompt = await createResultReviewPrompt(self, review_context)
|
prompt = await createResultReviewPrompt(review_context)
|
||||||
response = await self.service.callAiTextAdvanced(prompt)
|
response = await self.service.callAiTextAdvanced(prompt)
|
||||||
# Inline parseReviewResponse logic here
|
# Inline parseReviewResponse logic here
|
||||||
json_start = response.find('{')
|
json_start = response.find('{')
|
||||||
|
|
@ -239,10 +432,6 @@ class HandlingTasks:
|
||||||
improvements = []
|
improvements = []
|
||||||
|
|
||||||
# Ensure all list fields are properly typed
|
# Ensure all list fields are properly typed
|
||||||
missing_outputs = review.get('missing_outputs', [])
|
|
||||||
if not isinstance(missing_outputs, list):
|
|
||||||
missing_outputs = []
|
|
||||||
|
|
||||||
met_criteria = review.get('met_criteria', [])
|
met_criteria = review.get('met_criteria', [])
|
||||||
if not isinstance(met_criteria, list):
|
if not isinstance(met_criteria, list):
|
||||||
met_criteria = []
|
met_criteria = []
|
||||||
|
|
@ -256,14 +445,14 @@ class HandlingTasks:
|
||||||
reason=review.get('reason', 'No reason provided'),
|
reason=review.get('reason', 'No reason provided'),
|
||||||
improvements=improvements,
|
improvements=improvements,
|
||||||
quality_score=review.get('quality_score', 5),
|
quality_score=review.get('quality_score', 5),
|
||||||
missing_outputs=missing_outputs,
|
missing_outputs=[],
|
||||||
met_criteria=met_criteria,
|
met_criteria=met_criteria,
|
||||||
unmet_criteria=unmet_criteria,
|
unmet_criteria=unmet_criteria,
|
||||||
confidence=review.get('confidence', 0.5)
|
confidence=review.get('confidence', 0.5)
|
||||||
)
|
)
|
||||||
|
|
||||||
# Enhanced validation logging
|
# Enhanced validation logging
|
||||||
logger.info(f"VALIDATION RESULT - Task: '{task_step.description}' - Status: {review_result.status.upper()}, Quality: {review_result.quality_score}/10")
|
logger.info(f"VALIDATION RESULT - Task: '{task_step.objective}' - Status: {review_result.status.upper()}, Quality: {review_result.quality_score}/10")
|
||||||
if review_result.status == 'success':
|
if review_result.status == 'success':
|
||||||
logger.info(f"VALIDATION SUCCESS - Task completed successfully")
|
logger.info(f"VALIDATION SUCCESS - Task completed successfully")
|
||||||
if review_result.met_criteria:
|
if review_result.met_criteria:
|
||||||
|
|
@ -274,8 +463,6 @@ class HandlingTasks:
|
||||||
logger.warning(f"Unmet criteria: {', '.join(review_result.unmet_criteria)}")
|
logger.warning(f"Unmet criteria: {', '.join(review_result.unmet_criteria)}")
|
||||||
else:
|
else:
|
||||||
logger.error(f"VALIDATION FAILED - Task failed: {review_result.reason}")
|
logger.error(f"VALIDATION FAILED - Task failed: {review_result.reason}")
|
||||||
if review_result.missing_outputs:
|
|
||||||
logger.error(f"Missing outputs: {', '.join(review_result.missing_outputs)}")
|
|
||||||
|
|
||||||
return review_result
|
return review_result
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
@ -288,26 +475,22 @@ class HandlingTasks:
|
||||||
|
|
||||||
async def prepareTaskHandover(self, task_step, task_actions, review_result, workflow):
|
async def prepareTaskHandover(self, task_step, task_actions, review_result, workflow):
|
||||||
try:
|
try:
|
||||||
|
# Check workflow status before preparing task handover
|
||||||
|
self._checkWorkflowStopped()
|
||||||
|
|
||||||
# Log handover status summary
|
# Log handover status summary
|
||||||
if hasattr(review_result, 'status'):
|
if hasattr(review_result, 'status'):
|
||||||
status = review_result.status
|
status = review_result.status
|
||||||
if hasattr(review_result, 'missing_outputs'):
|
|
||||||
missing = review_result.missing_outputs
|
|
||||||
else:
|
|
||||||
missing = []
|
|
||||||
if hasattr(review_result, 'met_criteria'):
|
if hasattr(review_result, 'met_criteria'):
|
||||||
met = review_result.met_criteria
|
met = review_result.met_criteria
|
||||||
else:
|
else:
|
||||||
met = []
|
met = []
|
||||||
|
|
||||||
logger.debug(f"Task handover status: {status}")
|
|
||||||
logger.debug(f"Promised documents: {task_step.expected_outputs}")
|
|
||||||
logger.debug(f"Delivered documents: {met}")
|
|
||||||
logger.debug(f"Missing documents: {missing}")
|
|
||||||
|
|
||||||
handover_data = {
|
handover_data = {
|
||||||
'task_id': task_step.id,
|
'task_id': task_step.id,
|
||||||
'task_description': task_step.description,
|
'task_description': task_step.objective,
|
||||||
'actions': [action.to_dict() for action in task_actions],
|
'actions': [action.to_dict() for action in task_actions],
|
||||||
'review_result': review_result.to_dict() if hasattr(review_result, 'to_dict') else review_result,
|
'review_result': review_result.to_dict() if hasattr(review_result, 'to_dict') else review_result,
|
||||||
'workflow_id': workflow.id,
|
'workflow_id': workflow.id,
|
||||||
|
|
@ -321,20 +504,35 @@ class HandlingTasks:
|
||||||
|
|
||||||
# --- Helper action handling methods ---
|
# --- Helper action handling methods ---
|
||||||
|
|
||||||
async def executeSingleAction(self, action, workflow):
|
async def executeSingleAction(self, action, workflow, task_step, task_index=None, action_index=None, total_actions=None):
|
||||||
"""Execute a single action and return ActionResult with enhanced document processing"""
|
"""Execute a single action and return ActionResult with enhanced document processing"""
|
||||||
try:
|
try:
|
||||||
logger.info(f"Executing action: {action.execMethod}.{action.execAction}")
|
# Check workflow status before executing action
|
||||||
|
self._checkWorkflowStopped()
|
||||||
|
|
||||||
# Log input documents and connections
|
# Use passed indices or fallback to '?'
|
||||||
|
task_num = task_index if task_index is not None else '?'
|
||||||
|
action_num = action_index if action_index is not None else '?'
|
||||||
|
|
||||||
|
logger.info(f"=== TASK {task_num} ACTION {action_num}: {action.execMethod}.{action.execAction} ===")
|
||||||
|
|
||||||
|
# Log input parameters
|
||||||
input_docs = action.execParameters.get('documentList', [])
|
input_docs = action.execParameters.get('documentList', [])
|
||||||
logger.debug(f"Input documents: {input_docs}")
|
input_connections = action.execParameters.get('connections', [])
|
||||||
logger.debug(f"Input connections: {action.execParameters.get('connections', [])}")
|
logger.info(f"Input documents: {input_docs} (type: {type(input_docs)})")
|
||||||
|
if input_connections:
|
||||||
|
logger.info(f"Input connections: {input_connections}")
|
||||||
|
|
||||||
|
# Log all action parameters for debugging
|
||||||
|
logger.info(f"All action parameters: {action.execParameters}")
|
||||||
|
|
||||||
enhanced_parameters = action.execParameters.copy()
|
enhanced_parameters = action.execParameters.copy()
|
||||||
if action.expectedDocumentFormats:
|
if action.expectedDocumentFormats:
|
||||||
enhanced_parameters['expectedDocumentFormats'] = action.expectedDocumentFormats
|
enhanced_parameters['expectedDocumentFormats'] = action.expectedDocumentFormats
|
||||||
logger.debug(f"Expected document formats: {action.expectedDocumentFormats}")
|
logger.info(f"Expected formats: {action.expectedDocumentFormats}")
|
||||||
|
|
||||||
|
# Check workflow status before executing the action
|
||||||
|
self._checkWorkflowStopped()
|
||||||
|
|
||||||
result = await self.service.executeAction(
|
result = await self.service.executeAction(
|
||||||
methodName=action.execMethod,
|
methodName=action.execMethod,
|
||||||
|
|
@ -350,11 +548,51 @@ class HandlingTasks:
|
||||||
action.setSuccess()
|
action.setSuccess()
|
||||||
action.result = result.data.get("result", "")
|
action.result = result.data.get("result", "")
|
||||||
action.execResultLabel = result_label
|
action.execResultLabel = result_label
|
||||||
await self.createActionMessage(action, result, workflow, result_label, created_documents)
|
await self.createActionMessage(action, result, workflow, result_label, created_documents, task_step, task_index)
|
||||||
logger.info(f"Action {action.execMethod}.{action.execAction} executed successfully")
|
|
||||||
|
# Log action results
|
||||||
|
logger.info(f"✓ Action completed successfully")
|
||||||
|
|
||||||
|
# Create database log entry for action completion
|
||||||
|
if total_actions is not None:
|
||||||
|
self.chatInterface.createWorkflowLog({
|
||||||
|
"workflowId": workflow.id,
|
||||||
|
"message": f"✅ Task {task_num} - Action {action_num}/{total_actions} completed",
|
||||||
|
"type": "success"
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
self.chatInterface.createWorkflowLog({
|
||||||
|
"workflowId": workflow.id,
|
||||||
|
"message": f"✅ Task {task_num} - Action {action_num}/? completed",
|
||||||
|
"type": "success"
|
||||||
|
})
|
||||||
|
|
||||||
|
if created_documents:
|
||||||
|
logger.info(f"Output documents ({len(created_documents)}):")
|
||||||
|
for i, doc in enumerate(created_documents):
|
||||||
|
if hasattr(doc, 'filename'):
|
||||||
|
logger.info(f" {i+1}. {doc.filename}")
|
||||||
|
elif isinstance(doc, dict) and 'filename' in doc:
|
||||||
|
logger.info(f" {i+1}. {doc['filename']}")
|
||||||
|
else:
|
||||||
|
logger.info(f" {i+1}. {type(doc).__name__}")
|
||||||
|
|
||||||
|
# Log document details for debugging
|
||||||
|
logger.info("Document details:")
|
||||||
|
for i, doc in enumerate(created_documents):
|
||||||
|
if hasattr(doc, 'filename'):
|
||||||
|
logger.info(f" Doc {i+1}: filename={doc.filename}, type={type(doc)}")
|
||||||
|
if hasattr(doc, 'id'):
|
||||||
|
logger.info(f" ID: {doc.id}")
|
||||||
|
if hasattr(doc, 'fileId'):
|
||||||
|
logger.info(f" File ID: {doc.fileId}")
|
||||||
|
elif isinstance(doc, dict):
|
||||||
|
logger.info(f" Doc {i+1}: dict with keys: {list(doc.keys())}")
|
||||||
|
else:
|
||||||
|
logger.info("Output: No documents created")
|
||||||
else:
|
else:
|
||||||
action.setError(result.error or "Action execution failed")
|
action.setError(result.error or "Action execution failed")
|
||||||
logger.error(f"Action {action.execMethod}.{action.execAction} failed: {result.error}")
|
logger.error(f"✗ Action failed: {result.error}")
|
||||||
|
|
||||||
# Extract document filenames for the ActionResult
|
# Extract document filenames for the ActionResult
|
||||||
document_filenames = []
|
document_filenames = []
|
||||||
|
|
@ -367,6 +605,9 @@ class HandlingTasks:
|
||||||
# Also include the original documents from the service result for validation
|
# Also include the original documents from the service result for validation
|
||||||
original_documents = result.data.get("documents", [])
|
original_documents = result.data.get("documents", [])
|
||||||
|
|
||||||
|
# Log action summary
|
||||||
|
logger.info(f"=== TASK {task_num} ACTION {action_num} COMPLETED ===")
|
||||||
|
|
||||||
return ActionResult(
|
return ActionResult(
|
||||||
success=result.success,
|
success=result.success,
|
||||||
data={
|
data={
|
||||||
|
|
@ -407,9 +648,12 @@ class HandlingTasks:
|
||||||
error=str(e)
|
error=str(e)
|
||||||
)
|
)
|
||||||
|
|
||||||
async def createActionMessage(self, action, result, workflow, result_label=None, created_documents=None):
|
async def createActionMessage(self, action, result, workflow, result_label=None, created_documents=None, task_step=None, task_index=None):
|
||||||
"""Create and store a message for the action result in the workflow with enhanced document processing"""
|
"""Create and store a message for the action result in the workflow with enhanced document processing"""
|
||||||
try:
|
try:
|
||||||
|
# Check workflow status before creating action message
|
||||||
|
self._checkWorkflowStopped()
|
||||||
|
|
||||||
if result_label is None:
|
if result_label is None:
|
||||||
result_label = action.execResultLabel
|
result_label = action.execResultLabel
|
||||||
|
|
||||||
|
|
@ -417,25 +661,29 @@ class HandlingTasks:
|
||||||
if created_documents is None:
|
if created_documents is None:
|
||||||
created_documents = self.documentGenerator.createDocumentsFromActionResult(result, action, workflow)
|
created_documents = self.documentGenerator.createDocumentsFromActionResult(result, action, workflow)
|
||||||
|
|
||||||
# Log delivered documents with sizes
|
# Log delivered documents
|
||||||
if created_documents:
|
if created_documents:
|
||||||
doc_info = []
|
logger.info(f"Result label: {result_label} - {len(created_documents)} documents")
|
||||||
for doc in created_documents:
|
|
||||||
if hasattr(doc, 'filename') and hasattr(doc, 'fileSize'):
|
|
||||||
doc_info.append(f"{doc.filename} ({doc.fileSize} bytes)")
|
|
||||||
elif hasattr(doc, 'filename'):
|
|
||||||
doc_info.append(f"{doc.filename}")
|
|
||||||
else:
|
|
||||||
doc_info.append("unknown document")
|
|
||||||
logger.debug(f"Produced result label: {result_label}")
|
|
||||||
logger.debug(f"Delivered documents: {doc_info}")
|
|
||||||
else:
|
else:
|
||||||
logger.debug(f"Produced result label: {result_label} (no documents)")
|
logger.info(f"Result label: {result_label} - No documents")
|
||||||
|
|
||||||
|
# Create a more meaningful message that includes task context
|
||||||
|
task_objective = task_step.objective if task_step else 'Unknown task'
|
||||||
|
|
||||||
|
# Build a user-friendly message
|
||||||
|
if created_documents and len(created_documents) > 0:
|
||||||
|
doc_names = [doc.filename if hasattr(doc, 'filename') else str(doc) for doc in created_documents[:3]]
|
||||||
|
if len(created_documents) > 3:
|
||||||
|
doc_names.append(f"... and {len(created_documents) - 3} more")
|
||||||
|
|
||||||
|
message_text = f"✅ Task {task_index or '?'} - Action {action.execMethod}.{action.execAction} completed\n\nObjective: {task_objective}\n\nGenerated {len(created_documents)} document(s): {', '.join(doc_names)}"
|
||||||
|
else:
|
||||||
|
message_text = f"✅ Task {task_index or '?'} - Action {action.execMethod}.{action.execAction} completed\n\nObjective: {task_objective}\n\nAction executed successfully"
|
||||||
|
|
||||||
message_data = {
|
message_data = {
|
||||||
"workflowId": workflow.id,
|
"workflowId": workflow.id,
|
||||||
"role": "assistant",
|
"role": "assistant",
|
||||||
"message": f"Executed action {action.execMethod}.{action.execAction}",
|
"message": message_text,
|
||||||
"status": "step",
|
"status": "step",
|
||||||
"sequenceNr": len(workflow.messages) + 1,
|
"sequenceNr": len(workflow.messages) + 1,
|
||||||
"publishedAt": datetime.now(UTC).isoformat(),
|
"publishedAt": datetime.now(UTC).isoformat(),
|
||||||
|
|
@ -449,7 +697,7 @@ class HandlingTasks:
|
||||||
message = self.chatInterface.createWorkflowMessage(message_data)
|
message = self.chatInterface.createWorkflowMessage(message_data)
|
||||||
if message:
|
if message:
|
||||||
workflow.messages.append(message)
|
workflow.messages.append(message)
|
||||||
logger.info(f"Created action message for {action.execMethod}.{action.execAction} with {len(created_documents)} documents")
|
logger.info(f"Message created: {action.execMethod}.{action.execAction}")
|
||||||
else:
|
else:
|
||||||
logger.error(f"Failed to create workflow message for action {action.execMethod}.{action.execAction}")
|
logger.error(f"Failed to create workflow message for action {action.execMethod}.{action.execAction}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
@ -459,29 +707,59 @@ class HandlingTasks:
|
||||||
|
|
||||||
def _validateTaskPlan(self, task_plan: Dict[str, Any]) -> bool:
|
def _validateTaskPlan(self, task_plan: Dict[str, Any]) -> bool:
|
||||||
try:
|
try:
|
||||||
|
|
||||||
|
|
||||||
if not isinstance(task_plan, dict):
|
if not isinstance(task_plan, dict):
|
||||||
|
logger.error("Task plan is not a dictionary")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
if 'tasks' not in task_plan or not isinstance(task_plan['tasks'], list):
|
if 'tasks' not in task_plan or not isinstance(task_plan['tasks'], list):
|
||||||
|
logger.error(f"Task plan missing 'tasks' field or not a list. Found: {type(task_plan.get('tasks', 'MISSING'))}")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
# First pass: collect all task IDs to validate dependencies
|
||||||
task_ids = set()
|
task_ids = set()
|
||||||
for task in task_plan['tasks']:
|
for task in task_plan['tasks']:
|
||||||
if not isinstance(task, dict):
|
if not isinstance(task, dict):
|
||||||
|
logger.error(f"Task is not a dictionary: {type(task)}")
|
||||||
return False
|
return False
|
||||||
required_fields = ['id', 'description', 'expected_outputs', 'success_criteria']
|
if 'id' not in task:
|
||||||
if not all(field in task for field in required_fields):
|
logger.error(f"Task missing 'id' field: {task}")
|
||||||
return False
|
|
||||||
if task['id'] in task_ids:
|
|
||||||
return False
|
return False
|
||||||
task_ids.add(task['id'])
|
task_ids.add(task['id'])
|
||||||
|
|
||||||
|
# Second pass: validate each task
|
||||||
|
for i, task in enumerate(task_plan['tasks']):
|
||||||
|
|
||||||
|
|
||||||
|
if not isinstance(task, dict):
|
||||||
|
logger.error(f"Task {i} is not a dictionary: {type(task)}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
required_fields = ['id', 'objective', 'success_criteria']
|
||||||
|
missing_fields = [field for field in required_fields if field not in task]
|
||||||
|
if missing_fields:
|
||||||
|
logger.error(f"Task {i} missing required fields: {missing_fields}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Check for duplicate IDs (shouldn't happen after first pass, but safety check)
|
||||||
|
if task['id'] in task_ids and list(task_plan['tasks']).count(task['id']) > 1:
|
||||||
|
logger.error(f"Task {i} has duplicate ID: {task['id']}")
|
||||||
|
return False
|
||||||
|
|
||||||
dependencies = task.get('dependencies', [])
|
dependencies = task.get('dependencies', [])
|
||||||
if not isinstance(dependencies, list):
|
if not isinstance(dependencies, list):
|
||||||
|
logger.error(f"Task {i} dependencies is not a list: {type(dependencies)}")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
for dep in dependencies:
|
for dep in dependencies:
|
||||||
if dep not in task_ids and dep != 'task_0':
|
if dep not in task_ids and dep != 'task_0':
|
||||||
|
logger.error(f"Task {i} has invalid dependency: {dep} (available: {list(task_ids) + ['task_0']})")
|
||||||
return False
|
return False
|
||||||
if 'ai_prompt' in task and not isinstance(task['ai_prompt'], str):
|
|
||||||
return False
|
logger.info(f"Task plan validation successful with {len(task_ids)} tasks")
|
||||||
return True
|
return True
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error validating task plan: {str(e)}")
|
logger.error(f"Error validating task plan: {str(e)}")
|
||||||
return False
|
return False
|
||||||
|
|
|
||||||
|
|
@ -7,7 +7,7 @@ from typing import Any, Dict
|
||||||
|
|
||||||
# Prompt creation helpers extracted from managerChat.py
|
# Prompt creation helpers extracted from managerChat.py
|
||||||
|
|
||||||
def createTaskPlanningPrompt(self, context: Dict[str, Any]) -> str:
|
def createTaskPlanningPrompt(context: Dict[str, Any]) -> str:
|
||||||
"""Create prompt for task planning"""
|
"""Create prompt for task planning"""
|
||||||
return f"""You are a task planning AI that analyzes user requests and creates structured task plans.
|
return f"""You are a task planning AI that analyzes user requests and creates structured task plans.
|
||||||
|
|
||||||
|
|
@ -19,17 +19,16 @@ INSTRUCTIONS:
|
||||||
1. Analyze the user request and available documents
|
1. Analyze the user request and available documents
|
||||||
2. Break down the request into 2-4 meaningful high-level task steps
|
2. Break down the request into 2-4 meaningful high-level task steps
|
||||||
3. Focus on business outcomes, not technical operations
|
3. Focus on business outcomes, not technical operations
|
||||||
4. For document processing, create ONE task with a comprehensive AI prompt rather than multiple granular tasks
|
4. Each task should produce meaningful, usable outputs
|
||||||
5. Each task should produce meaningful, usable outputs
|
5. Ensure proper handover between tasks using result labels
|
||||||
6. Ensure proper handover between tasks using result labels
|
6. Return a JSON object with the exact structure shown below
|
||||||
7. Return a JSON object with the exact structure shown below
|
|
||||||
|
|
||||||
TASK PLANNING PRINCIPLES:
|
TASK PLANNING PRINCIPLES:
|
||||||
- Combine related operations into single tasks (e.g., \"Extract and analyze all candidate profiles\" instead of separate \"read file\" and \"analyze content\" tasks)
|
- Break down complex requests into logical, sequential steps
|
||||||
- Use comprehensive AI prompts for document processing rather than multiple small tasks
|
|
||||||
- Focus on business value and outcomes
|
- Focus on business value and outcomes
|
||||||
- Keep tasks at a meaningful level of abstraction
|
- Keep tasks at a meaningful level of abstraction
|
||||||
- Each task should produce results that can be used by subsequent tasks
|
- Each task should produce results that can be used by subsequent tasks
|
||||||
|
- Ensure clear dependencies and handovers between tasks
|
||||||
|
|
||||||
REQUIRED JSON STRUCTURE:
|
REQUIRED JSON STRUCTURE:
|
||||||
{{
|
{{
|
||||||
|
|
@ -37,31 +36,34 @@ REQUIRED JSON STRUCTURE:
|
||||||
\"tasks\": [
|
\"tasks\": [
|
||||||
{{
|
{{
|
||||||
\"id\": \"task_1\",
|
\"id\": \"task_1\",
|
||||||
\"description\": \"Clear description of what this task accomplishes (business outcome)\",
|
\"objective\": \"Clear business objective this task accomplishes\",
|
||||||
\"dependencies\": [\"task_0\"], // IDs of tasks that must complete first
|
\"dependencies\": [\"task_0\"], // IDs of tasks that must complete first
|
||||||
\"expected_outputs\": [\"output1\", \"output2\"],
|
|
||||||
\"success_criteria\": [\"criteria1\", \"criteria2\"],
|
\"success_criteria\": [\"criteria1\", \"criteria2\"],
|
||||||
\"required_documents\": [\"doc1\", \"doc2\"],
|
\"estimated_complexity\": \"low|medium|high\"
|
||||||
\"estimated_complexity\": \"low|medium|high\",
|
|
||||||
\"ai_prompt\": \"Comprehensive AI prompt for document processing tasks (if applicable)\"
|
|
||||||
}}
|
}}
|
||||||
]
|
]
|
||||||
}}
|
}}
|
||||||
|
|
||||||
EXAMPLES OF GOOD TASK DESCRIPTIONS:
|
EXAMPLES OF GOOD TASK OBJECTIVES:
|
||||||
- \"Extract and analyze all candidate profiles to identify key qualifications and experience\"
|
- \"Extract key information from documents for email preparation\"
|
||||||
- \"Create evaluation matrix and rate candidates against product designer criteria\"
|
- \"Draft professional email incorporating analyzed information\"
|
||||||
- \"Generate comprehensive PowerPoint presentation for management decision\"
|
- \"Send email using specified email account\"
|
||||||
- \"Store final presentation in SharePoint for specified account\"
|
- \"Store email draft and confirmation in system\"
|
||||||
|
|
||||||
EXAMPLES OF BAD TASK DESCRIPTIONS:
|
EXAMPLES OF GOOD SUCCESS CRITERIA:
|
||||||
|
- \"Document analysis completed with key points identified\"
|
||||||
|
- \"Email draft created with professional tone and clear structure\"
|
||||||
|
- \"Email successfully sent with delivery confirmation\"
|
||||||
|
- \"All outputs properly stored and accessible for future use\"
|
||||||
|
|
||||||
|
EXAMPLES OF BAD TASK OBJECTIVES:
|
||||||
- \"Open and read the PDF file\" (too granular)
|
- \"Open and read the PDF file\" (too granular)
|
||||||
- \"Identify table structure\" (technical detail)
|
- \"Identify table structure\" (technical detail)
|
||||||
- \"Convert data to CSV format\" (implementation detail)
|
- \"Convert data to CSV format\" (implementation detail)
|
||||||
|
|
||||||
NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
|
NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
|
||||||
|
|
||||||
async def createActionDefinitionPrompt(self, context) -> str:
|
async def createActionDefinitionPrompt(context, service) -> str:
|
||||||
"""Create prompt for action generation with enhanced document extraction guidance and retry context"""
|
"""Create prompt for action generation with enhanced document extraction guidance and retry context"""
|
||||||
task_step = context.task_step
|
task_step = context.task_step
|
||||||
workflow = context.workflow
|
workflow = context.workflow
|
||||||
|
|
@ -71,23 +73,32 @@ async def createActionDefinitionPrompt(self, context) -> str:
|
||||||
retry_count = context.retry_count or 0
|
retry_count = context.retry_count or 0
|
||||||
previous_action_results = context.previous_action_results or []
|
previous_action_results = context.previous_action_results or []
|
||||||
previous_review_result = context.previous_review_result
|
previous_review_result = context.previous_review_result
|
||||||
methodList = self.service.getMethodsList()
|
methodList = service.getMethodsList()
|
||||||
method_actions = {}
|
method_actions = {}
|
||||||
for sig in methodList:
|
for sig in methodList:
|
||||||
if '.' in sig:
|
if '.' in sig:
|
||||||
method, rest = sig.split('.', 1)
|
method, rest = sig.split('.', 1)
|
||||||
action = rest.split('(')[0]
|
action = rest.split('(')[0]
|
||||||
method_actions.setdefault(method, []).append((action, sig))
|
method_actions.setdefault(method, []).append((action, sig))
|
||||||
messageSummary = await self.service.summarizeChat(workflow.messages)
|
messageSummary = await service.summarizeChat(workflow.messages)
|
||||||
docRefs = self.service.getDocumentReferenceList()
|
# Get ALL documents from the entire workflow, not just current round
|
||||||
connRefs = self.service.getConnectionReferenceList()
|
docRefs = service.getDocumentReferenceList()
|
||||||
all_doc_refs = docRefs.get('chat', []) + docRefs.get('history', [])
|
connRefs = service.getConnectionReferenceList()
|
||||||
|
|
||||||
|
# Get documents from current round (chat) and entire workflow history
|
||||||
|
current_round_docs = docRefs.get('chat', [])
|
||||||
|
workflow_history_docs = docRefs.get('history', [])
|
||||||
|
|
||||||
|
# Combine all documents, prioritizing current round first, then workflow history
|
||||||
|
all_doc_refs = current_round_docs + workflow_history_docs
|
||||||
|
|
||||||
|
# Log document availability for debugging
|
||||||
|
logging.debug(f"Document references - Current round: {len(current_round_docs)}, Workflow history: {len(workflow_history_docs)}, Total: {len(all_doc_refs)}")
|
||||||
available_methods_str = ''
|
available_methods_str = ''
|
||||||
for method, actions in method_actions.items():
|
for method, actions in method_actions.items():
|
||||||
available_methods_str += f"- {method}:\n"
|
available_methods_str += f"- {method}:\n"
|
||||||
for action, sig in actions:
|
for action, sig in actions:
|
||||||
available_methods_str += f" - {action}: {sig}\n"
|
available_methods_str += f" - {action}: {sig}\n"
|
||||||
task_ai_prompt = task_step.ai_prompt or ''
|
|
||||||
retry_context = ""
|
retry_context = ""
|
||||||
if retry_count > 0:
|
if retry_count > 0:
|
||||||
retry_context = f"""
|
retry_context = f"""
|
||||||
|
|
@ -105,17 +116,36 @@ Previous review feedback:
|
||||||
- Status: {previous_review_result.status or 'unknown'}
|
- Status: {previous_review_result.status or 'unknown'}
|
||||||
- Reason: {previous_review_result.reason or 'No reason provided'}
|
- Reason: {previous_review_result.reason or 'No reason provided'}
|
||||||
- Quality Score: {previous_review_result.quality_score or 0}/10
|
- Quality Score: {previous_review_result.quality_score or 0}/10
|
||||||
- Missing Outputs: {', '.join(previous_review_result.missing_outputs or [])}
|
|
||||||
- Unmet Criteria: {', '.join(previous_review_result.unmet_criteria or [])}
|
- Unmet Criteria: {', '.join(previous_review_result.unmet_criteria or [])}
|
||||||
"""
|
"""
|
||||||
expected_outputs_str = ', '.join(task_step.expected_outputs or [])
|
|
||||||
success_criteria_str = ', '.join(task_step.success_criteria or [])
|
success_criteria_str = ', '.join(task_step.success_criteria or [])
|
||||||
previous_results_str = ', '.join(previous_results) if previous_results else 'None'
|
previous_results_str = ', '.join(previous_results) if previous_results else 'None'
|
||||||
improvements_str = str(improvements) if improvements else 'None'
|
improvements_str = str(improvements) if improvements else 'None'
|
||||||
available_connections_str = '\n'.join(f"- {conn}" for conn in connRefs)
|
available_connections_str = '\n'.join(f"- {conn}" for conn in connRefs)
|
||||||
available_documents_str = '\n'.join(
|
# Build comprehensive document list showing both current round and workflow history
|
||||||
f"- {doc.documentsLabel} contains {', '.join(doc.documents)}" for doc in all_doc_refs
|
if all_doc_refs:
|
||||||
)
|
available_documents_str = "CURRENT ROUND DOCUMENTS:\n"
|
||||||
|
if current_round_docs:
|
||||||
|
for doc in current_round_docs:
|
||||||
|
available_documents_str += f"- {doc.documentsLabel} contains {', '.join(doc.documents)}\n"
|
||||||
|
else:
|
||||||
|
available_documents_str += "- No documents in current round\n"
|
||||||
|
|
||||||
|
available_documents_str += "\nWORKFLOW HISTORY DOCUMENTS:\n"
|
||||||
|
if workflow_history_docs:
|
||||||
|
for doc in workflow_history_docs:
|
||||||
|
available_documents_str += f"- {doc.documentsLabel} contains {', '.join(doc.documents)}\n"
|
||||||
|
else:
|
||||||
|
available_documents_str += "- No documents in workflow history\n"
|
||||||
|
else:
|
||||||
|
available_documents_str = "NO DOCUMENTS AVAILABLE - This workflow has no documents to process."
|
||||||
|
|
||||||
|
# Debug logging for document availability
|
||||||
|
logging.debug(f"Available documents string length: {len(available_documents_str)}")
|
||||||
|
logging.debug(f"Current round docs count: {len(current_round_docs)}")
|
||||||
|
logging.debug(f"Workflow history docs count: {len(workflow_history_docs)}")
|
||||||
|
logging.debug(f"Total doc refs: {len(all_doc_refs)}")
|
||||||
|
|
||||||
prompt = f"""
|
prompt = f"""
|
||||||
You are an action generation AI that creates specific actions to accomplish a task step.
|
You are an action generation AI that creates specific actions to accomplish a task step.
|
||||||
|
|
||||||
|
|
@ -130,12 +160,11 @@ CRITICAL DOCUMENT REFERENCE RULES:
|
||||||
- NEVER invent new labels or use message IDs
|
- NEVER invent new labels or use message IDs
|
||||||
- NEVER use formats like "msg_xxx:documents" or "task_X_results" (these will fail)
|
- NEVER use formats like "msg_xxx:documents" or "task_X_results" (these will fail)
|
||||||
- ONLY use the exact labels shown in AVAILABLE DOCUMENTS
|
- ONLY use the exact labels shown in AVAILABLE DOCUMENTS
|
||||||
- **When generating multiple actions, you may only use as input documents those that are already present in AVAILABLE DOCUMENTS or produced by actions that come earlier in the list. Do NOT use as input any document label that will be produced by a later action.**
|
- When generating multiple actions, you may only use as input documents those that are already present in AVAILABLE DOCUMENTS or produced by actions that come earlier in the list. Do NOT use as input any document label that will be produced by a later action.
|
||||||
|
- If AVAILABLE DOCUMENTS shows "NO DOCUMENTS AVAILABLE", you CANNOT create document extraction actions. Instead, create actions that generate new content or inform the user that documents are needed.
|
||||||
|
|
||||||
TASK STEP: {task_step.description} (ID: {task_step.id})
|
TASK STEP: {task_step.objective} (ID: {task_step.id})
|
||||||
EXPECTED OUTPUTS: {expected_outputs_str}
|
|
||||||
SUCCESS CRITERIA: {success_criteria_str}
|
SUCCESS CRITERIA: {success_criteria_str}
|
||||||
TASK AI PROMPT: {task_ai_prompt if task_ai_prompt else 'None provided'}
|
|
||||||
|
|
||||||
CONTEXT - Chat History:
|
CONTEXT - Chat History:
|
||||||
{messageSummary}
|
{messageSummary}
|
||||||
|
|
@ -180,7 +209,8 @@ ACTION GENERATION PRINCIPLES:
|
||||||
INSTRUCTIONS:
|
INSTRUCTIONS:
|
||||||
- Generate actions to accomplish this task step using available documents, connections, and previous results
|
- Generate actions to accomplish this task step using available documents, connections, and previous results
|
||||||
- Use docItem for single documents and docList labels for groups of documents as shown in AVAILABLE DOCUMENTS
|
- Use docItem for single documents and docList labels for groups of documents as shown in AVAILABLE DOCUMENTS
|
||||||
- Always pass documentList as a LIST of references (docItem and/or docList)
|
- If AVAILABLE DOCUMENTS shows "NO DOCUMENTS AVAILABLE", you cannot create document extraction actions. Instead, create actions that generate new content or inform the user that documents are needed.
|
||||||
|
- Always pass documentList as a LIST of references (docItem and/or docList) - this list CANNOT be empty for document extraction actions
|
||||||
- For resultLabel, use the format: "task{{task_id}}_action{{action_number}}_{{short_label}}" where:
|
- For resultLabel, use the format: "task{{task_id}}_action{{action_number}}_{{short_label}}" where:
|
||||||
- {{task_id}} = the current task's id (e.g., 1)
|
- {{task_id}} = the current task's id (e.g., 1)
|
||||||
- {{action_number}} = the sequence number of the action within the task (e.g., 2)
|
- {{action_number}} = the sequence number of the action within the task (e.g., 2)
|
||||||
|
|
@ -202,8 +232,8 @@ REQUIRED JSON STRUCTURE:
|
||||||
"resultLabel": "task1_action3_analysis_results",
|
"resultLabel": "task1_action3_analysis_results",
|
||||||
"expectedDocumentFormats": [ // OPTIONAL: Specify expected document formats when needed
|
"expectedDocumentFormats": [ // OPTIONAL: Specify expected document formats when needed
|
||||||
{{
|
{{
|
||||||
"extension": ".csv",
|
"extension": ".txt",
|
||||||
"mimeType": "text/csv",
|
"mimeType": "text/plain",
|
||||||
"description": "Structured data output"
|
"description": "Structured data output"
|
||||||
}}
|
}}
|
||||||
],
|
],
|
||||||
|
|
@ -314,19 +344,33 @@ EXAMPLES OF GOOD ACTIONS:
|
||||||
]
|
]
|
||||||
}}
|
}}
|
||||||
|
|
||||||
NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
|
6. When no documents are available (NO DOCUMENTS AVAILABLE scenario):
|
||||||
logging.debug(f"[ACTION PLAN PROMPT] Available Documents Section:\n{available_documents_str}\nUser Connections Section:\n{available_connections_str}\nAvailable Methods (summarized):\n{', '.join(method_actions.keys())}")
|
{{
|
||||||
|
"method": "document",
|
||||||
|
"action": "generateReport",
|
||||||
|
"parameters": {{
|
||||||
|
"documentList": [],
|
||||||
|
"title": "Workflow Status Report"
|
||||||
|
}},
|
||||||
|
"resultLabel": "task1_action1_status_report",
|
||||||
|
"description": "Generate a status report informing the user that no documents are available for processing and requesting document upload or alternative input."
|
||||||
|
}}
|
||||||
|
|
||||||
|
IMPORTANT NOTES:
|
||||||
|
- Respond with ONLY the JSON object. Do not include any explanatory text.
|
||||||
|
- Before creating any document extraction action, verify that AVAILABLE DOCUMENTS contains actual document references.
|
||||||
|
- If AVAILABLE DOCUMENTS shows "NO DOCUMENTS AVAILABLE", use example 6 above to create a status report action instead of document extraction."""
|
||||||
|
logging.debug(f"[ACTION PLAN PROMPT] Available Documents Section:\n{available_documents_str}\nUser Connections Section:\n{available_connections_str}\nAvailable Methods (detailed):\n{available_methods_str}")
|
||||||
return prompt
|
return prompt
|
||||||
|
|
||||||
async def createResultReviewPrompt(self, review_context) -> str:
|
async def createResultReviewPrompt(review_context) -> str:
|
||||||
"""Create prompt for result review"""
|
"""Create prompt for result review"""
|
||||||
task_step = review_context.task_step
|
task_step = review_context.task_step
|
||||||
step_result = review_context.step_result or {}
|
step_result = review_context.step_result or {}
|
||||||
step_result_serializable = {
|
step_result_serializable = {
|
||||||
'task_step': {
|
'task_step': {
|
||||||
'id': task_step.id,
|
'id': task_step.id,
|
||||||
'description': task_step.description,
|
'objective': task_step.objective,
|
||||||
'expected_outputs': task_step.expected_outputs or [],
|
|
||||||
'success_criteria': task_step.success_criteria or []
|
'success_criteria': task_step.success_criteria or []
|
||||||
},
|
},
|
||||||
'action_results': [],
|
'action_results': [],
|
||||||
|
|
@ -380,12 +424,10 @@ async def createResultReviewPrompt(self, review_context) -> str:
|
||||||
}
|
}
|
||||||
step_result_serializable['action_results'].append(serializable_action_result)
|
step_result_serializable['action_results'].append(serializable_action_result)
|
||||||
step_result_json = json.dumps(step_result_serializable, indent=2, ensure_ascii=False)
|
step_result_json = json.dumps(step_result_serializable, indent=2, ensure_ascii=False)
|
||||||
expected_outputs_str = ', '.join(task_step.expected_outputs or [])
|
|
||||||
success_criteria_str = ', '.join(task_step.success_criteria or [])
|
success_criteria_str = ', '.join(task_step.success_criteria or [])
|
||||||
return f"""You are a result review AI that evaluates task step completion with BASIC validation.
|
return f"""You are a result review AI that evaluates task step completion with BASIC validation.
|
||||||
|
|
||||||
TASK STEP: {task_step.description}
|
TASK STEP: {task_step.objective}
|
||||||
EXPECTED OUTPUTS: {expected_outputs_str}
|
|
||||||
SUCCESS CRITERIA: {success_criteria_str}
|
SUCCESS CRITERIA: {success_criteria_str}
|
||||||
|
|
||||||
STEP RESULT: {step_result_json}
|
STEP RESULT: {step_result_json}
|
||||||
|
|
@ -402,8 +444,8 @@ VALIDATION PRINCIPLES:
|
||||||
- Text outputs are SECONDARY indicators
|
- Text outputs are SECONDARY indicators
|
||||||
- Only retry for CLEAR technical issues, not minor imperfections
|
- Only retry for CLEAR technical issues, not minor imperfections
|
||||||
- Don't be picky about formatting or minor details
|
- Don't be picky about formatting or minor details
|
||||||
- Check if ANY documents were produced (documents_count > 0), not specific expected output names
|
- Check if ANY documents were produced (documents_count > 0)
|
||||||
- If documents were produced, consider it a SUCCESS regardless of expected output names
|
- If documents were produced, consider it a SUCCESS
|
||||||
|
|
||||||
EXAMPLES OF SUCCESS:
|
EXAMPLES OF SUCCESS:
|
||||||
- Document extraction produced a file (even if imperfect)
|
- Document extraction produced a file (even if imperfect)
|
||||||
|
|
@ -428,7 +470,6 @@ REQUIRED JSON STRUCTURE:
|
||||||
"reason": "Brief explanation",
|
"reason": "Brief explanation",
|
||||||
"improvements": ["specific technical fixes only"],
|
"improvements": ["specific technical fixes only"],
|
||||||
"quality_score": 1-10,
|
"quality_score": 1-10,
|
||||||
"missing_outputs": [],
|
|
||||||
"met_criteria": ["basic functionality achieved"],
|
"met_criteria": ["basic functionality achieved"],
|
||||||
"unmet_criteria": []
|
"unmet_criteria": []
|
||||||
}}
|
}}
|
||||||
|
|
@ -437,6 +478,6 @@ VALIDATION LOGIC:
|
||||||
- If ANY action has documents_count > 0, mark as SUCCESS
|
- If ANY action has documents_count > 0, mark as SUCCESS
|
||||||
- If ALL actions have documents_count = 0 AND no meaningful text output, mark as FAILED
|
- If ALL actions have documents_count = 0 AND no meaningful text output, mark as FAILED
|
||||||
- Only mark as RETRY for clear technical issues that can be fixed
|
- Only mark as RETRY for clear technical issues that can be fixed
|
||||||
- Do NOT fail based on expected output name mismatches - focus on actual document production
|
- Focus on actual document production and functionality, not specific output names
|
||||||
|
|
||||||
NOTE: Respond with ONLY the JSON object. Be GENEROUS with success ratings."""
|
NOTE: Respond with ONLY the JSON object. Be GENEROUS with success ratings."""
|
||||||
|
|
@ -4,15 +4,7 @@ from modules.interfaces.interfaceAppModel import User
|
||||||
from modules.interfaces.interfaceChatModel import ChatWorkflow, UserInputRequest, TaskStep, TaskAction, ActionResult, ReviewResult, TaskPlan, WorkflowResult, TaskContext
|
from modules.interfaces.interfaceChatModel import ChatWorkflow, UserInputRequest, TaskStep, TaskAction, ActionResult, ReviewResult, TaskPlan, WorkflowResult, TaskContext
|
||||||
from modules.chat.serviceCenter import ServiceCenter
|
from modules.chat.serviceCenter import ServiceCenter
|
||||||
from modules.interfaces.interfaceChatObjects import ChatObjects
|
from modules.interfaces.interfaceChatObjects import ChatObjects
|
||||||
from .handling.handlingTasks import HandlingTasks
|
from .handling.handlingTasks import HandlingTasks, WorkflowStoppedException
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
# ===== STATE MANAGEMENT AND VALIDATION CLASSES =====
|
|
||||||
|
|
||||||
class WorkflowStoppedException(Exception):
|
|
||||||
"""Exception raised when workflow is stopped by user"""
|
|
||||||
pass
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
@ -47,16 +39,15 @@ class ChatManager:
|
||||||
raise Exception("No tasks generated in task plan.")
|
raise Exception("No tasks generated in task plan.")
|
||||||
|
|
||||||
# Phase 2-5: For each task, execute and get results
|
# Phase 2-5: For each task, execute and get results
|
||||||
logger.info(f"Phase 2: Executing {len(task_plan.tasks)} tasks")
|
total_tasks = len(task_plan.tasks)
|
||||||
|
logger.info(f"Phase 2: Executing {total_tasks} tasks")
|
||||||
all_task_results = []
|
all_task_results = []
|
||||||
previous_results = []
|
previous_results = []
|
||||||
for idx, task_step in enumerate(task_plan.tasks):
|
for idx, task_step in enumerate(task_plan.tasks):
|
||||||
logger.info(f"Task {idx+1}/{len(task_plan.tasks)}: {task_step.description}")
|
# Pass task index to executeTask method
|
||||||
|
current_task_index = idx + 1
|
||||||
|
|
||||||
# Check if workflow has been stopped before each task
|
logger.info(f"Task {idx+1}/{total_tasks}: {task_step.objective}")
|
||||||
if self.service.workflow.status == "stopped":
|
|
||||||
logger.info("Workflow stopped by user, aborting execution")
|
|
||||||
raise WorkflowStoppedException("Workflow was stopped by user")
|
|
||||||
|
|
||||||
# Create task context for this task
|
# Create task context for this task
|
||||||
task_context = TaskContext(
|
task_context = TaskContext(
|
||||||
|
|
@ -67,7 +58,7 @@ class ChatManager:
|
||||||
previous_results=previous_results
|
previous_results=previous_results
|
||||||
)
|
)
|
||||||
# Execute task (this handles action generation, execution, and review internally)
|
# Execute task (this handles action generation, execution, and review internally)
|
||||||
task_result = await self.handlingTasks.executeTask(task_step, workflow, task_context)
|
task_result = await self.handlingTasks.executeTask(task_step, workflow, task_context, current_task_index, total_tasks)
|
||||||
# Handover
|
# Handover
|
||||||
handover_data = await self.handlingTasks.prepareTaskHandover(task_step, [], task_result, workflow)
|
handover_data = await self.handlingTasks.prepareTaskHandover(task_step, [], task_result, workflow)
|
||||||
# Collect results
|
# Collect results
|
||||||
|
|
@ -90,6 +81,15 @@ class ChatManager:
|
||||||
)
|
)
|
||||||
logger.info(f"Unified workflow execution completed successfully for workflow {workflow.id}")
|
logger.info(f"Unified workflow execution completed successfully for workflow {workflow.id}")
|
||||||
return workflow_result
|
return workflow_result
|
||||||
|
except WorkflowStoppedException:
|
||||||
|
logger.info(f"Workflow {workflow.id} was stopped by user")
|
||||||
|
return WorkflowResult(
|
||||||
|
status="stopped",
|
||||||
|
completed_tasks=0,
|
||||||
|
total_tasks=0,
|
||||||
|
execution_time=0.0,
|
||||||
|
final_results_count=0
|
||||||
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error in executeUnifiedWorkflow: {str(e)}")
|
logger.error(f"Error in executeUnifiedWorkflow: {str(e)}")
|
||||||
return WorkflowResult(
|
return WorkflowResult(
|
||||||
|
|
|
||||||
|
|
@ -310,54 +310,43 @@ class ServiceCenter:
|
||||||
chat_exchanges = []
|
chat_exchanges = []
|
||||||
history_exchanges = []
|
history_exchanges = []
|
||||||
|
|
||||||
# Process messages in reverse order to find current chat round
|
# Process messages in reverse order; "first" marks boundary: include up to and including
|
||||||
|
# the first "first" message in the chat container, older messages in the history container
|
||||||
|
in_current_round = True
|
||||||
for message in reversed(self.workflow.messages):
|
for message in reversed(self.workflow.messages):
|
||||||
# Get document references from message
|
is_first = getattr(message, "status", None) == "first"
|
||||||
|
|
||||||
|
# Build a DocumentExchange if message has documents
|
||||||
|
doc_exchange = None
|
||||||
if message.documents:
|
if message.documents:
|
||||||
# For messages with action context, create DocumentExchange with docList reference
|
|
||||||
if message.actionId and message.documentsLabel:
|
if message.actionId and message.documentsLabel:
|
||||||
doc_ref = self.getDocumentReferenceFromMessage(message)
|
doc_ref = self.getDocumentReferenceFromMessage(message)
|
||||||
if doc_ref:
|
if doc_ref:
|
||||||
# Create DocumentExchange with single docList reference
|
|
||||||
doc_exchange = DocumentExchange(
|
doc_exchange = DocumentExchange(
|
||||||
documentsLabel=message.documentsLabel,
|
documentsLabel=message.documentsLabel,
|
||||||
documents=[doc_ref]
|
documents=[doc_ref]
|
||||||
)
|
)
|
||||||
|
|
||||||
# Add to appropriate list based on message status
|
|
||||||
if message.status == "first":
|
|
||||||
chat_exchanges.append(doc_exchange)
|
|
||||||
break # Stop after finding first message
|
|
||||||
elif message.status == "step":
|
|
||||||
chat_exchanges.append(doc_exchange)
|
|
||||||
else:
|
|
||||||
history_exchanges.append(doc_exchange)
|
|
||||||
# For regular messages, create DocumentExchange with individual docItem references
|
|
||||||
else:
|
else:
|
||||||
doc_refs = []
|
doc_refs = []
|
||||||
for doc in message.documents:
|
for doc in message.documents:
|
||||||
doc_ref = self.getDocumentReferenceFromChatDocument(doc)
|
doc_ref = self.getDocumentReferenceFromChatDocument(doc)
|
||||||
doc_refs.append(doc_ref)
|
doc_refs.append(doc_ref)
|
||||||
|
|
||||||
if doc_refs:
|
if doc_refs:
|
||||||
# Create DocumentExchange with individual document references
|
|
||||||
doc_exchange = DocumentExchange(
|
doc_exchange = DocumentExchange(
|
||||||
documentsLabel=f"{message.id}:documents",
|
documentsLabel=f"{message.id}:documents",
|
||||||
documents=doc_refs
|
documents=doc_refs
|
||||||
)
|
)
|
||||||
|
|
||||||
# Add to appropriate list based on message status
|
|
||||||
if message.status == "first":
|
|
||||||
chat_exchanges.append(doc_exchange)
|
|
||||||
break # Stop after finding first message
|
|
||||||
elif message.status == "step":
|
|
||||||
chat_exchanges.append(doc_exchange)
|
|
||||||
else:
|
|
||||||
history_exchanges.append(doc_exchange)
|
|
||||||
|
|
||||||
# Stop processing if we hit a first message
|
# Append to appropriate container based on boundary
|
||||||
if message.status == "first":
|
if doc_exchange:
|
||||||
break
|
if in_current_round:
|
||||||
|
chat_exchanges.append(doc_exchange)
|
||||||
|
else:
|
||||||
|
history_exchanges.append(doc_exchange)
|
||||||
|
|
||||||
|
# Flip boundary after including the "first" message in chat
|
||||||
|
if in_current_round and is_first:
|
||||||
|
in_current_round = False
|
||||||
|
|
||||||
# Sort both lists by datetime in descending order
|
# Sort both lists by datetime in descending order
|
||||||
chat_exchanges.sort(key=lambda x: x.documentsLabel, reverse=True)
|
chat_exchanges.sort(key=lambda x: x.documentsLabel, reverse=True)
|
||||||
|
|
@ -393,31 +382,7 @@ class ServiceCenter:
|
||||||
try:
|
try:
|
||||||
# ADDED LOGGING: Print workflow id, message count, and all message labels and document counts
|
# ADDED LOGGING: Print workflow id, message count, and all message labels and document counts
|
||||||
import logging
|
import logging
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
logger.debug(f"WORKFLOW STATE at getChatDocumentsFromDocumentList: id={id(self.workflow)}, message_count={len(self.workflow.messages) if hasattr(self.workflow, 'messages') else 'N/A'}")
|
|
||||||
for idx, message in enumerate(getattr(self.workflow, 'messages', [])):
|
|
||||||
label = getattr(message, 'documentsLabel', None)
|
|
||||||
docs = getattr(message, 'documents', None)
|
|
||||||
logger.debug(f" Message {idx}: label='{label}', documents_count={len(docs) if docs else 0}")
|
|
||||||
# DEBUG LOGGING: Print all document labels and their documents before extraction
|
|
||||||
import logging
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
logger.info("==== DEBUG: Listing all workflow message document labels and contained documents ====")
|
|
||||||
for message in self.workflow.messages:
|
|
||||||
label = getattr(message, 'documentsLabel', None)
|
|
||||||
docs = getattr(message, 'documents', None)
|
|
||||||
if label is not None:
|
|
||||||
doc_names = []
|
|
||||||
if docs:
|
|
||||||
for doc in docs:
|
|
||||||
if hasattr(doc, 'filename'):
|
|
||||||
doc_names.append(doc.filename)
|
|
||||||
elif isinstance(doc, dict) and 'filename' in doc:
|
|
||||||
doc_names.append(doc['filename'])
|
|
||||||
else:
|
|
||||||
doc_names.append(str(doc))
|
|
||||||
logger.info(f"Message label: '{label}' | Documents: {doc_names if doc_names else 'None'}")
|
|
||||||
logger.info("==== END DEBUG LIST ====")
|
|
||||||
all_documents = []
|
all_documents = []
|
||||||
for doc_ref in documentList:
|
for doc_ref in documentList:
|
||||||
# Parse reference format
|
# Parse reference format
|
||||||
|
|
@ -434,12 +399,12 @@ class ServiceCenter:
|
||||||
found = True
|
found = True
|
||||||
break
|
break
|
||||||
if not found:
|
if not found:
|
||||||
logger.warning(f"No documents found for label: {label}")
|
logger.debug(f"No documents found for label: {label}")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Handle structured reference format
|
# Handle structured reference format
|
||||||
if len(parts) < 3:
|
if len(parts) < 3:
|
||||||
logger.warning(f"Invalid document reference format: {doc_ref}")
|
logger.debug(f"Invalid document reference format: {doc_ref}")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
ref_type = parts[0]
|
ref_type = parts[0]
|
||||||
|
|
@ -487,7 +452,7 @@ class ServiceCenter:
|
||||||
|
|
||||||
def getConnectionReferenceFromUserConnection(self, connection: UserConnection) -> str:
|
def getConnectionReferenceFromUserConnection(self, connection: UserConnection) -> str:
|
||||||
"""Get connection reference from UserConnection"""
|
"""Get connection reference from UserConnection"""
|
||||||
return f"connection:{connection.authority}:{connection.externalUsername}:{connection.id}"
|
return f"connection:{connection.authority.value}:{connection.externalUsername}:{connection.id}"
|
||||||
|
|
||||||
def getUserConnectionFromConnectionReference(self, connectionReference: str) -> Optional[UserConnection]:
|
def getUserConnectionFromConnectionReference(self, connectionReference: str) -> Optional[UserConnection]:
|
||||||
"""Get UserConnection from reference string"""
|
"""Get UserConnection from reference string"""
|
||||||
|
|
@ -506,7 +471,7 @@ class ServiceCenter:
|
||||||
|
|
||||||
# Find matching connection
|
# Find matching connection
|
||||||
for conn in user_connections:
|
for conn in user_connections:
|
||||||
if str(conn.id) == conn_id and conn.authority == authority and conn.externalUsername == username:
|
if str(conn.id) == conn_id and conn.authority.value == authority and conn.externalUsername == username:
|
||||||
return conn
|
return conn
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
@ -700,16 +665,16 @@ Please provide a clear summary of this message."""
|
||||||
async def extractContentFromDocument(self, prompt: str, document: ChatDocument) -> ExtractedContent:
|
async def extractContentFromDocument(self, prompt: str, document: ChatDocument) -> ExtractedContent:
|
||||||
"""Extract content from ChatDocument using prompt"""
|
"""Extract content from ChatDocument using prompt"""
|
||||||
try:
|
try:
|
||||||
# Extract file data from ChatDocument
|
# ChatDocument is just a reference, so we need to get file data using fileId
|
||||||
if document.data:
|
if not hasattr(document, 'fileId') or not document.fileId:
|
||||||
fileData = document.data.encode('utf-8') if isinstance(document.data, str) else document.data
|
logger.error(f"Document {document.id} has no fileId")
|
||||||
else:
|
raise ValueError("Document has no fileId")
|
||||||
# Try to get file data from service center if document has fileId
|
|
||||||
if hasattr(document, 'fileId') and document.fileId:
|
# Get file data from service center using document's fileId
|
||||||
fileData = self.getFileData(document.fileId)
|
fileData = self.getFileData(document.fileId)
|
||||||
else:
|
if not fileData:
|
||||||
logger.error(f"No file data available in document: {document}")
|
logger.error(f"No file data found for fileId: {document.fileId}")
|
||||||
raise ValueError("No file data available in document")
|
raise ValueError("No file data found for document")
|
||||||
|
|
||||||
# Get filename and mime type from document
|
# Get filename and mime type from document
|
||||||
filename = document.filename if hasattr(document, 'filename') else "document"
|
filename = document.filename if hasattr(document, 'filename') else "document"
|
||||||
|
|
@ -739,11 +704,11 @@ Please provide a clear summary of this message."""
|
||||||
"""Extract content from file data directly using prompt"""
|
"""Extract content from file data directly using prompt"""
|
||||||
try:
|
try:
|
||||||
return await self.documentProcessor.processFileData(
|
return await self.documentProcessor.processFileData(
|
||||||
prompt=prompt,
|
|
||||||
fileData=fileData,
|
fileData=fileData,
|
||||||
filename=filename,
|
filename=filename,
|
||||||
mimeType=mimeType,
|
mimeType=mimeType,
|
||||||
base64Encoded=base64Encoded,
|
base64Encoded=base64Encoded,
|
||||||
|
prompt=prompt,
|
||||||
documentId=documentId
|
documentId=documentId
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
@ -771,15 +736,19 @@ Please provide a clear summary of this message."""
|
||||||
|
|
||||||
return file_item.id
|
return file_item.id
|
||||||
|
|
||||||
def createDocument(self, fileName: str, mimeType: str, content: str, base64encoded: bool = True) -> ChatDocument:
|
def createDocument(self, fileName: str, mimeType: str, content: str, base64encoded: bool = True, existing_file_id: str = None) -> ChatDocument:
|
||||||
"""Create document from file data object created by AI call"""
|
"""Create document from file data object created by AI call"""
|
||||||
# First create the file and get its ID
|
# Use existing file ID if provided, otherwise create new file
|
||||||
file_id = self.createFile(fileName, mimeType, content, base64encoded)
|
if existing_file_id:
|
||||||
|
file_id = existing_file_id
|
||||||
|
else:
|
||||||
|
# First create the file and get its ID
|
||||||
|
file_id = self.createFile(fileName, mimeType, content, base64encoded)
|
||||||
|
|
||||||
# Get file info for metadata
|
# Get file info for metadata
|
||||||
file_info = self.interfaceComponent.getFile(file_id)
|
file_info = self.interfaceComponent.getFile(file_id)
|
||||||
|
|
||||||
# Create document with file reference
|
# Create document with file reference (ChatDocument is just a reference, not a data container)
|
||||||
return ChatDocument(
|
return ChatDocument(
|
||||||
id=str(uuid.uuid4()),
|
id=str(uuid.uuid4()),
|
||||||
fileId=file_id,
|
fileId=file_id,
|
||||||
|
|
@ -807,8 +776,7 @@ Please provide a clear summary of this message."""
|
||||||
bytesReceived=bytesReceived
|
bytesReceived=bytesReceived
|
||||||
)
|
)
|
||||||
|
|
||||||
# Log the stats event
|
|
||||||
logger.debug(f"Workflow stats updated - Event: {eventLabel}, Sent: {bytesSent}, Received: {bytesReceived}, Tokens: {tokenCount}")
|
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error updating workflow stats: {str(e)}")
|
logger.error(f"Error updating workflow stats: {str(e)}")
|
||||||
|
|
|
||||||
|
|
@ -62,33 +62,42 @@ class AiCalls:
|
||||||
Advanced text processing using Anthropic.
|
Advanced text processing using Anthropic.
|
||||||
Fallback to OpenAI if Anthropic is overloaded or rate-limited.
|
Fallback to OpenAI if Anthropic is overloaded or rate-limited.
|
||||||
"""
|
"""
|
||||||
messages = []
|
# For Anthropic, we need to handle system content differently
|
||||||
if context:
|
# Anthropic expects system content in a top-level parameter, not as a message role
|
||||||
messages.append({
|
|
||||||
"role": "system",
|
|
||||||
"content": context
|
|
||||||
})
|
|
||||||
messages.append({
|
|
||||||
"role": "user",
|
|
||||||
"content": prompt
|
|
||||||
})
|
|
||||||
if hasattr(self, 'userLanguage') and self.userLanguage:
|
|
||||||
ltext = f"Please respond in '{self.userLanguage}' language."
|
|
||||||
if messages and messages[0]["role"] == "system":
|
|
||||||
if "language" not in messages[0]["content"].lower():
|
|
||||||
messages[0]["content"] = f"{ltext} {messages[0]['content']}"
|
|
||||||
else:
|
|
||||||
messages.insert(0, {
|
|
||||||
"role": "system",
|
|
||||||
"content": ltext
|
|
||||||
})
|
|
||||||
try:
|
try:
|
||||||
response = await self.anthropicService.callAiBasic(messages)
|
# Create messages without system role for Anthropic
|
||||||
|
anthropic_messages = []
|
||||||
|
if hasattr(self, 'userLanguage') and self.userLanguage:
|
||||||
|
ltext = f"Please respond in '{self.userLanguage}' language."
|
||||||
|
if context:
|
||||||
|
# Combine context and language instruction
|
||||||
|
full_context = f"{ltext}\n\n{context}"
|
||||||
|
else:
|
||||||
|
full_context = ltext
|
||||||
|
else:
|
||||||
|
full_context = context
|
||||||
|
|
||||||
|
# Add user message
|
||||||
|
anthropic_messages.append({
|
||||||
|
"role": "user",
|
||||||
|
"content": prompt
|
||||||
|
})
|
||||||
|
|
||||||
|
# Call Anthropic - let the connector handle system content conversion
|
||||||
|
if full_context:
|
||||||
|
# Send context as part of the user message for Anthropic
|
||||||
|
enhanced_prompt = f"Context:\n{full_context}\n\nUser Request:\n{prompt}"
|
||||||
|
response = await self.anthropicService.callAiBasic([
|
||||||
|
{"role": "user", "content": enhanced_prompt}
|
||||||
|
])
|
||||||
|
else:
|
||||||
|
response = await self.anthropicService.callAiBasic(anthropic_messages)
|
||||||
|
|
||||||
return response["choices"][0]["message"]["content"]
|
return response["choices"][0]["message"]["content"]
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
err_str = str(e)
|
err_str = str(e)
|
||||||
logger.warning(f"[UI NOTICE] Advanced AI failed, falling back to Basic AI (OpenAI). Reason: {err_str}")
|
logger.warning(f"[UI NOTICE] Advanced AI failed, falling back to Basic AI (OpenAI). Reason: {err_str}")
|
||||||
# Optionally, you could surface this message to the UI via a return value or error object
|
# Fallback to OpenAI basic
|
||||||
return await self.callAiTextBasic(prompt, context)
|
return await self.callAiTextBasic(prompt, context)
|
||||||
|
|
||||||
async def callAiImageBasic(self, prompt: str, imageData: Union[str, bytes], mimeType: str = None) -> str:
|
async def callAiImageBasic(self, prompt: str, imageData: Union[str, bytes], mimeType: str = None) -> str:
|
||||||
|
|
|
||||||
|
|
@ -753,8 +753,8 @@ class AppObjects:
|
||||||
logger.error(f"Error saving token: {str(e)}")
|
logger.error(f"Error saving token: {str(e)}")
|
||||||
raise
|
raise
|
||||||
|
|
||||||
def getToken(self, authority: AuthAuthority) -> Optional[Token]:
|
def getToken(self, authority: str) -> Optional[Token]:
|
||||||
"""Get the latest token for the current user and authority"""
|
"""Get the latest valid token for the current user and authority"""
|
||||||
try:
|
try:
|
||||||
# Get tokens for this user and authority
|
# Get tokens for this user and authority
|
||||||
tokens = self.db.getRecordset("tokens", recordFilter={
|
tokens = self.db.getRecordset("tokens", recordFilter={
|
||||||
|
|
@ -767,13 +767,20 @@ class AppObjects:
|
||||||
|
|
||||||
# Sort by creation date and get the latest
|
# Sort by creation date and get the latest
|
||||||
tokens.sort(key=lambda x: x.get("createdAt", ""), reverse=True)
|
tokens.sort(key=lambda x: x.get("createdAt", ""), reverse=True)
|
||||||
return Token(**tokens[0])
|
latest_token = Token(**tokens[0])
|
||||||
|
|
||||||
|
# Check if token is expired
|
||||||
|
if latest_token.expiresAt and latest_token.expiresAt < datetime.now().timestamp():
|
||||||
|
logger.warning(f"Token for {authority} is expired (expiresAt: {latest_token.expiresAt})")
|
||||||
|
return None # Don't return expired tokens
|
||||||
|
|
||||||
|
return latest_token
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error getting token: {str(e)}")
|
logger.error(f"Error getting token: {str(e)}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def deleteToken(self, authority: AuthAuthority) -> None:
|
def deleteToken(self, authority: str) -> None:
|
||||||
"""Delete all tokens for the current user and authority"""
|
"""Delete all tokens for the current user and authority"""
|
||||||
try:
|
try:
|
||||||
# Get tokens to delete
|
# Get tokens to delete
|
||||||
|
|
|
||||||
|
|
@ -256,6 +256,7 @@ class TaskAction(BaseModel, ModelMixin):
|
||||||
execResultLabel: Optional[str] = Field(None, description="Label for the set of result documents")
|
execResultLabel: Optional[str] = Field(None, description="Label for the set of result documents")
|
||||||
# NEW: Optional document format specification
|
# NEW: Optional document format specification
|
||||||
expectedDocumentFormats: Optional[List[Dict[str, str]]] = Field(None, description="Expected document formats (optional)")
|
expectedDocumentFormats: Optional[List[Dict[str, str]]] = Field(None, description="Expected document formats (optional)")
|
||||||
|
|
||||||
status: TaskStatus = Field(default=TaskStatus.PENDING, description="Action status")
|
status: TaskStatus = Field(default=TaskStatus.PENDING, description="Action status")
|
||||||
error: Optional[str] = Field(None, description="Error message if action failed")
|
error: Optional[str] = Field(None, description="Error message if action failed")
|
||||||
retryCount: int = Field(default=0, description="Number of retries attempted")
|
retryCount: int = Field(default=0, description="Number of retries attempted")
|
||||||
|
|
@ -530,13 +531,11 @@ register_model_labels(
|
||||||
|
|
||||||
class TaskStep(BaseModel, ModelMixin):
|
class TaskStep(BaseModel, ModelMixin):
|
||||||
id: str
|
id: str
|
||||||
description: str
|
objective: str
|
||||||
dependencies: Optional[list[str]] = []
|
dependencies: Optional[list[str]] = []
|
||||||
expected_outputs: Optional[list[str]] = []
|
|
||||||
success_criteria: Optional[list[str]] = []
|
success_criteria: Optional[list[str]] = []
|
||||||
required_documents: Optional[list[str]] = []
|
|
||||||
estimated_complexity: Optional[str] = None
|
estimated_complexity: Optional[str] = None
|
||||||
ai_prompt: Optional[str] = None
|
|
||||||
|
|
||||||
class TaskContext(BaseModel, ModelMixin):
|
class TaskContext(BaseModel, ModelMixin):
|
||||||
task_step: TaskStep
|
task_step: TaskStep
|
||||||
|
|
|
||||||
|
|
@ -880,7 +880,30 @@ class ChatObjects:
|
||||||
workflow = self.loadWorkflowState(workflowId)
|
workflow = self.loadWorkflowState(workflowId)
|
||||||
if not workflow:
|
if not workflow:
|
||||||
raise ValueError(f"Workflow {workflowId} not found")
|
raise ValueError(f"Workflow {workflowId} not found")
|
||||||
|
|
||||||
|
# Check if workflow is currently running and stop it first
|
||||||
|
if workflow.status == "running":
|
||||||
|
logger.info(f"Stopping running workflow {workflowId} before processing new prompt")
|
||||||
|
|
||||||
|
# Stop the running workflow
|
||||||
|
workflow.status = "stopped"
|
||||||
|
workflow.lastActivity = currentTime
|
||||||
|
self.updateWorkflow(workflowId, {
|
||||||
|
"status": "stopped",
|
||||||
|
"lastActivity": currentTime
|
||||||
|
})
|
||||||
|
|
||||||
|
# Add log entry for workflow stop
|
||||||
|
self.createWorkflowLog({
|
||||||
|
"workflowId": workflowId,
|
||||||
|
"message": "Workflow stopped for new prompt",
|
||||||
|
"type": "info",
|
||||||
|
"status": "stopped",
|
||||||
|
"progress": 100
|
||||||
|
})
|
||||||
|
|
||||||
|
# Wait a moment for any running processes to detect the stop
|
||||||
|
await asyncio.sleep(0.1)
|
||||||
|
|
||||||
# Update workflow - set status back to running for resumed workflows
|
# Update workflow - set status back to running for resumed workflows
|
||||||
self.updateWorkflow(workflowId, {
|
self.updateWorkflow(workflowId, {
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,6 @@ Handles document operations using the document service.
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
from typing import Dict, Any, List, Optional
|
from typing import Dict, Any, List, Optional
|
||||||
import uuid
|
|
||||||
from datetime import datetime, UTC
|
from datetime import datetime, UTC
|
||||||
|
|
||||||
from modules.chat.methodBase import MethodBase, ActionResult, action
|
from modules.chat.methodBase import MethodBase, ActionResult, action
|
||||||
|
|
@ -24,19 +23,19 @@ class MethodDocument(MethodBase):
|
||||||
@action
|
@action
|
||||||
async def extract(self, parameters: Dict[str, Any]) -> ActionResult:
|
async def extract(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||||
"""
|
"""
|
||||||
Extract specific content from document with ai prompt and return it in the specified format
|
Extract specific content from document with AI prompt and return it in the specified format.
|
||||||
|
|
||||||
Parameters:
|
Parameters:
|
||||||
documentList (str): Reference to the document list to extract content from
|
documentList (str): Reference to the document list to extract content from
|
||||||
aiPrompt (str): AI prompt for content extraction
|
aiPrompt (str): AI prompt for content extraction
|
||||||
includeMetadata (bool, optional): Whether to include metadata (default: True)
|
|
||||||
expectedDocumentFormats (list, optional): Expected document formats with extension, mimeType, description
|
expectedDocumentFormats (list, optional): Expected document formats with extension, mimeType, description
|
||||||
|
includeMetadata (bool, optional): Whether to include metadata (default: True)
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
documentList = parameters.get("documentList")
|
documentList = parameters.get("documentList")
|
||||||
aiPrompt = parameters.get("aiPrompt")
|
aiPrompt = parameters.get("aiPrompt")
|
||||||
includeMetadata = parameters.get("includeMetadata", True)
|
|
||||||
expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
|
expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
|
||||||
|
includeMetadata = parameters.get("includeMetadata", True)
|
||||||
|
|
||||||
if not documentList:
|
if not documentList:
|
||||||
return self._createResult(
|
return self._createResult(
|
||||||
|
|
@ -60,32 +59,7 @@ class MethodDocument(MethodBase):
|
||||||
error="No documents found for the provided reference"
|
error="No documents found for the provided reference"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Determine output format based on expected formats
|
# Extract content from all documents using AI
|
||||||
output_extension = ".txt" # Default
|
|
||||||
output_mime_type = "text/plain" # Default
|
|
||||||
|
|
||||||
if expectedDocumentFormats and len(expectedDocumentFormats) > 0:
|
|
||||||
# Use the first expected format
|
|
||||||
expected_format = expectedDocumentFormats[0]
|
|
||||||
output_extension = expected_format.get("extension", ".txt")
|
|
||||||
output_mime_type = expected_format.get("mimeType", "text/plain")
|
|
||||||
logger.info(f"Using expected format: {output_extension} ({output_mime_type})")
|
|
||||||
logger.info(f"Expected document formats: {expectedDocumentFormats}")
|
|
||||||
else:
|
|
||||||
logger.info("No expected format specified, using default .txt format")
|
|
||||||
|
|
||||||
# Enhance AI prompt to specify output format
|
|
||||||
enhanced_prompt = aiPrompt
|
|
||||||
if output_extension == ".csv":
|
|
||||||
enhanced_prompt += f"\n\nCRITICAL: Deliver the result as pure CSV data without any markdown formatting, code blocks, or additional text. Output only the CSV content with proper headers and data rows. Do not include ```csv or ``` markers."
|
|
||||||
elif output_extension == ".json":
|
|
||||||
enhanced_prompt += f"\n\nCRITICAL: Deliver the result as pure JSON data without any markdown formatting, code blocks, or additional text. Output only the JSON content. Do not include ```json or ``` markers."
|
|
||||||
elif output_extension == ".xml":
|
|
||||||
enhanced_prompt += f"\n\nCRITICAL: Deliver the result as pure XML data without any markdown formatting, code blocks, or additional text. Output only the XML content. Do not include ```xml or ``` markers."
|
|
||||||
elif output_extension != ".txt":
|
|
||||||
enhanced_prompt += f"\n\nCRITICAL: Deliver the result as pure {output_extension.upper()} data without any markdown formatting, code blocks, or additional text. Output only the {output_extension.upper()} content. Do not include any markdown markers."
|
|
||||||
|
|
||||||
# Extract content from all documents
|
|
||||||
all_extracted_content = []
|
all_extracted_content = []
|
||||||
file_infos = []
|
file_infos = []
|
||||||
|
|
||||||
|
|
@ -99,7 +73,7 @@ class MethodDocument(MethodBase):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
extracted_content = await self.service.extractContentFromFileData(
|
extracted_content = await self.service.extractContentFromFileData(
|
||||||
prompt=enhanced_prompt, # Use enhanced prompt instead of original
|
prompt=aiPrompt,
|
||||||
fileData=file_data,
|
fileData=file_data,
|
||||||
filename=file_info.get('name', 'document'),
|
filename=file_info.get('name', 'document'),
|
||||||
mimeType=file_info.get('mimeType', 'application/octet-stream'),
|
mimeType=file_info.get('mimeType', 'application/octet-stream'),
|
||||||
|
|
@ -118,21 +92,7 @@ class MethodDocument(MethodBase):
|
||||||
error="No content could be extracted from any documents"
|
error="No content could be extracted from any documents"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Extract text content from ExtractedContent objects
|
# Process each document individually with its own format conversion
|
||||||
text_contents = []
|
|
||||||
for content_obj in all_extracted_content:
|
|
||||||
if hasattr(content_obj, 'contents') and content_obj.contents:
|
|
||||||
# Extract text from ContentItem objects
|
|
||||||
for content_item in content_obj.contents:
|
|
||||||
if hasattr(content_item, 'data') and content_item.data:
|
|
||||||
text_contents.append(content_item.data)
|
|
||||||
elif isinstance(content_obj, str):
|
|
||||||
text_contents.append(content_obj)
|
|
||||||
else:
|
|
||||||
# Fallback: convert to string representation
|
|
||||||
text_contents.append(str(content_obj))
|
|
||||||
|
|
||||||
# Process each document individually and create separate output files
|
|
||||||
output_documents = []
|
output_documents = []
|
||||||
|
|
||||||
for i, (chatDocument, extracted_content) in enumerate(zip(chatDocuments, all_extracted_content)):
|
for i, (chatDocument, extracted_content) in enumerate(zip(chatDocuments, all_extracted_content)):
|
||||||
|
|
@ -140,36 +100,68 @@ class MethodDocument(MethodBase):
|
||||||
text_content = ""
|
text_content = ""
|
||||||
if hasattr(extracted_content, 'contents') and extracted_content.contents:
|
if hasattr(extracted_content, 'contents') and extracted_content.contents:
|
||||||
# Extract text from ContentItem objects
|
# Extract text from ContentItem objects
|
||||||
|
text_parts = []
|
||||||
for content_item in extracted_content.contents:
|
for content_item in extracted_content.contents:
|
||||||
if hasattr(content_item, 'data') and content_item.data:
|
if hasattr(content_item, 'data') and content_item.data:
|
||||||
text_content += content_item.data + "\n"
|
text_parts.append(content_item.data)
|
||||||
|
text_content = "\n".join(text_parts)
|
||||||
elif isinstance(extracted_content, str):
|
elif isinstance(extracted_content, str):
|
||||||
text_content = extracted_content
|
text_content = extracted_content
|
||||||
else:
|
else:
|
||||||
# Fallback: convert to string representation
|
|
||||||
text_content = str(extracted_content)
|
text_content = str(extracted_content)
|
||||||
|
|
||||||
# Create output filename based on original filename
|
# Get the expected format for this document (or use default)
|
||||||
|
target_format = None
|
||||||
|
if expectedDocumentFormats and i < len(expectedDocumentFormats):
|
||||||
|
target_format = expectedDocumentFormats[i]
|
||||||
|
elif expectedDocumentFormats and len(expectedDocumentFormats) > 0:
|
||||||
|
# If fewer formats than documents, use the last format for remaining documents
|
||||||
|
target_format = expectedDocumentFormats[-1]
|
||||||
|
|
||||||
|
# Determine output format and filename
|
||||||
|
if target_format:
|
||||||
|
target_extension = target_format.get("extension", ".txt")
|
||||||
|
target_mime_type = target_format.get("mimeType", "text/plain")
|
||||||
|
|
||||||
|
# Check if format conversion is needed
|
||||||
|
if target_extension not in [".txt", ".text"] or target_mime_type != "text/plain":
|
||||||
|
logger.info(f"Converting document {i+1} to format: {target_extension} ({target_mime_type})")
|
||||||
|
# Use AI to convert format
|
||||||
|
formatted_content = await self._convertContentToFormat(text_content, target_format)
|
||||||
|
final_content = formatted_content
|
||||||
|
final_mime_type = target_mime_type
|
||||||
|
final_extension = target_extension
|
||||||
|
else:
|
||||||
|
logger.info(f"Document {i+1}: No format conversion needed, using plain text")
|
||||||
|
final_content = text_content
|
||||||
|
final_mime_type = "text/plain"
|
||||||
|
final_extension = ".txt"
|
||||||
|
else:
|
||||||
|
logger.info(f"Document {i+1}: No expected format specified, using plain text")
|
||||||
|
final_content = text_content
|
||||||
|
final_mime_type = "text/plain"
|
||||||
|
final_extension = ".txt"
|
||||||
|
|
||||||
|
# Create output filename based on original filename and target format
|
||||||
original_filename = chatDocument.filename
|
original_filename = chatDocument.filename
|
||||||
base_name = original_filename.rsplit('.', 1)[0] if '.' in original_filename else original_filename
|
base_name = original_filename.rsplit('.', 1)[0] if '.' in original_filename else original_filename
|
||||||
output_filename = f"{base_name}_extracted_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}{output_extension}"
|
output_filename = f"{base_name}_extracted_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}{final_extension}"
|
||||||
|
|
||||||
# Create result data for this document
|
# Create result data for this document
|
||||||
result_data = {
|
result_data = {
|
||||||
"documentCount": 1,
|
"documentCount": 1,
|
||||||
"content": text_content,
|
"content": final_content,
|
||||||
"originalFilename": original_filename,
|
"originalFilename": original_filename,
|
||||||
"fileInfos": [file_infos[i]] if includeMetadata and i < len(file_infos) else None,
|
"fileInfos": [file_infos[i]] if includeMetadata and i < len(file_infos) else None,
|
||||||
"timestamp": datetime.now(UTC).isoformat()
|
"timestamp": datetime.now(UTC).isoformat()
|
||||||
}
|
}
|
||||||
|
|
||||||
logger.info(f"Created output document: {output_filename} with {len(text_content)} characters")
|
logger.info(f"Created output document: {output_filename} with {len(final_content)} characters")
|
||||||
logger.info(f"Content preview: {text_content[:200]}...")
|
|
||||||
|
|
||||||
output_documents.append({
|
output_documents.append({
|
||||||
"documentName": output_filename,
|
"documentName": output_filename,
|
||||||
"documentData": result_data,
|
"documentData": result_data,
|
||||||
"mimeType": output_mime_type
|
"mimeType": final_mime_type
|
||||||
})
|
})
|
||||||
|
|
||||||
return self._createResult(
|
return self._createResult(
|
||||||
|
|
@ -186,6 +178,327 @@ class MethodDocument(MethodBase):
|
||||||
error=str(e)
|
error=str(e)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@action
|
||||||
|
async def generate(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||||
|
"""
|
||||||
|
Generate documents in specific formats from document references.
|
||||||
|
This action automatically extracts content from documents and converts it to the specified format.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
documentList (list): List of document references to extract content from
|
||||||
|
expectedDocumentFormats (list): Expected document formats with extension, mimeType, description
|
||||||
|
originalDocuments (list, optional): List of original document names
|
||||||
|
includeMetadata (bool, optional): Whether to include metadata (default: True)
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
document_list = parameters.get("documentList", [])
|
||||||
|
expected_document_formats = parameters.get("expectedDocumentFormats", [])
|
||||||
|
original_documents = parameters.get("originalDocuments", [])
|
||||||
|
include_metadata = parameters.get("includeMetadata", True)
|
||||||
|
|
||||||
|
if not document_list:
|
||||||
|
return self._createResult(
|
||||||
|
success=False,
|
||||||
|
data={},
|
||||||
|
error="Document list is required for generation"
|
||||||
|
)
|
||||||
|
|
||||||
|
if not expected_document_formats or len(expected_document_formats) == 0:
|
||||||
|
return self._createResult(
|
||||||
|
success=False,
|
||||||
|
data={},
|
||||||
|
error="Expected document formats specification is required"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Get chat documents for original documents list
|
||||||
|
chat_documents = self.service.getChatDocumentsFromDocumentList(document_list)
|
||||||
|
logger.info(f"Found {len(chat_documents)} chat documents")
|
||||||
|
|
||||||
|
if not chat_documents:
|
||||||
|
return self._createResult(
|
||||||
|
success=False,
|
||||||
|
data={},
|
||||||
|
error="No documents found for the provided documentList reference"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Update original documents list if not provided
|
||||||
|
if not original_documents:
|
||||||
|
original_documents = [doc.filename if hasattr(doc, 'filename') else str(doc.id) for doc in chat_documents]
|
||||||
|
|
||||||
|
# Process each document individually with its own format conversion
|
||||||
|
output_documents = []
|
||||||
|
|
||||||
|
for i, chat_document in enumerate(chat_documents):
|
||||||
|
# Extract content from this document
|
||||||
|
# ChatDocument is just a reference, so we need to get file data using fileId
|
||||||
|
content = ""
|
||||||
|
if hasattr(chat_document, 'fileId') and chat_document.fileId:
|
||||||
|
# Need to get file data
|
||||||
|
file_data = self.service.getFileData(chat_document.fileId)
|
||||||
|
if file_data:
|
||||||
|
if isinstance(file_data, bytes):
|
||||||
|
content = file_data.decode('utf-8', errors='ignore')
|
||||||
|
else:
|
||||||
|
content = str(file_data)
|
||||||
|
else:
|
||||||
|
logger.warning(f"Could not get file data for document {i+1}, skipping")
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
logger.warning(f"Document {i+1} has no fileId, skipping")
|
||||||
|
continue
|
||||||
|
|
||||||
|
if not content:
|
||||||
|
logger.warning(f"Could not extract content from document {i+1}, skipping")
|
||||||
|
continue
|
||||||
|
|
||||||
|
logger.info(f"Extracted content from document {i+1}: {len(content)} characters")
|
||||||
|
|
||||||
|
# Get the expected format for this document (or use default)
|
||||||
|
target_format = None
|
||||||
|
if i < len(expected_document_formats):
|
||||||
|
target_format = expected_document_formats[i]
|
||||||
|
elif len(expected_document_formats) > 0:
|
||||||
|
# If fewer formats than documents, use the last format for remaining documents
|
||||||
|
target_format = expected_document_formats[-1]
|
||||||
|
|
||||||
|
if not target_format:
|
||||||
|
logger.warning(f"No expected format for document {i+1}, skipping")
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Use AI to convert format
|
||||||
|
formatted_content = await self._convertContentToFormat(content, target_format)
|
||||||
|
if not formatted_content:
|
||||||
|
logger.warning(f"Failed to format document {i+1}, skipping")
|
||||||
|
continue
|
||||||
|
|
||||||
|
target_extension = target_format.get("extension", ".txt")
|
||||||
|
target_mime_type = target_format.get("mimeType", "text/plain")
|
||||||
|
|
||||||
|
# Create output filename
|
||||||
|
timestamp = datetime.now(UTC).strftime('%Y%m%d_%H%M%S')
|
||||||
|
if i < len(original_documents):
|
||||||
|
base_name = original_documents[i].rsplit('.', 1)[0] if '.' in original_documents[i] else original_documents[i]
|
||||||
|
else:
|
||||||
|
base_name = f"document_{i+1}"
|
||||||
|
output_filename = f"{base_name}_generated_{timestamp}{target_extension}"
|
||||||
|
|
||||||
|
# Create result data
|
||||||
|
result_data = {
|
||||||
|
"documentCount": 1,
|
||||||
|
"content": formatted_content,
|
||||||
|
"outputFormat": target_format,
|
||||||
|
"originalDocument": original_documents[i] if i < len(original_documents) else f"document_{i+1}",
|
||||||
|
"timestamp": datetime.now(UTC).isoformat()
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.info(f"Generated document: {output_filename} with {len(formatted_content)} characters")
|
||||||
|
|
||||||
|
output_documents.append({
|
||||||
|
"documentName": output_filename,
|
||||||
|
"documentData": result_data,
|
||||||
|
"mimeType": target_mime_type
|
||||||
|
})
|
||||||
|
|
||||||
|
if not output_documents:
|
||||||
|
return self._createResult(
|
||||||
|
success=False,
|
||||||
|
data={},
|
||||||
|
error="No documents could be generated"
|
||||||
|
)
|
||||||
|
|
||||||
|
return self._createResult(
|
||||||
|
success=True,
|
||||||
|
data={
|
||||||
|
"documents": output_documents
|
||||||
|
}
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error generating document: {str(e)}")
|
||||||
|
return self._createResult(
|
||||||
|
success=False,
|
||||||
|
data={},
|
||||||
|
error=str(e)
|
||||||
|
)
|
||||||
|
|
||||||
|
async def _convertContentToFormat(self, content: str, target_format: Dict[str, Any]) -> str:
|
||||||
|
"""
|
||||||
|
Helper function to convert content to the specified format using AI.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
extension = target_format.get("extension", ".txt")
|
||||||
|
mime_type = target_format.get("mimeType", "text/plain")
|
||||||
|
|
||||||
|
logger.info(f"Converting content to format: {extension} ({mime_type})")
|
||||||
|
|
||||||
|
# Create AI prompt for format conversion
|
||||||
|
format_prompts = {
|
||||||
|
".csv": f"""
|
||||||
|
Convert the following content into a proper CSV format.
|
||||||
|
|
||||||
|
Requirements:
|
||||||
|
1. Output ONLY the CSV data without any markdown, code blocks, or additional text
|
||||||
|
2. Use appropriate headers based on the content
|
||||||
|
3. Ensure proper CSV formatting with commas and quotes where needed
|
||||||
|
4. Make the data easily readable and importable into spreadsheet applications
|
||||||
|
|
||||||
|
Content to convert:
|
||||||
|
{content}
|
||||||
|
|
||||||
|
Generate ONLY the CSV data:
|
||||||
|
""",
|
||||||
|
|
||||||
|
".json": f"""
|
||||||
|
Convert the following content into a proper JSON format.
|
||||||
|
|
||||||
|
Requirements:
|
||||||
|
1. Output ONLY the JSON data without any markdown, code blocks, or additional text
|
||||||
|
2. Structure the data logically with appropriate keys and values
|
||||||
|
3. Ensure valid JSON syntax
|
||||||
|
4. Make the data easily parseable and readable
|
||||||
|
|
||||||
|
Content to convert:
|
||||||
|
{content}
|
||||||
|
|
||||||
|
Generate ONLY the JSON data:
|
||||||
|
""",
|
||||||
|
|
||||||
|
".xml": f"""
|
||||||
|
Convert the following content into a proper XML format.
|
||||||
|
|
||||||
|
Requirements:
|
||||||
|
1. Output ONLY the XML data without any markdown, code blocks, or additional text
|
||||||
|
2. Use appropriate XML tags and structure
|
||||||
|
3. Ensure valid XML syntax
|
||||||
|
4. Make the data easily parseable and readable
|
||||||
|
|
||||||
|
Content to convert:
|
||||||
|
{content}
|
||||||
|
|
||||||
|
Generate ONLY the XML data:
|
||||||
|
""",
|
||||||
|
|
||||||
|
".html": f"""
|
||||||
|
Convert the following content into a proper HTML format.
|
||||||
|
|
||||||
|
Requirements:
|
||||||
|
1. Output ONLY the HTML data without any markdown, code blocks, or additional text
|
||||||
|
2. Use appropriate HTML tags and structure
|
||||||
|
3. Ensure valid HTML syntax
|
||||||
|
4. Make the data easily readable in web browsers
|
||||||
|
|
||||||
|
Content to convert:
|
||||||
|
{content}
|
||||||
|
|
||||||
|
Generate ONLY the HTML data:
|
||||||
|
""",
|
||||||
|
|
||||||
|
".md": f"""
|
||||||
|
Convert the following content into a proper Markdown format.
|
||||||
|
|
||||||
|
Requirements:
|
||||||
|
1. Output ONLY the Markdown data without any code blocks or additional text
|
||||||
|
2. Use appropriate Markdown syntax for headers, lists, emphasis, etc.
|
||||||
|
3. Structure the content logically
|
||||||
|
4. Make the data easily readable and convertible to other formats
|
||||||
|
|
||||||
|
Content to convert:
|
||||||
|
{content}
|
||||||
|
|
||||||
|
Generate ONLY the Markdown data:
|
||||||
|
"""
|
||||||
|
}
|
||||||
|
|
||||||
|
# Get the appropriate prompt for the target format
|
||||||
|
if extension in format_prompts:
|
||||||
|
ai_prompt = format_prompts[extension]
|
||||||
|
else:
|
||||||
|
# Generic format conversion
|
||||||
|
ai_prompt = f"""
|
||||||
|
Convert the following content into {extension.upper()} format.
|
||||||
|
|
||||||
|
Requirements:
|
||||||
|
1. Output ONLY the {extension.upper()} data without any markdown, code blocks, or additional text
|
||||||
|
2. Use appropriate formatting for {extension.upper()} files
|
||||||
|
3. Ensure the output is valid and usable
|
||||||
|
4. Make the data easily readable and importable
|
||||||
|
|
||||||
|
Content to convert:
|
||||||
|
{content}
|
||||||
|
|
||||||
|
Generate ONLY the {extension.upper()} data:
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Call AI to generate the formatted content
|
||||||
|
logger.info(f"Calling AI for {extension} format conversion")
|
||||||
|
formatted_content = await self.service.callAiTextBasic(ai_prompt, content)
|
||||||
|
|
||||||
|
if not formatted_content or formatted_content.strip() == "":
|
||||||
|
logger.warning("AI format conversion failed, using fallback")
|
||||||
|
return self._generateFallbackFormattedContent(content, extension, mime_type)
|
||||||
|
|
||||||
|
# Clean up the AI response
|
||||||
|
formatted_content = formatted_content.strip()
|
||||||
|
|
||||||
|
# Remove markdown code blocks if present
|
||||||
|
if formatted_content.startswith("```") and formatted_content.endswith("```"):
|
||||||
|
lines = formatted_content.split('\n')
|
||||||
|
if len(lines) > 2:
|
||||||
|
formatted_content = '\n'.join(lines[1:-1])
|
||||||
|
|
||||||
|
return formatted_content
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error in AI format conversion: {str(e)}")
|
||||||
|
return self._generateFallbackFormattedContent(content, extension, mime_type)
|
||||||
|
|
||||||
|
def _generateFallbackFormattedContent(self, content: str, extension: str, mime_type: str) -> str:
|
||||||
|
"""
|
||||||
|
Generate fallback formatted content when AI conversion fails.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
if extension == ".csv":
|
||||||
|
# Simple CSV fallback - split by lines and create basic CSV
|
||||||
|
lines = content.strip().split('\n')
|
||||||
|
if lines:
|
||||||
|
# Create a simple CSV with line numbers and content
|
||||||
|
csv_lines = ["Line,Content"]
|
||||||
|
for i, line in enumerate(lines, 1):
|
||||||
|
# Escape quotes and wrap in quotes if comma present
|
||||||
|
if ',' in line:
|
||||||
|
line = f'"{line.replace(chr(34), chr(34) + chr(34))}"'
|
||||||
|
csv_lines.append(f"{i},{line}")
|
||||||
|
return '\n'.join(csv_lines)
|
||||||
|
return "Line,Content\n1,No content available"
|
||||||
|
|
||||||
|
elif extension == ".json":
|
||||||
|
# Simple JSON fallback
|
||||||
|
content_escaped = content.replace('"', '\\"')
|
||||||
|
timestamp = datetime.now(UTC).isoformat()
|
||||||
|
return f'{{"content": "{content_escaped}", "format": "json", "timestamp": "{timestamp}"}}'
|
||||||
|
|
||||||
|
elif extension == ".xml":
|
||||||
|
# Simple XML fallback
|
||||||
|
timestamp = datetime.now(UTC).isoformat()
|
||||||
|
return f'<?xml version="1.0" encoding="UTF-8"?>\n<document>\n<content>{content}</content>\n<format>xml</format>\n<timestamp>{timestamp}</timestamp>\n</document>'
|
||||||
|
|
||||||
|
elif extension == ".html":
|
||||||
|
# Simple HTML fallback
|
||||||
|
timestamp = datetime.now(UTC).strftime('%Y-%m-%d %H:%M:%S UTC')
|
||||||
|
return f'<!DOCTYPE html>\n<html>\n<head><meta charset="UTF-8"><title>Generated Document</title></head>\n<body>\n<pre>{content}</pre>\n<p><em>Generated on {timestamp}</em></p>\n</body>\n</html>'
|
||||||
|
|
||||||
|
elif extension == ".md":
|
||||||
|
# Simple Markdown fallback
|
||||||
|
timestamp = datetime.now(UTC).strftime('%Y-%m-%d %H:%M:%S UTC')
|
||||||
|
return f"# Generated Document\n\n{content}\n\n---\n*Generated on {timestamp}*"
|
||||||
|
|
||||||
|
else:
|
||||||
|
# Generic fallback - return content as-is
|
||||||
|
return content
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error in fallback format conversion: {str(e)}")
|
||||||
|
return content
|
||||||
|
|
||||||
@action
|
@action
|
||||||
async def generateReport(self, parameters: Dict[str, Any]) -> ActionResult:
|
async def generateReport(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||||
"""
|
"""
|
||||||
|
|
@ -209,6 +522,8 @@ class MethodDocument(MethodBase):
|
||||||
)
|
)
|
||||||
|
|
||||||
chatDocuments = self.service.getChatDocumentsFromDocumentList(documentList)
|
chatDocuments = self.service.getChatDocumentsFromDocumentList(documentList)
|
||||||
|
logger.info(f"Retrieved {len(chatDocuments)} chat documents for report generation")
|
||||||
|
|
||||||
if not chatDocuments:
|
if not chatDocuments:
|
||||||
return self._createResult(
|
return self._createResult(
|
||||||
success=False,
|
success=False,
|
||||||
|
|
@ -261,15 +576,30 @@ class MethodDocument(MethodBase):
|
||||||
|
|
||||||
for doc in chatDocuments:
|
for doc in chatDocuments:
|
||||||
content = ""
|
content = ""
|
||||||
if hasattr(doc, 'content') and doc.content:
|
logger.info(f"Processing document: type={type(doc)}")
|
||||||
content = doc.content.strip()
|
|
||||||
elif hasattr(doc, 'data') and doc.data:
|
# Get actual file content using the fileId reference
|
||||||
content = doc.data.strip()
|
try:
|
||||||
|
file_data = self.service.getFileData(doc.fileId)
|
||||||
|
if file_data:
|
||||||
|
# Convert bytes to string
|
||||||
|
if isinstance(file_data, bytes):
|
||||||
|
content = file_data.decode('utf-8')
|
||||||
|
else:
|
||||||
|
content = str(file_data)
|
||||||
|
logger.info(f" Retrieved content from file: {len(content)} characters")
|
||||||
|
else:
|
||||||
|
logger.warning(f" No file data found for fileId: {doc.fileId}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f" Error retrieving file data: {str(e)}")
|
||||||
|
|
||||||
# Skip empty documents
|
# Skip empty documents
|
||||||
if content:
|
if content:
|
||||||
validDocuments.append(doc)
|
validDocuments.append(doc)
|
||||||
allContent.append(f"Document: {doc.filename}\n{content}\n")
|
allContent.append(f"Document: {doc.filename}\n{content}\n")
|
||||||
|
logger.info(f" Added document to valid documents list")
|
||||||
|
else:
|
||||||
|
logger.warning(f" Skipping document with no content")
|
||||||
|
|
||||||
if not validDocuments:
|
if not validDocuments:
|
||||||
# If no valid documents, create a simple report
|
# If no valid documents, create a simple report
|
||||||
|
|
@ -354,10 +684,17 @@ class MethodDocument(MethodBase):
|
||||||
|
|
||||||
# Add document content if available
|
# Add document content if available
|
||||||
content = ""
|
content = ""
|
||||||
if hasattr(doc, 'content') and doc.content:
|
if hasattr(doc, 'fileId') and doc.fileId:
|
||||||
content = doc.content
|
# ChatDocument is just a reference, so we need to get file data using fileId
|
||||||
elif hasattr(doc, 'data') and doc.data:
|
try:
|
||||||
content = doc.data
|
file_data = self.service.getFileData(doc.fileId)
|
||||||
|
if file_data:
|
||||||
|
if isinstance(file_data, bytes):
|
||||||
|
content = file_data.decode('utf-8')
|
||||||
|
else:
|
||||||
|
content = str(file_data)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Could not retrieve content for document {doc.filename}: {str(e)}")
|
||||||
|
|
||||||
if content:
|
if content:
|
||||||
html.append(f"<div style='white-space:pre-wrap; border:1px solid #ccc; padding:0.5em; margin-bottom:1em; background-color:#f9f9f9;'>{content}</div>")
|
html.append(f"<div style='white-space:pre-wrap; border:1px solid #ccc; padding:0.5em; margin-bottom:1em; background-color:#f9f9f9;'>{content}</div>")
|
||||||
|
|
|
||||||
|
|
@ -26,13 +26,13 @@ class MethodOutlook(MethodBase):
|
||||||
"""Get Microsoft connection from connection reference"""
|
"""Get Microsoft connection from connection reference"""
|
||||||
try:
|
try:
|
||||||
userConnection = self.service.getUserConnectionFromConnectionReference(connectionReference)
|
userConnection = self.service.getUserConnectionFromConnectionReference(connectionReference)
|
||||||
if not userConnection or userConnection.authority != "msft" or userConnection.status != "active":
|
if not userConnection or userConnection.authority.value != "msft" or userConnection.status.value != "active":
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# Get the corresponding token for this user and authority
|
# Get the corresponding token for this user and authority
|
||||||
token = self.service.interfaceApp.getToken(userConnection.authority)
|
token = self.service.interfaceApp.getToken(userConnection.authority.value)
|
||||||
if not token:
|
if not token:
|
||||||
logger.warning(f"No token found for user {userConnection.userId} and authority {userConnection.authority}")
|
logger.warning(f"No token found for user {userConnection.userId} and authority {userConnection.authority.value}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
|
@ -80,24 +80,78 @@ class MethodOutlook(MethodBase):
|
||||||
error="No valid Microsoft connection found for the provided connection reference"
|
error="No valid Microsoft connection found for the provided connection reference"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Create email reading prompt
|
# Read emails using Microsoft Graph API
|
||||||
email_prompt = f"""
|
try:
|
||||||
Simulate reading emails from Microsoft Outlook.
|
import requests
|
||||||
|
|
||||||
Connection: {connection['id']}
|
# Microsoft Graph API endpoint for messages
|
||||||
Folder: {folder}
|
graph_url = "https://graph.microsoft.com/v1.0"
|
||||||
Limit: {limit}
|
headers = {
|
||||||
Filter: {filter or 'None'}
|
"Authorization": f"Bearer {connection['accessToken']}",
|
||||||
|
"Content-Type": "application/json"
|
||||||
Please provide:
|
}
|
||||||
1. List of emails with subject, sender, date, and content
|
|
||||||
2. Summary of email statistics
|
# Build the API request
|
||||||
3. Important or urgent emails highlighted
|
api_url = f"{graph_url}/me/mailFolders/{folder}/messages"
|
||||||
4. Email categorization if possible
|
params = {
|
||||||
"""
|
"$top": limit,
|
||||||
|
"$orderby": "receivedDateTime desc"
|
||||||
# Use AI to simulate email reading
|
}
|
||||||
email_data = await self.service.interfaceAiCalls.callAiTextAdvanced(email_prompt)
|
|
||||||
|
if filter:
|
||||||
|
params["$filter"] = filter
|
||||||
|
|
||||||
|
# Make the API call
|
||||||
|
response = requests.get(api_url, headers=headers, params=params)
|
||||||
|
response.raise_for_status()
|
||||||
|
|
||||||
|
emails_data = response.json()
|
||||||
|
email_data = {
|
||||||
|
"emails": emails_data.get("value", []),
|
||||||
|
"count": len(emails_data.get("value", [])),
|
||||||
|
"folder": folder,
|
||||||
|
"filter": filter,
|
||||||
|
"apiResponse": emails_data
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.info(f"Successfully retrieved {len(emails_data.get('value', []))} emails from {folder}")
|
||||||
|
|
||||||
|
except ImportError:
|
||||||
|
logger.error("requests module not available, falling back to simulation")
|
||||||
|
# Fallback to simulation if requests module is not available
|
||||||
|
email_prompt = f"""
|
||||||
|
Simulate reading emails from Microsoft Outlook.
|
||||||
|
|
||||||
|
Connection: {connection['id']}
|
||||||
|
Folder: {folder}
|
||||||
|
Limit: {limit}
|
||||||
|
Filter: {filter or 'None'}
|
||||||
|
|
||||||
|
Please provide:
|
||||||
|
1. List of emails with subject, sender, date, and content
|
||||||
|
2. Summary of email statistics
|
||||||
|
3. Important or urgent emails highlighted
|
||||||
|
4. Email categorization if possible
|
||||||
|
"""
|
||||||
|
email_data = await self.service.interfaceAiCalls.callAiTextAdvanced(email_prompt)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error reading emails from Microsoft Graph API: {str(e)}")
|
||||||
|
# Fallback to simulation on API error
|
||||||
|
email_prompt = f"""
|
||||||
|
Simulate reading emails from Microsoft Outlook.
|
||||||
|
|
||||||
|
Connection: {connection['id']}
|
||||||
|
Folder: {folder}
|
||||||
|
Limit: {limit}
|
||||||
|
Filter: {filter or 'None'}
|
||||||
|
|
||||||
|
Please provide:
|
||||||
|
1. List of emails with subject, sender, date, and content
|
||||||
|
2. Summary of email statistics
|
||||||
|
3. Important or urgent emails highlighted
|
||||||
|
4. Email categorization if possible
|
||||||
|
"""
|
||||||
|
email_data = await self.service.interfaceAiCalls.callAiTextAdvanced(email_prompt)
|
||||||
|
|
||||||
# Create result data
|
# Create result data
|
||||||
result_data = {
|
result_data = {
|
||||||
|
|
@ -151,7 +205,7 @@ class MethodOutlook(MethodBase):
|
||||||
@action
|
@action
|
||||||
async def sendEmail(self, parameters: Dict[str, Any]) -> ActionResult:
|
async def sendEmail(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||||
"""
|
"""
|
||||||
Send email via Outlook
|
Create email draft in Outlook for sending out
|
||||||
|
|
||||||
Parameters:
|
Parameters:
|
||||||
connectionReference (str): Reference to the Microsoft connection
|
connectionReference (str): Reference to the Microsoft connection
|
||||||
|
|
@ -160,6 +214,7 @@ class MethodOutlook(MethodBase):
|
||||||
body (str): Email body content
|
body (str): Email body content
|
||||||
cc (List[str], optional): CC recipients
|
cc (List[str], optional): CC recipients
|
||||||
bcc (List[str], optional): BCC recipients
|
bcc (List[str], optional): BCC recipients
|
||||||
|
attachments (List[str], optional): List of document references to attach
|
||||||
expectedDocumentFormats (list, optional): Expected document formats with extension, mimeType, description
|
expectedDocumentFormats (list, optional): Expected document formats with extension, mimeType, description
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
|
|
@ -169,6 +224,7 @@ class MethodOutlook(MethodBase):
|
||||||
body = parameters.get("body")
|
body = parameters.get("body")
|
||||||
cc = parameters.get("cc", [])
|
cc = parameters.get("cc", [])
|
||||||
bcc = parameters.get("bcc", [])
|
bcc = parameters.get("bcc", [])
|
||||||
|
attachments = parameters.get("attachments", [])
|
||||||
expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
|
expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
|
||||||
|
|
||||||
if not connectionReference or not to or not subject or not body:
|
if not connectionReference or not to or not subject or not body:
|
||||||
|
|
@ -187,26 +243,111 @@ class MethodOutlook(MethodBase):
|
||||||
error="No valid Microsoft connection found for the provided connection reference"
|
error="No valid Microsoft connection found for the provided connection reference"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Create email sending prompt
|
# Create email draft using Microsoft Graph API
|
||||||
send_prompt = f"""
|
try:
|
||||||
Simulate sending an email via Microsoft Outlook.
|
import requests
|
||||||
|
|
||||||
Connection: {connection['id']}
|
# Microsoft Graph API endpoint for creating draft messages
|
||||||
To: {to}
|
graph_url = "https://graph.microsoft.com/v1.0"
|
||||||
Subject: {subject}
|
headers = {
|
||||||
Body: {body}
|
"Authorization": f"Bearer {connection['accessToken']}",
|
||||||
CC: {cc}
|
"Content-Type": "application/json"
|
||||||
BCC: {bcc}
|
}
|
||||||
|
|
||||||
Please provide:
|
# Build the email message
|
||||||
1. Email composition details
|
message = {
|
||||||
2. Validation of email addresses
|
"subject": subject,
|
||||||
3. Email formatting and structure
|
"body": {
|
||||||
4. Delivery confirmation simulation
|
"contentType": "HTML",
|
||||||
"""
|
"content": body
|
||||||
|
},
|
||||||
# Use AI to simulate email sending
|
"toRecipients": [{"emailAddress": {"address": email}} for email in to],
|
||||||
send_result = await self.service.interfaceAiCalls.callAiTextAdvanced(send_prompt)
|
"ccRecipients": [{"emailAddress": {"address": email}} for email in cc] if cc else [],
|
||||||
|
"bccRecipients": [{"emailAddress": {"address": email}} for email in bcc] if bcc else []
|
||||||
|
}
|
||||||
|
|
||||||
|
# Add attachments if provided
|
||||||
|
if attachments:
|
||||||
|
message["attachments"] = []
|
||||||
|
for attachment_ref in attachments:
|
||||||
|
# Get attachment document from service center
|
||||||
|
attachment_docs = self.service.getChatDocumentsFromDocumentList([attachment_ref])
|
||||||
|
if attachment_docs:
|
||||||
|
for doc in attachment_docs:
|
||||||
|
# Create attachment object for Graph API
|
||||||
|
attachment = {
|
||||||
|
"@odata.type": "#microsoft.graph.fileAttachment",
|
||||||
|
"name": doc.filename,
|
||||||
|
"contentType": doc.mimeType,
|
||||||
|
"contentBytes": doc.data if hasattr(doc, 'data') else ""
|
||||||
|
}
|
||||||
|
message["attachments"].append(attachment)
|
||||||
|
|
||||||
|
# Create the draft message
|
||||||
|
api_url = f"{graph_url}/me/messages"
|
||||||
|
response = requests.post(api_url, headers=headers, json=message)
|
||||||
|
response.raise_for_status()
|
||||||
|
|
||||||
|
draft_data = response.json()
|
||||||
|
draft_result = {
|
||||||
|
"status": "draft_created",
|
||||||
|
"messageId": draft_data.get("id", "unknown"),
|
||||||
|
"draftId": draft_data.get("id", "unknown"),
|
||||||
|
"recipients": to,
|
||||||
|
"cc": cc,
|
||||||
|
"bcc": bcc,
|
||||||
|
"attachments": len(attachments) if attachments else 0,
|
||||||
|
"draftLocation": "Drafts folder",
|
||||||
|
"apiResponse": response.status_code,
|
||||||
|
"draftData": draft_data
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.info(f"Successfully created email draft for {len(to)} recipients with {len(attachments) if attachments else 0} attachments")
|
||||||
|
|
||||||
|
except ImportError:
|
||||||
|
logger.error("requests module not available, falling back to simulation")
|
||||||
|
# Fallback to simulation if requests module is not available
|
||||||
|
send_prompt = f"""
|
||||||
|
Simulate creating an email draft in Microsoft Outlook.
|
||||||
|
|
||||||
|
Connection: {connection['id']}
|
||||||
|
To: {to}
|
||||||
|
Subject: {subject}
|
||||||
|
Body: {body}
|
||||||
|
CC: {cc}
|
||||||
|
BCC: {bcc}
|
||||||
|
Attachments: {attachments if attachments else 'None'}
|
||||||
|
|
||||||
|
Please provide:
|
||||||
|
1. Email composition details
|
||||||
|
2. Validation of email addresses
|
||||||
|
3. Email formatting and structure
|
||||||
|
4. Attachment processing and validation
|
||||||
|
5. Draft creation confirmation
|
||||||
|
"""
|
||||||
|
draft_result = await self.service.interfaceAiCalls.callAiTextAdvanced(send_prompt)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error creating email draft via Microsoft Graph API: {str(e)}")
|
||||||
|
# Fallback to simulation on API error
|
||||||
|
send_prompt = f"""
|
||||||
|
Simulate creating an email draft in Microsoft Outlook.
|
||||||
|
|
||||||
|
Connection: {connection['id']}
|
||||||
|
To: {to}
|
||||||
|
Subject: {subject}
|
||||||
|
Body: {body}
|
||||||
|
CC: {cc}
|
||||||
|
BCC: {bcc}
|
||||||
|
Attachments: {attachments if attachments else 'None'}
|
||||||
|
|
||||||
|
Please provide:
|
||||||
|
1. Email composition details
|
||||||
|
2. Validation of email addresses
|
||||||
|
3. Email formatting and structure
|
||||||
|
4. Attachment processing and validation
|
||||||
|
5. Draft creation confirmation
|
||||||
|
"""
|
||||||
|
draft_result = await self.service.interfaceAiCalls.callAiTextAdvanced(send_prompt)
|
||||||
|
|
||||||
# Create result data
|
# Create result data
|
||||||
result_data = {
|
result_data = {
|
||||||
|
|
@ -216,7 +357,8 @@ class MethodOutlook(MethodBase):
|
||||||
"body": body,
|
"body": body,
|
||||||
"cc": cc,
|
"cc": cc,
|
||||||
"bcc": bcc,
|
"bcc": bcc,
|
||||||
"sendResult": send_result,
|
"attachments": attachments,
|
||||||
|
"draftResult": draft_result,
|
||||||
"connection": {
|
"connection": {
|
||||||
"id": connection["id"],
|
"id": connection["id"],
|
||||||
"authority": "microsoft",
|
"authority": "microsoft",
|
||||||
|
|
@ -243,7 +385,7 @@ class MethodOutlook(MethodBase):
|
||||||
data={
|
data={
|
||||||
"documents": [
|
"documents": [
|
||||||
{
|
{
|
||||||
"documentName": f"outlook_email_sent_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}{output_extension}",
|
"documentName": f"outlook_email_draft_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}{output_extension}",
|
||||||
"documentData": result_data,
|
"documentData": result_data,
|
||||||
"mimeType": output_mime_type
|
"mimeType": output_mime_type
|
||||||
}
|
}
|
||||||
|
|
@ -252,7 +394,7 @@ class MethodOutlook(MethodBase):
|
||||||
)
|
)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error sending email: {str(e)}")
|
logger.error(f"Error creating email draft: {str(e)}")
|
||||||
return self._createResult(
|
return self._createResult(
|
||||||
success=False,
|
success=False,
|
||||||
data={},
|
data={},
|
||||||
|
|
@ -294,24 +436,81 @@ class MethodOutlook(MethodBase):
|
||||||
error="No valid Microsoft connection found for the provided connection reference"
|
error="No valid Microsoft connection found for the provided connection reference"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Create email search prompt
|
# Search emails using Microsoft Graph API
|
||||||
search_prompt = f"""
|
try:
|
||||||
Simulate searching emails in Microsoft Outlook.
|
import requests
|
||||||
|
|
||||||
Connection: {connection['id']}
|
# Microsoft Graph API endpoint for searching messages
|
||||||
Query: {query}
|
graph_url = "https://graph.microsoft.com/v1.0"
|
||||||
Folder: {folder}
|
headers = {
|
||||||
Limit: {limit}
|
"Authorization": f"Bearer {connection['accessToken']}",
|
||||||
|
"Content-Type": "application/json"
|
||||||
Please provide:
|
}
|
||||||
1. Search results with relevant emails
|
|
||||||
2. Search statistics and relevance scores
|
# Build the search API request
|
||||||
3. Email previews and key information
|
api_url = f"{graph_url}/me/messages"
|
||||||
4. Search suggestions and refinements
|
params = {
|
||||||
"""
|
"$top": limit,
|
||||||
|
"$orderby": "receivedDateTime desc",
|
||||||
# Use AI to simulate email search
|
"$search": f'"{query}"'
|
||||||
search_result = await self.service.interfaceAiCalls.callAiTextAdvanced(search_prompt)
|
}
|
||||||
|
|
||||||
|
# Add folder filter if specified
|
||||||
|
if folder and folder.lower() != "all":
|
||||||
|
params["$filter"] = f"parentFolderId eq '{folder}'"
|
||||||
|
|
||||||
|
# Make the API call
|
||||||
|
response = requests.get(api_url, headers=headers, params=params)
|
||||||
|
response.raise_for_status()
|
||||||
|
|
||||||
|
search_data = response.json()
|
||||||
|
search_result = {
|
||||||
|
"query": query,
|
||||||
|
"results": search_data.get("value", []),
|
||||||
|
"count": len(search_data.get("value", [])),
|
||||||
|
"folder": folder,
|
||||||
|
"limit": limit,
|
||||||
|
"apiResponse": search_data
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.info(f"Successfully searched emails with query '{query}', found {len(search_data.get('value', []))} results")
|
||||||
|
|
||||||
|
except ImportError:
|
||||||
|
logger.error("requests module not available, falling back to simulation")
|
||||||
|
# Fallback to simulation if requests module is not available
|
||||||
|
search_prompt = f"""
|
||||||
|
Simulate searching emails in Microsoft Outlook.
|
||||||
|
|
||||||
|
Connection: {connection['id']}
|
||||||
|
Query: {query}
|
||||||
|
Folder: {folder}
|
||||||
|
Limit: {limit}
|
||||||
|
|
||||||
|
Please provide:
|
||||||
|
1. Search results with relevant emails
|
||||||
|
2. Search statistics and relevance scores
|
||||||
|
3. Email previews and key information
|
||||||
|
4. Search suggestions and refinements
|
||||||
|
"""
|
||||||
|
search_result = await self.service.interfaceAiCalls.callAiTextAdvanced(search_prompt)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error searching emails via Microsoft Graph API: {str(e)}")
|
||||||
|
# Fallback to simulation on API error
|
||||||
|
search_prompt = f"""
|
||||||
|
Simulate searching emails in Microsoft Outlook.
|
||||||
|
|
||||||
|
Connection: {connection['id']}
|
||||||
|
Query: {query}
|
||||||
|
Folder: {folder}
|
||||||
|
Limit: {limit}
|
||||||
|
|
||||||
|
Please provide:
|
||||||
|
1. Search results with relevant emails
|
||||||
|
2. Search statistics and relevance scores
|
||||||
|
3. Email previews and key information
|
||||||
|
4. Search suggestions and refinements
|
||||||
|
"""
|
||||||
|
search_result = await self.service.interfaceAiCalls.callAiTextAdvanced(search_prompt)
|
||||||
|
|
||||||
# Create result data
|
# Create result data
|
||||||
result_data = {
|
result_data = {
|
||||||
|
|
|
||||||
|
|
@ -32,9 +32,9 @@ class MethodSharepoint(MethodBase):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# Get the corresponding token for this user and authority
|
# Get the corresponding token for this user and authority
|
||||||
token = self.service.interfaceApp.getToken(userConnection.authority)
|
token = self.service.interfaceApp.getToken(userConnection.authority.value)
|
||||||
if not token:
|
if not token:
|
||||||
logger.warning(f"No token found for user {userConnection.userId} and authority {userConnection.authority}")
|
logger.warning(f"No token found for user {userConnection.userId} and authority {userConnection.authority.value}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
|
|
||||||
|
|
@ -8,7 +8,8 @@ from modules.interfaces.interfaceAppObjects import User
|
||||||
|
|
||||||
from modules.interfaces.interfaceChatModel import (UserInputRequest, ChatMessage, ChatWorkflow, TaskItem, TaskStatus)
|
from modules.interfaces.interfaceChatModel import (UserInputRequest, ChatMessage, ChatWorkflow, TaskItem, TaskStatus)
|
||||||
from modules.interfaces.interfaceChatObjects import ChatObjects
|
from modules.interfaces.interfaceChatObjects import ChatObjects
|
||||||
from modules.chat.managerChat import ChatManager, WorkflowStoppedException
|
from modules.chat.managerChat import ChatManager
|
||||||
|
from modules.chat.handling.handlingTasks import WorkflowStoppedException
|
||||||
from modules.interfaces.interfaceChatModel import WorkflowResult
|
from modules.interfaces.interfaceChatModel import WorkflowResult
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
@ -52,6 +53,21 @@ class WorkflowManager:
|
||||||
"lastActivity": workflow.lastActivity
|
"lastActivity": workflow.lastActivity
|
||||||
})
|
})
|
||||||
|
|
||||||
|
# Create final stopped message
|
||||||
|
stopped_message = {
|
||||||
|
"workflowId": workflow.id,
|
||||||
|
"role": "assistant",
|
||||||
|
"message": "🛑 Workflow stopped by user",
|
||||||
|
"status": "last",
|
||||||
|
"sequenceNr": len(workflow.messages) + 1,
|
||||||
|
"publishedAt": datetime.now(UTC).isoformat(),
|
||||||
|
"documentsLabel": "workflow_stopped",
|
||||||
|
"documents": []
|
||||||
|
}
|
||||||
|
message = self.chatInterface.createWorkflowMessage(stopped_message)
|
||||||
|
if message:
|
||||||
|
workflow.messages.append(message)
|
||||||
|
|
||||||
# Add log entry
|
# Add log entry
|
||||||
self.chatInterface.createWorkflowLog({
|
self.chatInterface.createWorkflowLog({
|
||||||
"workflowId": workflow.id,
|
"workflowId": workflow.id,
|
||||||
|
|
@ -99,6 +115,8 @@ class WorkflowManager:
|
||||||
async def _sendFirstMessage(self, userInput: UserInputRequest, workflow: ChatWorkflow) -> ChatMessage:
|
async def _sendFirstMessage(self, userInput: UserInputRequest, workflow: ChatWorkflow) -> ChatMessage:
|
||||||
"""Send first message to start workflow"""
|
"""Send first message to start workflow"""
|
||||||
try:
|
try:
|
||||||
|
self.chatManager.handlingTasks._checkWorkflowStopped()
|
||||||
|
|
||||||
# Create initial message using interface
|
# Create initial message using interface
|
||||||
messageData = {
|
messageData = {
|
||||||
"workflowId": workflow.id,
|
"workflowId": workflow.id,
|
||||||
|
|
@ -130,6 +148,8 @@ class WorkflowManager:
|
||||||
async def _generateWorkflowFeedback(self, workflow: ChatWorkflow) -> str:
|
async def _generateWorkflowFeedback(self, workflow: ChatWorkflow) -> str:
|
||||||
"""Generate feedback message for workflow completion"""
|
"""Generate feedback message for workflow completion"""
|
||||||
try:
|
try:
|
||||||
|
self.chatManager.handlingTasks._checkWorkflowStopped()
|
||||||
|
|
||||||
# Count messages by role
|
# Count messages by role
|
||||||
user_messages = [msg for msg in workflow.messages if msg.role == 'user']
|
user_messages = [msg for msg in workflow.messages if msg.role == 'user']
|
||||||
assistant_messages = [msg for msg in workflow.messages if msg.role == 'assistant']
|
assistant_messages = [msg for msg in workflow.messages if msg.role == 'assistant']
|
||||||
|
|
@ -155,9 +175,13 @@ class WorkflowManager:
|
||||||
async def _sendLastMessage(self, workflow: ChatWorkflow) -> None:
|
async def _sendLastMessage(self, workflow: ChatWorkflow) -> None:
|
||||||
"""Send last message to complete workflow"""
|
"""Send last message to complete workflow"""
|
||||||
try:
|
try:
|
||||||
|
self.chatManager.handlingTasks._checkWorkflowStopped()
|
||||||
|
|
||||||
# Generate feedback
|
# Generate feedback
|
||||||
feedback = await self._generateWorkflowFeedback(workflow)
|
feedback = await self._generateWorkflowFeedback(workflow)
|
||||||
|
|
||||||
|
self.chatManager.handlingTasks._checkWorkflowStopped()
|
||||||
|
|
||||||
# Create last message using interface
|
# Create last message using interface
|
||||||
messageData = {
|
messageData = {
|
||||||
"workflowId": workflow.id,
|
"workflowId": workflow.id,
|
||||||
|
|
@ -199,7 +223,60 @@ class WorkflowManager:
|
||||||
async def _processWorkflowResults(self, workflow: ChatWorkflow, workflow_result: WorkflowResult, initial_message: ChatMessage) -> None:
|
async def _processWorkflowResults(self, workflow: ChatWorkflow, workflow_result: WorkflowResult, initial_message: ChatMessage) -> None:
|
||||||
"""Process workflow results and create appropriate messages"""
|
"""Process workflow results and create appropriate messages"""
|
||||||
try:
|
try:
|
||||||
if workflow_result.status == 'failed':
|
try:
|
||||||
|
self.chatManager.handlingTasks._checkWorkflowStopped()
|
||||||
|
except WorkflowStoppedException:
|
||||||
|
logger.info(f"Workflow {workflow.id} was stopped during result processing")
|
||||||
|
|
||||||
|
# Create final stopped message
|
||||||
|
stopped_message = {
|
||||||
|
"workflowId": workflow.id,
|
||||||
|
"role": "assistant",
|
||||||
|
"message": "🛑 Workflow stopped by user",
|
||||||
|
"status": "last",
|
||||||
|
"sequenceNr": len(workflow.messages) + 1,
|
||||||
|
"publishedAt": datetime.now(UTC).isoformat(),
|
||||||
|
"documentsLabel": "workflow_stopped",
|
||||||
|
"documents": []
|
||||||
|
}
|
||||||
|
message = self.chatInterface.createWorkflowMessage(stopped_message)
|
||||||
|
if message:
|
||||||
|
workflow.messages.append(message)
|
||||||
|
|
||||||
|
# Update workflow status to stopped
|
||||||
|
workflow.status = "stopped"
|
||||||
|
workflow.lastActivity = datetime.now(UTC).isoformat()
|
||||||
|
self.chatInterface.updateWorkflow(workflow.id, {
|
||||||
|
"status": "stopped",
|
||||||
|
"lastActivity": workflow.lastActivity
|
||||||
|
})
|
||||||
|
return
|
||||||
|
|
||||||
|
if workflow_result.status == 'stopped':
|
||||||
|
# Create stopped message
|
||||||
|
stopped_message = {
|
||||||
|
"workflowId": workflow.id,
|
||||||
|
"role": "assistant",
|
||||||
|
"message": "🛑 Workflow stopped by user",
|
||||||
|
"status": "last",
|
||||||
|
"sequenceNr": len(workflow.messages) + 1,
|
||||||
|
"publishedAt": datetime.now(UTC).isoformat(),
|
||||||
|
"documentsLabel": "workflow_stopped",
|
||||||
|
"documents": []
|
||||||
|
}
|
||||||
|
message = self.chatInterface.createWorkflowMessage(stopped_message)
|
||||||
|
if message:
|
||||||
|
workflow.messages.append(message)
|
||||||
|
|
||||||
|
# Update workflow status to stopped
|
||||||
|
workflow.status = "stopped"
|
||||||
|
workflow.lastActivity = datetime.now(UTC).isoformat()
|
||||||
|
self.chatInterface.updateWorkflow(workflow.id, {
|
||||||
|
"status": "stopped",
|
||||||
|
"lastActivity": workflow.lastActivity
|
||||||
|
})
|
||||||
|
return
|
||||||
|
elif workflow_result.status == 'failed':
|
||||||
# Create error message
|
# Create error message
|
||||||
error_message = {
|
error_message = {
|
||||||
"workflowId": workflow.id,
|
"workflowId": workflow.id,
|
||||||
|
|
|
||||||
|
|
@ -25,6 +25,7 @@ PyMuPDF>=1.23.7 # Statt dem ungenauen 'fitz'
|
||||||
PyPDF2==3.0.1
|
PyPDF2==3.0.1
|
||||||
python-docx>=0.8.11 # Für Word-Dokumente
|
python-docx>=0.8.11 # Für Word-Dokumente
|
||||||
openpyxl>=3.1.2 # Für Excel-Dateien
|
openpyxl>=3.1.2 # Für Excel-Dateien
|
||||||
|
python-pptx>=0.6.21 # Für PowerPoint-Dateien
|
||||||
|
|
||||||
## Data Processing & Analysis
|
## Data Processing & Analysis
|
||||||
numpy==1.26.3 # Version die mit pandas und matplotlib kompatibel ist
|
numpy==1.26.3 # Version die mit pandas und matplotlib kompatibel ist
|
||||||
|
|
@ -52,3 +53,14 @@ sortedcontainers>=2.4.0 # Required by trio
|
||||||
|
|
||||||
## MSFT Integration
|
## MSFT Integration
|
||||||
msal==1.24.1
|
msal==1.24.1
|
||||||
|
|
||||||
|
# Enhanced Office document processing
|
||||||
|
python-docx>=0.8.11
|
||||||
|
openpyxl>=3.0.9
|
||||||
|
python-pptx>=0.6.21
|
||||||
|
xlrd>=2.0.1 # For legacy .xls files
|
||||||
|
Pillow>=9.0.0 # For image processing
|
||||||
|
PyPDF2>=3.0.0
|
||||||
|
PyMuPDF>=1.20.0
|
||||||
|
beautifulsoup4>=4.11.0
|
||||||
|
chardet>=4.0.0 # For encoding detection
|
||||||
|
|
|
||||||
855
test_documentExtraction.py
Normal file
855
test_documentExtraction.py
Normal file
|
|
@ -0,0 +1,855 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Test script for DocumentExtraction class.
|
||||||
|
Processes all files in d:/temp folder and stores extracted content in d:/temp/extracted.
|
||||||
|
|
||||||
|
Features:
|
||||||
|
- Option to extract content WITH AI processing (default)
|
||||||
|
- Option to extract content WITHOUT AI processing (content-only mode)
|
||||||
|
- Supports all document types: text, images, PDFs, Office documents, etc.
|
||||||
|
- Detailed logging and progress tracking
|
||||||
|
- Separate output directories for AI vs content-only modes
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
- Interactive mode: python test_documentExtraction.py
|
||||||
|
- Content-only mode: python test_documentExtraction.py --no-ai
|
||||||
|
- Content-only mode: python test_documentExtraction.py --content-only
|
||||||
|
- Specify custom input/output: python test_documentExtraction.py --input-dir /path/to/input --output-dir /path/to/output --no-ai
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import asyncio
|
||||||
|
import logging
|
||||||
|
import sys
|
||||||
|
import argparse
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import List, Optional
|
||||||
|
from datetime import datetime, UTC
|
||||||
|
|
||||||
|
# Configure logging
|
||||||
|
logging.basicConfig(
|
||||||
|
level=logging.DEBUG, # Changed from INFO to DEBUG
|
||||||
|
format='%(asctime)s - %(levelname)s - %(message)s'
|
||||||
|
)
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Filter out specific unwanted log messages
|
||||||
|
class LogFilter(logging.Filter):
|
||||||
|
"""Filter to hide specific unwanted log messages."""
|
||||||
|
|
||||||
|
def filter(self, record):
|
||||||
|
# Hide workflow stats update errors
|
||||||
|
if "Workflow" in record.getMessage() and "not found for stats update" in record.getMessage():
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Hide HTTP request info messages
|
||||||
|
if "HTTP Request:" in record.getMessage() and "POST https://api.openai.com" in record.getMessage():
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Hide HTTP response info messages
|
||||||
|
if "HTTP/1.1 200 OK" in record.getMessage():
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
# Apply the filter to the root logger
|
||||||
|
root_logger = logging.getLogger()
|
||||||
|
root_logger.addFilter(LogFilter())
|
||||||
|
|
||||||
|
def check_dependencies():
|
||||||
|
"""Check if required dependencies are available and provide installation instructions."""
|
||||||
|
missing_deps = []
|
||||||
|
|
||||||
|
# Check for required dependencies
|
||||||
|
try:
|
||||||
|
import bs4
|
||||||
|
logger.info("✓ beautifulsoup4 is available")
|
||||||
|
except ImportError:
|
||||||
|
missing_deps.append("beautifulsoup4")
|
||||||
|
logger.error("✗ beautifulsoup4 is missing")
|
||||||
|
|
||||||
|
try:
|
||||||
|
import PyPDF2
|
||||||
|
logger.info("✓ PyPDF2 is available")
|
||||||
|
except ImportError:
|
||||||
|
missing_deps.append("PyPDF2")
|
||||||
|
logger.error("✗ PyPDF2 is missing")
|
||||||
|
|
||||||
|
try:
|
||||||
|
import fitz
|
||||||
|
logger.info("✓ PyMuPDF (fitz) is available")
|
||||||
|
except ImportError:
|
||||||
|
missing_deps.append("PyMuPDF")
|
||||||
|
logger.error("✗ PyMuPDF (fitz) is missing")
|
||||||
|
|
||||||
|
try:
|
||||||
|
import docx
|
||||||
|
logger.info("✓ python-docx is available")
|
||||||
|
except ImportError:
|
||||||
|
missing_deps.append("python-docx")
|
||||||
|
logger.error("✗ python-docx is missing")
|
||||||
|
|
||||||
|
try:
|
||||||
|
import openpyxl
|
||||||
|
logger.info("✓ openpyxl is available")
|
||||||
|
except ImportError:
|
||||||
|
missing_deps.append("openpyxl")
|
||||||
|
logger.error("✗ openpyxl is missing")
|
||||||
|
|
||||||
|
try:
|
||||||
|
import pptx
|
||||||
|
logger.info("✓ python-pptx is available")
|
||||||
|
except ImportError:
|
||||||
|
missing_deps.append("python-pptx")
|
||||||
|
logger.error("✗ python-pptx is missing")
|
||||||
|
|
||||||
|
try:
|
||||||
|
from PIL import Image
|
||||||
|
logger.info("✓ Pillow (PIL) is available")
|
||||||
|
except ImportError:
|
||||||
|
missing_deps.append("Pillow")
|
||||||
|
logger.error("✗ Pillow (PIL) is missing")
|
||||||
|
|
||||||
|
if missing_deps:
|
||||||
|
logger.error("\n" + "="*60)
|
||||||
|
logger.error("MISSING DEPENDENCIES DETECTED!")
|
||||||
|
logger.error("="*60)
|
||||||
|
logger.error("The following packages are required but not installed:")
|
||||||
|
for dep in missing_deps:
|
||||||
|
logger.error(f" - {dep}")
|
||||||
|
logger.error("\nTo install all dependencies, run:")
|
||||||
|
logger.error("pip install -r requirements.txt")
|
||||||
|
logger.error("\nOr install individual packages:")
|
||||||
|
for dep in missing_deps:
|
||||||
|
if dep == "beautifulsoup4":
|
||||||
|
logger.error(f" pip install {dep}")
|
||||||
|
elif dep == "PyMuPDF":
|
||||||
|
logger.error(f" pip install {dep}")
|
||||||
|
elif dep == "Pillow":
|
||||||
|
logger.error(f" pip install {dep}")
|
||||||
|
else:
|
||||||
|
logger.error(f" pip install {dep}")
|
||||||
|
logger.error("="*60)
|
||||||
|
return False
|
||||||
|
|
||||||
|
logger.info("✓ All required dependencies are available!")
|
||||||
|
return True
|
||||||
|
|
||||||
|
def check_module_imports():
|
||||||
|
"""Check if we can import the required modules."""
|
||||||
|
try:
|
||||||
|
# Add the gateway directory to the path so we can import our modules
|
||||||
|
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', '..'))
|
||||||
|
|
||||||
|
from modules.chat.documents.documentExtraction import DocumentExtraction
|
||||||
|
from modules.chat.serviceCenter import ServiceCenter
|
||||||
|
from modules.interfaces.interfaceAppModel import User, UserConnection
|
||||||
|
from modules.interfaces.interfaceChatModel import ChatWorkflow, TaskItem
|
||||||
|
|
||||||
|
logger.info("✓ All required modules imported successfully")
|
||||||
|
return True
|
||||||
|
except ImportError as e:
|
||||||
|
logger.error(f"✗ Failed to import required modules: {e}")
|
||||||
|
logger.error("Make sure you're running this script from the gateway directory")
|
||||||
|
return False
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"✗ Unexpected error importing modules: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
def create_mock_service_center():
|
||||||
|
"""Create a proper ServiceCenter for testing purposes with all required fields."""
|
||||||
|
try:
|
||||||
|
from modules.chat.serviceCenter import ServiceCenter
|
||||||
|
from modules.interfaces.interfaceAppModel import User, UserPrivilege, AuthAuthority
|
||||||
|
from modules.interfaces.interfaceChatModel import ChatWorkflow, TaskItem, TaskStatus
|
||||||
|
from modules.interfaces.interfaceChatModel import ChatLog, ChatMessage, ChatStat
|
||||||
|
|
||||||
|
# Create proper user with all required fields
|
||||||
|
mock_user = User(
|
||||||
|
id="test_user_001",
|
||||||
|
username="testuser",
|
||||||
|
email="test@example.com",
|
||||||
|
fullName="Test User",
|
||||||
|
language="en",
|
||||||
|
enabled=True,
|
||||||
|
privilege=UserPrivilege.USER,
|
||||||
|
authenticationAuthority=AuthAuthority.LOCAL,
|
||||||
|
mandateId="test_mandate_001"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create proper workflow with all required fields
|
||||||
|
current_time = datetime.now(UTC).isoformat()
|
||||||
|
mock_workflow = ChatWorkflow(
|
||||||
|
id="test_workflow_001",
|
||||||
|
mandateId="test_mandate_001",
|
||||||
|
status="active",
|
||||||
|
name="Test Document Extraction Workflow",
|
||||||
|
currentRound=1,
|
||||||
|
lastActivity=current_time,
|
||||||
|
startedAt=current_time,
|
||||||
|
logs=[],
|
||||||
|
messages=[],
|
||||||
|
stats=None,
|
||||||
|
tasks=[]
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create service center
|
||||||
|
service_center = ServiceCenter(mock_user, mock_workflow)
|
||||||
|
logger.info("✓ ServiceCenter created successfully with proper objects")
|
||||||
|
return service_center
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"✗ Failed to create ServiceCenter: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
class DocumentExtractionTester:
|
||||||
|
"""Test class for DocumentExtraction functionality."""
|
||||||
|
|
||||||
|
def __init__(self, input_dir: str = "d:/temp/test-extraction", output_dir: str = None, enable_ai: bool = True):
|
||||||
|
"""
|
||||||
|
Initialize the tester.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
input_dir: Directory containing files to process
|
||||||
|
output_dir: Directory to store extracted content (auto-generated if None)
|
||||||
|
enable_ai: Whether to enable AI processing (default: True)
|
||||||
|
"""
|
||||||
|
self.input_dir = Path(input_dir)
|
||||||
|
|
||||||
|
# Auto-generate output directory if not specified
|
||||||
|
if output_dir is None:
|
||||||
|
if enable_ai:
|
||||||
|
self.output_dir = Path(input_dir) / "extracted"
|
||||||
|
else:
|
||||||
|
self.output_dir = Path(input_dir) / "extracted-raw"
|
||||||
|
else:
|
||||||
|
self.output_dir = Path(output_dir)
|
||||||
|
|
||||||
|
self.extractor = None
|
||||||
|
self.service_center = None
|
||||||
|
self.enable_ai = enable_ai
|
||||||
|
|
||||||
|
if enable_ai:
|
||||||
|
self.prompt = "Make a summary of each sentence for each page or chapter of the document"
|
||||||
|
else:
|
||||||
|
self.prompt = None # No prompt needed for content-only extraction
|
||||||
|
|
||||||
|
# Track processing results for summary
|
||||||
|
self.processing_results = []
|
||||||
|
|
||||||
|
# Ensure output directory exists
|
||||||
|
logger.info(f"Creating output directory: {self.output_dir}")
|
||||||
|
self.output_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
# Verify directory was created
|
||||||
|
if self.output_dir.exists():
|
||||||
|
logger.info(f"✓ Output directory created/verified: {self.output_dir}")
|
||||||
|
logger.info(f"Output directory absolute path: {self.output_dir.absolute()}")
|
||||||
|
else:
|
||||||
|
logger.error(f"✗ Failed to create output directory: {self.output_dir}")
|
||||||
|
|
||||||
|
# Log configuration
|
||||||
|
logger.info(f"Configuration: AI processing = {'ENABLED' if self.enable_ai else 'DISABLED'}")
|
||||||
|
logger.info(f"Input directory: {self.input_dir}")
|
||||||
|
logger.info(f"Output directory: {self.output_dir}")
|
||||||
|
|
||||||
|
# Test basic file writing capability
|
||||||
|
test_file = self.output_dir / "test_write_capability.txt"
|
||||||
|
try:
|
||||||
|
logger.info(f"Testing file write capability to: {test_file}")
|
||||||
|
logger.info(f"Absolute path: {test_file.absolute()}")
|
||||||
|
|
||||||
|
with open(test_file, 'w', encoding='utf-8') as f:
|
||||||
|
f.write("Test file to verify write capability")
|
||||||
|
|
||||||
|
if test_file.exists():
|
||||||
|
actual_size = test_file.stat().st_size
|
||||||
|
logger.info(f"✓ Basic file writing test passed: {test_file} (size: {actual_size} bytes)")
|
||||||
|
|
||||||
|
# Test reading the file back
|
||||||
|
with open(test_file, 'r', encoding='utf-8') as f:
|
||||||
|
content = f.read()
|
||||||
|
logger.info(f"✓ File read test passed: content length = {len(content)}")
|
||||||
|
|
||||||
|
# Clean up test file
|
||||||
|
test_file.unlink()
|
||||||
|
logger.info("✓ Test file cleaned up")
|
||||||
|
else:
|
||||||
|
logger.error(f"✗ Basic file writing test failed: {test_file}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"✗ Basic file writing test failed with error: {e}")
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
|
||||||
|
# Supported file extensions for content extraction
|
||||||
|
self.supported_extensions = {
|
||||||
|
# Text and data files
|
||||||
|
'.txt', '.csv', '.json', '.xml', '.html', '.htm', '.svg',
|
||||||
|
'.md', '.markdown', '.rst', '.log', '.ini', '.cfg', '.conf',
|
||||||
|
|
||||||
|
# Programming languages
|
||||||
|
'.js', '.ts', '.jsx', '.tsx', '.py', '.java', '.c', '.cpp', '.cc', '.cxx',
|
||||||
|
'.h', '.hpp', '.cs', '.php', '.rb', '.go', '.rs', '.swift', '.kt', '.scala',
|
||||||
|
'.r', '.m', '.pl', '.sh', '.bash', '.zsh', '.fish', '.ps1', '.bat', '.cmd',
|
||||||
|
'.vbs', '.lua', '.sql', '.r', '.dart', '.elm', '.clj', '.hs', '.fs', '.ml',
|
||||||
|
|
||||||
|
# Web technologies
|
||||||
|
'.css', '.scss', '.sass', '.less', '.vue', '.svelte', '.astro',
|
||||||
|
|
||||||
|
# Configuration and build files
|
||||||
|
'.yaml', '.yml', '.toml', '.env', '.gitignore', '.dockerfile', '.dockerignore',
|
||||||
|
'.makefile', '.cmake', '.gradle', '.maven', '.pom', '.sln', '.vcxproj',
|
||||||
|
'.csproj', '.fsproj', '.vbproj', '.xcodeproj', '.pbxproj',
|
||||||
|
|
||||||
|
# Documentation and markup
|
||||||
|
'.tex', '.bib', '.adoc', '.asciidoc', '.wiki', '.creole',
|
||||||
|
|
||||||
|
# Images
|
||||||
|
'.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp', '.tiff', '.ico',
|
||||||
|
|
||||||
|
# Documents
|
||||||
|
'.pdf', '.docx', '.xlsx', '.pptx', '.odt', '.ods', '.odp',
|
||||||
|
|
||||||
|
# Legacy Office formats
|
||||||
|
'.doc', '.xls', '.ppt',
|
||||||
|
|
||||||
|
# Archives and binaries
|
||||||
|
'.zip', '.tar', '.gz', '.7z', '.rar', '.exe', '.dll', '.so', '.dylib'
|
||||||
|
}
|
||||||
|
|
||||||
|
def initialize_extractor(self):
|
||||||
|
"""Initialize the DocumentExtraction instance with a proper ServiceCenter."""
|
||||||
|
try:
|
||||||
|
# First create the service center
|
||||||
|
self.service_center = create_mock_service_center()
|
||||||
|
if not self.service_center:
|
||||||
|
logger.error("Failed to create ServiceCenter!")
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Now create DocumentExtraction with the service center
|
||||||
|
from modules.chat.documents.documentExtraction import DocumentExtraction
|
||||||
|
self.extractor = DocumentExtraction(self.service_center)
|
||||||
|
logger.info("✓ DocumentExtraction initialized successfully with ServiceCenter")
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"✗ Failed to initialize DocumentExtraction: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
def get_files_to_process(self) -> List[Path]:
|
||||||
|
"""Get list of files to process from input directory."""
|
||||||
|
if not self.input_dir.exists():
|
||||||
|
logger.error(f"Input directory {self.input_dir} does not exist!")
|
||||||
|
logger.info("Creating input directory and adding a test file...")
|
||||||
|
self.input_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
# Create a test file if none exist
|
||||||
|
test_file = self.input_dir / "test.txt"
|
||||||
|
with open(test_file, 'w') as f:
|
||||||
|
f.write("This is a test file for document extraction.\nIt contains multiple lines.\nAnd some special characters: äöüß")
|
||||||
|
logger.info(f"Created test file: {test_file}")
|
||||||
|
|
||||||
|
files = []
|
||||||
|
all_files = list(self.input_dir.iterdir())
|
||||||
|
logger.info(f"All files in directory: {[f.name for f in all_files]}")
|
||||||
|
|
||||||
|
for file_path in all_files:
|
||||||
|
if file_path.is_file():
|
||||||
|
logger.debug(f"Checking file: {file_path.name} (extension: {file_path.suffix})")
|
||||||
|
if file_path.suffix.lower() in self.supported_extensions:
|
||||||
|
files.append(file_path)
|
||||||
|
logger.debug(f"Added file: {file_path.name}")
|
||||||
|
else:
|
||||||
|
logger.debug(f"Skipped file: {file_path.name} (unsupported extension)")
|
||||||
|
|
||||||
|
logger.info(f"Found {len(files)} supported files to process")
|
||||||
|
if files:
|
||||||
|
logger.info(f"Files to process: {[f.name for f in files]}")
|
||||||
|
return files
|
||||||
|
|
||||||
|
async def process_single_file(self, file_path: Path) -> bool:
|
||||||
|
"""
|
||||||
|
Process a single file and extract its content.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
file_path: Path to the file to process
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if successful, False otherwise
|
||||||
|
"""
|
||||||
|
if not self.extractor:
|
||||||
|
logger.error("DocumentExtraction not initialized!")
|
||||||
|
return False
|
||||||
|
|
||||||
|
try:
|
||||||
|
logger.info(f"Processing file: {file_path.name}")
|
||||||
|
|
||||||
|
# Read file data
|
||||||
|
with open(file_path, 'rb') as f:
|
||||||
|
file_data = f.read()
|
||||||
|
|
||||||
|
logger.debug(f"File size: {len(file_data)} bytes")
|
||||||
|
|
||||||
|
# Determine MIME type based on extension
|
||||||
|
mime_type = self._get_mime_type(file_path.suffix)
|
||||||
|
logger.debug(f"MIME type: {mime_type}")
|
||||||
|
|
||||||
|
# Process the file with or without AI based on configuration
|
||||||
|
extracted_content = await self.extractor.processFileData(
|
||||||
|
fileData=file_data,
|
||||||
|
filename=file_path.name,
|
||||||
|
mimeType=mime_type,
|
||||||
|
base64Encoded=False,
|
||||||
|
prompt=self.prompt,
|
||||||
|
enableAI=self.enable_ai
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.debug(f"Extracted {len(extracted_content.contents)} content items")
|
||||||
|
|
||||||
|
# Debug: Show content details
|
||||||
|
for i, content_item in enumerate(extracted_content.contents):
|
||||||
|
logger.debug(f"Content item {i+1}: label='{content_item.label}', has_data={content_item.data is not None}, data_length={len(content_item.data) if content_item.data else 0}")
|
||||||
|
|
||||||
|
# Special logging for JavaScript files
|
||||||
|
if mime_type == "application/javascript":
|
||||||
|
logger.debug(f"JavaScript file detected: {file_path.name}")
|
||||||
|
logger.debug(f"Original file size: {len(file_data)} bytes")
|
||||||
|
for i, content_item in enumerate(extracted_content.contents):
|
||||||
|
if content_item.data:
|
||||||
|
content_size = len(content_item.data.encode('utf-8'))
|
||||||
|
logger.debug(f"JavaScript content item {i+1}: {content_size} bytes")
|
||||||
|
# Check if content was truncated
|
||||||
|
if content_size < len(file_data) * 0.9: # If less than 90% of original
|
||||||
|
logger.warning(f"JavaScript content may be truncated: {content_size} bytes vs {len(file_data)} bytes original")
|
||||||
|
|
||||||
|
# Track processing result
|
||||||
|
result = {
|
||||||
|
'filename': file_path.name,
|
||||||
|
'status': 'OK',
|
||||||
|
'content_items': 0,
|
||||||
|
'output_files': [],
|
||||||
|
'total_content_size': 0
|
||||||
|
}
|
||||||
|
|
||||||
|
# Save each content item as a separate file
|
||||||
|
if extracted_content.contents:
|
||||||
|
for i, content_item in enumerate(extracted_content.contents):
|
||||||
|
if content_item.data:
|
||||||
|
content_size = len(content_item.data.encode('utf-8'))
|
||||||
|
result['total_content_size'] += content_size
|
||||||
|
logger.debug(f"Content item {i+1}: {content_item.label}, size: {content_size} bytes")
|
||||||
|
|
||||||
|
# Generate filename with new naming convention
|
||||||
|
if len(extracted_content.contents) == 1:
|
||||||
|
# Single content item
|
||||||
|
output_filename = f"{file_path.stem} - {content_item.label} 1.txt"
|
||||||
|
else:
|
||||||
|
# Multiple content items - add sequence number
|
||||||
|
output_filename = f"{file_path.stem} - {content_item.label} {i+1}.txt"
|
||||||
|
|
||||||
|
output_file = self.output_dir / output_filename
|
||||||
|
|
||||||
|
# Write only the raw extracted content
|
||||||
|
logger.debug(f"Attempting to write to: {output_file}")
|
||||||
|
try:
|
||||||
|
with open(output_file, 'w', encoding='utf-8') as f:
|
||||||
|
f.write(content_item.data)
|
||||||
|
|
||||||
|
# Verify file was created
|
||||||
|
if output_file.exists():
|
||||||
|
actual_size = output_file.stat().st_size
|
||||||
|
logger.info(f"✓ File created successfully: {output_filename} (expected: {content_size} bytes, actual: {actual_size} bytes)")
|
||||||
|
else:
|
||||||
|
logger.error(f"✗ File was not created: {output_file}")
|
||||||
|
|
||||||
|
result['output_files'].append(output_filename)
|
||||||
|
result['content_items'] += 1
|
||||||
|
except Exception as write_error:
|
||||||
|
logger.error(f"✗ Error writing file {output_filename}: {write_error}")
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
else:
|
||||||
|
logger.warning(f"Content item {i+1} has no data, skipping")
|
||||||
|
else:
|
||||||
|
logger.warning(f"No content extracted from {file_path.name}")
|
||||||
|
result['status'] = 'FAIL'
|
||||||
|
result['error'] = 'No content extracted'
|
||||||
|
|
||||||
|
# Add result to tracking list
|
||||||
|
self.processing_results.append(result)
|
||||||
|
|
||||||
|
logger.info(f"Successfully processed {file_path.name} - Total content: {result['total_content_size']} bytes")
|
||||||
|
return True
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
error_msg = str(e)
|
||||||
|
logger.error(f"Error processing {file_path.name}: {error_msg}")
|
||||||
|
|
||||||
|
# Track failed result
|
||||||
|
result = {
|
||||||
|
'filename': file_path.name,
|
||||||
|
'status': 'FAIL',
|
||||||
|
'content_items': 0,
|
||||||
|
'output_files': [],
|
||||||
|
'error': error_msg,
|
||||||
|
'total_content_size': 0
|
||||||
|
}
|
||||||
|
self.processing_results.append(result)
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
def _get_mime_type(self, extension: str) -> str:
|
||||||
|
"""Get MIME type based on file extension."""
|
||||||
|
mime_types = {
|
||||||
|
# Text and data files
|
||||||
|
'.txt': 'text/plain',
|
||||||
|
'.csv': 'text/csv',
|
||||||
|
'.json': 'application/json',
|
||||||
|
'.xml': 'application/xml',
|
||||||
|
'.html': 'text/html',
|
||||||
|
'.htm': 'text/html',
|
||||||
|
'.svg': 'image/svg+xml',
|
||||||
|
'.md': 'text/markdown',
|
||||||
|
'.markdown': 'text/markdown',
|
||||||
|
'.rst': 'text/x-rst',
|
||||||
|
'.log': 'text/plain',
|
||||||
|
'.ini': 'text/plain',
|
||||||
|
'.cfg': 'text/plain',
|
||||||
|
'.conf': 'text/plain',
|
||||||
|
|
||||||
|
# Programming languages
|
||||||
|
'.js': 'application/javascript',
|
||||||
|
'.ts': 'application/typescript',
|
||||||
|
'.jsx': 'text/jsx',
|
||||||
|
'.tsx': 'text/tsx',
|
||||||
|
'.py': 'text/x-python',
|
||||||
|
'.java': 'text/x-java-source',
|
||||||
|
'.c': 'text/x-c',
|
||||||
|
'.cpp': 'text/x-c++src',
|
||||||
|
'.cc': 'text/x-c++src',
|
||||||
|
'.cxx': 'text/x-c++src',
|
||||||
|
'.h': 'text/x-c',
|
||||||
|
'.hpp': 'text/x-c++hdr',
|
||||||
|
'.cs': 'text/x-csharp',
|
||||||
|
'.php': 'application/x-httpd-php',
|
||||||
|
'.rb': 'text/x-ruby',
|
||||||
|
'.go': 'text/x-go',
|
||||||
|
'.rs': 'text/x-rust',
|
||||||
|
'.swift': 'text/x-swift',
|
||||||
|
'.kt': 'text/x-kotlin',
|
||||||
|
'.scala': 'text/x-scala',
|
||||||
|
'.r': 'text/x-r',
|
||||||
|
'.m': 'text/x-matlab',
|
||||||
|
'.pl': 'text/x-perl',
|
||||||
|
'.sh': 'application/x-sh',
|
||||||
|
'.bash': 'application/x-sh',
|
||||||
|
'.zsh': 'application/x-sh',
|
||||||
|
'.fish': 'application/x-sh',
|
||||||
|
'.ps1': 'application/x-powershell',
|
||||||
|
'.bat': 'application/x-msdos-program',
|
||||||
|
'.cmd': 'application/x-msdos-program',
|
||||||
|
'.vbs': 'text/vbscript',
|
||||||
|
'.lua': 'text/x-lua',
|
||||||
|
'.sql': 'application/sql',
|
||||||
|
'.dart': 'application/dart',
|
||||||
|
'.elm': 'text/x-elm',
|
||||||
|
'.clj': 'text/x-clojure',
|
||||||
|
'.hs': 'text/x-haskell',
|
||||||
|
'.fs': 'text/x-fsharp',
|
||||||
|
'.ml': 'text/x-ocaml',
|
||||||
|
|
||||||
|
# Web technologies
|
||||||
|
'.css': 'text/css',
|
||||||
|
'.scss': 'text/x-scss',
|
||||||
|
'.sass': 'text/x-sass',
|
||||||
|
'.less': 'text/x-less',
|
||||||
|
'.vue': 'text/x-vue',
|
||||||
|
'.svelte': 'text/x-svelte',
|
||||||
|
'.astro': 'text/x-astro',
|
||||||
|
|
||||||
|
# Configuration and build files
|
||||||
|
'.yaml': 'application/x-yaml',
|
||||||
|
'.yml': 'application/x-yaml',
|
||||||
|
'.toml': 'application/toml',
|
||||||
|
'.env': 'text/plain',
|
||||||
|
'.gitignore': 'text/plain',
|
||||||
|
'.dockerfile': 'text/x-dockerfile',
|
||||||
|
'.dockerignore': 'text/plain',
|
||||||
|
'.makefile': 'text/x-makefile',
|
||||||
|
'.cmake': 'text/x-cmake',
|
||||||
|
'.gradle': 'text/x-gradle',
|
||||||
|
'.maven': 'text/x-maven',
|
||||||
|
'.pom': 'application/xml',
|
||||||
|
'.sln': 'text/plain',
|
||||||
|
'.vcxproj': 'application/xml',
|
||||||
|
'.csproj': 'application/xml',
|
||||||
|
'.fsproj': 'application/xml',
|
||||||
|
'.vbproj': 'application/xml',
|
||||||
|
'.xcodeproj': 'text/plain',
|
||||||
|
'.pbxproj': 'text/plain',
|
||||||
|
|
||||||
|
# Documentation and markup
|
||||||
|
'.tex': 'application/x-tex',
|
||||||
|
'.bib': 'text/x-bibtex',
|
||||||
|
'.adoc': 'text/asciidoc',
|
||||||
|
'.asciidoc': 'text/asciidoc',
|
||||||
|
'.wiki': 'text/x-wiki',
|
||||||
|
'.creole': 'text/x-wiki',
|
||||||
|
|
||||||
|
# Images
|
||||||
|
'.jpg': 'image/jpeg',
|
||||||
|
'.jpeg': 'image/jpeg',
|
||||||
|
'.png': 'image/png',
|
||||||
|
'.gif': 'image/gif',
|
||||||
|
'.webp': 'image/webp',
|
||||||
|
'.bmp': 'image/bmp',
|
||||||
|
'.tiff': 'image/tiff',
|
||||||
|
'.ico': 'image/x-icon',
|
||||||
|
|
||||||
|
# Documents
|
||||||
|
'.pdf': 'application/pdf',
|
||||||
|
'.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
||||||
|
'.xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
|
||||||
|
'.pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
|
||||||
|
'.odt': 'application/vnd.oasis.opendocument.text',
|
||||||
|
'.ods': 'application/vnd.oasis.opendocument.spreadsheet',
|
||||||
|
'.odp': 'application/vnd.oasis.opendocument.presentation',
|
||||||
|
|
||||||
|
# Legacy Office formats
|
||||||
|
'.doc': 'application/msword',
|
||||||
|
'.xls': 'application/vnd.ms-excel',
|
||||||
|
'.ppt': 'application/vnd.ms-powerpoint',
|
||||||
|
|
||||||
|
# Archives and binaries (will be processed as binary)
|
||||||
|
'.zip': 'application/zip',
|
||||||
|
'.tar': 'application/x-tar',
|
||||||
|
'.gz': 'application/gzip',
|
||||||
|
'.7z': 'application/x-7z-compressed',
|
||||||
|
'.rar': 'application/vnd.rar',
|
||||||
|
'.exe': 'application/x-msdownload',
|
||||||
|
'.dll': 'application/x-msdownload',
|
||||||
|
'.so': 'application/x-sharedlib',
|
||||||
|
'.dylib': 'application/x-mach-binary'
|
||||||
|
}
|
||||||
|
return mime_types.get(extension.lower(), 'application/octet-stream')
|
||||||
|
|
||||||
|
async def run_tests(self) -> None:
|
||||||
|
"""Run the document extraction tests on all files."""
|
||||||
|
mode = "WITH AI" if self.enable_ai else "CONTENT ONLY (No AI)"
|
||||||
|
logger.info(f"Starting document extraction tests - {mode}")
|
||||||
|
logger.info(f"Input directory: {self.input_dir}")
|
||||||
|
logger.info(f"Output directory: {self.output_dir}")
|
||||||
|
if self.enable_ai:
|
||||||
|
logger.info(f"Processing prompt: {self.prompt}")
|
||||||
|
else:
|
||||||
|
logger.info("AI processing: DISABLED - Raw content extraction only")
|
||||||
|
|
||||||
|
# Initialize the extractor
|
||||||
|
if not self.initialize_extractor():
|
||||||
|
logger.error("Cannot proceed without DocumentExtraction!")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Get files to process
|
||||||
|
files = self.get_files_to_process()
|
||||||
|
|
||||||
|
if not files:
|
||||||
|
logger.warning("No files found to process!")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Process each file
|
||||||
|
successful = 0
|
||||||
|
failed = 0
|
||||||
|
|
||||||
|
logger.info(f"Starting to process {len(files)} files...")
|
||||||
|
for i, file_path in enumerate(files):
|
||||||
|
logger.info(f"Processing file {i+1}/{len(files)}: {file_path.name}")
|
||||||
|
try:
|
||||||
|
if await self.process_single_file(file_path):
|
||||||
|
successful += 1
|
||||||
|
logger.info(f"✓ File {i+1} processed successfully")
|
||||||
|
else:
|
||||||
|
failed += 1
|
||||||
|
logger.error(f"✗ File {i+1} processing failed")
|
||||||
|
except Exception as e:
|
||||||
|
failed += 1
|
||||||
|
logger.error(f"✗ Exception processing file {i+1}: {e}")
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
|
||||||
|
# Print detailed summary
|
||||||
|
mode = "WITH AI" if self.enable_ai else "CONTENT ONLY (No AI)"
|
||||||
|
logger.info("\n" + "=" * 80)
|
||||||
|
logger.info(f"DETAILED TEST SUMMARY - {mode}")
|
||||||
|
logger.info("=" * 80)
|
||||||
|
logger.info(f"Total files processed: {len(files)}")
|
||||||
|
logger.info(f"Successful: {successful}")
|
||||||
|
logger.info(f"Failed: {failed}")
|
||||||
|
logger.info(f"Output directory: {self.output_dir}")
|
||||||
|
if self.enable_ai:
|
||||||
|
logger.info("AI processing: ENABLED")
|
||||||
|
else:
|
||||||
|
logger.info("AI processing: DISABLED")
|
||||||
|
logger.info("=" * 80)
|
||||||
|
|
||||||
|
# List all processed documents with results
|
||||||
|
logger.info("\nPROCESSING RESULTS:")
|
||||||
|
logger.info("-" * 80)
|
||||||
|
|
||||||
|
for result in self.processing_results:
|
||||||
|
status_icon = "✅" if result['status'] == 'OK' else "❌"
|
||||||
|
logger.info(f"{status_icon} {result['filename']} - {result['status']}")
|
||||||
|
|
||||||
|
if result['status'] == 'OK':
|
||||||
|
if result['content_items'] == 1:
|
||||||
|
logger.info(f" └─ Generated: {result['output_files'][0]} ({result['total_content_size']} bytes)")
|
||||||
|
else:
|
||||||
|
logger.info(f" └─ Generated {result['content_items']} files ({result['total_content_size']} total bytes):")
|
||||||
|
for output_file in result['output_files']:
|
||||||
|
logger.info(f" └─ {output_file}")
|
||||||
|
else:
|
||||||
|
error_msg = result.get('error', 'Unknown error')
|
||||||
|
logger.info(f" └─ Error: {error_msg}")
|
||||||
|
|
||||||
|
logger.info("-" * 80)
|
||||||
|
logger.info("=" * 80)
|
||||||
|
|
||||||
|
def parse_arguments():
|
||||||
|
"""Parse command line arguments."""
|
||||||
|
parser = argparse.ArgumentParser(description='Document Extraction Test Script')
|
||||||
|
parser.add_argument('--no-ai', '--content-only', action='store_true',
|
||||||
|
help='Run in content-only mode without AI processing')
|
||||||
|
parser.add_argument('--input-dir', type=str, default='d:/temp/test-extraction',
|
||||||
|
help='Input directory containing files to process (default: d:/temp/test-extraction)')
|
||||||
|
parser.add_argument('--output-dir', type=str,
|
||||||
|
help='Output directory for extracted content (auto-generated if not specified)')
|
||||||
|
parser.add_argument('--verbose', '-v', action='store_true',
|
||||||
|
help='Enable verbose logging')
|
||||||
|
|
||||||
|
return parser.parse_args()
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
"""Main function to run the tests."""
|
||||||
|
# Parse command line arguments
|
||||||
|
args = parse_arguments()
|
||||||
|
|
||||||
|
# Set logging level based on verbosity
|
||||||
|
if args.verbose:
|
||||||
|
logging.getLogger().setLevel(logging.DEBUG)
|
||||||
|
else:
|
||||||
|
logging.getLogger().setLevel(logging.INFO)
|
||||||
|
|
||||||
|
logger.info("DocumentExtraction Test Script")
|
||||||
|
logger.info("=" * 50)
|
||||||
|
logger.info(f"Source: {args.input_dir}")
|
||||||
|
|
||||||
|
# Determine output directory
|
||||||
|
if args.output_dir:
|
||||||
|
output_dir = args.output_dir
|
||||||
|
else:
|
||||||
|
if args.no_ai:
|
||||||
|
output_dir = f"{args.input_dir}/extracted-raw"
|
||||||
|
else:
|
||||||
|
output_dir = f"{args.input_dir}/extracted"
|
||||||
|
|
||||||
|
logger.info(f"Output: {output_dir}")
|
||||||
|
logger.info("=" * 50)
|
||||||
|
|
||||||
|
# Check dependencies first
|
||||||
|
if not check_dependencies():
|
||||||
|
logger.error("Please install missing dependencies before running tests.")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Check module imports
|
||||||
|
if not check_module_imports():
|
||||||
|
logger.error("Cannot import required modules. Please check your setup.")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Determine mode based on command line arguments
|
||||||
|
if args.no_ai:
|
||||||
|
enable_ai = False
|
||||||
|
logger.info("Running in CONTENT ONLY mode (no AI processing)")
|
||||||
|
else:
|
||||||
|
# Interactive mode: ask user for choice
|
||||||
|
print("\n" + "=" * 50)
|
||||||
|
print("SELECT EXTRACTION MODE:")
|
||||||
|
print("=" * 50)
|
||||||
|
print("1. With AI processing (default)")
|
||||||
|
print("2. Content only (no AI processing)")
|
||||||
|
print("=" * 50)
|
||||||
|
|
||||||
|
try:
|
||||||
|
choice = input("Enter your choice (1 or 2, default is 1): ").strip()
|
||||||
|
if choice == "2":
|
||||||
|
enable_ai = False
|
||||||
|
output_dir = f"{args.input_dir}/extracted-raw"
|
||||||
|
logger.info("Selected: Content only mode (no AI processing)")
|
||||||
|
else:
|
||||||
|
enable_ai = True
|
||||||
|
output_dir = f"{args.input_dir}/extracted"
|
||||||
|
logger.info("Selected: AI processing mode")
|
||||||
|
except (EOFError, KeyboardInterrupt):
|
||||||
|
# Default to AI mode if input fails
|
||||||
|
enable_ai = True
|
||||||
|
output_dir = f"{args.input_dir}/extracted"
|
||||||
|
logger.info("Defaulting to AI processing mode")
|
||||||
|
|
||||||
|
# Run tests with selected mode
|
||||||
|
tester = DocumentExtractionTester(
|
||||||
|
input_dir=args.input_dir,
|
||||||
|
output_dir=output_dir,
|
||||||
|
enable_ai=enable_ai
|
||||||
|
)
|
||||||
|
await tester.run_tests()
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# Check if command line arguments are provided for automated testing
|
||||||
|
if len(sys.argv) > 1:
|
||||||
|
# Parse arguments and run directly
|
||||||
|
asyncio.run(main())
|
||||||
|
else:
|
||||||
|
# Interactive mode: ask user for choice
|
||||||
|
asyncio.run(main())
|
||||||
|
|
||||||
|
# Convenience function for easy content-only extraction
|
||||||
|
async def extract_documents_content_only(input_folder: str, output_folder: str = None):
|
||||||
|
"""
|
||||||
|
Convenience function to extract documents without AI processing.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
input_folder: Path to folder containing documents to extract
|
||||||
|
output_folder: Path to folder where extracted content will be stored (optional)
|
||||||
|
|
||||||
|
Example:
|
||||||
|
# Extract from d:/temp to d:/temp/extracted-raw
|
||||||
|
asyncio.run(extract_documents_content_only("d:/temp"))
|
||||||
|
|
||||||
|
# Extract from custom folders
|
||||||
|
asyncio.run(extract_documents_content_only("c:/my_docs", "c:/my_docs/extracted"))
|
||||||
|
"""
|
||||||
|
if output_folder is None:
|
||||||
|
output_folder = f"{input_folder}/extracted-raw"
|
||||||
|
|
||||||
|
logger.info(f"Running content-only extraction from {input_folder} to {output_folder}")
|
||||||
|
|
||||||
|
# Check dependencies and imports
|
||||||
|
if not check_dependencies():
|
||||||
|
logger.error("Missing dependencies. Please install required packages.")
|
||||||
|
return False
|
||||||
|
|
||||||
|
if not check_module_imports():
|
||||||
|
logger.error("Cannot import required modules. Please check your setup.")
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Create tester and run
|
||||||
|
tester = DocumentExtractionTester(
|
||||||
|
input_dir=input_folder,
|
||||||
|
output_dir=output_folder,
|
||||||
|
enable_ai=False
|
||||||
|
)
|
||||||
|
|
||||||
|
await tester.run_tests()
|
||||||
|
return True
|
||||||
|
|
||||||
|
# Example usage (uncomment to use):
|
||||||
|
# if __name__ == "__main__":
|
||||||
|
# # For content-only extraction from d:/temp to d:/temp/extracted-raw
|
||||||
|
# asyncio.run(extract_documents_content_only("d:/temp"))
|
||||||
189
test_excel_processing.py
Normal file
189
test_excel_processing.py
Normal file
|
|
@ -0,0 +1,189 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Simple test script for enhanced Excel processing functionality.
|
||||||
|
This script tests the DocumentExtraction class with Excel files.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import asyncio
|
||||||
|
import logging
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
# Configure logging
|
||||||
|
logging.basicConfig(
|
||||||
|
level=logging.DEBUG,
|
||||||
|
format='%(asctime)s - %(levelname)s - %(message)s'
|
||||||
|
)
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Add the gateway directory to the path
|
||||||
|
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', '..'))
|
||||||
|
|
||||||
|
async def test_excel_processing():
|
||||||
|
"""Test Excel processing functionality."""
|
||||||
|
try:
|
||||||
|
# Import required modules
|
||||||
|
from modules.chat.documents.documentExtraction import DocumentExtraction
|
||||||
|
from modules.chat.serviceCenter import ServiceCenter
|
||||||
|
from modules.interfaces.interfaceAppModel import User, UserPrivilege, AuthAuthority
|
||||||
|
from modules.interfaces.interfaceChatModel import ChatWorkflow
|
||||||
|
from datetime import datetime, UTC
|
||||||
|
|
||||||
|
logger.info("Testing Excel processing functionality...")
|
||||||
|
|
||||||
|
# Create mock service center
|
||||||
|
mock_user = User(
|
||||||
|
id="test_user_001",
|
||||||
|
username="testuser",
|
||||||
|
email="test@example.com",
|
||||||
|
fullName="Test User",
|
||||||
|
language="en",
|
||||||
|
enabled=True,
|
||||||
|
privilege=UserPrivilege.USER,
|
||||||
|
authenticationAuthority=AuthAuthority.LOCAL,
|
||||||
|
mandateId="test_mandate_001"
|
||||||
|
)
|
||||||
|
|
||||||
|
current_time = datetime.now(UTC).isoformat()
|
||||||
|
mock_workflow = ChatWorkflow(
|
||||||
|
id="test_workflow_001",
|
||||||
|
mandateId="test_mandate_001",
|
||||||
|
status="active",
|
||||||
|
name="Test Excel Processing Workflow",
|
||||||
|
currentRound=1,
|
||||||
|
lastActivity=current_time,
|
||||||
|
startedAt=current_time,
|
||||||
|
logs=[],
|
||||||
|
messages=[],
|
||||||
|
stats=None,
|
||||||
|
tasks=[]
|
||||||
|
)
|
||||||
|
|
||||||
|
service_center = ServiceCenter(mock_user, mock_workflow)
|
||||||
|
logger.info("✓ ServiceCenter created successfully")
|
||||||
|
|
||||||
|
# Create DocumentExtraction instance
|
||||||
|
extractor = DocumentExtraction(service_center)
|
||||||
|
logger.info("✓ DocumentExtraction created successfully")
|
||||||
|
|
||||||
|
# Test with a sample Excel file if available
|
||||||
|
test_file_path = "d:/temp/test-extraction/test.xlsx"
|
||||||
|
|
||||||
|
if os.path.exists(test_file_path):
|
||||||
|
logger.info(f"Found test file: {test_file_path}")
|
||||||
|
|
||||||
|
# Read the file
|
||||||
|
with open(test_file_path, 'rb') as f:
|
||||||
|
file_data = f.read()
|
||||||
|
|
||||||
|
logger.info(f"File size: {len(file_data)} bytes")
|
||||||
|
|
||||||
|
# Process the Excel file
|
||||||
|
logger.info("Processing Excel file...")
|
||||||
|
result = await extractor.processFileData(
|
||||||
|
fileData=file_data,
|
||||||
|
filename="test.xlsx",
|
||||||
|
mimeType="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
||||||
|
base64Encoded=False,
|
||||||
|
prompt=None,
|
||||||
|
enableAI=False
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.info(f"✓ Excel processing completed successfully!")
|
||||||
|
logger.info(f"Generated {len(result.contents)} content items:")
|
||||||
|
|
||||||
|
for i, content_item in enumerate(result.contents):
|
||||||
|
logger.info(f" Item {i+1}: {content_item.label}")
|
||||||
|
logger.info(f" MIME type: {content_item.metadata.mimeType}")
|
||||||
|
logger.info(f" Size: {content_item.metadata.size} bytes")
|
||||||
|
if content_item.data:
|
||||||
|
logger.info(f" Data preview: {content_item.data[:100]}...")
|
||||||
|
else:
|
||||||
|
logger.info(f" Data: None")
|
||||||
|
|
||||||
|
else:
|
||||||
|
logger.info("No test Excel file found. Creating a simple test...")
|
||||||
|
|
||||||
|
# Test the openpyxl library directly
|
||||||
|
try:
|
||||||
|
import openpyxl
|
||||||
|
from openpyxl import Workbook
|
||||||
|
|
||||||
|
# Create a test workbook
|
||||||
|
wb = Workbook()
|
||||||
|
ws = wb.active
|
||||||
|
ws.title = "Test Sheet"
|
||||||
|
|
||||||
|
# Add some test data
|
||||||
|
ws['A1'] = "Name"
|
||||||
|
ws['B1'] = "Age"
|
||||||
|
ws['C1'] = "City"
|
||||||
|
ws['A2'] = "John Doe"
|
||||||
|
ws['B2'] = 30
|
||||||
|
ws['C2'] = "New York"
|
||||||
|
ws['A3'] = "Jane Smith"
|
||||||
|
ws['B3'] = 25
|
||||||
|
ws['C3'] = "Los Angeles"
|
||||||
|
|
||||||
|
# Test properties
|
||||||
|
wb.properties.title = "Test Workbook"
|
||||||
|
wb.properties.creator = "Test User"
|
||||||
|
wb.properties.subject = "Test Subject"
|
||||||
|
|
||||||
|
logger.info("✓ Test workbook created successfully")
|
||||||
|
logger.info(f" Title: {wb.properties.title}")
|
||||||
|
logger.info(f" Creator: {wb.properties.creator}")
|
||||||
|
logger.info(f" Subject: {wb.properties.subject}")
|
||||||
|
logger.info(f" Sheets: {wb.sheetnames}")
|
||||||
|
|
||||||
|
# Test the DocumentExtraction with this workbook
|
||||||
|
from io import BytesIO
|
||||||
|
|
||||||
|
# Save to bytes
|
||||||
|
buffer = BytesIO()
|
||||||
|
wb.save(buffer)
|
||||||
|
buffer.seek(0)
|
||||||
|
file_data = buffer.getvalue()
|
||||||
|
|
||||||
|
logger.info(f"Test workbook size: {len(file_data)} bytes")
|
||||||
|
|
||||||
|
# Process with DocumentExtraction
|
||||||
|
result = await extractor.processFileData(
|
||||||
|
fileData=file_data,
|
||||||
|
filename="test_workbook.xlsx",
|
||||||
|
mimeType="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
||||||
|
base64Encoded=False,
|
||||||
|
prompt=None,
|
||||||
|
enableAI=False
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.info(f"✓ Test workbook processing completed successfully!")
|
||||||
|
logger.info(f"Generated {len(result.contents)} content items:")
|
||||||
|
|
||||||
|
for i, content_item in enumerate(result.contents):
|
||||||
|
logger.info(f" Item {i+1}: {content_item.label}")
|
||||||
|
logger.info(f" MIME type: {content_item.metadata.mimeType}")
|
||||||
|
logger.info(f" Size: {content_item.metadata.size} bytes")
|
||||||
|
if content_item.data:
|
||||||
|
logger.info(f" Data preview: {content_item.data[:200]}...")
|
||||||
|
else:
|
||||||
|
logger.info(f" Data: None")
|
||||||
|
|
||||||
|
except ImportError as e:
|
||||||
|
logger.error(f"openpyxl not available: {e}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error testing Excel functionality: {e}")
|
||||||
|
|
||||||
|
logger.info("Excel processing test completed!")
|
||||||
|
|
||||||
|
except ImportError as e:
|
||||||
|
logger.error(f"Failed to import required modules: {e}")
|
||||||
|
logger.error("Make sure you're running this script from the gateway directory")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Unexpected error: {e}")
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
asyncio.run(test_excel_processing())
|
||||||
Loading…
Reference in a new issue