stable workflow

2025-08-16 23:32:36 +02:00 · 2025-08-16 23:32:36 +02:00 · 8592cdd790
commit 8592cdd790
parent a0219181e9
20 changed files with 3701 additions and 3581 deletions
--- a/modules/chat/OLD_BACKUP
+++ b/modules/chat/OLD_BACKUP
--- a/modules/chat/documents/documentExtraction.py
+++ b/modules/chat/documents/documentExtraction.py
--- a/modules/chat/documents/documentGeneration.py
+++ b/modules/chat/documents/documentGeneration.py
@ -22,13 +22,11 @@ class DocumentGenerator:
        """
        try:
            documents = action_result.data.get("documents", [])
            logger.debug(f"Processing {len(documents)} documents from action result")
            processed_documents = []
            for doc in documents:
                processed_doc = self.processSingleDocument(doc, action)
                if processed_doc:
                    processed_documents.append(processed_doc)
            logger.debug(f"Successfully processed {len(processed_documents)} documents")
            return processed_documents
        except Exception as e:
            logger.error(f"Error processing action result documents: {str(e)}")
@ -54,19 +52,29 @@ class DocumentGenerator:
                # Dictionary format document - handle both 'documentName' and 'filename' keys
                filename = doc.get('documentName', doc.get('filename', \
                    f"{action.execMethod}_{action.execAction}_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}"))
                fileSize = doc.get('fileSize', len(str(doc.get('documentData', ''))))
                mimeType = doc.get('mimeType', 'application/octet-stream')
                if mimeType == "application/octet-stream":
                    document_data = doc.get('documentData', '')
                    mimeType = detectMimeTypeFromContent(document_data, filename, self.service)
                # Handle documentData structure - it might be a dict with 'content' key or direct content
                document_data = doc.get('documentData', '')
                if isinstance(document_data, dict) and 'content' in document_data:
                    # This is the structure returned by extract action: documentData.content
                    content = document_data['content']
                    # Also check for other potential content fields
                    if not content and 'data' in document_data:
                        content = document_data['data']
                else:
                    # Direct content (fallback)
                    content = document_data
                # Calculate file size from actual content
                fileSize = len(str(content)) if content else 0
                # Detect mime type if not specified
                if mimeType == "application/octet-stream":
                    mimeType = detectMimeTypeFromContent(content, filename, self.service)
                logger.info(f"Processed document: {filename}, content length: {len(str(content))}, mimeType: {mimeType}")
                return {
                    'filename': filename,
                    'fileSize': fileSize,
@ -96,20 +104,32 @@ class DocumentGenerator:
        Returns a list of created document objects.
        """
        try:
            logger.info(f"Creating documents from action result for {action.execMethod}.{action.execAction}")
            logger.info(f"Action result data keys: {list(action_result.data.keys())}")
            processed_docs = self.processActionResultDocuments(action_result, action, workflow)
            logger.info(f"Processed {len(processed_docs)} documents")
            created_documents = []
-            for doc_data in processed_docs:
+            for i, doc_data in enumerate(processed_docs):
                try:
                    document_name = doc_data['filename']
                    document_data = doc_data['content']
                    mime_type = doc_data['mimeType']
                    logger.info(f"Creating document {i+1}: {document_name} (mime: {mime_type}, content length: {len(str(document_data))})")
                    # Convert document data to string content
                    content = convertDocumentDataToString(document_data, getFileExtension(document_name))
                    # Skip empty or minimal content
                    minimal_content_patterns = ['{}', '[]', 'null', '""', "''"]
                    if not content or content.strip() == "" or content.strip() in minimal_content_patterns:
                        logger.warning(f"Empty or minimal content for document {document_name}, skipping")
                        continue
                    logger.info(f"Document {document_name} has content: {len(content)} characters")
                    # Create file in system
                    file_id = self.service.createFile(
                        fileName=document_name,
@ -120,22 +140,27 @@ class DocumentGenerator:
                    if not file_id:
                        logger.error(f"Failed to create file for document {document_name}")
                        continue
-                    # Create document object
+                    
                    logger.info(f"Created file with ID: {file_id}")
                    # Create document object using existing file ID
                    document = self.service.createDocument(
                        fileName=document_name,
                        mimeType=mime_type,
                        content=content,
-                        base64encoded=False
+                        base64encoded=False,
                        existing_file_id=file_id
                    )
                    if document:
                        created_documents.append(document)
-                        logger.debug(f"Created document: {document_name} ({len(content)} bytes, {mime_type})")
+                        logger.info(f"Successfully created ChatDocument: {document_name} (ID: {getattr(document, 'id', 'N/A')}, fileId: {getattr(document, 'fileId', 'N/A')})")
                    else:
                        logger.error(f"Failed to create ChatDocument object for {document_name}")
                except Exception as e:
                    logger.error(f"Error creating document {doc_data.get('filename', 'unknown')}: {str(e)}")
                    continue
-            logger.info(f"Created {len(created_documents)} documents from action result")
+            
            logger.info(f"Successfully created {len(created_documents)} documents")
            return created_documents
        except Exception as e:
            logger.error(f"Error creating documents from action result: {str(e)}")
--- a/modules/chat/handling/handlingTasks.py
+++ b/modules/chat/handling/handlingTasks.py
@ -16,22 +16,53 @@ from modules.chat.documents.documentGeneration import DocumentGenerator
 logger = logging.getLogger(__name__)
 class WorkflowStoppedException(Exception):
    """Exception raised when a workflow is stopped by the user."""
    pass
 class HandlingTasks:
    def __init__(self, chatInterface, service, workflow=None):
        self.chatInterface = chatInterface
        self.service = service
        self.workflow = workflow
        self.documentGenerator = DocumentGenerator(service)
    def _checkWorkflowStopped(self):
        """
        Check if workflow has been stopped by user and raise exception if so.
        This function centralizes all workflow stop checking logic to avoid code duplication.
        """
        try:
            # Get the current workflow status from the database to avoid stale data
            current_workflow = self.chatInterface.getWorkflow(self.service.workflow.id)
            if current_workflow and current_workflow.status == "stopped":
                logger.info("Workflow stopped by user, aborting execution")
                raise WorkflowStoppedException("Workflow was stopped by user")
        except WorkflowStoppedException:
            # Re-raise the WorkflowStoppedException immediately
            raise
        except Exception as e:
            # If we can't get the current status due to other database issues, fall back to the in-memory object
            logger.warning(f"Could not check current workflow status from database: {str(e)}")
            if self.service.workflow.status == "stopped":
                logger.info("Workflow stopped by user (from in-memory object), aborting execution")
                raise WorkflowStoppedException("Workflow was stopped by user")
    async def generateTaskPlan(self, userInput: str, workflow) -> TaskPlan:
        """Generate a high-level task plan for the workflow."""
        try:
            # Check workflow status before generating task plan
            self._checkWorkflowStopped()
            logger.info(f"Generating task plan for workflow {workflow.id}")
            available_docs = self.service.getAvailableDocuments(workflow)
-            logger.debug(f"Available documents: {available_docs}")
+
            # Check workflow status before calling AI service
            self._checkWorkflowStopped()
            prompt = await self.service.callAiTextAdvanced(
-                createTaskPlanningPrompt(self, {
+                createTaskPlanningPrompt({
                    'user_request': userInput,
                    'available_documents': available_docs,
                    'workflow_id': workflow.id
@ -53,16 +84,38 @@ class HandlingTasks:
            if not self._validateTaskPlan(task_plan_dict):
                logger.error("Generated task plan failed validation")
                logger.error(f"AI Response: {prompt}")
                logger.error(f"Parsed Task Plan: {json.dumps(task_plan_dict, indent=2)}")
                raise Exception("AI-generated task plan failed validation - AI is required for task planning")
-            tasks = [TaskStep(**task_dict) for task_dict in task_plan_dict.get('tasks', [])]
+            tasks = []
            for task_dict in task_plan_dict.get('tasks', []):
                # Map old 'description' field to new 'objective' field
                if 'description' in task_dict and 'objective' not in task_dict:
                    task_dict['objective'] = task_dict.pop('description')
                tasks.append(TaskStep(**task_dict))
            task_plan = TaskPlan(
                overview=task_plan_dict.get('overview', ''),
                tasks=tasks
            )
            logger.info(f"Task plan generated successfully with {len(tasks)} tasks")
-            logger.debug(f"Task plan: {json.dumps(task_plan_dict, indent=2)}")
+            
            # Log the generated tasks
            for i, task in enumerate(tasks):
                logger.info(f"  Task {i+1}: {task.objective}")
                if hasattr(task, 'success_criteria') and task.success_criteria:
                    logger.info(f"    Success criteria: {task.success_criteria}")
            # Log the complete task plan
            logger.info("=== GENERATED TASK PLAN ===")
            logger.info(f"Overview: {task_plan.overview}")
            logger.info(f"Total tasks: {len(tasks)}")
            # Log the RAW AI-generated task plan JSON for debugging
            logger.info("=== RAW AI TASK PLAN JSON ===")
            logger.info(f"AI Response with task plan: {prompt}")
            logger.info("=== END RAW AI TASK PLAN JSON ===")
            return task_plan
        except Exception as e:
@ -72,12 +125,14 @@ class HandlingTasks:
    async def generateTaskActions(self, task_step, workflow, previous_results=None, enhanced_context=None) -> List[TaskAction]:
        """Generate actions for a given task step."""
        try:
-            logger.info(f"Generating actions for task: {task_step.description}")
+            # Check workflow status before generating actions
            self._checkWorkflowStopped()
            logger.info(f"Generating actions for task: {task_step.objective}")
            available_docs = self.service.getAvailableDocuments(workflow)
            available_connections = self.service.getConnectionReferenceList()
-            logger.debug(f"Available documents: {available_docs}")
+
            logger.debug(f"Available connections: {available_connections}")
            context = enhanced_context or TaskContext(
                task_step=task_step,
@ -94,8 +149,11 @@ class HandlingTasks:
                failed_actions=[],
                successful_actions=[]
            )
            # Check workflow status before calling AI service
            self._checkWorkflowStopped()
            prompt = await self.service.callAiTextAdvanced(
-                await createActionDefinitionPrompt(self, context)
+                await createActionDefinitionPrompt(context, self.service)
            )
            # Inline parseActionResponse logic here
            json_start = prompt.find('{')
@ -126,40 +184,169 @@ class HandlingTasks:
            }) for a in actions]
            valid_actions = [ta for ta in task_actions if ta]
-            logger.info(f"Generated {len(valid_actions)} actions for task: {task_step.description}")
+            logger.info(f"Generated {len(valid_actions)} actions for task: {task_step.objective}")
-            logger.debug(f"Task actions plan: {json.dumps(action_data, indent=2)}")
+            
            # Log the generated actions
            for i, action in enumerate(valid_actions):
                logger.info(f"  Action {i+1}: {action.execMethod}.{action.execAction}")
                if action.expectedDocumentFormats:
                    logger.info(f"    Expected formats: {action.expectedDocumentFormats}")
                if action.execParameters.get('documentList'):
                    logger.info(f"    Input documents: {action.execParameters['documentList']}")
            # Log the complete action plan
            logger.info("=== GENERATED ACTION PLAN ===")
            logger.info(f"Task: {task_step.objective}")
            logger.info(f"Total actions: {len(valid_actions)}")
            # Log the RAW AI-generated action plan JSON for debugging
            logger.info("=== RAW AI ACTION PLAN JSON ===")
            logger.info(f"AI Response with parsed actions: {prompt}")
            logger.info("=== END RAW AI ACTION PLAN JSON ===")
            return valid_actions
        except Exception as e:
            logger.error(f"Error in generateTaskActions: {str(e)}")
            return []
-    async def executeTask(self, task_step, workflow, context) -> TaskResult:
+    async def executeTask(self, task_step, workflow, context, task_index=None, total_tasks=None) -> TaskResult:
        """Execute all actions for a task step, with state management and retries."""
-        logger.info(f"Executing task: {task_step.description}")
+        logger.info(f"=== STARTING TASK {task_index or '?'}: {task_step.objective} ===")
        # Create database log entry for task start in format expected by frontend
        if task_index is not None:
            if total_tasks is not None:
                self.chatInterface.createWorkflowLog({
                    "workflowId": workflow.id,
                    "message": f"Executing task {task_index}/{total_tasks}",
                    "type": "info"
                })
            else:
                self.chatInterface.createWorkflowLog({
                    "workflowId": workflow.id,
                    "message": f"Executing task {task_index}/?",
                    "type": "info"
                })
            # Create a task start message for the user
            task_progress = f"{task_index}/{total_tasks}" if total_tasks is not None else str(task_index)
            task_start_message = {
                "workflowId": workflow.id,
                "role": "assistant",
                "message": f"🚀 Starting Task {task_progress}\n\nObjective: {task_step.objective}",
                "status": "step",
                "sequenceNr": len(workflow.messages) + 1,
                "publishedAt": datetime.now(UTC).isoformat(),
                "documentsLabel": f"task_{task_index}_start",
                "documents": []
            }
            message = self.chatInterface.createWorkflowMessage(task_start_message)
            if message:
                workflow.messages.append(message)
                logger.info(f"Task start message created for task {task_index}")
        state = TaskExecutionState(task_step)
        retry_context = context
        max_retries = state.max_retries
        for attempt in range(max_retries):
            logger.info(f"Task execution attempt {attempt+1}/{max_retries}")
            # Check workflow status before starting task execution
            self._checkWorkflowStopped()
            actions = await self.generateTaskActions(task_step, workflow, previous_results=retry_context.previous_results, enhanced_context=retry_context)
            if not actions:
                logger.error("No actions defined for task step, aborting task execution")
                break
            # Log total actions count for this task
            total_actions = len(actions)
            logger.info(f"Task {task_index or '?'} has {total_actions} actions")
            action_results = []
-            for action in actions:
+            for action_idx, action in enumerate(actions):
-                result = await self.executeSingleAction(action, workflow)
+                # Check workflow status before each action execution
                self._checkWorkflowStopped()
                # Log action start in format expected by frontend
                action_number = action_idx + 1
                logger.info(f"Task {task_index} - Starting action {action_number}/{total_actions}")
                # Create database log entry for action start
                self.chatInterface.createWorkflowLog({
                    "workflowId": workflow.id,
                    "message": f"Task {task_index} - Starting action {action_number}/{total_actions}",
                    "type": "info"
                })
                # Create an action start message for the user
                action_start_message = {
                    "workflowId": workflow.id,
                    "role": "assistant",
                    "message": f"⚡ Task {task_index} - Action {action_number}/{total_actions}\n\nMethod: {action.execMethod}.{action.execAction}",
                    "status": "step",
                    "sequenceNr": len(workflow.messages) + 1,
                    "publishedAt": datetime.now(UTC).isoformat(),
                    "documentsLabel": f"action_{action_number}_start",
                    "documents": []
                }
                message = self.chatInterface.createWorkflowMessage(action_start_message)
                if message:
                    workflow.messages.append(message)
                    logger.info(f"Action start message created for action {action_number}")
                # Pass action index to executeSingleAction with task context
                result = await self.executeSingleAction(action, workflow, task_step, task_index, action_number, total_actions)
                action_results.append(result)
                if result.success:
                    state.addSuccessfulAction(result)
                else:
                    state.addFailedAction(result)
            # Check workflow status before review
            self._checkWorkflowStopped()
            review_result = await self.reviewTaskCompletion(task_step, actions, action_results, workflow)
            success = review_result.status == 'success'
            feedback = review_result.reason
            error = None if success else review_result.reason
            if success:
-                logger.info(f"Task step '{task_step.description}' completed successfully")
+                logger.info(f"=== TASK {task_index or '?'} COMPLETED SUCCESSFULLY: {task_step.objective} ===")
                # Create database log entry for task completion
                if total_tasks is not None:
                    self.chatInterface.createWorkflowLog({
                        "workflowId": workflow.id,
                        "message": f"🎯 Task {task_index}/{total_tasks} completed",
                        "type": "success"
                    })
                else:
                    self.chatInterface.createWorkflowLog({
                        "workflowId": workflow.id,
                        "message": f"🎯 Task {task_index}/? completed",
                        "type": "success"
                    })
                # Create a task completion message for the user
                task_progress = f"{task_index}/{total_tasks}" if total_tasks is not None else str(task_index)
                task_completion_message = {
                    "workflowId": workflow.id,
                    "role": "assistant",
                    "message": f"🎯 Task {task_progress} Completed Successfully!\n\nObjective: {task_step.objective}\n\nFeedback: {feedback or 'Task completed successfully'}",
                    "status": "step",
                    "sequenceNr": len(workflow.messages) + 1,
                    "publishedAt": datetime.now(UTC).isoformat(),
                    "documentsLabel": f"task_{task_index}_completion",
                    "documents": []
                }
                message = self.chatInterface.createWorkflowMessage(task_completion_message)
                if message:
                    workflow.messages.append(message)
                    logger.info(f"Task completion message created for task {task_index}")
                return TaskResult(
                    taskId=task_step.id,
                    status=TaskStatus.COMPLETED,
@ -168,7 +355,7 @@ class HandlingTasks:
                    error=None
                )
            elif review_result.status == 'retry' and state.canRetry():
-                logger.warning(f"Task step '{task_step.description}' requires retry: {review_result.improvements}")
+                logger.warning(f"Task step '{task_step.objective}' requires retry: {review_result.improvements}")
                state.incrementRetryCount()
                retry_context.retry_count = state.retry_count
                retry_context.improvements = review_result.improvements
@ -180,7 +367,7 @@ class HandlingTasks:
                retry_context.successful_actions = state.successful_actions
                continue
            else:
-                logger.error(f"Task step '{task_step.description}' failed after {attempt+1} attempts")
+                logger.error(f"=== TASK {task_index or '?'} FAILED: {task_step.objective} after {attempt+1} attempts ===")
                return TaskResult(
                    taskId=task_step.id,
                    status=TaskStatus.FAILED,
@ -188,7 +375,7 @@ class HandlingTasks:
                    feedback=feedback,
                    error=error
                )
-        logger.error(f"Task step '{task_step.description}' failed after all retries")
+        logger.error(f"=== TASK {task_index or '?'} FAILED AFTER ALL RETRIES: {task_step.objective} ===")
        return TaskResult(
            taskId=task_step.id,
            status=TaskStatus.FAILED,
@ -199,6 +386,9 @@ class HandlingTasks:
    async def reviewTaskCompletion(self, task_step, task_actions, action_results, workflow):
        try:
            # Check workflow status before reviewing task completion
            self._checkWorkflowStopped()
            review_context = ReviewContext(
                task_step=task_step,
                action_results=action_results,
@ -210,8 +400,11 @@ class HandlingTasks:
                    'errors': [result.error for result in action_results if not result.success]
                }
            )
            # Check workflow status before calling AI service
            self._checkWorkflowStopped()
            # Use promptFactory for review prompt
-            prompt = await createResultReviewPrompt(self, review_context)
+            prompt = await createResultReviewPrompt(review_context)
            response = await self.service.callAiTextAdvanced(prompt)
            # Inline parseReviewResponse logic here
            json_start = response.find('{')
@ -239,10 +432,6 @@ class HandlingTasks:
                improvements = []
            # Ensure all list fields are properly typed
            missing_outputs = review.get('missing_outputs', [])
            if not isinstance(missing_outputs, list):
                missing_outputs = []
            met_criteria = review.get('met_criteria', [])
            if not isinstance(met_criteria, list):
                met_criteria = []
@ -256,14 +445,14 @@ class HandlingTasks:
                reason=review.get('reason', 'No reason provided'),
                improvements=improvements,
                quality_score=review.get('quality_score', 5),
-                missing_outputs=missing_outputs,
+                missing_outputs=[],
                met_criteria=met_criteria,
                unmet_criteria=unmet_criteria,
                confidence=review.get('confidence', 0.5)
            )
            # Enhanced validation logging
-            logger.info(f"VALIDATION RESULT - Task: '{task_step.description}' - Status: {review_result.status.upper()}, Quality: {review_result.quality_score}/10")
+            logger.info(f"VALIDATION RESULT - Task: '{task_step.objective}' - Status: {review_result.status.upper()}, Quality: {review_result.quality_score}/10")
            if review_result.status == 'success':
                logger.info(f"VALIDATION SUCCESS - Task completed successfully")
                if review_result.met_criteria:
@ -274,8 +463,6 @@ class HandlingTasks:
                    logger.warning(f"Unmet criteria: {', '.join(review_result.unmet_criteria)}")
            else:
                logger.error(f"VALIDATION FAILED - Task failed: {review_result.reason}")
                if review_result.missing_outputs:
                    logger.error(f"Missing outputs: {', '.join(review_result.missing_outputs)}")
            return review_result
        except Exception as e:
@ -288,26 +475,22 @@ class HandlingTasks:
    async def prepareTaskHandover(self, task_step, task_actions, review_result, workflow):
        try:
            # Check workflow status before preparing task handover
            self._checkWorkflowStopped()
            # Log handover status summary
            if hasattr(review_result, 'status'):
                status = review_result.status
                if hasattr(review_result, 'missing_outputs'):
                    missing = review_result.missing_outputs
                else:
                    missing = []
                if hasattr(review_result, 'met_criteria'):
                    met = review_result.met_criteria
                else:
                    met = []
-                logger.debug(f"Task handover status: {status}")
+                
                logger.debug(f"Promised documents: {task_step.expected_outputs}")
                logger.debug(f"Delivered documents: {met}")
                logger.debug(f"Missing documents: {missing}")
            handover_data = {
                'task_id': task_step.id,
-                'task_description': task_step.description,
+                'task_description': task_step.objective,
                'actions': [action.to_dict() for action in task_actions],
                'review_result': review_result.to_dict() if hasattr(review_result, 'to_dict') else review_result,
                'workflow_id': workflow.id,
@ -321,20 +504,35 @@ class HandlingTasks:
    # --- Helper action handling methods ---
-    async def executeSingleAction(self, action, workflow):
+    async def executeSingleAction(self, action, workflow, task_step, task_index=None, action_index=None, total_actions=None):
        """Execute a single action and return ActionResult with enhanced document processing"""
        try:
-            logger.info(f"Executing action: {action.execMethod}.{action.execAction}")
+            # Check workflow status before executing action
            self._checkWorkflowStopped()
-            # Log input documents and connections
+            # Use passed indices or fallback to '?'
            task_num = task_index if task_index is not None else '?'
            action_num = action_index if action_index is not None else '?'
            logger.info(f"=== TASK {task_num} ACTION {action_num}: {action.execMethod}.{action.execAction} ===")
            # Log input parameters
            input_docs = action.execParameters.get('documentList', [])
-            logger.debug(f"Input documents: {input_docs}")
+            input_connections = action.execParameters.get('connections', [])
-            logger.debug(f"Input connections: {action.execParameters.get('connections', [])}")
+            logger.info(f"Input documents: {input_docs} (type: {type(input_docs)})")
            if input_connections:
                logger.info(f"Input connections: {input_connections}")
            # Log all action parameters for debugging
            logger.info(f"All action parameters: {action.execParameters}")
            enhanced_parameters = action.execParameters.copy()
            if action.expectedDocumentFormats:
                enhanced_parameters['expectedDocumentFormats'] = action.expectedDocumentFormats
-                logger.debug(f"Expected document formats: {action.expectedDocumentFormats}")
+                logger.info(f"Expected formats: {action.expectedDocumentFormats}")
            # Check workflow status before executing the action
            self._checkWorkflowStopped()
            result = await self.service.executeAction(
                methodName=action.execMethod,
@ -350,11 +548,51 @@ class HandlingTasks:
                action.setSuccess()
                action.result = result.data.get("result", "")
                action.execResultLabel = result_label
-                await self.createActionMessage(action, result, workflow, result_label, created_documents)
+                await self.createActionMessage(action, result, workflow, result_label, created_documents, task_step, task_index)
-                logger.info(f"Action {action.execMethod}.{action.execAction} executed successfully")
+                
                # Log action results
                logger.info(f"✓ Action completed successfully")
                # Create database log entry for action completion
                if total_actions is not None:
                    self.chatInterface.createWorkflowLog({
                        "workflowId": workflow.id,
                        "message": f"✅ Task {task_num} - Action {action_num}/{total_actions} completed",
                        "type": "success"
                    })
                else:
                    self.chatInterface.createWorkflowLog({
                        "workflowId": workflow.id,
                        "message": f"✅ Task {task_num} - Action {action_num}/? completed",
                        "type": "success"
                    })
                if created_documents:
                    logger.info(f"Output documents ({len(created_documents)}):")
                    for i, doc in enumerate(created_documents):
                        if hasattr(doc, 'filename'):
                            logger.info(f"  {i+1}. {doc.filename}")
                        elif isinstance(doc, dict) and 'filename' in doc:
                            logger.info(f"  {i+1}. {doc['filename']}")
                        else:
                            logger.info(f"  {i+1}. {type(doc).__name__}")
                    # Log document details for debugging
                    logger.info("Document details:")
                    for i, doc in enumerate(created_documents):
                        if hasattr(doc, 'filename'):
                            logger.info(f"  Doc {i+1}: filename={doc.filename}, type={type(doc)}")
                            if hasattr(doc, 'id'):
                                logger.info(f"    ID: {doc.id}")
                            if hasattr(doc, 'fileId'):
                                logger.info(f"    File ID: {doc.fileId}")
                        elif isinstance(doc, dict):
                            logger.info(f"  Doc {i+1}: dict with keys: {list(doc.keys())}")
                else:
                    logger.info("Output: No documents created")
            else:
                action.setError(result.error or "Action execution failed")
-                logger.error(f"Action {action.execMethod}.{action.execAction} failed: {result.error}")
+                logger.error(f"✗ Action failed: {result.error}")
            # Extract document filenames for the ActionResult
            document_filenames = []
@ -367,6 +605,9 @@ class HandlingTasks:
            # Also include the original documents from the service result for validation
            original_documents = result.data.get("documents", [])
            # Log action summary
            logger.info(f"=== TASK {task_num} ACTION {action_num} COMPLETED ===")
            return ActionResult(
                success=result.success,
                data={
@ -407,9 +648,12 @@ class HandlingTasks:
                error=str(e)
            )
-    async def createActionMessage(self, action, result, workflow, result_label=None, created_documents=None):
+    async def createActionMessage(self, action, result, workflow, result_label=None, created_documents=None, task_step=None, task_index=None):
        """Create and store a message for the action result in the workflow with enhanced document processing"""
        try:
            # Check workflow status before creating action message
            self._checkWorkflowStopped()
            if result_label is None:
                result_label = action.execResultLabel
@ -417,25 +661,29 @@ class HandlingTasks:
            if created_documents is None:
                created_documents = self.documentGenerator.createDocumentsFromActionResult(result, action, workflow)
-            # Log delivered documents with sizes
+            # Log delivered documents
            if created_documents:
-                doc_info = []
+                logger.info(f"Result label: {result_label} - {len(created_documents)} documents")
                for doc in created_documents:
                    if hasattr(doc, 'filename') and hasattr(doc, 'fileSize'):
                        doc_info.append(f"{doc.filename} ({doc.fileSize} bytes)")
                    elif hasattr(doc, 'filename'):
                        doc_info.append(f"{doc.filename}")
                    else:
                        doc_info.append("unknown document")
                logger.debug(f"Produced result label: {result_label}")
                logger.debug(f"Delivered documents: {doc_info}")
            else:
-                logger.debug(f"Produced result label: {result_label} (no documents)")
+                logger.info(f"Result label: {result_label} - No documents")
            # Create a more meaningful message that includes task context
            task_objective = task_step.objective if task_step else 'Unknown task'
            # Build a user-friendly message
            if created_documents and len(created_documents) > 0:
                doc_names = [doc.filename if hasattr(doc, 'filename') else str(doc) for doc in created_documents[:3]]
                if len(created_documents) > 3:
                    doc_names.append(f"... and {len(created_documents) - 3} more")
                message_text = f"✅ Task {task_index or '?'} - Action {action.execMethod}.{action.execAction} completed\n\nObjective: {task_objective}\n\nGenerated {len(created_documents)} document(s): {', '.join(doc_names)}"
            else:
                message_text = f"✅ Task {task_index or '?'} - Action {action.execMethod}.{action.execAction} completed\n\nObjective: {task_objective}\n\nAction executed successfully"
            message_data = {
                "workflowId": workflow.id,
                "role": "assistant",
-                "message": f"Executed action {action.execMethod}.{action.execAction}",
+                "message": message_text,
                "status": "step",
                "sequenceNr": len(workflow.messages) + 1,
                "publishedAt": datetime.now(UTC).isoformat(),
@ -449,7 +697,7 @@ class HandlingTasks:
            message = self.chatInterface.createWorkflowMessage(message_data)
            if message:
                workflow.messages.append(message)
-                logger.info(f"Created action message for {action.execMethod}.{action.execAction} with {len(created_documents)} documents")
+                logger.info(f"Message created: {action.execMethod}.{action.execAction}")
            else:
                logger.error(f"Failed to create workflow message for action {action.execMethod}.{action.execAction}")
        except Exception as e:
@ -459,29 +707,59 @@ class HandlingTasks:
    def _validateTaskPlan(self, task_plan: Dict[str, Any]) -> bool:
        try:
            if not isinstance(task_plan, dict):
                logger.error("Task plan is not a dictionary")
                return False
            if 'tasks' not in task_plan or not isinstance(task_plan['tasks'], list):
                logger.error(f"Task plan missing 'tasks' field or not a list. Found: {type(task_plan.get('tasks', 'MISSING'))}")
                return False
            # First pass: collect all task IDs to validate dependencies
            task_ids = set()
            for task in task_plan['tasks']:
                if not isinstance(task, dict):
                    logger.error(f"Task is not a dictionary: {type(task)}")
                    return False
-                required_fields = ['id', 'description', 'expected_outputs', 'success_criteria']
+                if 'id' not in task:
-                if not all(field in task for field in required_fields):
+                    logger.error(f"Task missing 'id' field: {task}")
                    return False
                if task['id'] in task_ids:
                    return False
                task_ids.add(task['id'])
            # Second pass: validate each task
            for i, task in enumerate(task_plan['tasks']):
                if not isinstance(task, dict):
                    logger.error(f"Task {i} is not a dictionary: {type(task)}")
                    return False
                required_fields = ['id', 'objective', 'success_criteria']
                missing_fields = [field for field in required_fields if field not in task]
                if missing_fields:
                    logger.error(f"Task {i} missing required fields: {missing_fields}")
                    return False
                # Check for duplicate IDs (shouldn't happen after first pass, but safety check)
                if task['id'] in task_ids and list(task_plan['tasks']).count(task['id']) > 1:
                    logger.error(f"Task {i} has duplicate ID: {task['id']}")
                    return False
                dependencies = task.get('dependencies', [])
                if not isinstance(dependencies, list):
                    logger.error(f"Task {i} dependencies is not a list: {type(dependencies)}")
                    return False
                for dep in dependencies:
                    if dep not in task_ids and dep != 'task_0':
                        logger.error(f"Task {i} has invalid dependency: {dep} (available: {list(task_ids) + ['task_0']})")
                        return False
-                if 'ai_prompt' in task and not isinstance(task['ai_prompt'], str):
+                        
-                    return False
+            logger.info(f"Task plan validation successful with {len(task_ids)} tasks")
            return True
        except Exception as e:
            logger.error(f"Error validating task plan: {str(e)}")
            return False
--- a/modules/chat/handling/promptFactory.py
+++ b/modules/chat/handling/promptFactory.py
@ -7,7 +7,7 @@ from typing import Any, Dict
 # Prompt creation helpers extracted from managerChat.py
-def createTaskPlanningPrompt(self, context: Dict[str, Any]) -> str:
+def createTaskPlanningPrompt(context: Dict[str, Any]) -> str:
    """Create prompt for task planning"""
    return f"""You are a task planning AI that analyzes user requests and creates structured task plans.
@ -19,17 +19,16 @@ INSTRUCTIONS:
 1. Analyze the user request and available documents
 2. Break down the request into 2-4 meaningful high-level task steps
 3. Focus on business outcomes, not technical operations
-4. For document processing, create ONE task with a comprehensive AI prompt rather than multiple granular tasks
+4. Each task should produce meaningful, usable outputs
-5. Each task should produce meaningful, usable outputs
+5. Ensure proper handover between tasks using result labels
-6. Ensure proper handover between tasks using result labels
+6. Return a JSON object with the exact structure shown below
 7. Return a JSON object with the exact structure shown below
 TASK PLANNING PRINCIPLES:
- Combine related operations into single tasks (e.g., \"Extract and analyze all candidate profiles\" instead of separate \"read file\" and \"analyze content\" tasks)
+- Break down complex requests into logical, sequential steps
 - Use comprehensive AI prompts for document processing rather than multiple small tasks
 - Focus on business value and outcomes
 - Keep tasks at a meaningful level of abstraction
 - Each task should produce results that can be used by subsequent tasks
 - Ensure clear dependencies and handovers between tasks
 REQUIRED JSON STRUCTURE:
 {{
@ -37,31 +36,34 @@ REQUIRED JSON STRUCTURE:
    \"tasks\": [
        {{
            \"id\": \"task_1\",
-            \"description\": \"Clear description of what this task accomplishes (business outcome)\",
+            \"objective\": \"Clear business objective this task accomplishes\",
            \"dependencies\": [\"task_0\"],  // IDs of tasks that must complete first
            \"expected_outputs\": [\"output1\", \"output2\"],
            \"success_criteria\": [\"criteria1\", \"criteria2\"],
-            \"required_documents\": [\"doc1\", \"doc2\"],
+            \"estimated_complexity\": \"low|medium|high\"
            \"estimated_complexity\": \"low|medium|high\",
            \"ai_prompt\": \"Comprehensive AI prompt for document processing tasks (if applicable)\"
        }}
    ]
 }}
-EXAMPLES OF GOOD TASK DESCRIPTIONS:
+EXAMPLES OF GOOD TASK OBJECTIVES:
- \"Extract and analyze all candidate profiles to identify key qualifications and experience\"
+- \"Extract key information from documents for email preparation\"
- \"Create evaluation matrix and rate candidates against product designer criteria\"
+- \"Draft professional email incorporating analyzed information\"
- \"Generate comprehensive PowerPoint presentation for management decision\"
+- \"Send email using specified email account\"
- \"Store final presentation in SharePoint for specified account\"
+- \"Store email draft and confirmation in system\"
-EXAMPLES OF BAD TASK DESCRIPTIONS:
+EXAMPLES OF GOOD SUCCESS CRITERIA:
 - \"Document analysis completed with key points identified\"
 - \"Email draft created with professional tone and clear structure\"
 - \"Email successfully sent with delivery confirmation\"
 - \"All outputs properly stored and accessible for future use\"
 EXAMPLES OF BAD TASK OBJECTIVES:
 - \"Open and read the PDF file\" (too granular)
 - \"Identify table structure\" (technical detail)
 - \"Convert data to CSV format\" (implementation detail)
 NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
-async def createActionDefinitionPrompt(self, context) -> str:
+async def createActionDefinitionPrompt(context, service) -> str:
    """Create prompt for action generation with enhanced document extraction guidance and retry context"""
    task_step = context.task_step
    workflow = context.workflow
@ -71,23 +73,32 @@ async def createActionDefinitionPrompt(self, context) -> str:
    retry_count = context.retry_count or 0
    previous_action_results = context.previous_action_results or []
    previous_review_result = context.previous_review_result
-    methodList = self.service.getMethodsList()
+    methodList = service.getMethodsList()
    method_actions = {}
    for sig in methodList:
        if '.' in sig:
            method, rest = sig.split('.', 1)
            action = rest.split('(')[0]
            method_actions.setdefault(method, []).append((action, sig))
-    messageSummary = await self.service.summarizeChat(workflow.messages)
+    messageSummary = await service.summarizeChat(workflow.messages)
-    docRefs = self.service.getDocumentReferenceList()
+    # Get ALL documents from the entire workflow, not just current round
-    connRefs = self.service.getConnectionReferenceList()
+    docRefs = service.getDocumentReferenceList()
-    all_doc_refs = docRefs.get('chat', []) + docRefs.get('history', [])
+    connRefs = service.getConnectionReferenceList()
    # Get documents from current round (chat) and entire workflow history
    current_round_docs = docRefs.get('chat', [])
    workflow_history_docs = docRefs.get('history', [])
    # Combine all documents, prioritizing current round first, then workflow history
    all_doc_refs = current_round_docs + workflow_history_docs
    # Log document availability for debugging
    logging.debug(f"Document references - Current round: {len(current_round_docs)}, Workflow history: {len(workflow_history_docs)}, Total: {len(all_doc_refs)}")
    available_methods_str = ''
    for method, actions in method_actions.items():
        available_methods_str += f"- {method}:\n"
        for action, sig in actions:
            available_methods_str += f"    - {action}: {sig}\n"
    task_ai_prompt = task_step.ai_prompt or ''
    retry_context = ""
    if retry_count > 0:
        retry_context = f"""
@ -105,17 +116,36 @@ Previous review feedback:
 - Status: {previous_review_result.status or 'unknown'}
 - Reason: {previous_review_result.reason or 'No reason provided'}
 - Quality Score: {previous_review_result.quality_score or 0}/10
 - Missing Outputs: {', '.join(previous_review_result.missing_outputs or [])}
 - Unmet Criteria: {', '.join(previous_review_result.unmet_criteria or [])}
 """
    expected_outputs_str = ', '.join(task_step.expected_outputs or [])
    success_criteria_str = ', '.join(task_step.success_criteria or [])
    previous_results_str = ', '.join(previous_results) if previous_results else 'None'
    improvements_str = str(improvements) if improvements else 'None'
    available_connections_str = '\n'.join(f"- {conn}" for conn in connRefs)
-    available_documents_str = '\n'.join(
+    # Build comprehensive document list showing both current round and workflow history
-        f"- {doc.documentsLabel} contains {', '.join(doc.documents)}" for doc in all_doc_refs
+    if all_doc_refs:
-    )
+        available_documents_str = "CURRENT ROUND DOCUMENTS:\n"
        if current_round_docs:
            for doc in current_round_docs:
                available_documents_str += f"- {doc.documentsLabel} contains {', '.join(doc.documents)}\n"
        else:
            available_documents_str += "- No documents in current round\n"
        available_documents_str += "\nWORKFLOW HISTORY DOCUMENTS:\n"
        if workflow_history_docs:
            for doc in workflow_history_docs:
                available_documents_str += f"- {doc.documentsLabel} contains {', '.join(doc.documents)}\n"
        else:
            available_documents_str += "- No documents in workflow history\n"
    else:
        available_documents_str = "NO DOCUMENTS AVAILABLE - This workflow has no documents to process."
    # Debug logging for document availability
    logging.debug(f"Available documents string length: {len(available_documents_str)}")
    logging.debug(f"Current round docs count: {len(current_round_docs)}")
    logging.debug(f"Workflow history docs count: {len(workflow_history_docs)}")
    logging.debug(f"Total doc refs: {len(all_doc_refs)}")
    prompt = f"""
 You are an action generation AI that creates specific actions to accomplish a task step.
@ -130,12 +160,11 @@ CRITICAL DOCUMENT REFERENCE RULES:
 - NEVER invent new labels or use message IDs
 - NEVER use formats like "msg_xxx:documents" or "task_X_results" (these will fail)
 - ONLY use the exact labels shown in AVAILABLE DOCUMENTS
- **When generating multiple actions, you may only use as input documents those that are already present in AVAILABLE DOCUMENTS or produced by actions that come earlier in the list. Do NOT use as input any document label that will be produced by a later action.**
+- When generating multiple actions, you may only use as input documents those that are already present in AVAILABLE DOCUMENTS or produced by actions that come earlier in the list. Do NOT use as input any document label that will be produced by a later action.
 - If AVAILABLE DOCUMENTS shows "NO DOCUMENTS AVAILABLE", you CANNOT create document extraction actions. Instead, create actions that generate new content or inform the user that documents are needed.
-TASK STEP: {task_step.description} (ID: {task_step.id})
+TASK STEP: {task_step.objective} (ID: {task_step.id})
 EXPECTED OUTPUTS: {expected_outputs_str}
 SUCCESS CRITERIA: {success_criteria_str}
 TASK AI PROMPT: {task_ai_prompt if task_ai_prompt else 'None provided'}
 CONTEXT - Chat History:
 {messageSummary}
@ -180,7 +209,8 @@ ACTION GENERATION PRINCIPLES:
 INSTRUCTIONS:
 - Generate actions to accomplish this task step using available documents, connections, and previous results
 - Use docItem for single documents and docList labels for groups of documents as shown in AVAILABLE DOCUMENTS
- Always pass documentList as a LIST of references (docItem and/or docList)
+- If AVAILABLE DOCUMENTS shows "NO DOCUMENTS AVAILABLE", you cannot create document extraction actions. Instead, create actions that generate new content or inform the user that documents are needed.
 - Always pass documentList as a LIST of references (docItem and/or docList) - this list CANNOT be empty for document extraction actions
 - For resultLabel, use the format: "task{{task_id}}_action{{action_number}}_{{short_label}}" where:
    - {{task_id}} = the current task's id (e.g., 1)
    - {{action_number}} = the sequence number of the action within the task (e.g., 2)
@ -202,8 +232,8 @@ REQUIRED JSON STRUCTURE:
            "resultLabel": "task1_action3_analysis_results",
            "expectedDocumentFormats": [  // OPTIONAL: Specify expected document formats when needed
                {{
-                    "extension": ".csv",
+                    "extension": ".txt",
-                    "mimeType": "text/csv",
+                    "mimeType": "text/plain",
                    "description": "Structured data output"
                }}
            ],
@ -314,19 +344,33 @@ EXAMPLES OF GOOD ACTIONS:
    ]
 }}
-NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
+6. When no documents are available (NO DOCUMENTS AVAILABLE scenario):
-    logging.debug(f"[ACTION PLAN PROMPT] Available Documents Section:\n{available_documents_str}\nUser Connections Section:\n{available_connections_str}\nAvailable Methods (summarized):\n{', '.join(method_actions.keys())}")
+{{
    "method": "document",
    "action": "generateReport",
    "parameters": {{
        "documentList": [],
        "title": "Workflow Status Report"
    }},
    "resultLabel": "task1_action1_status_report",
    "description": "Generate a status report informing the user that no documents are available for processing and requesting document upload or alternative input."
 }}
 IMPORTANT NOTES:
 - Respond with ONLY the JSON object. Do not include any explanatory text.
 - Before creating any document extraction action, verify that AVAILABLE DOCUMENTS contains actual document references.
 - If AVAILABLE DOCUMENTS shows "NO DOCUMENTS AVAILABLE", use example 6 above to create a status report action instead of document extraction."""
    logging.debug(f"[ACTION PLAN PROMPT] Available Documents Section:\n{available_documents_str}\nUser Connections Section:\n{available_connections_str}\nAvailable Methods (detailed):\n{available_methods_str}")
    return prompt
-async def createResultReviewPrompt(self, review_context) -> str:
+async def createResultReviewPrompt(review_context) -> str:
    """Create prompt for result review"""
    task_step = review_context.task_step
    step_result = review_context.step_result or {}
    step_result_serializable = {
        'task_step': {
            'id': task_step.id,
-            'description': task_step.description,
+            'objective': task_step.objective,
            'expected_outputs': task_step.expected_outputs or [],
            'success_criteria': task_step.success_criteria or []
        },
        'action_results': [],
@ -380,12 +424,10 @@ async def createResultReviewPrompt(self, review_context) -> str:
        }
        step_result_serializable['action_results'].append(serializable_action_result)
    step_result_json = json.dumps(step_result_serializable, indent=2, ensure_ascii=False)
    expected_outputs_str = ', '.join(task_step.expected_outputs or [])
    success_criteria_str = ', '.join(task_step.success_criteria or [])
    return f"""You are a result review AI that evaluates task step completion with BASIC validation.
-TASK STEP: {task_step.description}
+TASK STEP: {task_step.objective}
 EXPECTED OUTPUTS: {expected_outputs_str}
 SUCCESS CRITERIA: {success_criteria_str}
 STEP RESULT: {step_result_json}
@ -402,8 +444,8 @@ VALIDATION PRINCIPLES:
 - Text outputs are SECONDARY indicators
 - Only retry for CLEAR technical issues, not minor imperfections
 - Don't be picky about formatting or minor details
- Check if ANY documents were produced (documents_count > 0), not specific expected output names
+- Check if ANY documents were produced (documents_count > 0)
- If documents were produced, consider it a SUCCESS regardless of expected output names
+- If documents were produced, consider it a SUCCESS
 EXAMPLES OF SUCCESS:
 - Document extraction produced a file (even if imperfect)
@ -428,7 +470,6 @@ REQUIRED JSON STRUCTURE:
    "reason": "Brief explanation",
    "improvements": ["specific technical fixes only"],
    "quality_score": 1-10,
    "missing_outputs": [],
    "met_criteria": ["basic functionality achieved"],
    "unmet_criteria": []
 }}
@ -437,6 +478,6 @@ VALIDATION LOGIC:
 - If ANY action has documents_count > 0, mark as SUCCESS
 - If ALL actions have documents_count = 0 AND no meaningful text output, mark as FAILED
 - Only mark as RETRY for clear technical issues that can be fixed
- Do NOT fail based on expected output name mismatches - focus on actual document production
+- Focus on actual document production and functionality, not specific output names
 NOTE: Respond with ONLY the JSON object. Be GENEROUS with success ratings.""" 
--- a/modules/chat/managerChat.py
+++ b/modules/chat/managerChat.py
@ -4,15 +4,7 @@ from modules.interfaces.interfaceAppModel import User
 from modules.interfaces.interfaceChatModel import ChatWorkflow, UserInputRequest, TaskStep, TaskAction, ActionResult, ReviewResult, TaskPlan, WorkflowResult, TaskContext
 from modules.chat.serviceCenter import ServiceCenter
 from modules.interfaces.interfaceChatObjects import ChatObjects
-from .handling.handlingTasks import HandlingTasks
+from .handling.handlingTasks import HandlingTasks, WorkflowStoppedException
 logger = logging.getLogger(__name__)
 # ===== STATE MANAGEMENT AND VALIDATION CLASSES =====
 class WorkflowStoppedException(Exception):
    """Exception raised when workflow is stopped by user"""
    pass
 logger = logging.getLogger(__name__)
@ -47,16 +39,15 @@ class ChatManager:
                raise Exception("No tasks generated in task plan.")
            # Phase 2-5: For each task, execute and get results
-            logger.info(f"Phase 2: Executing {len(task_plan.tasks)} tasks")
+            total_tasks = len(task_plan.tasks)
            logger.info(f"Phase 2: Executing {total_tasks} tasks")
            all_task_results = []
            previous_results = []
            for idx, task_step in enumerate(task_plan.tasks):
-                logger.info(f"Task {idx+1}/{len(task_plan.tasks)}: {task_step.description}")
+                # Pass task index to executeTask method
                current_task_index = idx + 1
-                # Check if workflow has been stopped before each task
+                logger.info(f"Task {idx+1}/{total_tasks}: {task_step.objective}")
                if self.service.workflow.status == "stopped":
                    logger.info("Workflow stopped by user, aborting execution")
                    raise WorkflowStoppedException("Workflow was stopped by user")
                # Create task context for this task
                task_context = TaskContext(
@ -67,7 +58,7 @@ class ChatManager:
                    previous_results=previous_results
                )
                # Execute task (this handles action generation, execution, and review internally)
-                task_result = await self.handlingTasks.executeTask(task_step, workflow, task_context)
+                task_result = await self.handlingTasks.executeTask(task_step, workflow, task_context, current_task_index, total_tasks)
                # Handover
                handover_data = await self.handlingTasks.prepareTaskHandover(task_step, [], task_result, workflow)
                # Collect results
@ -90,6 +81,15 @@ class ChatManager:
            )
            logger.info(f"Unified workflow execution completed successfully for workflow {workflow.id}")
            return workflow_result
        except WorkflowStoppedException:
            logger.info(f"Workflow {workflow.id} was stopped by user")
            return WorkflowResult(
                status="stopped", 
                completed_tasks=0,
                total_tasks=0,
                execution_time=0.0,
                final_results_count=0
            )
        except Exception as e:
            logger.error(f"Error in executeUnifiedWorkflow: {str(e)}")
            return WorkflowResult(
--- a/modules/chat/serviceCenter.py
+++ b/modules/chat/serviceCenter.py
@ -310,54 +310,43 @@ class ServiceCenter:
        chat_exchanges = []
        history_exchanges = []
-        # Process messages in reverse order to find current chat round
+        # Process messages in reverse order; "first" marks boundary: include up to and including
        # the first "first" message in the chat container, older messages in the history container
        in_current_round = True
        for message in reversed(self.workflow.messages):
-            # Get document references from message
+            is_first = getattr(message, "status", None) == "first"
            # Build a DocumentExchange if message has documents
            doc_exchange = None
            if message.documents:
                # For messages with action context, create DocumentExchange with docList reference
                if message.actionId and message.documentsLabel:
                    doc_ref = self.getDocumentReferenceFromMessage(message)
                    if doc_ref:
                        # Create DocumentExchange with single docList reference
                        doc_exchange = DocumentExchange(
                            documentsLabel=message.documentsLabel,
                            documents=[doc_ref]
                        )
                        # Add to appropriate list based on message status
                        if message.status == "first":
                            chat_exchanges.append(doc_exchange)
                            break  # Stop after finding first message
                        elif message.status == "step":
                            chat_exchanges.append(doc_exchange)
                        else:
                            history_exchanges.append(doc_exchange)
                # For regular messages, create DocumentExchange with individual docItem references
                else:
                    doc_refs = []
                    for doc in message.documents:
                        doc_ref = self.getDocumentReferenceFromChatDocument(doc)
                        doc_refs.append(doc_ref)
                    if doc_refs:
                        # Create DocumentExchange with individual document references
                        doc_exchange = DocumentExchange(
                            documentsLabel=f"{message.id}:documents",
                            documents=doc_refs
                        )
                        # Add to appropriate list based on message status
                        if message.status == "first":
                            chat_exchanges.append(doc_exchange)
                            break  # Stop after finding first message
                        elif message.status == "step":
                            chat_exchanges.append(doc_exchange)
                        else:
                            history_exchanges.append(doc_exchange)
-            # Stop processing if we hit a first message
+            # Append to appropriate container based on boundary
-            if message.status == "first":
+            if doc_exchange:
-                break
+                if in_current_round:
                    chat_exchanges.append(doc_exchange)
                else:
                    history_exchanges.append(doc_exchange)
            # Flip boundary after including the "first" message in chat
            if in_current_round and is_first:
                in_current_round = False
        # Sort both lists by datetime in descending order
        chat_exchanges.sort(key=lambda x: x.documentsLabel, reverse=True)
@ -393,31 +382,7 @@ class ServiceCenter:
        try:
            # ADDED LOGGING: Print workflow id, message count, and all message labels and document counts
            import logging
-            logger = logging.getLogger(__name__)
+
            logger.debug(f"WORKFLOW STATE at getChatDocumentsFromDocumentList: id={id(self.workflow)}, message_count={len(self.workflow.messages) if hasattr(self.workflow, 'messages') else 'N/A'}")
            for idx, message in enumerate(getattr(self.workflow, 'messages', [])):
                label = getattr(message, 'documentsLabel', None)
                docs = getattr(message, 'documents', None)
                logger.debug(f"  Message {idx}: label='{label}', documents_count={len(docs) if docs else 0}")
            # DEBUG LOGGING: Print all document labels and their documents before extraction
            import logging
            logger = logging.getLogger(__name__)
            logger.info("==== DEBUG: Listing all workflow message document labels and contained documents ====")
            for message in self.workflow.messages:
                label = getattr(message, 'documentsLabel', None)
                docs = getattr(message, 'documents', None)
                if label is not None:
                    doc_names = []
                    if docs:
                        for doc in docs:
                            if hasattr(doc, 'filename'):
                                doc_names.append(doc.filename)
                            elif isinstance(doc, dict) and 'filename' in doc:
                                doc_names.append(doc['filename'])
                            else:
                                doc_names.append(str(doc))
                    logger.info(f"Message label: '{label}' | Documents: {doc_names if doc_names else 'None'}")
            logger.info("==== END DEBUG LIST ====")
            all_documents = []
            for doc_ref in documentList:
                # Parse reference format
@ -434,12 +399,12 @@ class ServiceCenter:
                            found = True
                            break
                    if not found:
-                        logger.warning(f"No documents found for label: {label}")
+                        logger.debug(f"No documents found for label: {label}")
                    continue
                # Handle structured reference format
                if len(parts) < 3:
-                    logger.warning(f"Invalid document reference format: {doc_ref}")
+                    logger.debug(f"Invalid document reference format: {doc_ref}")
                    continue
                ref_type = parts[0]
@ -487,7 +452,7 @@ class ServiceCenter:
    def getConnectionReferenceFromUserConnection(self, connection: UserConnection) -> str:
        """Get connection reference from UserConnection"""
-        return f"connection:{connection.authority}:{connection.externalUsername}:{connection.id}"
+        return f"connection:{connection.authority.value}:{connection.externalUsername}:{connection.id}"
    def getUserConnectionFromConnectionReference(self, connectionReference: str) -> Optional[UserConnection]:
        """Get UserConnection from reference string"""
@ -506,7 +471,7 @@ class ServiceCenter:
            # Find matching connection
            for conn in user_connections:
-                if str(conn.id) == conn_id and conn.authority == authority and conn.externalUsername == username:
+                if str(conn.id) == conn_id and conn.authority.value == authority and conn.externalUsername == username:
                    return conn
            return None
@ -700,16 +665,16 @@ Please provide a clear summary of this message."""
    async def extractContentFromDocument(self, prompt: str, document: ChatDocument) -> ExtractedContent:
        """Extract content from ChatDocument using prompt"""
        try:
-            # Extract file data from ChatDocument
+            # ChatDocument is just a reference, so we need to get file data using fileId
-            if document.data:
+            if not hasattr(document, 'fileId') or not document.fileId:
-                fileData = document.data.encode('utf-8') if isinstance(document.data, str) else document.data
+                logger.error(f"Document {document.id} has no fileId")
-            else:
+                raise ValueError("Document has no fileId")
-                # Try to get file data from service center if document has fileId
+            
-                if hasattr(document, 'fileId') and document.fileId:
+            # Get file data from service center using document's fileId
-                    fileData = self.getFileData(document.fileId)
+            fileData = self.getFileData(document.fileId)
-                else:
+            if not fileData:
-                    logger.error(f"No file data available in document: {document}")
+                logger.error(f"No file data found for fileId: {document.fileId}")
-                    raise ValueError("No file data available in document")
+                raise ValueError("No file data found for document")
            # Get filename and mime type from document
            filename = document.filename if hasattr(document, 'filename') else "document"
@ -739,11 +704,11 @@ Please provide a clear summary of this message."""
        """Extract content from file data directly using prompt"""
        try:
            return await self.documentProcessor.processFileData(
                prompt=prompt,
                fileData=fileData,
                filename=filename,
                mimeType=mimeType,
                base64Encoded=base64Encoded,
                prompt=prompt,
                documentId=documentId
            )
        except Exception as e:
@ -771,15 +736,19 @@ Please provide a clear summary of this message."""
        return file_item.id
-    def createDocument(self, fileName: str, mimeType: str, content: str, base64encoded: bool = True) -> ChatDocument:
+    def createDocument(self, fileName: str, mimeType: str, content: str, base64encoded: bool = True, existing_file_id: str = None) -> ChatDocument:
        """Create document from file data object created by AI call"""
-        # First create the file and get its ID
+        # Use existing file ID if provided, otherwise create new file
-        file_id = self.createFile(fileName, mimeType, content, base64encoded)
+        if existing_file_id:
            file_id = existing_file_id
        else:
            # First create the file and get its ID
            file_id = self.createFile(fileName, mimeType, content, base64encoded)
        # Get file info for metadata
        file_info = self.interfaceComponent.getFile(file_id)
-        # Create document with file reference
+        # Create document with file reference (ChatDocument is just a reference, not a data container)
        return ChatDocument(
            id=str(uuid.uuid4()),
            fileId=file_id,
@ -807,8 +776,7 @@ Please provide a clear summary of this message."""
                    bytesReceived=bytesReceived
                )
-                # Log the stats event
+
                logger.debug(f"Workflow stats updated - Event: {eventLabel}, Sent: {bytesSent}, Received: {bytesReceived}, Tokens: {tokenCount}")
        except Exception as e:
            logger.error(f"Error updating workflow stats: {str(e)}")
--- a/modules/interfaces/interfaceAiCalls.py
+++ b/modules/interfaces/interfaceAiCalls.py
@ -62,33 +62,42 @@ class AiCalls:
        Advanced text processing using Anthropic.
        Fallback to OpenAI if Anthropic is overloaded or rate-limited.
        """
-        messages = []
+        # For Anthropic, we need to handle system content differently
-        if context:
+        # Anthropic expects system content in a top-level parameter, not as a message role
            messages.append({
                "role": "system",
                "content": context
            })
        messages.append({
            "role": "user",
            "content": prompt
        })
        if hasattr(self, 'userLanguage') and self.userLanguage:
            ltext = f"Please respond in '{self.userLanguage}' language."
            if messages and messages[0]["role"] == "system":
                if "language" not in messages[0]["content"].lower():
                    messages[0]["content"] = f"{ltext} {messages[0]['content']}"
            else:
                messages.insert(0, {
                    "role": "system", 
                    "content": ltext
                })
        try:
-            response = await self.anthropicService.callAiBasic(messages)
+            # Create messages without system role for Anthropic
            anthropic_messages = []
            if hasattr(self, 'userLanguage') and self.userLanguage:
                ltext = f"Please respond in '{self.userLanguage}' language."
                if context:
                    # Combine context and language instruction
                    full_context = f"{ltext}\n\n{context}"
                else:
                    full_context = ltext
            else:
                full_context = context
            # Add user message
            anthropic_messages.append({
                "role": "user",
                "content": prompt
            })
            # Call Anthropic - let the connector handle system content conversion
            if full_context:
                # Send context as part of the user message for Anthropic
                enhanced_prompt = f"Context:\n{full_context}\n\nUser Request:\n{prompt}"
                response = await self.anthropicService.callAiBasic([
                    {"role": "user", "content": enhanced_prompt}
                ])
            else:
                response = await self.anthropicService.callAiBasic(anthropic_messages)
            return response["choices"][0]["message"]["content"]
        except Exception as e:
            err_str = str(e)
            logger.warning(f"[UI NOTICE] Advanced AI failed, falling back to Basic AI (OpenAI). Reason: {err_str}")
-            # Optionally, you could surface this message to the UI via a return value or error object
+            # Fallback to OpenAI basic
            return await self.callAiTextBasic(prompt, context)
    async def callAiImageBasic(self, prompt: str, imageData: Union[str, bytes], mimeType: str = None) -> str:
--- a/modules/interfaces/interfaceAppObjects.py
+++ b/modules/interfaces/interfaceAppObjects.py
@ -753,8 +753,8 @@ class AppObjects:
            logger.error(f"Error saving token: {str(e)}")
            raise
-    def getToken(self, authority: AuthAuthority) -> Optional[Token]:
+    def getToken(self, authority: str) -> Optional[Token]:
-        """Get the latest token for the current user and authority"""
+        """Get the latest valid token for the current user and authority"""
        try:
            # Get tokens for this user and authority
            tokens = self.db.getRecordset("tokens", recordFilter={
@ -767,13 +767,20 @@ class AppObjects:
            # Sort by creation date and get the latest
            tokens.sort(key=lambda x: x.get("createdAt", ""), reverse=True)
-            return Token(**tokens[0])
+            latest_token = Token(**tokens[0])
            # Check if token is expired
            if latest_token.expiresAt and latest_token.expiresAt < datetime.now().timestamp():
                logger.warning(f"Token for {authority} is expired (expiresAt: {latest_token.expiresAt})")
                return None  # Don't return expired tokens
            return latest_token
        except Exception as e:
            logger.error(f"Error getting token: {str(e)}")
            return None
-    def deleteToken(self, authority: AuthAuthority) -> None:
+    def deleteToken(self, authority: str) -> None:
        """Delete all tokens for the current user and authority"""
        try:
            # Get tokens to delete
--- a/modules/interfaces/interfaceChatModel.py
+++ b/modules/interfaces/interfaceChatModel.py
@ -256,6 +256,7 @@ class TaskAction(BaseModel, ModelMixin):
    execResultLabel: Optional[str] = Field(None, description="Label for the set of result documents")
    # NEW: Optional document format specification
    expectedDocumentFormats: Optional[List[Dict[str, str]]] = Field(None, description="Expected document formats (optional)")
    status: TaskStatus = Field(default=TaskStatus.PENDING, description="Action status")
    error: Optional[str] = Field(None, description="Error message if action failed")
    retryCount: int = Field(default=0, description="Number of retries attempted")
@ -530,13 +531,11 @@ register_model_labels(
 class TaskStep(BaseModel, ModelMixin):
    id: str
-    description: str
+    objective: str
    dependencies: Optional[list[str]] = []
    expected_outputs: Optional[list[str]] = []
    success_criteria: Optional[list[str]] = []
    required_documents: Optional[list[str]] = []
    estimated_complexity: Optional[str] = None
-    ai_prompt: Optional[str] = None
+
 class TaskContext(BaseModel, ModelMixin):
    task_step: TaskStep
--- a/modules/interfaces/interfaceChatObjects.py
+++ b/modules/interfaces/interfaceChatObjects.py
@ -880,7 +880,30 @@ class ChatObjects:
                workflow = self.loadWorkflowState(workflowId)
                if not workflow:
                    raise ValueError(f"Workflow {workflowId} not found")
                # Check if workflow is currently running and stop it first
                if workflow.status == "running":
                    logger.info(f"Stopping running workflow {workflowId} before processing new prompt")
                    # Stop the running workflow
                    workflow.status = "stopped"
                    workflow.lastActivity = currentTime
                    self.updateWorkflow(workflowId, {
                        "status": "stopped",
                        "lastActivity": currentTime
                    })
                    # Add log entry for workflow stop
                    self.createWorkflowLog({
                        "workflowId": workflowId,
                        "message": "Workflow stopped for new prompt",
                        "type": "info",
                        "status": "stopped",
                        "progress": 100
                    })
                    # Wait a moment for any running processes to detect the stop
                    await asyncio.sleep(0.1)
                # Update workflow - set status back to running for resumed workflows
                self.updateWorkflow(workflowId, {
--- a/modules/methods/methodDocument.py
+++ b/modules/methods/methodDocument.py
@ -5,7 +5,6 @@ Handles document operations using the document service.
 import logging
 from typing import Dict, Any, List, Optional
 import uuid
 from datetime import datetime, UTC
 from modules.chat.methodBase import MethodBase, ActionResult, action
@ -24,19 +23,19 @@ class MethodDocument(MethodBase):
    @action
    async def extract(self, parameters: Dict[str, Any]) -> ActionResult:
        """
-        Extract specific content from document with ai prompt and return it in the specified format
+        Extract specific content from document with AI prompt and return it in the specified format.
        Parameters:
            documentList (str): Reference to the document list to extract content from
            aiPrompt (str): AI prompt for content extraction
            includeMetadata (bool, optional): Whether to include metadata (default: True)
            expectedDocumentFormats (list, optional): Expected document formats with extension, mimeType, description
            includeMetadata (bool, optional): Whether to include metadata (default: True)
        """
        try:
            documentList = parameters.get("documentList")
            aiPrompt = parameters.get("aiPrompt")
            includeMetadata = parameters.get("includeMetadata", True)
            expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
            includeMetadata = parameters.get("includeMetadata", True)
            if not documentList:
                return self._createResult(
@ -60,32 +59,7 @@ class MethodDocument(MethodBase):
                    error="No documents found for the provided reference"
                )
-            # Determine output format based on expected formats
+            # Extract content from all documents using AI
            output_extension = ".txt"  # Default
            output_mime_type = "text/plain"  # Default
            if expectedDocumentFormats and len(expectedDocumentFormats) > 0:
                # Use the first expected format
                expected_format = expectedDocumentFormats[0]
                output_extension = expected_format.get("extension", ".txt")
                output_mime_type = expected_format.get("mimeType", "text/plain")
                logger.info(f"Using expected format: {output_extension} ({output_mime_type})")
                logger.info(f"Expected document formats: {expectedDocumentFormats}")
            else:
                logger.info("No expected format specified, using default .txt format")
            # Enhance AI prompt to specify output format
            enhanced_prompt = aiPrompt
            if output_extension == ".csv":
                enhanced_prompt += f"\n\nCRITICAL: Deliver the result as pure CSV data without any markdown formatting, code blocks, or additional text. Output only the CSV content with proper headers and data rows. Do not include ```csv or ``` markers."
            elif output_extension == ".json":
                enhanced_prompt += f"\n\nCRITICAL: Deliver the result as pure JSON data without any markdown formatting, code blocks, or additional text. Output only the JSON content. Do not include ```json or ``` markers."
            elif output_extension == ".xml":
                enhanced_prompt += f"\n\nCRITICAL: Deliver the result as pure XML data without any markdown formatting, code blocks, or additional text. Output only the XML content. Do not include ```xml or ``` markers."
            elif output_extension != ".txt":
                enhanced_prompt += f"\n\nCRITICAL: Deliver the result as pure {output_extension.upper()} data without any markdown formatting, code blocks, or additional text. Output only the {output_extension.upper()} content. Do not include any markdown markers."
            # Extract content from all documents
            all_extracted_content = []
            file_infos = []
@ -99,7 +73,7 @@ class MethodDocument(MethodBase):
                    continue
                extracted_content = await self.service.extractContentFromFileData(
-                    prompt=enhanced_prompt,  # Use enhanced prompt instead of original
+                    prompt=aiPrompt,
                    fileData=file_data,
                    filename=file_info.get('name', 'document'),
                    mimeType=file_info.get('mimeType', 'application/octet-stream'),
@ -118,21 +92,7 @@ class MethodDocument(MethodBase):
                    error="No content could be extracted from any documents"
                )
-            # Extract text content from ExtractedContent objects
+            # Process each document individually with its own format conversion
            text_contents = []
            for content_obj in all_extracted_content:
                if hasattr(content_obj, 'contents') and content_obj.contents:
                    # Extract text from ContentItem objects
                    for content_item in content_obj.contents:
                        if hasattr(content_item, 'data') and content_item.data:
                            text_contents.append(content_item.data)
                elif isinstance(content_obj, str):
                    text_contents.append(content_obj)
                else:
                    # Fallback: convert to string representation
                    text_contents.append(str(content_obj))
            # Process each document individually and create separate output files
            output_documents = []
            for i, (chatDocument, extracted_content) in enumerate(zip(chatDocuments, all_extracted_content)):
@ -140,36 +100,68 @@ class MethodDocument(MethodBase):
                text_content = ""
                if hasattr(extracted_content, 'contents') and extracted_content.contents:
                    # Extract text from ContentItem objects
                    text_parts = []
                    for content_item in extracted_content.contents:
                        if hasattr(content_item, 'data') and content_item.data:
-                            text_content += content_item.data + "\n"
+                            text_parts.append(content_item.data)
                    text_content = "\n".join(text_parts)
                elif isinstance(extracted_content, str):
                    text_content = extracted_content
                else:
                    # Fallback: convert to string representation
                    text_content = str(extracted_content)
-                # Create output filename based on original filename
+                # Get the expected format for this document (or use default)
                target_format = None
                if expectedDocumentFormats and i < len(expectedDocumentFormats):
                    target_format = expectedDocumentFormats[i]
                elif expectedDocumentFormats and len(expectedDocumentFormats) > 0:
                    # If fewer formats than documents, use the last format for remaining documents
                    target_format = expectedDocumentFormats[-1]
                # Determine output format and filename
                if target_format:
                    target_extension = target_format.get("extension", ".txt")
                    target_mime_type = target_format.get("mimeType", "text/plain")
                    # Check if format conversion is needed
                    if target_extension not in [".txt", ".text"] or target_mime_type != "text/plain":
                        logger.info(f"Converting document {i+1} to format: {target_extension} ({target_mime_type})")
                        # Use AI to convert format
                        formatted_content = await self._convertContentToFormat(text_content, target_format)
                        final_content = formatted_content
                        final_mime_type = target_mime_type
                        final_extension = target_extension
                    else:
                        logger.info(f"Document {i+1}: No format conversion needed, using plain text")
                        final_content = text_content
                        final_mime_type = "text/plain"
                        final_extension = ".txt"
                else:
                    logger.info(f"Document {i+1}: No expected format specified, using plain text")
                    final_content = text_content
                    final_mime_type = "text/plain"
                    final_extension = ".txt"
                # Create output filename based on original filename and target format
                original_filename = chatDocument.filename
                base_name = original_filename.rsplit('.', 1)[0] if '.' in original_filename else original_filename
-                output_filename = f"{base_name}_extracted_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}{output_extension}"
+                output_filename = f"{base_name}_extracted_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}{final_extension}"
                # Create result data for this document
                result_data = {
                    "documentCount": 1,
-                    "content": text_content,
+                    "content": final_content,
                    "originalFilename": original_filename,
                    "fileInfos": [file_infos[i]] if includeMetadata and i < len(file_infos) else None,
                    "timestamp": datetime.now(UTC).isoformat()
                }
-                logger.info(f"Created output document: {output_filename} with {len(text_content)} characters")
+                logger.info(f"Created output document: {output_filename} with {len(final_content)} characters")
                logger.info(f"Content preview: {text_content[:200]}...")
                output_documents.append({
                    "documentName": output_filename,
                    "documentData": result_data,
-                    "mimeType": output_mime_type
+                    "mimeType": final_mime_type
                })
            return self._createResult(
@ -186,6 +178,327 @@ class MethodDocument(MethodBase):
                error=str(e)
            )
    @action
    async def generate(self, parameters: Dict[str, Any]) -> ActionResult:
        """
        Generate documents in specific formats from document references.
        This action automatically extracts content from documents and converts it to the specified format.
        Parameters:
            documentList (list): List of document references to extract content from
            expectedDocumentFormats (list): Expected document formats with extension, mimeType, description
            originalDocuments (list, optional): List of original document names
            includeMetadata (bool, optional): Whether to include metadata (default: True)
        """
        try:
            document_list = parameters.get("documentList", [])
            expected_document_formats = parameters.get("expectedDocumentFormats", [])
            original_documents = parameters.get("originalDocuments", [])
            include_metadata = parameters.get("includeMetadata", True)
            if not document_list:
                return self._createResult(
                    success=False,
                    data={},
                    error="Document list is required for generation"
                )
            if not expected_document_formats or len(expected_document_formats) == 0:
                return self._createResult(
                    success=False,
                    data={},
                    error="Expected document formats specification is required"
                )
            # Get chat documents for original documents list
            chat_documents = self.service.getChatDocumentsFromDocumentList(document_list)
            logger.info(f"Found {len(chat_documents)} chat documents")
            if not chat_documents:
                return self._createResult(
                    success=False,
                    data={},
                    error="No documents found for the provided documentList reference"
                )
            # Update original documents list if not provided
            if not original_documents:
                original_documents = [doc.filename if hasattr(doc, 'filename') else str(doc.id) for doc in chat_documents]
            # Process each document individually with its own format conversion
            output_documents = []
            for i, chat_document in enumerate(chat_documents):
                # Extract content from this document
                # ChatDocument is just a reference, so we need to get file data using fileId
                content = ""
                if hasattr(chat_document, 'fileId') and chat_document.fileId:
                    # Need to get file data
                    file_data = self.service.getFileData(chat_document.fileId)
                    if file_data:
                        if isinstance(file_data, bytes):
                            content = file_data.decode('utf-8', errors='ignore')
                        else:
                            content = str(file_data)
                    else:
                        logger.warning(f"Could not get file data for document {i+1}, skipping")
                        continue
                else:
                    logger.warning(f"Document {i+1} has no fileId, skipping")
                    continue
                if not content:
                    logger.warning(f"Could not extract content from document {i+1}, skipping")
                    continue
                logger.info(f"Extracted content from document {i+1}: {len(content)} characters")
                # Get the expected format for this document (or use default)
                target_format = None
                if i < len(expected_document_formats):
                    target_format = expected_document_formats[i]
                elif len(expected_document_formats) > 0:
                    # If fewer formats than documents, use the last format for remaining documents
                    target_format = expected_document_formats[-1]
                if not target_format:
                    logger.warning(f"No expected format for document {i+1}, skipping")
                    continue
                # Use AI to convert format
                formatted_content = await self._convertContentToFormat(content, target_format)
                if not formatted_content:
                    logger.warning(f"Failed to format document {i+1}, skipping")
                    continue
                target_extension = target_format.get("extension", ".txt")
                target_mime_type = target_format.get("mimeType", "text/plain")
                # Create output filename
                timestamp = datetime.now(UTC).strftime('%Y%m%d_%H%M%S')
                if i < len(original_documents):
                    base_name = original_documents[i].rsplit('.', 1)[0] if '.' in original_documents[i] else original_documents[i]
                else:
                    base_name = f"document_{i+1}"
                output_filename = f"{base_name}_generated_{timestamp}{target_extension}"
                # Create result data
                result_data = {
                    "documentCount": 1,
                    "content": formatted_content,
                    "outputFormat": target_format,
                    "originalDocument": original_documents[i] if i < len(original_documents) else f"document_{i+1}",
                    "timestamp": datetime.now(UTC).isoformat()
                }
                logger.info(f"Generated document: {output_filename} with {len(formatted_content)} characters")
                output_documents.append({
                    "documentName": output_filename,
                    "documentData": result_data,
                    "mimeType": target_mime_type
                })
            if not output_documents:
                return self._createResult(
                    success=False,
                    data={},
                    error="No documents could be generated"
                )
            return self._createResult(
                success=True,
                data={
                    "documents": output_documents
                }
            )
        except Exception as e:
            logger.error(f"Error generating document: {str(e)}")
            return self._createResult(
                success=False,
                data={},
                error=str(e)
            )
    async def _convertContentToFormat(self, content: str, target_format: Dict[str, Any]) -> str:
        """
        Helper function to convert content to the specified format using AI.
        """
        try:
            extension = target_format.get("extension", ".txt")
            mime_type = target_format.get("mimeType", "text/plain")
            logger.info(f"Converting content to format: {extension} ({mime_type})")
            # Create AI prompt for format conversion
            format_prompts = {
                ".csv": f"""
                Convert the following content into a proper CSV format.
                Requirements:
                1. Output ONLY the CSV data without any markdown, code blocks, or additional text
                2. Use appropriate headers based on the content
                3. Ensure proper CSV formatting with commas and quotes where needed
                4. Make the data easily readable and importable into spreadsheet applications
                Content to convert:
                {content}
                Generate ONLY the CSV data:
                """,
                ".json": f"""
                Convert the following content into a proper JSON format.
                Requirements:
                1. Output ONLY the JSON data without any markdown, code blocks, or additional text
                2. Structure the data logically with appropriate keys and values
                3. Ensure valid JSON syntax
                4. Make the data easily parseable and readable
                Content to convert:
                {content}
                Generate ONLY the JSON data:
                """,
                ".xml": f"""
                Convert the following content into a proper XML format.
                Requirements:
                1. Output ONLY the XML data without any markdown, code blocks, or additional text
                2. Use appropriate XML tags and structure
                3. Ensure valid XML syntax
                4. Make the data easily parseable and readable
                Content to convert:
                {content}
                Generate ONLY the XML data:
                """,
                ".html": f"""
                Convert the following content into a proper HTML format.
                Requirements:
                1. Output ONLY the HTML data without any markdown, code blocks, or additional text
                2. Use appropriate HTML tags and structure
                3. Ensure valid HTML syntax
                4. Make the data easily readable in web browsers
                Content to convert:
                {content}
                Generate ONLY the HTML data:
                """,
                ".md": f"""
                Convert the following content into a proper Markdown format.
                Requirements:
                1. Output ONLY the Markdown data without any code blocks or additional text
                2. Use appropriate Markdown syntax for headers, lists, emphasis, etc.
                3. Structure the content logically
                4. Make the data easily readable and convertible to other formats
                Content to convert:
                {content}
                Generate ONLY the Markdown data:
                """
            }
            # Get the appropriate prompt for the target format
            if extension in format_prompts:
                ai_prompt = format_prompts[extension]
            else:
                # Generic format conversion
                ai_prompt = f"""
                Convert the following content into {extension.upper()} format.
                Requirements:
                1. Output ONLY the {extension.upper()} data without any markdown, code blocks, or additional text
                2. Use appropriate formatting for {extension.upper()} files
                3. Ensure the output is valid and usable
                4. Make the data easily readable and importable
                Content to convert:
                {content}
                Generate ONLY the {extension.upper()} data:
                """
            # Call AI to generate the formatted content
            logger.info(f"Calling AI for {extension} format conversion")
            formatted_content = await self.service.callAiTextBasic(ai_prompt, content)
            if not formatted_content or formatted_content.strip() == "":
                logger.warning("AI format conversion failed, using fallback")
                return self._generateFallbackFormattedContent(content, extension, mime_type)
            # Clean up the AI response
            formatted_content = formatted_content.strip()
            # Remove markdown code blocks if present
            if formatted_content.startswith("```") and formatted_content.endswith("```"):
                lines = formatted_content.split('\n')
                if len(lines) > 2:
                    formatted_content = '\n'.join(lines[1:-1])
            return formatted_content
        except Exception as e:
            logger.error(f"Error in AI format conversion: {str(e)}")
            return self._generateFallbackFormattedContent(content, extension, mime_type)
    def _generateFallbackFormattedContent(self, content: str, extension: str, mime_type: str) -> str:
        """
        Generate fallback formatted content when AI conversion fails.
        """
        try:
            if extension == ".csv":
                # Simple CSV fallback - split by lines and create basic CSV
                lines = content.strip().split('\n')
                if lines:
                    # Create a simple CSV with line numbers and content
                    csv_lines = ["Line,Content"]
                    for i, line in enumerate(lines, 1):
                        # Escape quotes and wrap in quotes if comma present
                        if ',' in line:
                            line = f'"{line.replace(chr(34), chr(34) + chr(34))}"'
                        csv_lines.append(f"{i},{line}")
                    return '\n'.join(csv_lines)
                return "Line,Content\n1,No content available"
            elif extension == ".json":
                # Simple JSON fallback
                content_escaped = content.replace('"', '\\"')
                timestamp = datetime.now(UTC).isoformat()
                return f'{{"content": "{content_escaped}", "format": "json", "timestamp": "{timestamp}"}}'
            elif extension == ".xml":
                # Simple XML fallback
                timestamp = datetime.now(UTC).isoformat()
                return f'<?xml version="1.0" encoding="UTF-8"?>\n<document>\n<content>{content}</content>\n<format>xml</format>\n<timestamp>{timestamp}</timestamp>\n</document>'
            elif extension == ".html":
                # Simple HTML fallback
                timestamp = datetime.now(UTC).strftime('%Y-%m-%d %H:%M:%S UTC')
                return f'<!DOCTYPE html>\n<html>\n<head><meta charset="UTF-8"><title>Generated Document</title></head>\n<body>\n<pre>{content}</pre>\n<p><em>Generated on {timestamp}</em></p>\n</body>\n</html>'
            elif extension == ".md":
                # Simple Markdown fallback
                timestamp = datetime.now(UTC).strftime('%Y-%m-%d %H:%M:%S UTC')
                return f"# Generated Document\n\n{content}\n\n---\n*Generated on {timestamp}*"
            else:
                # Generic fallback - return content as-is
                return content
        except Exception as e:
            logger.error(f"Error in fallback format conversion: {str(e)}")
            return content
    @action
    async def generateReport(self, parameters: Dict[str, Any]) -> ActionResult:
        """
@ -209,6 +522,8 @@ class MethodDocument(MethodBase):
                )
            chatDocuments = self.service.getChatDocumentsFromDocumentList(documentList)
            logger.info(f"Retrieved {len(chatDocuments)} chat documents for report generation")
            if not chatDocuments:
                return self._createResult(
                    success=False,
@ -261,15 +576,30 @@ class MethodDocument(MethodBase):
            for doc in chatDocuments:
                content = ""
-                if hasattr(doc, 'content') and doc.content:
+                logger.info(f"Processing document: type={type(doc)}")
-                    content = doc.content.strip()
+                
-                elif hasattr(doc, 'data') and doc.data:
+                # Get actual file content using the fileId reference
-                    content = doc.data.strip()
+                try:
                    file_data = self.service.getFileData(doc.fileId)
                    if file_data:
                        # Convert bytes to string
                        if isinstance(file_data, bytes):
                            content = file_data.decode('utf-8')
                        else:
                            content = str(file_data)
                        logger.info(f"  Retrieved content from file: {len(content)} characters")
                    else:
                        logger.warning(f"  No file data found for fileId: {doc.fileId}")
                except Exception as e:
                    logger.error(f"  Error retrieving file data: {str(e)}")
                # Skip empty documents
                if content:
                    validDocuments.append(doc)
                    allContent.append(f"Document: {doc.filename}\n{content}\n")
                    logger.info(f"  Added document to valid documents list")
                else:
                    logger.warning(f"  Skipping document with no content")
            if not validDocuments:
                # If no valid documents, create a simple report
@ -354,10 +684,17 @@ class MethodDocument(MethodBase):
            # Add document content if available
            content = ""
-            if hasattr(doc, 'content') and doc.content:
+            if hasattr(doc, 'fileId') and doc.fileId:
-                content = doc.content
+                # ChatDocument is just a reference, so we need to get file data using fileId
-            elif hasattr(doc, 'data') and doc.data:
+                try:
-                content = doc.data
+                    file_data = self.service.getFileData(doc.fileId)
                    if file_data:
                        if isinstance(file_data, bytes):
                            content = file_data.decode('utf-8')
                        else:
                            content = str(file_data)
                except Exception as e:
                    logger.warning(f"Could not retrieve content for document {doc.filename}: {str(e)}")
            if content:
                html.append(f"<div style='white-space:pre-wrap; border:1px solid #ccc; padding:0.5em; margin-bottom:1em; background-color:#f9f9f9;'>{content}</div>")
--- a/modules/methods/methodOutlook.py
+++ b/modules/methods/methodOutlook.py
@ -26,13 +26,13 @@ class MethodOutlook(MethodBase):
        """Get Microsoft connection from connection reference"""
        try:
            userConnection = self.service.getUserConnectionFromConnectionReference(connectionReference)
-            if not userConnection or userConnection.authority != "msft" or userConnection.status != "active":
+            if not userConnection or userConnection.authority.value != "msft" or userConnection.status.value != "active":
                return None
            # Get the corresponding token for this user and authority
-            token = self.service.interfaceApp.getToken(userConnection.authority)
+            token = self.service.interfaceApp.getToken(userConnection.authority.value)
            if not token:
-                logger.warning(f"No token found for user {userConnection.userId} and authority {userConnection.authority}")
+                logger.warning(f"No token found for user {userConnection.userId} and authority {userConnection.authority.value}")
                return None
            return {
@ -80,24 +80,78 @@ class MethodOutlook(MethodBase):
                    error="No valid Microsoft connection found for the provided connection reference"
                )
-            # Create email reading prompt
+            # Read emails using Microsoft Graph API
-            email_prompt = f"""
+            try:
-            Simulate reading emails from Microsoft Outlook.
+                import requests
-            
+                
-            Connection: {connection['id']}
+                # Microsoft Graph API endpoint for messages
-            Folder: {folder}
+                graph_url = "https://graph.microsoft.com/v1.0"
-            Limit: {limit}
+                headers = {
-            Filter: {filter or 'None'}
+                    "Authorization": f"Bearer {connection['accessToken']}",
-            
+                    "Content-Type": "application/json"
-            Please provide:
+                }
-            1. List of emails with subject, sender, date, and content
+                
-            2. Summary of email statistics
+                # Build the API request
-            3. Important or urgent emails highlighted
+                api_url = f"{graph_url}/me/mailFolders/{folder}/messages"
-            4. Email categorization if possible
+                params = {
-            """
+                    "$top": limit,
-            
+                    "$orderby": "receivedDateTime desc"
-            # Use AI to simulate email reading
+                }
-            email_data = await self.service.interfaceAiCalls.callAiTextAdvanced(email_prompt)
+                
                if filter:
                    params["$filter"] = filter
                # Make the API call
                response = requests.get(api_url, headers=headers, params=params)
                response.raise_for_status()
                emails_data = response.json()
                email_data = {
                    "emails": emails_data.get("value", []),
                    "count": len(emails_data.get("value", [])),
                    "folder": folder,
                    "filter": filter,
                    "apiResponse": emails_data
                }
                logger.info(f"Successfully retrieved {len(emails_data.get('value', []))} emails from {folder}")
            except ImportError:
                logger.error("requests module not available, falling back to simulation")
                # Fallback to simulation if requests module is not available
                email_prompt = f"""
                Simulate reading emails from Microsoft Outlook.
                Connection: {connection['id']}
                Folder: {folder}
                Limit: {limit}
                Filter: {filter or 'None'}
                Please provide:
                1. List of emails with subject, sender, date, and content
                2. Summary of email statistics
                3. Important or urgent emails highlighted
                4. Email categorization if possible
                """
                email_data = await self.service.interfaceAiCalls.callAiTextAdvanced(email_prompt)
            except Exception as e:
                logger.error(f"Error reading emails from Microsoft Graph API: {str(e)}")
                # Fallback to simulation on API error
                email_prompt = f"""
                Simulate reading emails from Microsoft Outlook.
                Connection: {connection['id']}
                Folder: {folder}
                Limit: {limit}
                Filter: {filter or 'None'}
                Please provide:
                1. List of emails with subject, sender, date, and content
                2. Summary of email statistics
                3. Important or urgent emails highlighted
                4. Email categorization if possible
                """
                email_data = await self.service.interfaceAiCalls.callAiTextAdvanced(email_prompt)
            # Create result data
            result_data = {
@ -151,7 +205,7 @@ class MethodOutlook(MethodBase):
    @action
    async def sendEmail(self, parameters: Dict[str, Any]) -> ActionResult:
        """
-        Send email via Outlook
+        Create email draft in Outlook for sending out
        Parameters:
            connectionReference (str): Reference to the Microsoft connection
@ -160,6 +214,7 @@ class MethodOutlook(MethodBase):
            body (str): Email body content
            cc (List[str], optional): CC recipients
            bcc (List[str], optional): BCC recipients
            attachments (List[str], optional): List of document references to attach
            expectedDocumentFormats (list, optional): Expected document formats with extension, mimeType, description
        """
        try:
@ -169,6 +224,7 @@ class MethodOutlook(MethodBase):
            body = parameters.get("body")
            cc = parameters.get("cc", [])
            bcc = parameters.get("bcc", [])
            attachments = parameters.get("attachments", [])
            expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
            if not connectionReference or not to or not subject or not body:
@ -187,26 +243,111 @@ class MethodOutlook(MethodBase):
                    error="No valid Microsoft connection found for the provided connection reference"
                )
-            # Create email sending prompt
+            # Create email draft using Microsoft Graph API
-            send_prompt = f"""
+            try:
-            Simulate sending an email via Microsoft Outlook.
+                import requests
-            
+                
-            Connection: {connection['id']}
+                # Microsoft Graph API endpoint for creating draft messages
-            To: {to}
+                graph_url = "https://graph.microsoft.com/v1.0"
-            Subject: {subject}
+                headers = {
-            Body: {body}
+                    "Authorization": f"Bearer {connection['accessToken']}",
-            CC: {cc}
+                    "Content-Type": "application/json"
-            BCC: {bcc}
+                }
-            
+                
-            Please provide:
+                # Build the email message
-            1. Email composition details
+                message = {
-            2. Validation of email addresses
+                    "subject": subject,
-            3. Email formatting and structure
+                    "body": {
-            4. Delivery confirmation simulation
+                        "contentType": "HTML",
-            """
+                        "content": body
-            
+                    },
-            # Use AI to simulate email sending
+                    "toRecipients": [{"emailAddress": {"address": email}} for email in to],
-            send_result = await self.service.interfaceAiCalls.callAiTextAdvanced(send_prompt)
+                    "ccRecipients": [{"emailAddress": {"address": email}} for email in cc] if cc else [],
                    "bccRecipients": [{"emailAddress": {"address": email}} for email in bcc] if bcc else []
                }
                # Add attachments if provided
                if attachments:
                    message["attachments"] = []
                    for attachment_ref in attachments:
                        # Get attachment document from service center
                        attachment_docs = self.service.getChatDocumentsFromDocumentList([attachment_ref])
                        if attachment_docs:
                            for doc in attachment_docs:
                                # Create attachment object for Graph API
                                attachment = {
                                    "@odata.type": "#microsoft.graph.fileAttachment",
                                    "name": doc.filename,
                                    "contentType": doc.mimeType,
                                    "contentBytes": doc.data if hasattr(doc, 'data') else ""
                                }
                                message["attachments"].append(attachment)
                # Create the draft message
                api_url = f"{graph_url}/me/messages"
                response = requests.post(api_url, headers=headers, json=message)
                response.raise_for_status()
                draft_data = response.json()
                draft_result = {
                    "status": "draft_created",
                    "messageId": draft_data.get("id", "unknown"),
                    "draftId": draft_data.get("id", "unknown"),
                    "recipients": to,
                    "cc": cc,
                    "bcc": bcc,
                    "attachments": len(attachments) if attachments else 0,
                    "draftLocation": "Drafts folder",
                    "apiResponse": response.status_code,
                    "draftData": draft_data
                }
                logger.info(f"Successfully created email draft for {len(to)} recipients with {len(attachments) if attachments else 0} attachments")
            except ImportError:
                logger.error("requests module not available, falling back to simulation")
                # Fallback to simulation if requests module is not available
                send_prompt = f"""
                Simulate creating an email draft in Microsoft Outlook.
                Connection: {connection['id']}
                To: {to}
                Subject: {subject}
                Body: {body}
                CC: {cc}
                BCC: {bcc}
                Attachments: {attachments if attachments else 'None'}
                Please provide:
                1. Email composition details
                2. Validation of email addresses
                3. Email formatting and structure
                4. Attachment processing and validation
                5. Draft creation confirmation
                """
                draft_result = await self.service.interfaceAiCalls.callAiTextAdvanced(send_prompt)
            except Exception as e:
                logger.error(f"Error creating email draft via Microsoft Graph API: {str(e)}")
                # Fallback to simulation on API error
                send_prompt = f"""
                Simulate creating an email draft in Microsoft Outlook.
                Connection: {connection['id']}
                To: {to}
                Subject: {subject}
                Body: {body}
                CC: {cc}
                BCC: {bcc}
                Attachments: {attachments if attachments else 'None'}
                Please provide:
                1. Email composition details
                2. Validation of email addresses
                3. Email formatting and structure
                4. Attachment processing and validation
                5. Draft creation confirmation
                """
                draft_result = await self.service.interfaceAiCalls.callAiTextAdvanced(send_prompt)
            # Create result data
            result_data = {
@ -216,7 +357,8 @@ class MethodOutlook(MethodBase):
                "body": body,
                "cc": cc,
                "bcc": bcc,
-                "sendResult": send_result,
+                "attachments": attachments,
                "draftResult": draft_result,
                "connection": {
                    "id": connection["id"],
                    "authority": "microsoft",
@ -243,7 +385,7 @@ class MethodOutlook(MethodBase):
                data={
                    "documents": [
                        {
-                            "documentName": f"outlook_email_sent_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}{output_extension}",
+                            "documentName": f"outlook_email_draft_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}{output_extension}",
                            "documentData": result_data,
                            "mimeType": output_mime_type
                        }
@ -252,7 +394,7 @@ class MethodOutlook(MethodBase):
            )
        except Exception as e:
-            logger.error(f"Error sending email: {str(e)}")
+            logger.error(f"Error creating email draft: {str(e)}")
            return self._createResult(
                success=False,
                data={},
@ -294,24 +436,81 @@ class MethodOutlook(MethodBase):
                    error="No valid Microsoft connection found for the provided connection reference"
                )
-            # Create email search prompt
+            # Search emails using Microsoft Graph API
-            search_prompt = f"""
+            try:
-            Simulate searching emails in Microsoft Outlook.
+                import requests
-            
+                
-            Connection: {connection['id']}
+                # Microsoft Graph API endpoint for searching messages
-            Query: {query}
+                graph_url = "https://graph.microsoft.com/v1.0"
-            Folder: {folder}
+                headers = {
-            Limit: {limit}
+                    "Authorization": f"Bearer {connection['accessToken']}",
-            
+                    "Content-Type": "application/json"
-            Please provide:
+                }
-            1. Search results with relevant emails
+                
-            2. Search statistics and relevance scores
+                # Build the search API request
-            3. Email previews and key information
+                api_url = f"{graph_url}/me/messages"
-            4. Search suggestions and refinements
+                params = {
-            """
+                    "$top": limit,
-            
+                    "$orderby": "receivedDateTime desc",
-            # Use AI to simulate email search
+                    "$search": f'"{query}"'
-            search_result = await self.service.interfaceAiCalls.callAiTextAdvanced(search_prompt)
+                }
                # Add folder filter if specified
                if folder and folder.lower() != "all":
                    params["$filter"] = f"parentFolderId eq '{folder}'"
                # Make the API call
                response = requests.get(api_url, headers=headers, params=params)
                response.raise_for_status()
                search_data = response.json()
                search_result = {
                    "query": query,
                    "results": search_data.get("value", []),
                    "count": len(search_data.get("value", [])),
                    "folder": folder,
                    "limit": limit,
                    "apiResponse": search_data
                }
                logger.info(f"Successfully searched emails with query '{query}', found {len(search_data.get('value', []))} results")
            except ImportError:
                logger.error("requests module not available, falling back to simulation")
                # Fallback to simulation if requests module is not available
                search_prompt = f"""
                Simulate searching emails in Microsoft Outlook.
                Connection: {connection['id']}
                Query: {query}
                Folder: {folder}
                Limit: {limit}
                Please provide:
                1. Search results with relevant emails
                2. Search statistics and relevance scores
                3. Email previews and key information
                4. Search suggestions and refinements
                """
                search_result = await self.service.interfaceAiCalls.callAiTextAdvanced(search_prompt)
            except Exception as e:
                logger.error(f"Error searching emails via Microsoft Graph API: {str(e)}")
                # Fallback to simulation on API error
                search_prompt = f"""
                Simulate searching emails in Microsoft Outlook.
                Connection: {connection['id']}
                Query: {query}
                Folder: {folder}
                Limit: {limit}
                Please provide:
                1. Search results with relevant emails
                2. Search statistics and relevance scores
                3. Email previews and key information
                4. Search suggestions and refinements
                """
                search_result = await self.service.interfaceAiCalls.callAiTextAdvanced(search_prompt)
            # Create result data
            result_data = {
--- a/modules/methods/methodSharepoint.py
+++ b/modules/methods/methodSharepoint.py
@ -32,9 +32,9 @@ class MethodSharepoint(MethodBase):
                return None
            # Get the corresponding token for this user and authority
-            token = self.service.interfaceApp.getToken(userConnection.authority)
+            token = self.service.interfaceApp.getToken(userConnection.authority.value)
            if not token:
-                logger.warning(f"No token found for user {userConnection.userId} and authority {userConnection.authority}")
+                logger.warning(f"No token found for user {userConnection.userId} and authority {userConnection.authority.value}")
                return None
            return {
--- a/modules/workflow/managerWorkflow.py
+++ b/modules/workflow/managerWorkflow.py
@ -8,7 +8,8 @@ from modules.interfaces.interfaceAppObjects import User
 from modules.interfaces.interfaceChatModel import (UserInputRequest, ChatMessage, ChatWorkflow, TaskItem, TaskStatus)
 from modules.interfaces.interfaceChatObjects import ChatObjects
-from modules.chat.managerChat import ChatManager, WorkflowStoppedException
+from modules.chat.managerChat import ChatManager
 from modules.chat.handling.handlingTasks import WorkflowStoppedException
 from modules.interfaces.interfaceChatModel import WorkflowResult
 logger = logging.getLogger(__name__)
@ -52,6 +53,21 @@ class WorkflowManager:
                "lastActivity": workflow.lastActivity
            })
            # Create final stopped message
            stopped_message = {
                "workflowId": workflow.id,
                "role": "assistant",
                "message": "🛑 Workflow stopped by user",
                "status": "last",
                "sequenceNr": len(workflow.messages) + 1,
                "publishedAt": datetime.now(UTC).isoformat(),
                "documentsLabel": "workflow_stopped",
                "documents": []
            }
            message = self.chatInterface.createWorkflowMessage(stopped_message)
            if message:
                workflow.messages.append(message)
            # Add log entry
            self.chatInterface.createWorkflowLog({
                "workflowId": workflow.id,
@ -99,6 +115,8 @@ class WorkflowManager:
    async def _sendFirstMessage(self, userInput: UserInputRequest, workflow: ChatWorkflow) -> ChatMessage:
        """Send first message to start workflow"""
        try:
            self.chatManager.handlingTasks._checkWorkflowStopped()
            # Create initial message using interface
            messageData = {
                "workflowId": workflow.id,
@ -130,6 +148,8 @@ class WorkflowManager:
    async def _generateWorkflowFeedback(self, workflow: ChatWorkflow) -> str:
        """Generate feedback message for workflow completion"""
        try:
            self.chatManager.handlingTasks._checkWorkflowStopped()
            # Count messages by role
            user_messages = [msg for msg in workflow.messages if msg.role == 'user']
            assistant_messages = [msg for msg in workflow.messages if msg.role == 'assistant']
@ -155,9 +175,13 @@ class WorkflowManager:
    async def _sendLastMessage(self, workflow: ChatWorkflow) -> None:
        """Send last message to complete workflow"""
        try:
            self.chatManager.handlingTasks._checkWorkflowStopped()
            # Generate feedback
            feedback = await self._generateWorkflowFeedback(workflow)
            self.chatManager.handlingTasks._checkWorkflowStopped()
            # Create last message using interface
            messageData = {
                "workflowId": workflow.id,
@ -199,7 +223,60 @@ class WorkflowManager:
    async def _processWorkflowResults(self, workflow: ChatWorkflow, workflow_result: WorkflowResult, initial_message: ChatMessage) -> None:
        """Process workflow results and create appropriate messages"""
        try:
-            if workflow_result.status == 'failed':
+            try:
                self.chatManager.handlingTasks._checkWorkflowStopped()
            except WorkflowStoppedException:
                logger.info(f"Workflow {workflow.id} was stopped during result processing")
                # Create final stopped message
                stopped_message = {
                    "workflowId": workflow.id,
                    "role": "assistant",
                    "message": "🛑 Workflow stopped by user",
                    "status": "last",
                    "sequenceNr": len(workflow.messages) + 1,
                    "publishedAt": datetime.now(UTC).isoformat(),
                    "documentsLabel": "workflow_stopped",
                    "documents": []
                }
                message = self.chatInterface.createWorkflowMessage(stopped_message)
                if message:
                    workflow.messages.append(message)
                # Update workflow status to stopped
                workflow.status = "stopped"
                workflow.lastActivity = datetime.now(UTC).isoformat()
                self.chatInterface.updateWorkflow(workflow.id, {
                    "status": "stopped",
                    "lastActivity": workflow.lastActivity
                })
                return
            if workflow_result.status == 'stopped':
                # Create stopped message
                stopped_message = {
                    "workflowId": workflow.id,
                    "role": "assistant",
                    "message": "🛑 Workflow stopped by user",
                    "status": "last",
                    "sequenceNr": len(workflow.messages) + 1,
                    "publishedAt": datetime.now(UTC).isoformat(),
                    "documentsLabel": "workflow_stopped",
                    "documents": []
                }
                message = self.chatInterface.createWorkflowMessage(stopped_message)
                if message:
                    workflow.messages.append(message)
                # Update workflow status to stopped
                workflow.status = "stopped"
                workflow.lastActivity = datetime.now(UTC).isoformat()
                self.chatInterface.updateWorkflow(workflow.id, {
                    "status": "stopped",
                    "lastActivity": workflow.lastActivity
                })
                return
            elif workflow_result.status == 'failed':
                # Create error message
                error_message = {
                    "workflowId": workflow.id,
--- a/requirements.txt
+++ b/requirements.txt
@ -25,6 +25,7 @@ PyMuPDF>=1.23.7     # Statt dem ungenauen 'fitz'
 PyPDF2==3.0.1
 python-docx>=0.8.11 # Für Word-Dokumente
 openpyxl>=3.1.2     # Für Excel-Dateien
 python-pptx>=0.6.21 # Für PowerPoint-Dateien
 ## Data Processing & Analysis
 numpy==1.26.3       # Version die mit pandas und matplotlib kompatibel ist
@ -52,3 +53,14 @@ sortedcontainers>=2.4.0     # Required by trio
 ## MSFT Integration
 msal==1.24.1
 # Enhanced Office document processing
 python-docx>=0.8.11
 openpyxl>=3.0.9
 python-pptx>=0.6.21
 xlrd>=2.0.1  # For legacy .xls files
 Pillow>=9.0.0  # For image processing
 PyPDF2>=3.0.0
 PyMuPDF>=1.20.0
 beautifulsoup4>=4.11.0
 chardet>=4.0.0  # For encoding detection
--- a/test_documentExtraction.py
+++ b/test_documentExtraction.py
@ -0,0 +1,855 @@
 #!/usr/bin/env python3
 """
 Test script for DocumentExtraction class.
 Processes all files in d:/temp folder and stores extracted content in d:/temp/extracted.
 Features:
 - Option to extract content WITH AI processing (default)
 - Option to extract content WITHOUT AI processing (content-only mode)
 - Supports all document types: text, images, PDFs, Office documents, etc.
 - Detailed logging and progress tracking
 - Separate output directories for AI vs content-only modes
 Usage:
 - Interactive mode: python test_documentExtraction.py
 - Content-only mode: python test_documentExtraction.py --no-ai
 - Content-only mode: python test_documentExtraction.py --content-only
 - Specify custom input/output: python test_documentExtraction.py --input-dir /path/to/input --output-dir /path/to/output --no-ai
 """
 import os
 import asyncio
 import logging
 import sys
 import argparse
 from pathlib import Path
 from typing import List, Optional
 from datetime import datetime, UTC
 # Configure logging
 logging.basicConfig(
    level=logging.DEBUG,  # Changed from INFO to DEBUG
    format='%(asctime)s - %(levelname)s - %(message)s'
 )
 logger = logging.getLogger(__name__)
 # Filter out specific unwanted log messages
 class LogFilter(logging.Filter):
    """Filter to hide specific unwanted log messages."""
    def filter(self, record):
        # Hide workflow stats update errors
        if "Workflow" in record.getMessage() and "not found for stats update" in record.getMessage():
            return False
        # Hide HTTP request info messages
        if "HTTP Request:" in record.getMessage() and "POST https://api.openai.com" in record.getMessage():
            return False
        # Hide HTTP response info messages
        if "HTTP/1.1 200 OK" in record.getMessage():
            return False
        return True
 # Apply the filter to the root logger
 root_logger = logging.getLogger()
 root_logger.addFilter(LogFilter())
 def check_dependencies():
    """Check if required dependencies are available and provide installation instructions."""
    missing_deps = []
    # Check for required dependencies
    try:
        import bs4
        logger.info("✓ beautifulsoup4 is available")
    except ImportError:
        missing_deps.append("beautifulsoup4")
        logger.error("✗ beautifulsoup4 is missing")
    try:
        import PyPDF2
        logger.info("✓ PyPDF2 is available")
    except ImportError:
        missing_deps.append("PyPDF2")
        logger.error("✗ PyPDF2 is missing")
    try:
        import fitz
        logger.info("✓ PyMuPDF (fitz) is available")
    except ImportError:
        missing_deps.append("PyMuPDF")
        logger.error("✗ PyMuPDF (fitz) is missing")
    try:
        import docx
        logger.info("✓ python-docx is available")
    except ImportError:
        missing_deps.append("python-docx")
        logger.error("✗ python-docx is missing")
    try:
        import openpyxl
        logger.info("✓ openpyxl is available")
    except ImportError:
        missing_deps.append("openpyxl")
        logger.error("✗ openpyxl is missing")
    try:
        import pptx
        logger.info("✓ python-pptx is available")
    except ImportError:
        missing_deps.append("python-pptx")
        logger.error("✗ python-pptx is missing")
    try:
        from PIL import Image
        logger.info("✓ Pillow (PIL) is available")
    except ImportError:
        missing_deps.append("Pillow")
        logger.error("✗ Pillow (PIL) is missing")
    if missing_deps:
        logger.error("\n" + "="*60)
        logger.error("MISSING DEPENDENCIES DETECTED!")
        logger.error("="*60)
        logger.error("The following packages are required but not installed:")
        for dep in missing_deps:
            logger.error(f"  - {dep}")
        logger.error("\nTo install all dependencies, run:")
        logger.error("pip install -r requirements.txt")
        logger.error("\nOr install individual packages:")
        for dep in missing_deps:
            if dep == "beautifulsoup4":
                logger.error(f"  pip install {dep}")
            elif dep == "PyMuPDF":
                logger.error(f"  pip install {dep}")
            elif dep == "Pillow":
                logger.error(f"  pip install {dep}")
            else:
                logger.error(f"  pip install {dep}")
        logger.error("="*60)
        return False
    logger.info("✓ All required dependencies are available!")
    return True
 def check_module_imports():
    """Check if we can import the required modules."""
    try:
        # Add the gateway directory to the path so we can import our modules
        sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', '..'))
        from modules.chat.documents.documentExtraction import DocumentExtraction
        from modules.chat.serviceCenter import ServiceCenter
        from modules.interfaces.interfaceAppModel import User, UserConnection
        from modules.interfaces.interfaceChatModel import ChatWorkflow, TaskItem
        logger.info("✓ All required modules imported successfully")
        return True
    except ImportError as e:
        logger.error(f"✗ Failed to import required modules: {e}")
        logger.error("Make sure you're running this script from the gateway directory")
        return False
    except Exception as e:
        logger.error(f"✗ Unexpected error importing modules: {e}")
        return False
 def create_mock_service_center():
    """Create a proper ServiceCenter for testing purposes with all required fields."""
    try:
        from modules.chat.serviceCenter import ServiceCenter
        from modules.interfaces.interfaceAppModel import User, UserPrivilege, AuthAuthority
        from modules.interfaces.interfaceChatModel import ChatWorkflow, TaskItem, TaskStatus
        from modules.interfaces.interfaceChatModel import ChatLog, ChatMessage, ChatStat
        # Create proper user with all required fields
        mock_user = User(
            id="test_user_001",
            username="testuser",
            email="test@example.com",
            fullName="Test User",
            language="en",
            enabled=True,
            privilege=UserPrivilege.USER,
            authenticationAuthority=AuthAuthority.LOCAL,
            mandateId="test_mandate_001"
        )
        # Create proper workflow with all required fields
        current_time = datetime.now(UTC).isoformat()
        mock_workflow = ChatWorkflow(
            id="test_workflow_001",
            mandateId="test_mandate_001",
            status="active",
            name="Test Document Extraction Workflow",
            currentRound=1,
            lastActivity=current_time,
            startedAt=current_time,
            logs=[],
            messages=[],
            stats=None,
            tasks=[]
        )
        # Create service center
        service_center = ServiceCenter(mock_user, mock_workflow)
        logger.info("✓ ServiceCenter created successfully with proper objects")
        return service_center
    except Exception as e:
        logger.error(f"✗ Failed to create ServiceCenter: {e}")
        return None
 class DocumentExtractionTester:
    """Test class for DocumentExtraction functionality."""
    def __init__(self, input_dir: str = "d:/temp/test-extraction", output_dir: str = None, enable_ai: bool = True):
        """
        Initialize the tester.
        Args:
            input_dir: Directory containing files to process
            output_dir: Directory to store extracted content (auto-generated if None)
            enable_ai: Whether to enable AI processing (default: True)
        """
        self.input_dir = Path(input_dir)
        # Auto-generate output directory if not specified
        if output_dir is None:
            if enable_ai:
                self.output_dir = Path(input_dir) / "extracted"
            else:
                self.output_dir = Path(input_dir) / "extracted-raw"
        else:
            self.output_dir = Path(output_dir)
        self.extractor = None
        self.service_center = None
        self.enable_ai = enable_ai
        if enable_ai:
            self.prompt = "Make a summary of each sentence for each page or chapter of the document"
        else:
            self.prompt = None  # No prompt needed for content-only extraction
        # Track processing results for summary
        self.processing_results = []
        # Ensure output directory exists
        logger.info(f"Creating output directory: {self.output_dir}")
        self.output_dir.mkdir(parents=True, exist_ok=True)
        # Verify directory was created
        if self.output_dir.exists():
            logger.info(f"✓ Output directory created/verified: {self.output_dir}")
            logger.info(f"Output directory absolute path: {self.output_dir.absolute()}")
        else:
            logger.error(f"✗ Failed to create output directory: {self.output_dir}")
        # Log configuration
        logger.info(f"Configuration: AI processing = {'ENABLED' if self.enable_ai else 'DISABLED'}")
        logger.info(f"Input directory: {self.input_dir}")
        logger.info(f"Output directory: {self.output_dir}")
        # Test basic file writing capability
        test_file = self.output_dir / "test_write_capability.txt"
        try:
            logger.info(f"Testing file write capability to: {test_file}")
            logger.info(f"Absolute path: {test_file.absolute()}")
            with open(test_file, 'w', encoding='utf-8') as f:
                f.write("Test file to verify write capability")
            if test_file.exists():
                actual_size = test_file.stat().st_size
                logger.info(f"✓ Basic file writing test passed: {test_file} (size: {actual_size} bytes)")
                # Test reading the file back
                with open(test_file, 'r', encoding='utf-8') as f:
                    content = f.read()
                logger.info(f"✓ File read test passed: content length = {len(content)}")
                # Clean up test file
                test_file.unlink()
                logger.info("✓ Test file cleaned up")
            else:
                logger.error(f"✗ Basic file writing test failed: {test_file}")
        except Exception as e:
            logger.error(f"✗ Basic file writing test failed with error: {e}")
            import traceback
            traceback.print_exc()
        # Supported file extensions for content extraction
        self.supported_extensions = {
            # Text and data files
            '.txt', '.csv', '.json', '.xml', '.html', '.htm', '.svg',
            '.md', '.markdown', '.rst', '.log', '.ini', '.cfg', '.conf',
            # Programming languages
            '.js', '.ts', '.jsx', '.tsx', '.py', '.java', '.c', '.cpp', '.cc', '.cxx',
            '.h', '.hpp', '.cs', '.php', '.rb', '.go', '.rs', '.swift', '.kt', '.scala',
            '.r', '.m', '.pl', '.sh', '.bash', '.zsh', '.fish', '.ps1', '.bat', '.cmd',
            '.vbs', '.lua', '.sql', '.r', '.dart', '.elm', '.clj', '.hs', '.fs', '.ml',
            # Web technologies
            '.css', '.scss', '.sass', '.less', '.vue', '.svelte', '.astro',
            # Configuration and build files
            '.yaml', '.yml', '.toml', '.env', '.gitignore', '.dockerfile', '.dockerignore',
            '.makefile', '.cmake', '.gradle', '.maven', '.pom', '.sln', '.vcxproj',
            '.csproj', '.fsproj', '.vbproj', '.xcodeproj', '.pbxproj',
            # Documentation and markup
            '.tex', '.bib', '.adoc', '.asciidoc', '.wiki', '.creole',
            # Images
            '.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp', '.tiff', '.ico',
            # Documents
            '.pdf', '.docx', '.xlsx', '.pptx', '.odt', '.ods', '.odp',
            # Legacy Office formats
            '.doc', '.xls', '.ppt',
            # Archives and binaries
            '.zip', '.tar', '.gz', '.7z', '.rar', '.exe', '.dll', '.so', '.dylib'
        }
    def initialize_extractor(self):
        """Initialize the DocumentExtraction instance with a proper ServiceCenter."""
        try:
            # First create the service center
            self.service_center = create_mock_service_center()
            if not self.service_center:
                logger.error("Failed to create ServiceCenter!")
                return False
            # Now create DocumentExtraction with the service center
            from modules.chat.documents.documentExtraction import DocumentExtraction
            self.extractor = DocumentExtraction(self.service_center)
            logger.info("✓ DocumentExtraction initialized successfully with ServiceCenter")
            return True
        except Exception as e:
            logger.error(f"✗ Failed to initialize DocumentExtraction: {e}")
            return False
    def get_files_to_process(self) -> List[Path]:
        """Get list of files to process from input directory."""
        if not self.input_dir.exists():
            logger.error(f"Input directory {self.input_dir} does not exist!")
            logger.info("Creating input directory and adding a test file...")
            self.input_dir.mkdir(parents=True, exist_ok=True)
            # Create a test file if none exist
            test_file = self.input_dir / "test.txt"
            with open(test_file, 'w') as f:
                f.write("This is a test file for document extraction.\nIt contains multiple lines.\nAnd some special characters: äöüß")
            logger.info(f"Created test file: {test_file}")
        files = []
        all_files = list(self.input_dir.iterdir())
        logger.info(f"All files in directory: {[f.name for f in all_files]}")
        for file_path in all_files:
            if file_path.is_file():
                logger.debug(f"Checking file: {file_path.name} (extension: {file_path.suffix})")
                if file_path.suffix.lower() in self.supported_extensions:
                    files.append(file_path)
                    logger.debug(f"Added file: {file_path.name}")
                else:
                    logger.debug(f"Skipped file: {file_path.name} (unsupported extension)")
        logger.info(f"Found {len(files)} supported files to process")
        if files:
            logger.info(f"Files to process: {[f.name for f in files]}")
        return files
    async def process_single_file(self, file_path: Path) -> bool:
        """
        Process a single file and extract its content.
        Args:
            file_path: Path to the file to process
        Returns:
            True if successful, False otherwise
        """
        if not self.extractor:
            logger.error("DocumentExtraction not initialized!")
            return False
        try:
            logger.info(f"Processing file: {file_path.name}")
            # Read file data
            with open(file_path, 'rb') as f:
                file_data = f.read()
            logger.debug(f"File size: {len(file_data)} bytes")
            # Determine MIME type based on extension
            mime_type = self._get_mime_type(file_path.suffix)
            logger.debug(f"MIME type: {mime_type}")
            # Process the file with or without AI based on configuration
            extracted_content = await self.extractor.processFileData(
                fileData=file_data,
                filename=file_path.name,
                mimeType=mime_type,
                base64Encoded=False,
                prompt=self.prompt,
                enableAI=self.enable_ai
            )
            logger.debug(f"Extracted {len(extracted_content.contents)} content items")
            # Debug: Show content details
            for i, content_item in enumerate(extracted_content.contents):
                logger.debug(f"Content item {i+1}: label='{content_item.label}', has_data={content_item.data is not None}, data_length={len(content_item.data) if content_item.data else 0}")
            # Special logging for JavaScript files
            if mime_type == "application/javascript":
                logger.debug(f"JavaScript file detected: {file_path.name}")
                logger.debug(f"Original file size: {len(file_data)} bytes")
                for i, content_item in enumerate(extracted_content.contents):
                    if content_item.data:
                        content_size = len(content_item.data.encode('utf-8'))
                        logger.debug(f"JavaScript content item {i+1}: {content_size} bytes")
                        # Check if content was truncated
                        if content_size < len(file_data) * 0.9:  # If less than 90% of original
                            logger.warning(f"JavaScript content may be truncated: {content_size} bytes vs {len(file_data)} bytes original")
            # Track processing result
            result = {
                'filename': file_path.name,
                'status': 'OK',
                'content_items': 0,
                'output_files': [],
                'total_content_size': 0
            }
            # Save each content item as a separate file
            if extracted_content.contents:
                for i, content_item in enumerate(extracted_content.contents):
                    if content_item.data:
                        content_size = len(content_item.data.encode('utf-8'))
                        result['total_content_size'] += content_size
                        logger.debug(f"Content item {i+1}: {content_item.label}, size: {content_size} bytes")
                        # Generate filename with new naming convention
                        if len(extracted_content.contents) == 1:
                            # Single content item
                            output_filename = f"{file_path.stem} - {content_item.label} 1.txt"
                        else:
                            # Multiple content items - add sequence number
                            output_filename = f"{file_path.stem} - {content_item.label} {i+1}.txt"
                        output_file = self.output_dir / output_filename
                        # Write only the raw extracted content
                        logger.debug(f"Attempting to write to: {output_file}")
                        try:
                            with open(output_file, 'w', encoding='utf-8') as f:
                                f.write(content_item.data)
                            # Verify file was created
                            if output_file.exists():
                                actual_size = output_file.stat().st_size
                                logger.info(f"✓ File created successfully: {output_filename} (expected: {content_size} bytes, actual: {actual_size} bytes)")
                            else:
                                logger.error(f"✗ File was not created: {output_file}")
                            result['output_files'].append(output_filename)
                            result['content_items'] += 1
                        except Exception as write_error:
                            logger.error(f"✗ Error writing file {output_filename}: {write_error}")
                            import traceback
                            traceback.print_exc()
                    else:
                        logger.warning(f"Content item {i+1} has no data, skipping")
            else:
                logger.warning(f"No content extracted from {file_path.name}")
                result['status'] = 'FAIL'
                result['error'] = 'No content extracted'
            # Add result to tracking list
            self.processing_results.append(result)
            logger.info(f"Successfully processed {file_path.name} - Total content: {result['total_content_size']} bytes")
            return True
        except Exception as e:
            error_msg = str(e)
            logger.error(f"Error processing {file_path.name}: {error_msg}")
            # Track failed result
            result = {
                'filename': file_path.name,
                'status': 'FAIL',
                'content_items': 0,
                'output_files': [],
                'error': error_msg,
                'total_content_size': 0
            }
            self.processing_results.append(result)
            return False
    def _get_mime_type(self, extension: str) -> str:
        """Get MIME type based on file extension."""
        mime_types = {
            # Text and data files
            '.txt': 'text/plain',
            '.csv': 'text/csv',
            '.json': 'application/json',
            '.xml': 'application/xml',
            '.html': 'text/html',
            '.htm': 'text/html',
            '.svg': 'image/svg+xml',
            '.md': 'text/markdown',
            '.markdown': 'text/markdown',
            '.rst': 'text/x-rst',
            '.log': 'text/plain',
            '.ini': 'text/plain',
            '.cfg': 'text/plain',
            '.conf': 'text/plain',
            # Programming languages
            '.js': 'application/javascript',
            '.ts': 'application/typescript',
            '.jsx': 'text/jsx',
            '.tsx': 'text/tsx',
            '.py': 'text/x-python',
            '.java': 'text/x-java-source',
            '.c': 'text/x-c',
            '.cpp': 'text/x-c++src',
            '.cc': 'text/x-c++src',
            '.cxx': 'text/x-c++src',
            '.h': 'text/x-c',
            '.hpp': 'text/x-c++hdr',
            '.cs': 'text/x-csharp',
            '.php': 'application/x-httpd-php',
            '.rb': 'text/x-ruby',
            '.go': 'text/x-go',
            '.rs': 'text/x-rust',
            '.swift': 'text/x-swift',
            '.kt': 'text/x-kotlin',
            '.scala': 'text/x-scala',
            '.r': 'text/x-r',
            '.m': 'text/x-matlab',
            '.pl': 'text/x-perl',
            '.sh': 'application/x-sh',
            '.bash': 'application/x-sh',
            '.zsh': 'application/x-sh',
            '.fish': 'application/x-sh',
            '.ps1': 'application/x-powershell',
            '.bat': 'application/x-msdos-program',
            '.cmd': 'application/x-msdos-program',
            '.vbs': 'text/vbscript',
            '.lua': 'text/x-lua',
            '.sql': 'application/sql',
            '.dart': 'application/dart',
            '.elm': 'text/x-elm',
            '.clj': 'text/x-clojure',
            '.hs': 'text/x-haskell',
            '.fs': 'text/x-fsharp',
            '.ml': 'text/x-ocaml',
            # Web technologies
            '.css': 'text/css',
            '.scss': 'text/x-scss',
            '.sass': 'text/x-sass',
            '.less': 'text/x-less',
            '.vue': 'text/x-vue',
            '.svelte': 'text/x-svelte',
            '.astro': 'text/x-astro',
            # Configuration and build files
            '.yaml': 'application/x-yaml',
            '.yml': 'application/x-yaml',
            '.toml': 'application/toml',
            '.env': 'text/plain',
            '.gitignore': 'text/plain',
            '.dockerfile': 'text/x-dockerfile',
            '.dockerignore': 'text/plain',
            '.makefile': 'text/x-makefile',
            '.cmake': 'text/x-cmake',
            '.gradle': 'text/x-gradle',
            '.maven': 'text/x-maven',
            '.pom': 'application/xml',
            '.sln': 'text/plain',
            '.vcxproj': 'application/xml',
            '.csproj': 'application/xml',
            '.fsproj': 'application/xml',
            '.vbproj': 'application/xml',
            '.xcodeproj': 'text/plain',
            '.pbxproj': 'text/plain',
            # Documentation and markup
            '.tex': 'application/x-tex',
            '.bib': 'text/x-bibtex',
            '.adoc': 'text/asciidoc',
            '.asciidoc': 'text/asciidoc',
            '.wiki': 'text/x-wiki',
            '.creole': 'text/x-wiki',
            # Images
            '.jpg': 'image/jpeg',
            '.jpeg': 'image/jpeg',
            '.png': 'image/png',
            '.gif': 'image/gif',
            '.webp': 'image/webp',
            '.bmp': 'image/bmp',
            '.tiff': 'image/tiff',
            '.ico': 'image/x-icon',
            # Documents
            '.pdf': 'application/pdf',
            '.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
            '.xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
            '.pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
            '.odt': 'application/vnd.oasis.opendocument.text',
            '.ods': 'application/vnd.oasis.opendocument.spreadsheet',
            '.odp': 'application/vnd.oasis.opendocument.presentation',
            # Legacy Office formats
            '.doc': 'application/msword',
            '.xls': 'application/vnd.ms-excel',
            '.ppt': 'application/vnd.ms-powerpoint',
            # Archives and binaries (will be processed as binary)
            '.zip': 'application/zip',
            '.tar': 'application/x-tar',
            '.gz': 'application/gzip',
            '.7z': 'application/x-7z-compressed',
            '.rar': 'application/vnd.rar',
            '.exe': 'application/x-msdownload',
            '.dll': 'application/x-msdownload',
            '.so': 'application/x-sharedlib',
            '.dylib': 'application/x-mach-binary'
        }
        return mime_types.get(extension.lower(), 'application/octet-stream')
    async def run_tests(self) -> None:
        """Run the document extraction tests on all files."""
        mode = "WITH AI" if self.enable_ai else "CONTENT ONLY (No AI)"
        logger.info(f"Starting document extraction tests - {mode}")
        logger.info(f"Input directory: {self.input_dir}")
        logger.info(f"Output directory: {self.output_dir}")
        if self.enable_ai:
            logger.info(f"Processing prompt: {self.prompt}")
        else:
            logger.info("AI processing: DISABLED - Raw content extraction only")
        # Initialize the extractor
        if not self.initialize_extractor():
            logger.error("Cannot proceed without DocumentExtraction!")
            return
        # Get files to process
        files = self.get_files_to_process()
        if not files:
            logger.warning("No files found to process!")
            return
        # Process each file
        successful = 0
        failed = 0
        logger.info(f"Starting to process {len(files)} files...")
        for i, file_path in enumerate(files):
            logger.info(f"Processing file {i+1}/{len(files)}: {file_path.name}")
            try:
                if await self.process_single_file(file_path):
                    successful += 1
                    logger.info(f"✓ File {i+1} processed successfully")
                else:
                    failed += 1
                    logger.error(f"✗ File {i+1} processing failed")
            except Exception as e:
                failed += 1
                logger.error(f"✗ Exception processing file {i+1}: {e}")
                import traceback
                traceback.print_exc()
        # Print detailed summary
        mode = "WITH AI" if self.enable_ai else "CONTENT ONLY (No AI)"
        logger.info("\n" + "=" * 80)
        logger.info(f"DETAILED TEST SUMMARY - {mode}")
        logger.info("=" * 80)
        logger.info(f"Total files processed: {len(files)}")
        logger.info(f"Successful: {successful}")
        logger.info(f"Failed: {failed}")
        logger.info(f"Output directory: {self.output_dir}")
        if self.enable_ai:
            logger.info("AI processing: ENABLED")
        else:
            logger.info("AI processing: DISABLED")
        logger.info("=" * 80)
        # List all processed documents with results
        logger.info("\nPROCESSING RESULTS:")
        logger.info("-" * 80)
        for result in self.processing_results:
            status_icon = "✅" if result['status'] == 'OK' else "❌"
            logger.info(f"{status_icon} {result['filename']} - {result['status']}")
            if result['status'] == 'OK':
                if result['content_items'] == 1:
                    logger.info(f"   └─ Generated: {result['output_files'][0]} ({result['total_content_size']} bytes)")
                else:
                    logger.info(f"   └─ Generated {result['content_items']} files ({result['total_content_size']} total bytes):")
                    for output_file in result['output_files']:
                        logger.info(f"      └─ {output_file}")
            else:
                error_msg = result.get('error', 'Unknown error')
                logger.info(f"   └─ Error: {error_msg}")
        logger.info("-" * 80)
        logger.info("=" * 80)
 def parse_arguments():
    """Parse command line arguments."""
    parser = argparse.ArgumentParser(description='Document Extraction Test Script')
    parser.add_argument('--no-ai', '--content-only', action='store_true', 
                       help='Run in content-only mode without AI processing')
    parser.add_argument('--input-dir', type=str, default='d:/temp/test-extraction',
                       help='Input directory containing files to process (default: d:/temp/test-extraction)')
    parser.add_argument('--output-dir', type=str, 
                       help='Output directory for extracted content (auto-generated if not specified)')
    parser.add_argument('--verbose', '-v', action='store_true',
                       help='Enable verbose logging')
    return parser.parse_args()
 async def main():
    """Main function to run the tests."""
    # Parse command line arguments
    args = parse_arguments()
    # Set logging level based on verbosity
    if args.verbose:
        logging.getLogger().setLevel(logging.DEBUG)
    else:
        logging.getLogger().setLevel(logging.INFO)
    logger.info("DocumentExtraction Test Script")
    logger.info("=" * 50)
    logger.info(f"Source: {args.input_dir}")
    # Determine output directory
    if args.output_dir:
        output_dir = args.output_dir
    else:
        if args.no_ai:
            output_dir = f"{args.input_dir}/extracted-raw"
        else:
            output_dir = f"{args.input_dir}/extracted"
    logger.info(f"Output: {output_dir}")
    logger.info("=" * 50)
    # Check dependencies first
    if not check_dependencies():
        logger.error("Please install missing dependencies before running tests.")
        return
    # Check module imports
    if not check_module_imports():
        logger.error("Cannot import required modules. Please check your setup.")
        return
    # Determine mode based on command line arguments
    if args.no_ai:
        enable_ai = False
        logger.info("Running in CONTENT ONLY mode (no AI processing)")
    else:
        # Interactive mode: ask user for choice
        print("\n" + "=" * 50)
        print("SELECT EXTRACTION MODE:")
        print("=" * 50)
        print("1. With AI processing (default)")
        print("2. Content only (no AI processing)")
        print("=" * 50)
        try:
            choice = input("Enter your choice (1 or 2, default is 1): ").strip()
            if choice == "2":
                enable_ai = False
                output_dir = f"{args.input_dir}/extracted-raw"
                logger.info("Selected: Content only mode (no AI processing)")
            else:
                enable_ai = True
                output_dir = f"{args.input_dir}/extracted"
                logger.info("Selected: AI processing mode")
        except (EOFError, KeyboardInterrupt):
            # Default to AI mode if input fails
            enable_ai = True
            output_dir = f"{args.input_dir}/extracted"
            logger.info("Defaulting to AI processing mode")
    # Run tests with selected mode
    tester = DocumentExtractionTester(
        input_dir=args.input_dir,
        output_dir=output_dir,
        enable_ai=enable_ai
    )
    await tester.run_tests()
 if __name__ == "__main__":
    # Check if command line arguments are provided for automated testing
    if len(sys.argv) > 1:
        # Parse arguments and run directly
        asyncio.run(main())
    else:
        # Interactive mode: ask user for choice
        asyncio.run(main())
 # Convenience function for easy content-only extraction
 async def extract_documents_content_only(input_folder: str, output_folder: str = None):
    """
    Convenience function to extract documents without AI processing.
    Args:
        input_folder: Path to folder containing documents to extract
        output_folder: Path to folder where extracted content will be stored (optional)
    Example:
        # Extract from d:/temp to d:/temp/extracted-raw
        asyncio.run(extract_documents_content_only("d:/temp"))
        # Extract from custom folders
        asyncio.run(extract_documents_content_only("c:/my_docs", "c:/my_docs/extracted"))
    """
    if output_folder is None:
        output_folder = f"{input_folder}/extracted-raw"
    logger.info(f"Running content-only extraction from {input_folder} to {output_folder}")
    # Check dependencies and imports
    if not check_dependencies():
        logger.error("Missing dependencies. Please install required packages.")
        return False
    if not check_module_imports():
        logger.error("Cannot import required modules. Please check your setup.")
        return False
    # Create tester and run
    tester = DocumentExtractionTester(
        input_dir=input_folder,
        output_dir=output_folder,
        enable_ai=False
    )
    await tester.run_tests()
    return True
 # Example usage (uncomment to use):
 # if __name__ == "__main__":
 #     # For content-only extraction from d:/temp to d:/temp/extracted-raw
 #     asyncio.run(extract_documents_content_only("d:/temp"))
--- a/test_excel_processing.py
+++ b/test_excel_processing.py
@ -0,0 +1,189 @@
 #!/usr/bin/env python3
 """
 Simple test script for enhanced Excel processing functionality.
 This script tests the DocumentExtraction class with Excel files.
 """
 import os
 import sys
 import asyncio
 import logging
 from pathlib import Path
 # Configure logging
 logging.basicConfig(
    level=logging.DEBUG,
    format='%(asctime)s - %(levelname)s - %(message)s'
 )
 logger = logging.getLogger(__name__)
 # Add the gateway directory to the path
 sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', '..'))
 async def test_excel_processing():
    """Test Excel processing functionality."""
    try:
        # Import required modules
        from modules.chat.documents.documentExtraction import DocumentExtraction
        from modules.chat.serviceCenter import ServiceCenter
        from modules.interfaces.interfaceAppModel import User, UserPrivilege, AuthAuthority
        from modules.interfaces.interfaceChatModel import ChatWorkflow
        from datetime import datetime, UTC
        logger.info("Testing Excel processing functionality...")
        # Create mock service center
        mock_user = User(
            id="test_user_001",
            username="testuser",
            email="test@example.com",
            fullName="Test User",
            language="en",
            enabled=True,
            privilege=UserPrivilege.USER,
            authenticationAuthority=AuthAuthority.LOCAL,
            mandateId="test_mandate_001"
        )
        current_time = datetime.now(UTC).isoformat()
        mock_workflow = ChatWorkflow(
            id="test_workflow_001",
            mandateId="test_mandate_001",
            status="active",
            name="Test Excel Processing Workflow",
            currentRound=1,
            lastActivity=current_time,
            startedAt=current_time,
            logs=[],
            messages=[],
            stats=None,
            tasks=[]
        )
        service_center = ServiceCenter(mock_user, mock_workflow)
        logger.info("✓ ServiceCenter created successfully")
        # Create DocumentExtraction instance
        extractor = DocumentExtraction(service_center)
        logger.info("✓ DocumentExtraction created successfully")
        # Test with a sample Excel file if available
        test_file_path = "d:/temp/test-extraction/test.xlsx"
        if os.path.exists(test_file_path):
            logger.info(f"Found test file: {test_file_path}")
            # Read the file
            with open(test_file_path, 'rb') as f:
                file_data = f.read()
            logger.info(f"File size: {len(file_data)} bytes")
            # Process the Excel file
            logger.info("Processing Excel file...")
            result = await extractor.processFileData(
                fileData=file_data,
                filename="test.xlsx",
                mimeType="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
                base64Encoded=False,
                prompt=None,
                enableAI=False
            )
            logger.info(f"✓ Excel processing completed successfully!")
            logger.info(f"Generated {len(result.contents)} content items:")
            for i, content_item in enumerate(result.contents):
                logger.info(f"  Item {i+1}: {content_item.label}")
                logger.info(f"    MIME type: {content_item.metadata.mimeType}")
                logger.info(f"    Size: {content_item.metadata.size} bytes")
                if content_item.data:
                    logger.info(f"    Data preview: {content_item.data[:100]}...")
                else:
                    logger.info(f"    Data: None")
        else:
            logger.info("No test Excel file found. Creating a simple test...")
            # Test the openpyxl library directly
            try:
                import openpyxl
                from openpyxl import Workbook
                # Create a test workbook
                wb = Workbook()
                ws = wb.active
                ws.title = "Test Sheet"
                # Add some test data
                ws['A1'] = "Name"
                ws['B1'] = "Age"
                ws['C1'] = "City"
                ws['A2'] = "John Doe"
                ws['B2'] = 30
                ws['C2'] = "New York"
                ws['A3'] = "Jane Smith"
                ws['B3'] = 25
                ws['C3'] = "Los Angeles"
                # Test properties
                wb.properties.title = "Test Workbook"
                wb.properties.creator = "Test User"
                wb.properties.subject = "Test Subject"
                logger.info("✓ Test workbook created successfully")
                logger.info(f"  Title: {wb.properties.title}")
                logger.info(f"  Creator: {wb.properties.creator}")
                logger.info(f"  Subject: {wb.properties.subject}")
                logger.info(f"  Sheets: {wb.sheetnames}")
                # Test the DocumentExtraction with this workbook
                from io import BytesIO
                # Save to bytes
                buffer = BytesIO()
                wb.save(buffer)
                buffer.seek(0)
                file_data = buffer.getvalue()
                logger.info(f"Test workbook size: {len(file_data)} bytes")
                # Process with DocumentExtraction
                result = await extractor.processFileData(
                    fileData=file_data,
                    filename="test_workbook.xlsx",
                    mimeType="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
                    base64Encoded=False,
                    prompt=None,
                    enableAI=False
                )
                logger.info(f"✓ Test workbook processing completed successfully!")
                logger.info(f"Generated {len(result.contents)} content items:")
                for i, content_item in enumerate(result.contents):
                    logger.info(f"  Item {i+1}: {content_item.label}")
                    logger.info(f"    MIME type: {content_item.metadata.mimeType}")
                    logger.info(f"    Size: {content_item.metadata.size} bytes")
                    if content_item.data:
                        logger.info(f"    Data preview: {content_item.data[:200]}...")
                    else:
                        logger.info(f"    Data: None")
            except ImportError as e:
                logger.error(f"openpyxl not available: {e}")
            except Exception as e:
                logger.error(f"Error testing Excel functionality: {e}")
        logger.info("Excel processing test completed!")
    except ImportError as e:
        logger.error(f"Failed to import required modules: {e}")
        logger.error("Make sure you're running this script from the gateway directory")
    except Exception as e:
        logger.error(f"Unexpected error: {e}")
        import traceback
        traceback.print_exc()
 if __name__ == "__main__":
    asyncio.run(test_excel_processing())
--- a/web_search_20250717_140455.txt
+++ b/web_search_20250717_140455.txt
--- a/web_search_20250717_144557.txt
+++ b/web_search_20250717_144557.txt