refactored document handling

2025-07-22 18:15:02 +02:00 · 2025-07-22 18:15:02 +02:00 · 4e15be8296
commit 4e15be8296
parent 02d34b914e
9 changed files with 483 additions and 493 deletions
--- a/modules/chat/documents/documentCreation.py
+++ b/modules/chat/documents/documentCreation.py
@ -1,124 +0,0 @@
-# Contains all document creation functions extracted from managerChat.py
-
-import logging
-import json
-from typing import Dict, Any, Optional, List, Union
-from datetime import datetime, UTC
-
-class DocumentCreator:
-    def __init__(self, service):
-        self.service = service
-
-    def getFileExtension(self, filename: str) -> str:
-        """Extract file extension from filename"""
-        return self.service.getFileExtension(filename)
-
-    def getMimeType(self, extension: str) -> str:
-        """Get MIME type based on file extension"""
-        return self.service.getMimeTypeFromExtension(extension)
-
-    def detectMimeTypeFromContent(self, content: Any, filename: str) -> str:
-        """
-        Detect MIME type from content and filename using service center.
-        Only returns a detected MIME type if it's better than application/octet-stream.
-        """
-        try:
-            if isinstance(content, str):
-                file_bytes = content.encode('utf-8')
-            elif isinstance(content, dict):
-                file_bytes = json.dumps(content, ensure_ascii=False).encode('utf-8')
-            else:
-                file_bytes = str(content).encode('utf-8')
-            detected_mime_type = self.service.detectContentTypeFromData(file_bytes, filename)
-            if detected_mime_type != "application/octet-stream":
-                return detected_mime_type
-            return "application/octet-stream"
-        except Exception as e:
-            logging.warning(f"Error in MIME type detection for {filename}: {str(e)}")
-            return 'application/octet-stream'
-
-    def detectMimeTypeFromDocument(self, document: Any, filename: str) -> str:
-        """
-        Detect MIME type from document object using service center.
-        Only returns a detected MIME type if it's better than application/octet-stream.
-        """
-        try:
-            content = getattr(document, 'content', '')
-            if isinstance(content, str):
-                file_bytes = content.encode('utf-8')
-            else:
-                file_bytes = str(content).encode('utf-8')
-            detected_mime_type = self.service.detectContentTypeFromData(file_bytes, filename)
-            if detected_mime_type != "application/octet-stream":
-                return detected_mime_type
-            return "application/octet-stream"
-        except Exception as e:
-            logging.warning(f"Error in MIME type detection for document {filename}: {str(e)}")
-            return 'application/octet-stream'
-
-    def convertDocumentDataToString(self, document_data: Dict[str, Any], file_extension: str) -> str:
-        """Convert document data to string content based on file type with enhanced processing"""
-        try:
-            if document_data is None:
-                return ""
-            if isinstance(document_data, str):
-                return document_data
-            if isinstance(document_data, dict):
-                if file_extension == 'json':
-                    return json.dumps(document_data, indent=2, ensure_ascii=False)
-                elif file_extension in ['txt', 'md', 'html', 'css', 'js', 'py']:
-                    text_fields = ['content', 'text', 'data', 'result', 'summary', 'extracted_content', 'table_data']
-                    for field in text_fields:
-                        if field in document_data:
-                            content = document_data[field]
-                            if isinstance(content, str):
-                                return content
-                            elif isinstance(content, (dict, list)):
-                                return json.dumps(content, indent=2, ensure_ascii=False)
-                    return json.dumps(document_data, indent=2, ensure_ascii=False)
-                elif file_extension == 'csv':
-                    csv_fields = ['table_data', 'csv_data', 'rows', 'data', 'content', 'text']
-                    for field in csv_fields:
-                        if field in document_data:
-                            content = document_data[field]
-                            if isinstance(content, str):
-                                return content
-                            elif isinstance(content, list):
-                                if content and isinstance(content[0], (list, dict)):
-                                    import csv
-                                    import io
-                                    output = io.StringIO()
-                                    if isinstance(content[0], dict):
-                                        if content:
-                                            fieldnames = content[0].keys()
-                                            writer = csv.DictWriter(output, fieldnames=fieldnames)
-                                            writer.writeheader()
-                                            writer.writerows(content)
-                                    else:
-                                        writer = csv.writer(output)
-                                        writer.writerows(content)
-                                    return output.getvalue()
-                    return json.dumps(document_data, indent=2, ensure_ascii=False)
-                else:
-                    return json.dumps(document_data, indent=2, ensure_ascii=False)
-            elif isinstance(document_data, list):
-                if file_extension == 'csv':
-                    import csv
-                    import io
-                    output = io.StringIO()
-                    if document_data and isinstance(document_data[0], dict):
-                        fieldnames = document_data[0].keys()
-                        writer = csv.DictWriter(output, fieldnames=fieldnames)
-                        writer.writeheader()
-                        writer.writerows(document_data)
-                    else:
-                        writer = csv.writer(output)
-                        writer.writerows(document_data)
-                    return output.getvalue()
-                else:
-                    return json.dumps(document_data, indent=2, ensure_ascii=False)
-            else:
-                return str(document_data)
-        except Exception as e:
-            logging.error(f"Error converting document data to string: {str(e)}")
-            return str(document_data) 
--- a/modules/chat/documents/documentExtraction.py
+++ b/modules/chat/documents/documentExtraction.py
@ -9,6 +9,13 @@ from pathlib import Path
 import xml.etree.ElementTree as ET
 from bs4 import BeautifulSoup
 import uuid
+from .documentUtility import (
+    getFileExtension,
+    getMimeTypeFromExtension,
+    detectMimeTypeFromContent,
+    detectMimeTypeFromData,
+    convertDocumentDataToString
+)

 from modules.interfaces.interfaceChatModel import (
    ExtractedContent,
@ -29,7 +36,7 @@ class FileProcessingError(Exception):
    """Custom exception for file processing errors."""
    pass

-class DocumentProcessor:
+class DocumentExtraction:
    """Processor for handling document operations and content extraction."""
    
    def __init__(self, serviceCenter=None):
@ -133,17 +140,13 @@ class DocumentProcessor:
            # Decode base64 if needed
            if base64Encoded:
                fileData = base64.b64decode(fileData)
-            
-            # Detect content type if needed
+            # Use documentUtility for mime type detection
            if mimeType == "application/octet-stream":
-                mimeType = self._serviceCenter.detectContentTypeFromData(fileData, filename)
-            
+                mimeType = detectMimeTypeFromData(fileData, filename, self._serviceCenter)
            # Process document based on type
            if mimeType not in self.supportedTypes:
-                # Fallback to binary processing
                contentItems = await self._processBinary(fileData, filename, mimeType)
            else:
-                # Process document based on type
                processor = self.supportedTypes[mimeType]
                contentItems = await processor(fileData, filename, mimeType)
            
@ -171,13 +174,15 @@ class DocumentProcessor:
        """Process text document"""
        try:
            content = fileData.decode('utf-8')
+            # Use documentUtility for mime type
+            mime_type = getMimeTypeFromExtension(getFileExtension(filename), self._serviceCenter)
            return [ContentItem(
                label="main",
                data=content,
                metadata=ContentMetadata(
                    size=len(content.encode('utf-8')),
                    pages=1,
-                    mimeType="text/plain",
+                    mimeType=mime_type,
                    base64Encoded=False
                )
            )]
@ -189,13 +194,14 @@ class DocumentProcessor:
        """Process CSV document"""
        try:
            content = fileData.decode('utf-8')
+            mime_type = getMimeTypeFromExtension(getFileExtension(filename), self._serviceCenter)
            return [ContentItem(
                label="main",
                data=content,
                metadata=ContentMetadata(
                    size=len(content.encode('utf-8')),
                    pages=1,
-                    mimeType="text/csv",
+                    mimeType=mime_type,
                    base64Encoded=False
                )
            )]
@ -207,16 +213,15 @@ class DocumentProcessor:
        """Process JSON document"""
        try:
            content = fileData.decode('utf-8')
-            # Parse JSON to validate
            jsonData = json.loads(content)
-            
+            mime_type = getMimeTypeFromExtension(getFileExtension(filename), self._serviceCenter)
            return [ContentItem(
                label="main",
                data=content,
                metadata=ContentMetadata(
                    size=len(content.encode('utf-8')),
                    pages=1,
-                    mimeType="application/json",
+                    mimeType=mime_type,
                    base64Encoded=False
                )
            )]
@ -228,13 +233,14 @@ class DocumentProcessor:
        """Process XML document"""
        try:
            content = fileData.decode('utf-8')
+            mime_type = getMimeTypeFromExtension(getFileExtension(filename), self._serviceCenter)
            return [ContentItem(
                label="main",
                data=content,
                metadata=ContentMetadata(
                    size=len(content.encode('utf-8')),
                    pages=1,
-                    mimeType="application/xml",
+                    mimeType=mime_type,
                    base64Encoded=False
                )
            )]
@ -246,13 +252,14 @@ class DocumentProcessor:
        """Process HTML document"""
        try:
            content = fileData.decode('utf-8')
+            mime_type = getMimeTypeFromExtension(getFileExtension(filename), self._serviceCenter)
            return [ContentItem(
                label="main",
                data=content,
                metadata=ContentMetadata(
                    size=len(content.encode('utf-8')),
                    pages=1,
-                    mimeType="text/html",
+                    mimeType=mime_type,
                    base64Encoded=False
                )
            )]
@ -264,15 +271,14 @@ class DocumentProcessor:
        """Process SVG document"""
        try:
            content = fileData.decode('utf-8')
-            # Check if it's actually SVG
            isSvg = "<svg" in content.lower()
-            
+            mime_type = getMimeTypeFromExtension(getFileExtension(filename), self._serviceCenter)
            return [ContentItem(
                label="main",
                data=content if isSvg else None,
                metadata=ContentMetadata(
                    size=len(content.encode('utf-8')),
-                    mimeType="image/svg+xml",
+                    mimeType=mime_type,
                    base64Encoded=False,
                    error=None if isSvg else "Invalid SVG content"
                )
--- a/modules/chat/documents/documentGeneration.py
+++ b/modules/chat/documents/documentGeneration.py
@ -0,0 +1,163 @@
+import logging
+from typing import Any, Dict, List, Optional
+from datetime import datetime, UTC
+from .documentUtility import (
+    getFileExtension,
+    getMimeTypeFromExtension,
+    detectMimeTypeFromContent,
+    detectMimeTypeFromData,
+    convertDocumentDataToString
+)
+
+logger = logging.getLogger(__name__)
+
+class DocumentGenerator:
+    def __init__(self, service):
+        self.service = service
+
+    def processActionResultDocuments(self, action_result, action, workflow) -> List[Dict[str, Any]]:
+        """
+        Main function to process documents from an action result.
+        Returns a list of processed document dictionaries.
+        """
+        try:
+            documents = action_result.data.get("documents", [])
+            processed_documents = []
+            for doc in documents:
+                processed_doc = self.processSingleDocument(doc, action)
+                if processed_doc:
+                    processed_documents.append(processed_doc)
+            return processed_documents
+        except Exception as e:
+            logger.error(f"Error processing action result documents: {str(e)}")
+            return []
+
+    def processSingleDocument(self, doc: Any, action) -> Optional[Dict[str, Any]]:
+        """Process a single document from action result"""
+        try:
+            if hasattr(doc, 'filename') and doc.filename:
+                # Document object with filename attribute
+                mime_type = getattr(doc, 'mimeType', 'application/octet-stream')
+                if mime_type == "application/octet-stream":
+                    content = getattr(doc, 'content', '')
+                    mime_type = detectMimeTypeFromContent(content, doc.filename, self.service)
+                return {
+                    'filename': doc.filename,
+                    'fileSize': getattr(doc, 'fileSize', 0),
+                    'mimeType': mime_type,
+                    'content': getattr(doc, 'content', ''),
+                    'document': doc
+                }
+            elif isinstance(doc, dict):
+                # Dictionary format document
+                filename = doc.get('documentName', doc.get('filename', \
+                    f"{action.execMethod}_{action.execAction}_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}"))
+                fileSize = doc.get('fileSize', len(str(doc.get('documentData', ''))))
+                mimeType = doc.get('mimeType', 'application/octet-stream')
+                if mimeType == "application/octet-stream":
+                    document_data = doc.get('documentData', '')
+                    mimeType = detectMimeTypeFromContent(document_data, filename, self.service)
+                return {
+                    'filename': filename,
+                    'fileSize': fileSize,
+                    'mimeType': mimeType,
+                    'content': doc.get('documentData', ''),
+                    'document': doc
+                }
+            else:
+                # Unknown document type
+                logger.warning(f"Unknown document type for action {action.execMethod}.{action.execAction}: {type(doc)}")
+                filename = f"{action.execMethod}_{action.execAction}_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}"
+                mimeType = detectMimeTypeFromContent(doc, filename, self.service)
+                return {
+                    'filename': filename,
+                    'fileSize': 0,
+                    'mimeType': mimeType,
+                    'content': str(doc),
+                    'document': doc
+                }
+        except Exception as e:
+            logger.error(f"Error processing single document: {str(e)}")
+            return None
+
+    def createDocumentsFromActionResult(self, action_result, action, workflow) -> List[Any]:
+        """
+        Create actual document objects from action result and store them in the system.
+        Returns a list of created document objects.
+        """
+        try:
+            processed_docs = self.processActionResultDocuments(action_result, action, workflow)
+            created_documents = []
+            for doc_data in processed_docs:
+                try:
+                    document_name = doc_data['filename']
+                    document_data = doc_data['content']
+                    mime_type = doc_data['mimeType']
+                    # Convert document data to string content
+                    content = convertDocumentDataToString(document_data, getFileExtension(document_name))
+                    # Skip empty or minimal content
+                    minimal_content_patterns = ['{}', '[]', 'null', '""', "''"]
+                    if not content or content.strip() == "" or content.strip() in minimal_content_patterns:
+                        logger.warning(f"Empty or minimal content for document {document_name}, skipping")
+                        continue
+                    # Create file in system
+                    file_id = self.service.createFile(
+                        fileName=document_name,
+                        mimeType=mime_type,
+                        content=content,
+                        base64encoded=False
+                    )
+                    if not file_id:
+                        logger.error(f"Failed to create file for document {document_name}")
+                        continue
+                    # Create document object
+                    document = self.service.createDocument(
+                        fileName=document_name,
+                        mimeType=mime_type,
+                        content=content,
+                        base64encoded=False
+                    )
+                    if document:
+                        created_documents.append(document)
+                        logger.info(f"Created document: {document_name} with file ID: {file_id} and MIME type: {mime_type}")
+                    else:
+                        logger.error(f"Failed to create ChatDocument object for {document_name}")
+                except Exception as e:
+                    logger.error(f"Error creating document {doc_data.get('filename', 'unknown')}: {str(e)}")
+                    continue
+            return created_documents
+        except Exception as e:
+            logger.error(f"Error creating documents from action result: {str(e)}")
+            return []
+
+    @staticmethod
+    def get_delivered_files_and_formats(documents):
+        delivered_files = []
+        delivered_formats = []
+        for doc in documents:
+            if hasattr(doc, 'filename'):
+                delivered_files.append(doc.filename)
+                file_extension = getFileExtension(doc.filename)
+                mime_type = getattr(doc, 'mimeType', 'application/octet-stream')
+                delivered_formats.append({
+                    'filename': doc.filename,
+                    'extension': file_extension,
+                    'mimeType': mime_type
+                })
+            elif isinstance(doc, dict) and 'filename' in doc:
+                delivered_files.append(doc['filename'])
+                file_extension = getFileExtension(doc['filename'])
+                mime_type = doc.get('mimeType', 'application/octet-stream')
+                delivered_formats.append({
+                    'filename': doc['filename'],
+                    'extension': file_extension,
+                    'mimeType': mime_type
+                })
+            else:
+                delivered_files.append(f"document_{len(delivered_files)}")
+                delivered_formats.append({
+                    'filename': f"document_{len(delivered_files)}",
+                    'extension': 'unknown',
+                    'mimeType': 'application/octet-stream'
+                })
+        return delivered_files, delivered_formats 
--- a/modules/chat/documents/documentUtility.py
+++ b/modules/chat/documents/documentUtility.py
@ -0,0 +1,132 @@
+import json
+import logging
+from typing import Any, Dict
+
+logger = logging.getLogger(__name__)
+
+def getFileExtension(filename: str) -> str:
+    """Extract file extension from filename"""
+    if '.' in filename:
+        return filename.rsplit('.', 1)[-1].lower()
+    return ''
+
+def getMimeTypeFromExtension(extension: str, service=None) -> str:
+    """Get MIME type based on file extension. Optionally use a service for mapping."""
+    if service:
+        return service.getMimeTypeFromExtension(extension)
+    # Fallback mapping
+    mapping = {
+        'txt': 'text/plain',
+        'md': 'text/markdown',
+        'html': 'text/html',
+        'css': 'text/css',
+        'js': 'application/javascript',
+        'json': 'application/json',
+        'csv': 'text/csv',
+        'xml': 'application/xml',
+        'py': 'text/x-python',
+        'pdf': 'application/pdf',
+        'docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
+        'xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
+        'png': 'image/png',
+        'jpg': 'image/jpeg',
+        'jpeg': 'image/jpeg',
+        'gif': 'image/gif',
+        'svg': 'image/svg+xml',
+    }
+    return mapping.get(extension.lower(), 'application/octet-stream')
+
+def detectMimeTypeFromData(file_bytes: bytes, filename: str, service=None) -> str:
+    """Detect MIME type from file bytes and filename using a service if provided."""
+    try:
+        if service:
+            detected = service.detectContentTypeFromData(file_bytes, filename)
+            if detected and detected != 'application/octet-stream':
+                return detected
+        # Fallback: guess from extension
+        ext = getFileExtension(filename)
+        return getMimeTypeFromExtension(ext, service)
+    except Exception as e:
+        logger.warning(f"Error in MIME type detection for {filename}: {str(e)}")
+        return 'application/octet-stream'
+
+def detectMimeTypeFromContent(content: Any, filename: str, service=None) -> str:
+    """Detect MIME type from content and filename using a service if provided."""
+    try:
+        if isinstance(content, str):
+            file_bytes = content.encode('utf-8')
+        elif isinstance(content, dict):
+            file_bytes = json.dumps(content, ensure_ascii=False).encode('utf-8')
+        else:
+            file_bytes = str(content).encode('utf-8')
+        return detectMimeTypeFromData(file_bytes, filename, service)
+    except Exception as e:
+        logger.warning(f"Error in MIME type detection for {filename}: {str(e)}")
+        return 'application/octet-stream'
+
+def convertDocumentDataToString(document_data: Any, file_extension: str) -> str:
+    """Convert document data to string content based on file type with enhanced processing."""
+    try:
+        if document_data is None:
+            return ""
+        if isinstance(document_data, str):
+            return document_data
+        if isinstance(document_data, dict):
+            if file_extension == 'json':
+                return json.dumps(document_data, indent=2, ensure_ascii=False)
+            elif file_extension in ['txt', 'md', 'html', 'css', 'js', 'py']:
+                text_fields = ['content', 'text', 'data', 'result', 'summary', 'extracted_content', 'table_data']
+                for field in text_fields:
+                    if field in document_data:
+                        content = document_data[field]
+                        if isinstance(content, str):
+                            return content
+                        elif isinstance(content, (dict, list)):
+                            return json.dumps(content, indent=2, ensure_ascii=False)
+                return json.dumps(document_data, indent=2, ensure_ascii=False)
+            elif file_extension == 'csv':
+                csv_fields = ['table_data', 'csv_data', 'rows', 'data', 'content', 'text']
+                for field in csv_fields:
+                    if field in document_data:
+                        content = document_data[field]
+                        if isinstance(content, str):
+                            return content
+                        elif isinstance(content, list):
+                            if content and isinstance(content[0], (list, dict)):
+                                import csv
+                                import io
+                                output = io.StringIO()
+                                if isinstance(content[0], dict):
+                                    if content:
+                                        fieldnames = content[0].keys()
+                                        writer = csv.DictWriter(output, fieldnames=fieldnames)
+                                        writer.writeheader()
+                                        writer.writerows(content)
+                                else:
+                                    writer = csv.writer(output)
+                                    writer.writerows(content)
+                                return output.getvalue()
+                return json.dumps(document_data, indent=2, ensure_ascii=False)
+            else:
+                return json.dumps(document_data, indent=2, ensure_ascii=False)
+        elif isinstance(document_data, list):
+            if file_extension == 'csv':
+                import csv
+                import io
+                output = io.StringIO()
+                if document_data and isinstance(document_data[0], dict):
+                    fieldnames = document_data[0].keys()
+                    writer = csv.DictWriter(output, fieldnames=fieldnames)
+                    writer.writeheader()
+                    writer.writerows(document_data)
+                else:
+                    writer = csv.writer(output)
+                    writer.writerows(document_data)
+                return output.getvalue()
+            else:
+                return json.dumps(document_data, indent=2, ensure_ascii=False)
+        else:
+            return str(document_data)
+    except Exception as e:
+        logger.error(f"Error converting document data to string: {str(e)}")
+        return str(document_data) 
--- a/modules/chat/handling/handlingActions.py
+++ b/modules/chat/handling/handlingActions.py
@ -7,8 +7,8 @@ import time
 from typing import Dict, Any, Optional, List, Union
 from datetime import datetime, UTC
 from modules.interfaces.interfaceChatModel import ReviewResult, ActionResult
-from modules.chat.documents.documentCreation import DocumentCreator
 from .promptFactory import createResultReviewPrompt
+from modules.chat.documents.documentGeneration import DocumentGenerator

 logger = logging.getLogger(__name__)

@ -16,12 +16,72 @@ class HandlingActions:
    def __init__(self, service, chatInterface):
        self.service = service
        self.chatInterface = chatInterface
-        self.documentCreator = DocumentCreator(self.service)
+        self.documentGenerator = DocumentGenerator(service)
+
+    async def executeSingleAction(self, action, workflow):
+        """Execute a single action and return ActionResult with enhanced document processing"""
+        try:
+            enhanced_parameters = action.execParameters.copy()
+            if action.expectedDocumentFormats:
+                enhanced_parameters['expectedDocumentFormats'] = action.expectedDocumentFormats
+                logger.info(f"Action {action.execMethod}.{action.execAction} expects formats: {action.expectedDocumentFormats}")
+            result = await self.service.executeAction(
+                methodName=action.execMethod,
+                actionName=action.execAction,
+                parameters=enhanced_parameters
+            )
+            result_label = action.execResultLabel
+            if result.success:
+                action.setSuccess()
+                action.result = result.data.get("result", "")
+                action.execResultLabel = result_label
+                await self.createActionMessage(action, result, workflow, result_label)
+            else:
+                action.setError(result.error or "Action execution failed")
+            processed_documents = self.documentGenerator.processActionResultDocuments(result, action, workflow)
+            return ActionResult(
+                success=result.success,
+                data={
+                    "result": result.data.get("result", ""),
+                    "documents": processed_documents,
+                    "actionId": action.id,
+                    "actionMethod": action.execMethod,
+                    "actionName": action.execAction,
+                    "resultLabel": result_label
+                },
+                metadata={
+                    "actionId": action.id,
+                    "actionMethod": action.execMethod,
+                    "actionName": action.execAction,
+                    "resultLabel": result_label
+                },
+                validation=[],
+                error=result.error or ""
+            )
+        except Exception as e:
+            logger.error(f"Error executing single action: {str(e)}")
+            action.setError(str(e))
+            return ActionResult(
+                success=False,
+                data={
+                    "actionId": action.id,
+                    "actionMethod": action.execMethod,
+                    "actionName": action.execAction,
+                    "documents": []
+                },
+                metadata={
+                    "actionId": action.id,
+                    "actionMethod": action.execMethod,
+                    "actionName": action.execAction
+                },
+                validation=[],
+                error=str(e)
+            )

    async def validateActionResult(self, action_result, action, context) -> dict:
        try:
            prompt = self._createGenericValidationPrompt(action_result, action, context)
-            response = await self._callAIWithCircuitBreaker(prompt, "action_validation")
+            response = await self.service.callAiTextAdvanced(prompt, "action_validation")
            validation = self._parseValidationResponse(response)
            validation['action_id'] = action.id
            validation['action_method'] = action.execMethod
@ -41,6 +101,73 @@ class HandlingActions:
                'result_label': action.execResultLabel
            }

+    async def createActionMessage(self, action, result, workflow, result_label=None):
+        """Create and store a message for the action result in the workflow with enhanced document processing"""
+        try:
+            if result_label is None:
+                result_label = action.execResultLabel
+            message_data = {
+                "workflowId": workflow.id,
+                "role": "assistant",
+                "message": f"Executed action {action.execMethod}.{action.execAction}",
+                "status": "step",
+                "sequenceNr": len(workflow.messages) + 1,
+                "publishedAt": datetime.now(UTC).isoformat(),
+                "actionId": action.id,
+                "actionMethod": action.execMethod,
+                "actionName": action.execAction,
+                "documentsLabel": result_label,
+                "documents": []
+            }
+            # Use the local createDocumentsFromActionResult method
+            created_documents = self.documentGenerator.createDocumentsFromActionResult(result, action, workflow)
+            message_data["documents"] = created_documents
+            message = self.chatInterface.createWorkflowMessage(message_data)
+            if message:
+                workflow.messages.append(message)
+                logger.info(f"Created action message for {action.execMethod}.{action.execAction} with {len(created_documents)} documents")
+                logger.debug(f"WORKFLOW STATE after createActionMessage: id={id(workflow)}, message_count={len(workflow.messages)}")
+                for idx, msg in enumerate(workflow.messages):
+                    label = getattr(msg, 'documentsLabel', None)
+                    docs = getattr(msg, 'documents', None)
+                    logger.debug(f"  Message {idx}: label='{label}', documents_count={len(docs) if docs else 0}")
+            else:
+                logger.error(f"Failed to create workflow message for action {action.execMethod}.{action.execAction}")
+        except Exception as e:
+            logger.error(f"Error creating action message: {str(e)}")
+
+    def parseActionResponse(self, response: str) -> list:
+        try:
+            json_start = response.find('{')
+            json_end = response.rfind('}') + 1
+            if json_start == -1 or json_end == 0:
+                raise ValueError("No JSON found in response")
+            json_str = response[json_start:json_end]
+            action_data = json.loads(json_str)
+            if 'actions' not in action_data:
+                raise ValueError("Action response missing 'actions' field")
+            return action_data['actions']
+        except Exception as e:
+            logger.error(f"Error parsing action response: {str(e)}")
+            return []
+
+    def parseReviewResponse(self, response: str) -> dict:
+        try:
+            json_start = response.find('{')
+            json_end = response.rfind('}') + 1
+            if json_start == -1 or json_end == 0:
+                raise ValueError("No JSON found in response")
+            json_str = response[json_start:json_end]
+            review = json.loads(json_str)
+            if 'status' not in review:
+                raise ValueError("Review response missing 'status' field")
+            return review
+        except Exception as e:
+            logger.error(f"Error parsing review response: {str(e)}")
+            return {'status': 'failed', 'reason': f'Parse error: {str(e)}'}
+
+    # Internal helper methods
+
    def _createGenericValidationPrompt(self, action_result, action, context) -> str:
        success = action_result.success
        result_data = action_result.data
@ -54,35 +181,9 @@ class HandlingActions:
        expected_document_formats = action.expectedDocumentFormats or []
        actual_result_label = result_data.get("resultLabel", "") if isinstance(result_data, dict) else ""
        result_label_match = actual_result_label == expected_result_label
-        delivered_files = []
-        delivered_formats = []
+        # Use DocumentGenerator for file/format extraction
+        delivered_files, delivered_formats = DocumentGenerator.get_delivered_files_and_formats(documents)
        content_items = []
-        for doc in documents:
-            if hasattr(doc, 'filename'):
-                delivered_files.append(doc.filename)
-                file_extension = self._getFileExtension(doc.filename)
-                mime_type = getattr(doc, 'mimeType', 'application/octet-stream')
-                delivered_formats.append({
-                    'filename': doc.filename,
-                    'extension': file_extension,
-                    'mimeType': mime_type
-                })
-            elif isinstance(doc, dict) and 'filename' in doc:
-                delivered_files.append(doc['filename'])
-                file_extension = self._getFileExtension(doc['filename'])
-                mime_type = doc.get('mimeType', 'application/octet-stream')
-                delivered_formats.append({
-                    'filename': doc['filename'],
-                    'extension': file_extension,
-                    'mimeType': mime_type
-                })
-            else:
-                delivered_files.append(f"document_{len(delivered_files)}")
-                delivered_formats.append({
-                    'filename': f"document_{len(delivered_files)}",
-                    'extension': 'unknown',
-                    'mimeType': 'application/octet-stream'
-                })
        if isinstance(result_data, dict):
            if 'extractedContent' in result_data:
                extracted_content = result_data['extractedContent']
@ -128,305 +229,4 @@ class HandlingActions:
                'quality_score': 5,
                'missing_elements': [],
                'suggested_retry_approach': ''
-            }
-
-    async def executeSingleAction(self, action, workflow):
-        """Execute a single action and return ActionResult with enhanced document processing"""
-        try:
-            # Use DocumentCreator methods
-            # Enhance parameters with expected document formats if specified
-            enhanced_parameters = action.execParameters.copy()
-            if action.expectedDocumentFormats:
-                enhanced_parameters['expectedDocumentFormats'] = action.expectedDocumentFormats
-                logger.info(f"Action {action.execMethod}.{action.execAction} expects formats: {action.expectedDocumentFormats}")
-            result = await self.service.executeAction(
-                methodName=action.execMethod,
-                actionName=action.execAction,
-                parameters=enhanced_parameters
-            )
-            result_label = action.execResultLabel
-            if result.success:
-                action.setSuccess()
-                action.result = result.data.get("result", "")
-                action.execResultLabel = result_label
-                await self.createActionMessage(action, result, workflow, result_label)
-            else:
-                action.setError(result.error or "Action execution failed")
-            documents = result.data.get("documents", [])
-            processed_documents = []
-            for doc in documents:
-                if hasattr(doc, 'filename') and doc.filename:
-                    mime_type = getattr(doc, 'mimeType', 'application/octet-stream')
-                    if mime_type == "application/octet-stream":
-                        mime_type = self.documentCreator.detectMimeTypeFromDocument(doc, doc.filename)
-                    processed_documents.append({
-                        'filename': doc.filename,
-                        'fileSize': getattr(doc, 'fileSize', 0),
-                        'mimeType': mime_type,
-                        'content': getattr(doc, 'content', ''),
-                        'document': doc
-                    })
-                elif isinstance(doc, dict):
-                    filename = doc.get('documentName', doc.get('filename', f"{action.execMethod}_{action.execAction}_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}"))
-                    fileSize = doc.get('fileSize', len(str(doc.get('documentData', ''))))
-                    mimeType = doc.get('mimeType', 'application/octet-stream')
-                    if mimeType == "application/octet-stream":
-                        document_data = doc.get('documentData', '')
-                        mimeType = self.documentCreator.detectMimeTypeFromContent(document_data, filename)
-                    processed_documents.append({
-                        'filename': filename,
-                        'fileSize': fileSize,
-                        'mimeType': mimeType,
-                        'content': doc.get('documentData', ''),
-                        'document': doc
-                    })
-                else:
-                    logger.warning(f"Unknown document type for action {action.execMethod}.{action.execAction}: {type(doc)}")
-                    filename = f"{action.execMethod}_{action.execAction}_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}"
-                    mimeType = 'application/octet-stream'
-                    mimeType = self.documentCreator.detectMimeTypeFromContent(doc, filename)
-                    processed_documents.append({
-                        'filename': filename,
-                        'fileSize': 0,
-                        'mimeType': mimeType,
-                        'content': str(doc),
-                        'document': doc
-                    })
-            return ActionResult(
-                success=result.success,
-                data={
-                    "result": result.data.get("result", ""),
-                    "documents": processed_documents,
-                    "actionId": action.id,
-                    "actionMethod": action.execMethod,
-                    "actionName": action.execAction,
-                    "resultLabel": result_label
-                },
-                metadata={
-                    "actionId": action.id,
-                    "actionMethod": action.execMethod,
-                    "actionName": action.execAction,
-                    "resultLabel": result_label
-                },
-                validation=[],
-                error=result.error or ""
-            )
-        except Exception as e:
-            logger.error(f"Error executing single action: {str(e)}")
-            action.setError(str(e))
-            return ActionResult(
-                success=False,
-                data={
-                    "actionId": action.id,
-                    "actionMethod": action.execMethod,
-                    "actionName": action.execAction,
-                    "documents": []
-                },
-                metadata={
-                    "actionId": action.id,
-                    "actionMethod": action.execMethod,
-                    "actionName": action.execAction
-                },
-                validation=[],
-                error=str(e)
-            )
-
-    async def createActionMessage(self, action, result, workflow, result_label=None):
-        """Create and store a message for the action result in the workflow with enhanced document processing"""
-        try:
-            # Use DocumentCreator methods
-            result_data = result.data if hasattr(result, 'data') else {}
-            documents_data = result_data.get("documents", [])
-            if result_label is None:
-                result_label = action.execResultLabel
-            message_data = {
-                "workflowId": workflow.id,
-                "role": "assistant",
-                "message": f"Executed action {action.execMethod}.{action.execAction}",
-                "status": "step",
-                "sequenceNr": len(workflow.messages) + 1,
-                "publishedAt": datetime.now(UTC).isoformat(),
-                "actionId": action.id,
-                "actionMethod": action.execMethod,
-                "actionName": action.execAction,
-                "documentsLabel": result_label,  # Use intent label from action definition
-                "documents": []
-            }
-            if documents_data:
-                processed_documents = []
-                for doc_data in documents_data:
-                    try:
-                        if isinstance(doc_data, dict):
-                            document_name = doc_data.get("documentName", doc_data.get("filename", f"{action.execMethod}_{action.execAction}_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}"))
-                            document_data = doc_data.get("documentData", {})
-                            file_size = doc_data.get("fileSize", 0)
-                            mime_type = doc_data.get("mimeType", "application/octet-stream")
-                        elif hasattr(doc_data, 'filename'):
-                            document_name = doc_data.filename
-                            document_data = getattr(doc_data, 'content', {})
-                            file_size = getattr(doc_data, 'fileSize', 0)
-                            mime_type = getattr(doc_data, 'mimeType', "application/octet-stream")
-                        else:
-                            document_name = f"{action.execMethod}_{action.execAction}_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}"
-                            document_data = doc_data
-                            file_size = len(str(doc_data))
-                            mime_type = "application/octet-stream"
-                        if mime_type == "application/octet-stream":
-                            mime_type = self.documentCreator.detectMimeTypeFromContent(document_data, document_name)
-                        content = self.documentCreator.convertDocumentDataToString(document_data, self.documentCreator.getFileExtension(document_name))
-                        minimal_content_patterns = ['{}', '[]', 'null', '""', "''"]
-                        if not content or content.strip() == "" or content.strip() in minimal_content_patterns:
-                            logger.warning(f"Empty or minimal content for document {document_name}, skipping")
-                            continue
-                        file_id = self.service.createFile(
-                            fileName=document_name,
-                            mimeType=mime_type,
-                            content=content,
-                            base64encoded=False
-                        )
-                        if not file_id:
-                            logger.error(f"Failed to create file for document {document_name}")
-                            continue
-                        document = self.service.createDocument(
-                            fileName=document_name,
-                            mimeType=mime_type,
-                            content=content,
-                            base64encoded=False
-                        )
-                        if document:
-                            processed_documents.append(document)
-                            logger.info(f"Created document: {document_name} with file ID: {file_id} and MIME type: {mime_type}")
-                        else:
-                            logger.error(f"Failed to create ChatDocument object for {document_name}")
-                    except Exception as e:
-                        logger.error(f"Error processing document {getattr(doc_data, 'documentName', 'unknown') if isinstance(doc_data, dict) else 'unknown'}: {str(e)}")
-                        continue
-                message_data["documents"] = processed_documents
-            message = self.chatInterface.createWorkflowMessage(message_data)
-            if message:
-                workflow.messages.append(message)
-                logger.info(f"Created action message for {action.execMethod}.{action.execAction} with {len(message_data.get('documents', []))} documents")
-                logger.debug(f"WORKFLOW STATE after createActionMessage: id={id(workflow)}, message_count={len(workflow.messages)}")
-                for idx, msg in enumerate(workflow.messages):
-                    label = getattr(msg, 'documentsLabel', None)
-                    docs = getattr(msg, 'documents', None)
-                    logger.debug(f"  Message {idx}: label='{label}', documents_count={len(docs) if docs else 0}")
-            else:
-                logger.error(f"Failed to create workflow message for action {action.execMethod}.{action.execAction}")
-        except Exception as e:
-            logger.error(f"Error creating action message: {str(e)}")
-
-    async def performTaskReview(self, review_context) -> 'ReviewResult':
-        """Perform AI-based task review with enhanced retry logic"""
-        try:
-            # Prepare prompt for result review
-            prompt = await createResultReviewPrompt(self, review_context)
-            
-            # Call AI with circuit breaker
-            response = await self._callAIWithCircuitBreaker(prompt, "result_review")
-            
-            # Parse review result
-            review_dict = self._parseReviewResponse(response)
-            
-            # Add default values for missing fields
-            review_dict.setdefault('status', 'unknown')
-            review_dict.setdefault('reason', 'No reason provided')
-            review_dict.setdefault('quality_score', 5)
-            
-            # Enhanced retry logic based on result quality
-            if review_dict.get('status') == 'retry':
-                # Analyze the specific issues for better retry guidance
-                action_results = review_context.action_results or []
-                if action_results:
-                    # Check for common issues that warrant retry
-                    # Only consider empty results a problem if there are no documents produced
-                    has_empty_results = any(
-                        not result.data.get('result', '').strip() and 
-                        not result.data.get('documents') and 
-                        not result.data.get('documents')
-                        for result in action_results 
-                        if result.success
-                    )
-                    
-                    has_incomplete_metadata = any(
-                        any(doc.get('filename') == 'unknown' for doc in result.data.get('documents', []) or [])
-                        for result in action_results
-                        if result.success
-                    )
-                    
-                    if has_empty_results:
-                        review_dict['improvements'] = (review_dict.get('improvements', '') + 
-                                               " Ensure the document extraction returns actual content, not empty results. " +
-                                               "Check if the AI prompt is specific enough to extract meaningful data.")
-                    
-                    if has_incomplete_metadata:
-                        review_dict['improvements'] = (review_dict.get('improvements', '') + 
-                                               " Ensure proper document metadata is extracted including filename, size, and mime type. " +
-                                               "The document processing should provide complete file information.")
-                    
-                    # If we have specific issues, adjust quality score
-                    if has_empty_results or has_incomplete_metadata:
-                        review_dict['quality_score'] = max(1, review_dict.get('quality_score', 5) - 2)
-            
-            # Create ReviewResult model
-            return ReviewResult(
-                status=review_dict.get('status', 'unknown'),
-                reason=review_dict.get('reason', 'No reason provided'),
-                improvements=review_dict.get('improvements', []),
-                quality_score=review_dict.get('quality_score', 5),
-                missing_outputs=review_dict.get('missing_outputs', []),
-                met_criteria=review_dict.get('met_criteria', []),
-                unmet_criteria=review_dict.get('unmet_criteria', []),
-                confidence=review_dict.get('confidence', 0.5)
-            )
-            
-        except Exception as e:
-            logger.error(f"Error performing task review: {str(e)}")
-            return ReviewResult(
-                status='success',  # Default to success to avoid blocking workflow
-                reason=f'Review failed: {str(e)}',
-                quality_score=5,
-                confidence=0.5
-            )
-
-    def parseActionResponse(self, response: str) -> list:
-        """Parse AI response into action list"""
-        try:
-            json_start = response.find('{')
-            json_end = response.rfind('}') + 1
-            if json_start == -1 or json_end == 0:
-                raise ValueError("No JSON found in response")
-            json_str = response[json_start:json_end]
-            action_data = json.loads(json_str)
-            if 'actions' not in action_data:
-                raise ValueError("Action response missing 'actions' field")
-            return action_data['actions']
-        except Exception as e:
-            logger.error(f"Error parsing action response: {str(e)}")
-            return []
-
-    def parseReviewResponse(self, response: str) -> dict:
-        """Parse AI response into review result"""
-        try:
-            json_start = response.find('{')
-            json_end = response.rfind('}') + 1
-            if json_start == -1 or json_end == 0:
-                raise ValueError("No JSON found in response")
-            json_str = response[json_start:json_end]
-            review = json.loads(json_str)
-            if 'status' not in review:
-                raise ValueError("Review response missing 'status' field")
-            return review
-        except Exception as e:
-            logger.error(f"Error parsing review response: {str(e)}")
-            return {'status': 'failed', 'reason': f'Parse error: {str(e)}'} 
-
-    # Utility method for file extension
-    def _getFileExtension(self, filename):
-        if '.' in filename:
-            return filename.rsplit('.', 1)[-1].lower()
-        return ''
-
-    # Placeholder methods for AI and prompt logic (to be implemented or injected)
-    async def _callAIWithCircuitBreaker(self, prompt, purpose):
-        raise NotImplementedError("_callAIWithCircuitBreaker must be implemented in the subclass or injected.") 
+            } 
--- a/modules/chat/handling/handlingTasks.py
+++ b/modules/chat/handling/handlingTasks.py
@ -30,7 +30,7 @@ class HandlingTasks:
            prompt = await self.service.callAiTextAdvanced(
                createTaskPlanningPrompt(self, {
                    'user_request': userInput,
-                    'available_documents': self._getAvailableDocuments(workflow),
+                    'available_documents': self.service.getAvailableDocuments(workflow),
                    'workflow_id': workflow.id
                })
            )
@ -55,7 +55,7 @@ class HandlingTasks:
                task_step=task_step,
                workflow=workflow,
                workflow_id=workflow.id,
-                available_documents=self._getAvailableDocuments(workflow),
+                available_documents=self.service.getAvailableDocuments(workflow),
                previous_results=previous_results or [],
                improvements=[],
                retry_count=0,
@ -205,13 +205,7 @@ class HandlingTasks:
            return {'error': str(e)}

    # --- Helper and validation methods (unchanged, but can be inlined or made private) ---
-    def _getAvailableDocuments(self, workflow):
-        documents = []
-        for message in workflow.messages:
-            for doc in message.documents:
-                documents.append(doc.filename)
-        return documents
-
+    
    def _parseTaskPlanResponse(self, response: str) -> dict:
        try:
            json_start = response.find('{')
--- a/modules/chat/serviceCenter.py
+++ b/modules/chat/serviceCenter.py
@ -13,7 +13,7 @@ from modules.interfaces.interfaceChatObjects import getInterface as getChatObjec
 from modules.interfaces.interfaceChatModel import ActionResult
 from modules.interfaces.interfaceComponentObjects import getInterface as getComponentObjects
 from modules.interfaces.interfaceAppObjects import getInterface as getAppObjects
-from gateway.modules.chat.documents.documentProcessing import DocumentProcessor
+from modules.chat.documents.documentExtraction import DocumentExtraction
 from modules.chat.methodBase import MethodBase
 import uuid

@ -37,7 +37,7 @@ class ServiceCenter:
        self.interfaceComponent = getComponentObjects(currentUser)
        self.interfaceApp = getAppObjects(currentUser)
        self.interfaceAiCalls = AiCalls()
-        self.documentProcessor = DocumentProcessor(self)
+        self.documentProcessor = DocumentExtraction(self)
        
        # Initialize methods catalog
        self.methods = {}
@ -259,6 +259,15 @@ class ServiceCenter:
            return filename.split('.')[-1].lower()
        return "txt"  # Default to text

+    def getFileExtension(self, filename):
+        """
+        Extract file extension from filename (without dot, lowercased).
+        Returns empty string if no extension is found.
+        """
+        if '.' in filename:
+            return filename.rsplit('.', 1)[-1].lower()
+        return ''
+
    # ===== Functions =====
    
    def extractContent(self, prompt: str, document: ChatDocument) -> ExtractedContent:
@ -859,6 +868,22 @@ Please provide a clear summary of this message."""
            logger.error(f"Error calculating user input size: {str(e)}")
            return 0

+    def getAvailableDocuments(self, workflow) -> List[str]:
+        """
+        Get list of available document filenames from workflow.
+        
+        Args:
+            workflow: ChatWorkflow object
+            
+        Returns:
+            List[str]: List of document filenames
+        """
+        documents = []
+        for message in workflow.messages:
+            for doc in message.documents:
+                documents.append(doc.filename)
+        return documents
+
    async def executeAction(self, methodName: str, actionName: str, parameters: Dict[str, Any]) -> ActionResult:
        """Execute a method action"""
        try:
--- a/notes/changelog.txt
+++ b/notes/changelog.txt
@ -1,11 +1,5 @@

 TODO
- refactory of chat manager
-
-
-    - to put document modules into documents--> creation, extraction  -> adapt references over global search
-
-
 - neutralizer to activate AND put back placeholders to the returned data
 - referenceHandling and authentication for connections in the method actions
 - check methods
--- a/notes/methodbased_specification.md
+++ b/notes/methodbased_specification.md
@ -177,7 +177,7 @@ class ServiceCenter:
        self.tasks: Dict[str, AgentTask] = {}
        self.promptManager = AIPromptManager()
        self.taskStateManager = TaskStateManager()
-        self.documentProcessor = DocumentProcessor()
+        self.documentProcessor = DocumentExtraction()
        
    async def execute_task(self, task: AgentTask) -> None:
        """Execute task with improved error handling and timeout"""
@ -304,7 +304,7 @@ class DocumentContext(BaseModel):
    relevantSections: List[str]
    processingStatus: Dict[str, str]

-class DocumentProcessor:
+class DocumentExtraction:
    """Processes documents with context awareness"""
    
    def process_with_context(self, doc: Dict, context: DocumentContext) -> Dict: