start testing with backend running

2025-09-30 18:30:33 +02:00 · 2025-09-30 18:30:33 +02:00 · 501cebe342
commit 501cebe342
parent cbea086f91
23 changed files with 618 additions and 2427 deletions
--- a/modules/datamodels/datamodelExtraction.py
+++ b/modules/datamodels/datamodelExtraction.py
@ -1,21 +1,19 @@
 from typing import Any, Dict, List, Optional
-from dataclasses import dataclass, field
+from pydantic import BaseModel, Field
-@dataclass
+class ContentPart(BaseModel):
-class ContentPart:
+    id: str = Field(description="Unique content part identifier")
-    id: str
+    parentId: Optional[str] = Field(default=None, description="Optional parent content part id")
-    parentId: Optional[str]
+    label: str = Field(description="Human readable label of the part")
-    label: str
+    typeGroup: str = Field(description="Logical type group: text, table, structure, binary, ...")
-    typeGroup: str
+    mimeType: str = Field(description="MIME type of the part payload")
-    mimeType: str
+    data: str = Field(default="", description="Primary data payload, often extracted text")
-    data: str
+    metadata: Dict[str, Any] = Field(default_factory=dict, description="Arbitrary metadata for the part")
    metadata: Dict[str, Any] = field(default_factory=dict)
-@dataclass
+class ExtractedContent(BaseModel):
-class ExtractedContent:
+    id: str = Field(description="Extraction id or source document id")
-    id: str
+    parts: List[ContentPart] = Field(default_factory=list, description="List of extracted parts")
-    parts: List[ContentPart]
+    summary: Optional[Dict[str, Any]] = Field(default=None, description="Optional extraction summary")
    summary: Optional[Dict[str, Any]] = None
--- a/modules/services/init.py
+++ b/modules/services/init.py
@ -55,11 +55,11 @@ class Services:
        # Initialize service packages
-        from .serviceDocument.mainServiceDocumentExtraction import DocumentExtractionService
+        from .serviceExtraction.mainServiceExtraction import ExtractionService
-        self.documentExtraction = PublicService(DocumentExtractionService(self))
+        self.extraction = PublicService(ExtractionService(self))
-        from .serviceDocument.mainServiceDocumentGeneration import DocumentGenerationService
+        from .serviceGeneration.mainServiceGeneration import GenerationService
-        self.documentGeneration = PublicService(DocumentGenerationService(self))
+        self.generation = PublicService(GenerationService(self))
        from .serviceNeutralization.mainServiceNeutralization import NeutralizationService
        self.neutralization = PublicService(NeutralizationService(self))
@ -76,14 +76,9 @@ class Services:
        from .serviceWorkflow.mainServiceWorkflow import WorkflowService
        self.workflow = PublicService(WorkflowService(self))
        from .serviceWeb.mainServiceWeb import WebService
        self.web = PublicService(WebService(self))
        from .serviceUtils.mainServiceUtils import UtilsService
        self.utils = PublicService(UtilsService(self))
    async def extractContentFromDocument(self, prompt, document):
        return await self.services.documentExtraction.extractContentFromDocument(prompt, document)
 def getInterface(user: User, workflow: ChatWorkflow) -> Services:
    return Services(user, workflow)
--- a/modules/services/serviceAi/mainServiceAi.py
+++ b/modules/services/serviceAi/mainServiceAi.py
@ -196,7 +196,7 @@ class AiService:
        processedContents: List[str] = []
        try:
-            extractionResult = self.extractionService.extractDocuments(documentList, extractionOptions)
+            extractionResult = self.extractionService.extractContent(documentList, extractionOptions)
            def _partsToText(parts) -> str:
                lines: List[str] = []
@ -205,7 +205,7 @@ class AiService:
                        lines.append(p.data)
                return "\n\n".join(lines)
-            if processIndividually and isinstance(extractionResult, list):
+            if isinstance(extractionResult, list):
                for i, ec in enumerate(extractionResult):
                    try:
                        contentText = _partsToText(ec.parts)
@ -216,9 +216,8 @@ class AiService:
                        logger.warning(f"Error aggregating extracted content: {str(e)}")
                        processedContents.append("[Error aggregating content]")
            else:
-                # pooled mode returns dict
+                # Fallback: no content
-                parts = extractionResult.get("parts", []) if isinstance(extractionResult, dict) else []
+                contentText = ""
                contentText = _partsToText(parts)
                if compressDocuments and len(contentText.encode("utf-8")) > 10000:
                    contentText = await self._compressContent(contentText, 10000, "document")
                processedContents.append(contentText)
@ -359,7 +358,7 @@ class AiService:
                        "mimeType": d.mimeType
                    } for d in documents]
-                    extracted_content = await self.extractionService.extractDocuments(
+                    extracted_content = await self.extractionService.extractContent(
                        documentList=documentList,
                        options={
                            "prompt": prompt,
@ -371,8 +370,15 @@ class AiService:
                        }
                    )
-                    # Get text content from extracted parts using typeGroup-aware processing
+                    # Build context from list of ExtractedContent
-                    context = self._extractTextFromContentParts(extracted_content)
+                    if isinstance(extracted_content, list):
                        context = "\n\n---\n\n".join([
                            "\n\n".join([
                                p.data for p in ec.parts if p.typeGroup in ["text", "table", "structure"] and p.data
                            ]) for ec in extracted_content
                        ])
                    else:
                        context = ""
                # Check size and reduce if needed
                full_prompt = prompt + "\n\n" + context if context else prompt
--- a/modules/services/serviceDocument/mainServiceDocumentExtraction.py
+++ b/modules/services/serviceDocument/mainServiceDocumentExtraction.py
--- a/modules/services/serviceExtraction/chunking/structure_chunker.py
+++ b/modules/services/serviceExtraction/chunking/structure_chunker.py
@ -1,7 +1,7 @@
 from typing import Any, Dict, List
 import json
-from ..types import ContentPart
+from modules.datamodels.datamodelExtraction import ContentPart
 from ..subRegistry import Chunker
--- a/modules/services/serviceExtraction/chunking/table_chunker.py
+++ b/modules/services/serviceExtraction/chunking/table_chunker.py
@ -1,6 +1,6 @@
 from typing import Any, Dict, List
-from ..types import ContentPart
+from modules.datamodels.datamodelExtraction import ContentPart
 from ..subRegistry import Chunker
--- a/modules/services/serviceExtraction/chunking/text_chunker.py
+++ b/modules/services/serviceExtraction/chunking/text_chunker.py
@ -1,6 +1,6 @@
 from typing import Any, Dict, List
-from ..types import ContentPart
+from modules.datamodels.datamodelExtraction import ContentPart
 from ..subRegistry import Chunker
--- a/modules/services/serviceExtraction/mainServiceExtraction.py
+++ b/modules/services/serviceExtraction/mainServiceExtraction.py
@ -1,4 +1,4 @@
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Optional, Union
 import uuid
 from .subRegistry import ExtractorRegistry, ChunkerRegistry
@ -7,51 +7,50 @@ from modules.datamodels.datamodelExtraction import ExtractedContent, ContentPart
 class ExtractionService:
-    def __init__(self):
+    def __init__(self, services: Optional[Any] = None):
        self.services = services
        self._extractorRegistry = ExtractorRegistry()
        self._chunkerRegistry = ChunkerRegistry()
-    def extractDocuments(self, documentList: List[Dict[str, Any]], options: Dict[str, Any]) -> Any:
+    def extractContent(self, documentList: List[Dict[str, Any]], options: Dict[str, Any]) -> List[ExtractedContent]:
-        processIndividually = options.get("processDocumentsIndividually", True)
+        results: List[ExtractedContent] = []
        for doc in documentList:
            ec = runExtraction(
                extractorRegistry=self._extractorRegistry,
                chunkerRegistry=self._chunkerRegistry,
                documentBytes=doc.get("bytes"),
                fileName=doc.get("fileName"),
                mimeType=doc.get("mimeType"),
                options=options
            )
            # Attach document id to parts if missing
            for p in ec.parts:
                if "documentId" not in p.metadata:
                    p.metadata["documentId"] = doc.get("id") or str(uuid.uuid4())
            ec = applyAiIfRequested(ec, options)
            results.append(ec)
        return results
-        if processIndividually:
+    async def extractContentFromDocument(self, prompt: str, documents: List[Dict[str, Any]], options: Optional[Dict[str, Any]] = None) -> List[ExtractedContent]:
-            results: List[ExtractedContent] = []
+        """
-            for doc in documentList:
+        Batch extract content from multiple documents.
                ec = runExtraction(
                    extractorRegistry=self._extractorRegistry,
                    chunkerRegistry=self._chunkerRegistry,
                    documentBytes=doc.get("bytes"),
                    fileName=doc.get("fileName"),
                    mimeType=doc.get("mimeType"),
                    options=options
                )
                ec = applyAiIfRequested(ec, options)
                results.append(ec)
            return results
        else:
            allParts: List[ContentPart] = []
            for doc in documentList:
                ec = runExtraction(
                    extractorRegistry=self._extractorRegistry,
                    chunkerRegistry=self._chunkerRegistry,
                    documentBytes=doc.get("bytes"),
                    fileName=doc.get("fileName"),
                    mimeType=doc.get("mimeType"),
                    options=options
                )
                for p in ec.parts:
                    if "documentId" not in p.metadata:
                        p.metadata["documentId"] = doc.get("id") or str(uuid.uuid4())
                allParts.extend(ec.parts)
-            pooled = poolAndLimit(allParts, self._chunkerRegistry, options)
+        Args:
-            # In pooled mode we return a dict containing pooled parts and an optional AI output
+            prompt: Instructional prompt for optional AI post-processing/selection.
-            pooledResult: Dict[str, Any] = {
+            documents: List of dicts with keys: id, bytes, fileName, mimeType.
-                "parts": pooled,
+            options: Optional extraction options. "ai" config may be provided.
-                "summary": {"documents": len(documentList)}
+
-            }
+        Returns:
-            aiOut = applyAiIfRequested(ExtractedContent(id=str(uuid.uuid4()), parts=pooled, summary=None), options)
+            List[ExtractedContent]: one per input document in order.
-            pooledResult["ai"] = aiOut.summary if isinstance(aiOut, ExtractedContent) else aiOut
+        """
-            return pooledResult
+        # Build options safely and inject prompt for downstream AI selection if desired
        effectiveOptions: Dict[str, Any] = options.copy() if options else {}
        aiCfg = effectiveOptions.get("ai") or {}
        if prompt:
            aiCfg["prompt"] = prompt
        effectiveOptions["ai"] = aiCfg
        # Delegate to existing synchronous pipeline
        return self.extractContent(documents, effectiveOptions)
--- a/modules/services/serviceExtraction/subRegistry.py
+++ b/modules/services/serviceExtraction/subRegistry.py
@ -1,6 +1,6 @@
 from typing import Any, Dict, Optional
-from .types import ContentPart
+from modules.datamodels.datamodelExtraction import ContentPart
 class Extractor:
--- a/modules/services/serviceDocument/mainServiceDocumentGeneration.py
+++ b/modules/services/serviceDocument/mainServiceDocumentGeneration.py
@ -1,9 +1,11 @@
 import logging
 import uuid
 from typing import Any, Dict, List, Optional
 from datetime import datetime, UTC
 import re
 from modules.shared.timezoneUtils import get_utc_timestamp
-from modules.services.serviceDocument.subDocumentUtility import (
+from modules.datamodels.datamodelChat import ChatDocument
 from modules.services.serviceGeneration.subDocumentUtility import (
    getFileExtension,
    getMimeTypeFromExtension,
    detectMimeTypeFromContent,
@ -13,9 +15,13 @@ from modules.services.serviceDocument.subDocumentUtility import (
 logger = logging.getLogger(__name__)
-class DocumentGenerationService:
+class GenerationService:
-    def __init__(self, service):
+    def __init__(self, serviceCenter=None):
-        self.service = service
+        # Directly use interfaces from the provided service center (no self.service calls)
        self.serviceCenter = serviceCenter
        self.interfaceDbComponent = getattr(serviceCenter, 'interfaceDbComponent', None) if serviceCenter else None
        self.interfaceDbChat = getattr(serviceCenter, 'interfaceDbChat', None) if serviceCenter else None
        self.workflow = getattr(serviceCenter, 'workflow', None) if serviceCenter else None
    def processActionResultDocuments(self, action_result, action, workflow) -> List[Dict[str, Any]]:
        """
@ -53,7 +59,8 @@ class DocumentGenerationService:
            mime_type = doc.mimeType
            if mime_type == "application/octet-stream":
                content = doc.documentData
-                mime_type = detectMimeTypeFromContent(content, doc.documentName, self.service)
+                # Detect MIME without relying on a service center
                mime_type = detectMimeTypeFromContent(content, doc.documentName)
            return {
                'fileName': doc.documentName,
@ -98,8 +105,8 @@ class DocumentGenerationService:
                    logger.info(f"Document {document_name} has content: {len(content)} characters")
-                    # Create document with file in one step
+                    # Create document with file in one step using interfaces directly
-                    document = self.service.createDocument(
+                    document = self._createDocument(
                        fileName=document_name,
                        mimeType=mime_type,
                        content=content,
@ -126,9 +133,9 @@ class DocumentGenerationService:
    def _setDocumentWorkflowContext(self, document, action, workflow):
        """Set workflow context on a document for proper routing and labeling"""
        try:
-            # Get current workflow context from service center
+            # Get current workflow context directly from workflow object
-            workflow_context = self.service.getWorkflowContext()
+            workflow_context = self._getWorkflowContext(workflow)
-            workflow_stats = self.service.getWorkflowStats()
+            workflow_stats = self._getWorkflowStats(workflow)
            current_round = workflow_context.get('currentRound', 0)
            current_task = workflow_context.get('currentTask', 0)
@ -155,3 +162,99 @@ class DocumentGenerationService:
        except Exception as e:
            logger.warning(f"Could not set workflow context on document: {str(e)}")
    def _createDocument(self, fileName: str, mimeType: str, content: str, base64encoded: bool = True, messageId: str = None) -> Optional[ChatDocument]:
        """Create file and ChatDocument using interfaces without service indirection."""
        try:
            if not self.interfaceDbComponent:
                logger.error("Component interface not available for document creation")
                return None
            # Convert content to bytes
            if base64encoded:
                import base64
                content_bytes = base64.b64decode(content)
            else:
                content_bytes = content.encode('utf-8')
            # Create file and store data
            file_item = self.interfaceDbComponent.createFile(
                name=fileName,
                mimeType=mimeType,
                content=content_bytes
            )
            self.interfaceDbComponent.createFileData(file_item.id, content_bytes)
            # Collect file info
            file_info = self._getFileInfo(file_item.id)
            if not file_info:
                logger.error(f"Could not get file info for fileId: {file_item.id}")
                return None
            # Build ChatDocument
            document = ChatDocument(
                id=str(uuid.uuid4()),
                messageId=messageId or "",
                fileId=file_item.id,
                fileName=file_info.get("fileName", fileName),
                fileSize=file_info.get("size", 0),
                mimeType=file_info.get("mimeType", mimeType)
            )
            # Ensure document can access component interface later
            if hasattr(document, 'setComponentInterface') and self.interfaceDbComponent:
                try:
                    document.setComponentInterface(self.interfaceDbComponent)
                except Exception:
                    pass
            return document
        except Exception as e:
            logger.error(f"Error creating document: {str(e)}")
            return None
    def _getFileInfo(self, fileId: str) -> Optional[Dict[str, Any]]:
        try:
            if not self.interfaceDbComponent:
                return None
            file_item = self.interfaceDbComponent.getFile(fileId)
            if file_item:
                return {
                    "id": file_item.id,
                    "fileName": file_item.fileName,
                    "size": file_item.fileSize,
                    "mimeType": file_item.mimeType,
                    "fileHash": getattr(file_item, 'fileHash', None),
                    "creationDate": getattr(file_item, 'creationDate', None)
                }
            return None
        except Exception as e:
            logger.error(f"Error getting file info for {fileId}: {str(e)}")
            return None
    def _getWorkflowContext(self, workflow) -> Dict[str, int]:
        try:
            return {
                'currentRound': getattr(workflow, 'currentRound', 0),
                'currentTask': getattr(workflow, 'currentTask', 0),
                'currentAction': getattr(workflow, 'currentAction', 0)
            }
        except Exception:
            return {'currentRound': 0, 'currentTask': 0, 'currentAction': 0}
    def _getWorkflowStats(self, workflow) -> Dict[str, Any]:
        try:
            context = self._getWorkflowContext(workflow)
            return {
                'currentRound': context['currentRound'],
                'currentTask': context['currentTask'],
                'currentAction': context['currentAction'],
                'totalTasks': getattr(workflow, 'totalTasks', 0),
                'totalActions': getattr(workflow, 'totalActions', 0),
                'workflowStatus': getattr(workflow, 'status', 'unknown'),
                'workflowId': getattr(workflow, 'id', 'unknown')
            }
        except Exception:
            return {
                'currentRound': 0,
                'currentTask': 0,
                'currentAction': 0,
                'totalTasks': 0,
                'totalActions': 0,
                'workflowStatus': 'unknown',
                'workflowId': 'unknown'
            }
--- a/modules/services/serviceGeneration/subDocumentUtility.py
+++ b/modules/services/serviceGeneration/subDocumentUtility.py
--- a/modules/services/serviceWorkflow/mainServiceWorkflow.py
+++ b/modules/services/serviceWorkflow/mainServiceWorkflow.py
@ -4,8 +4,8 @@ from typing import Dict, Any, List, Optional
 from modules.datamodels.datamodelUam import User, UserConnection
 from modules.datamodels.datamodelChat import ChatDocument, ChatMessage
 from modules.datamodels.datamodelChat import ExtractedContent
-from modules.services.serviceDocument.mainServiceDocumentExtraction import DocumentExtractionService
+from modules.services.serviceExtraction.mainServiceExtraction import ExtractionService
-from modules.services.serviceDocument.subDocumentUtility import getFileExtension, getMimeTypeFromExtension, detectContentTypeFromData
+from modules.services.serviceGeneration.subDocumentUtility import getFileExtension, getMimeTypeFromExtension, detectContentTypeFromData
 from modules.shared.timezoneUtils import get_utc_timestamp
 from modules.services.serviceAi.mainServiceAi import AiService
 from modules.security.tokenManager import TokenManager
@ -312,80 +312,6 @@ class WorkflowService:
    def getFileData(self, fileId: str) -> bytes:
        """Get file data by ID"""
        return self.interfaceDbComponent.getFileData(fileId)
    async def extractContentFromDocument(self, prompt: str, document: ChatDocument) -> ExtractedContent:
        """Extract content from ChatDocument using prompt"""
        try:
            # ChatDocument is just a reference, so we need to get file data using fileId
            if not hasattr(document, 'fileId') or not document.fileId:
                logger.error(f"Document {document.id} has no fileId")
                raise ValueError("Document has no fileId")
            # Get file data from service center using document's fileId
            fileData = self.getFileData(document.fileId)
            if not fileData:
                logger.error(f"No file data found for fileId: {document.fileId}")
                raise ValueError("No file data found for document")
            # Get fileName and mime type from document properties
            try:
                fileName = document.fileName
                mimeType = document.mimeType
            except Exception as e:
                # Try to diagnose and recover the issue
                diagnosis = self._diagnoseDocumentAccess(document)
                logger.error(f"Critical error: Cannot access document properties for document {document.id}. Diagnosis: {diagnosis}")
                # Attempt recovery
                if self._recoverDocumentAccess(document):
                    try:
                        fileName = document.fileName
                        mimeType = document.mimeType
                        logger.info(f"Document access recovered for {document.id} - proceeding with AI extraction")
                    except Exception as recovery_error:
                        logger.error(f"Recovery failed for document {document.id}: {str(recovery_error)}")
                        raise RuntimeError(f"Document {document.id} properties are permanently inaccessible after recovery attempt - cannot proceed with AI extraction: {str(recovery_error)}")
                else:
                    # Recovery failed - don't continue with invalid data
                    raise RuntimeError(f"Document {document.id} properties are inaccessible and recovery failed. Diagnosis: {diagnosis}")
            # Process with DocumentExtractionService directly (no circular dependency)
            docService = DocumentExtractionService(None)  # Pass None to avoid circular dependency
            content_items = await docService.processFileData(
                fileData=fileData,
                fileName=fileName,
                mimeType=mimeType,
                base64Encoded=False,
                prompt=prompt,
                enableAI=True
            )
            # Convert ContentItem list to ExtractedContent
            contents = []
            for item in content_items:
                contents.append({
                    'label': item.label,
                    'data': item.data,
                    'metadata': {
                        'mimeType': item.metadata.mimeType if hasattr(item.metadata, 'mimeType') else mimeType,
                        'size': item.metadata.size if hasattr(item.metadata, 'size') else len(fileData),
                        'base64Encoded': item.metadata.base64Encoded if hasattr(item.metadata, 'base64Encoded') else False
                    }
                })
            extractedContent = ExtractedContent(
                id=document.id,
                contents=contents
            )
            # Note: ExtractedContent model only has 'id' and 'contents' fields
            # No need to set objectId or objectType as they don't exist in the model
            return extractedContent
        except Exception as e:
            logger.error(f"Error extracting from document: {str(e)}")
            raise
    def _diagnoseDocumentAccess(self, document: ChatDocument) -> Dict[str, Any]:
        """
@ -456,43 +382,6 @@ class WorkflowService:
            logger.error(f"Error during document access recovery for {document.id}: {str(e)}")
            return False
    def createDocument(self, fileName: str, mimeType: str, content: str, base64encoded: bool = True, messageId: str = None) -> ChatDocument:
        """Create document with file in one step - handles file creation internally"""
        # Convert content to bytes based on base64 flag
        if base64encoded:
            import base64
            content_bytes = base64.b64decode(content)
        else:
            content_bytes = content.encode('utf-8')
        # Create the file (hash and size are computed inside interfaceDbComponent)
        file_item = self.interfaceDbComponent.createFile(
            name=fileName,
            mimeType=mimeType,
            content=content_bytes
        )
        # Then store the file data
        self.interfaceDbComponent.createFileData(file_item.id, content_bytes)
        # Get file info to copy attributes
        file_info = self.getFileInfo(file_item.id)
        if not file_info:
            logger.error(f"Could not get file info for fileId: {file_item.id}")
            raise ValueError(f"File info not found for fileId: {file_item.id}")
        # Create document with all file attributes copied
        document = ChatDocument(
            id=str(uuid.uuid4()),
            messageId=messageId or "",  # Use provided messageId or empty string as fallback
            fileId=file_item.id,
            fileName=file_info.get("fileName", fileName),
            fileSize=file_info.get("size", 0),
            mimeType=file_info.get("mimeType", mimeType)
        )
        return document
    def calculateObjectSize(self, obj: Any) -> int:
        """
        Calculate the size of an object in bytes.
--- a/modules/services/test_all_services.py
+++ b/modules/services/test_all_services.py
@ -0,0 +1,226 @@
 import asyncio
 import os
 import sys
 from typing import List, Dict, Any
 # Ensure relative imports work when running directly
 CURRENT_DIR = os.path.dirname(__file__)
 GATEWAY_DIR = os.path.dirname(os.path.dirname(CURRENT_DIR))
 if GATEWAY_DIR not in sys.path:
    sys.path.append(GATEWAY_DIR)
 from modules.services.serviceExtraction.mainServiceExtraction import ExtractionService
 from modules.services.serviceGeneration.mainServiceGeneration import DocumentGenerationService
 from modules.datamodels.datamodelWorkflow import ActionResult, ActionDocument
 from modules.datamodels.datamodelAi import AiCallOptions, OperationType, ProcessingMode, Priority
 from modules.services.serviceAi.mainServiceAi import AiService
 TESTDATA_DIR = os.path.join(GATEWAY_DIR, "testdata")
 def _read_test_files() -> List[Dict[str, Any]]:
    files = []
    for name in os.listdir(TESTDATA_DIR):
        path = os.path.join(TESTDATA_DIR, name)
        if not os.path.isfile(path):
            continue
        try:
            with open(path, "rb") as f:
                data = f.read()
            mime = _guess_mime(name)
            files.append({
                "id": name,
                "bytes": data,
                "fileName": name,
                "mimeType": mime,
            })
        except Exception:
            continue
    return files
 def _guess_mime(name: str) -> str:
    lower = name.lower()
    if lower.endswith(".pdf"):
        return "application/pdf"
    if lower.endswith(".xlsx"):
        return "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
    if lower.endswith(".jpg") or lower.endswith(".jpeg"):
        return "image/jpeg"
    if lower.endswith(".png"):
        return "image/png"
    return "application/octet-stream"
 def run_extraction_1000_bytes() -> None:
    svc = ExtractionService()
    docs = _read_test_files()
    options = {
        # cap total pooled size per document set
        "maxSize": 1000,
        # allow chunking to respect the cap across parts
        "chunkAllowed": True,
        # chunk sizes for different content types to help fit under the cap
        "textChunkSize": 500,
        "tableChunkSize": 500,
        "structureChunkSize": 500,
        # simple merge strategy if supported
        "mergeStrategy": {},
    }
    results = svc.extractContent(docs, options)
    print("[extraction] documents:", len(docs), "results:", len(results))
    for i, ec in enumerate(results):
        total = sum(int(p.metadata.get("size", 0) or 0) for p in ec.parts)
        print(f"  - doc[{i}] parts={len(ec.parts)} pooledBytes={total}")
 async def main():
    print("=== serviceExtraction: compress to 1000 bytes ===")
    run_extraction_1000_bytes()
    print("\n=== serviceGeneration: create ActionResult and write output to testdata ===")
    await run_generation_write_file()
    print("\n=== serviceAi: planning call + image + pdf extraction ===")
    await run_ai_tests()
 if __name__ == "__main__":
    asyncio.run(main())
 async def run_generation_write_file() -> None:
    # Minimal stubs for interfaces expected by DocumentGenerationService
    class _FileItem:
        def __init__(self, file_id: str, file_name: str, mime_type: str, content: bytes):
            self.id = file_id
            self.fileName = file_name
            self.mimeType = mime_type
            self.fileSize = len(content)
    class _ComponentInterface:
        def __init__(self):
            self._files = {}
        def createFile(self, name: str, mimeType: str, content: bytes):
            fid = f"test_{len(self._files)+1}"
            item = _FileItem(fid, name, mimeType, content)
            self._files[fid] = item
            return item
        def createFileData(self, fileId: str, content: bytes):
            # Persist into testdata directory as requested
            item = self._files[fileId]
            out_path = os.path.join(TESTDATA_DIR, f"output_{fileId}_{item.fileName}")
            with open(out_path, "wb") as f:
                f.write(content)
        def getFile(self, fileId: str):
            return self._files.get(fileId)
    class _ServiceCenter:
        def __init__(self, comp):
            self.interfaceDbComponent = comp
            self.interfaceDbChat = None
            self.workflow = type("_Wf", (), {"id": "wf_test", "currentRound": 1, "currentTask": 1, "currentAction": 1, "status": "running", "totalTasks": 1, "totalActions": 1})()
    component = _ComponentInterface()
    center = _ServiceCenter(component)
    gen = DocumentGenerationService(center)
    # Build a fake action and ActionResult with a small text document
    class _Action:
        def __init__(self):
            self.id = "action_test"
            self.execMethod = "document"
            self.execAction = "generate"
            self.execParameters = {}
            self.execResultLabel = "round1_task1_action1_results"
    action = _Action()
    content = "This is a generated test file from serviceGeneration test."
    action_doc = ActionDocument(documentName="test_generated.txt", documentData=content, mimeType="text/plain")
    action_result = ActionResult(success=True, documents=[action_doc])
    docs = gen.createDocumentsFromActionResult(action_result, action, center.workflow, message_id="msg_test")
    print("[generation] created documents:", len(docs))
 async def run_ai_tests() -> None:
    # Create AiService instance (uses internal default model registry; no external creds required for this test)
    ai = await AiService.create()
    # Planning AI call (like in handlingTasks.generateTaskPlan)
    plan_options = AiCallOptions(
        operationType=OperationType.GENERATE_PLAN,
        priority=Priority.QUALITY,
        compressPrompt=False,
        compressContext=False,
        processingMode=ProcessingMode.DETAILED,
        maxCost=0.05,
        maxProcessingTime=10,
    )
    plan_prompt = """
    You are a planning assistant. Return a compact JSON with fields: tasks:[{id, objective, success_criteria:[]}], languageUserDetected:"en".
    Create exactly one simple task id:"task_1" objective:"Test planning" success_criteria:["done"].
    """.strip()
    plan_resp = await ai.callAi(prompt=plan_prompt, placeholders=None, options=plan_options)
    print("[ai] planning response length:", len(plan_resp) if plan_resp else 0)
    # Image content extraction prompt using test JPEG
    img_path = os.path.join(TESTDATA_DIR, "00Untitled.jpg")
    img_resp = None
    if os.path.exists(img_path):
        try:
            with open(img_path, "rb") as f:
                img_bytes = f.read()
            img_options = AiCallOptions(
                operationType=OperationType.ANALYSE_CONTENT,
                priority=Priority.BALANCED,
                compressPrompt=True,
                compressContext=False,
                processingMode=ProcessingMode.ADVANCED,
                maxCost=0.02,
                maxProcessingTime=10,
            )
            img_resp = await ai.callAiImage(
                prompt="Describe the content of this image succinctly.",
                imageData=img_bytes,
                mimeType="image/jpeg",
                options=img_options,
            )
            print("[ai] image analysis response length:", len(img_resp) if img_resp else 0)
        except Exception as e:
            print("[ai] image analysis error:", str(e))
    else:
        print("[ai] image test file not found; skipping")
    # PDF extraction prompt: emulate text call with document context built via ExtractionService
    pdf_path = os.path.join(TESTDATA_DIR, "diagramm_komponenten.pdf")
    if os.path.exists(pdf_path):
        try:
            # Build a minimal ChatDocument-like shim that AiService._callAiText expects via extraction
            class _Doc:
                def __init__(self, file_path: str, mime: str):
                    self.id = "doc_pdf"
                    self.fileName = os.path.basename(file_path)
                    self.mimeType = mime
                    with open(file_path, "rb") as f:
                        self.fileData = f.read()
            pdf_doc = _Doc(pdf_path, "application/pdf")
            pdf_options = AiCallOptions(
                operationType=OperationType.ANALYSE_CONTENT,
                priority=Priority.BALANCED,
                compressPrompt=True,
                compressContext=True,
                processingMode=ProcessingMode.ADVANCED,
                maxContextBytes=1000,
                chunkAllowed=True,
                maxCost=0.02,
                maxProcessingTime=10,
            )
            pdf_prompt = "Extract key information from the attached PDF."
            pdf_resp = await ai.callAi(prompt=pdf_prompt, documents=[pdf_doc], options=pdf_options)
            print("[ai] pdf extraction response length:", len(pdf_resp) if pdf_resp else 0)
        except Exception as e:
            print("[ai] pdf extraction error:", str(e))
    else:
        print("[ai] pdf test file not found; skipping")
--- a/modules/workflows/methods/methodAi.py
+++ b/modules/workflows/methods/methodAi.py
@ -76,66 +76,70 @@ class MethodAi(MethodBase):
                chatDocuments = self.services.workflow.getChatDocumentsFromDocumentList(documentList)
                if chatDocuments:
                    context_parts = []
                    # Build batch payload for extraction
                    batch_docs = []
                    for doc in chatDocuments:
                        file_info = self.services.workflow.getFileInfo(doc.fileId)
                        try:
-                            # Use the document content extraction service with the specific AI prompt context
+                            fileBytes = self.services.workflow.getFileData(doc.fileId) if hasattr(doc, 'fileId') else None
-                            # This tells the extraction engine exactly what and how to extract
+                        except Exception:
-                            extraction_prompt = f"""
+                            fileBytes = None
-                            Extract content from this document for AI processing context.
+                        batch_docs.append({
-                            
+                            "id": getattr(doc, 'id', None),
-                            AI Task: {aiPrompt}
+                            "bytes": fileBytes or b"",
-                            Processing Mode: {processingMode}
+                            "fileName": getattr(doc, 'fileName', 'unknown'),
-                            Expected Output: {output_extension.upper()} format
+                            "mimeType": getattr(doc, 'mimeType', None) or "application/octet-stream"
-                            
+                        })
                            Requirements:
                            1. Extract the most relevant text content that would be useful for the AI task
                            2. Focus on content that directly relates to: {aiPrompt}
                            3. Include key information, data, and insights that the AI needs
                            4. Provide clean, readable text without formatting artifacts
                            Document: {doc.fileName}
                            """
                            logger.debug(f"Extracting content from {doc.fileName} with task-specific prompt: {extraction_prompt[:100]}...")
                            extracted_content = await self.services.documentExtraction.extractContentFromDocument(
                                prompt=extraction_prompt.strip(), 
                                document=doc
                            )
                            if extracted_content and extracted_content.contents:
                                # Get the first content item's data
                                content = ""
                                for content_item in extracted_content.contents:
                                    if hasattr(content_item, 'data') and content_item.data:
                                        content += content_item.data + " "
-                                
+                    extraction_prompt = (
-                                if content.strip():
+                        f"Extract content for AI task context. Task: {aiPrompt}. Mode: {processingMode}."
-                                    metadata_info = ""
+                    )
-                                    if file_info and includeMetadata:
+                    try:
-                                        metadata_info = f" (Size: {file_info.get('fileSize', 'unknown')}, Type: {file_info.get('mimeType', 'unknown')})"
+                        extracted_list = await self.services.extraction.extractContentFromDocuments(
-                                    
+                            prompt=extraction_prompt,
-                                    # Adjust context length based on processing mode and AI task relevance
+                            documents=batch_docs,
-                                    base_length = 5000 if processingMode == "detailed" else 3000 if processingMode == "advanced" else 2000
+                            options={"ai": {"enabled": False}, "mergeStrategy": {}}
-                                    
+                        )
-                                    # For detailed mode, include more context
+                    except Exception:
-                                    if processingMode == "detailed":
+                        extracted_list = []
-                                        context_parts.append(f"Document: {doc.fileName}{metadata_info}\nRelevance to AI Task: This document contains content directly related to '{aiPrompt[:100]}...'\nContent:\n{content[:base_length]}...")
+
-                                    else:
+                    # Helper to aggregate readable text from parts
-                                        context_parts.append(f"Document: {doc.fileName}{metadata_info}\nContent:\n{content[:base_length]}...")
+                    def _partsToText(parts) -> str:
-                                else:
+                        lines: List[str] = []
-                                    context_parts.append(f"Document: {doc.fileName} [No readable text content - binary file]")
+                        for p in (parts or []):
                            try:
                                if getattr(p, 'typeGroup', '') in ("text", "table", "structure") and getattr(p, 'data', None):
                                    lines.append(p.data)
                            except Exception:
                                continue
                        return "\n\n".join(lines)
                    for i, doc in enumerate(chatDocuments):
                        file_info = self.services.workflow.getFileInfo(doc.fileId)
                        content = ""
                        try:
                            ec = extracted_list[i] if i < len(extracted_list) else None
                            if ec:
                                content = _partsToText(getattr(ec, 'parts', []))
                        except Exception:
                            content = ""
                        if content.strip():
                            metadata_info = ""
                            if file_info and includeMetadata:
                                metadata_info = f" (Size: {file_info.get('fileSize', 'unknown')}, Type: {file_info.get('mimeType', 'unknown')})"
                            base_length = 5000 if processingMode == "detailed" else 3000 if processingMode == "advanced" else 2000
                            if processingMode == "detailed":
                                context_parts.append(
                                    f"Document: {doc.fileName}{metadata_info}\nRelevance to AI Task: This document contains content directly related to '{aiPrompt[:100]}...'\nContent:\n{content[:base_length]}..."
                                )
                            else:
-                                context_parts.append(f"Document: {doc.fileName} [No readable text content - binary file]")
+                                context_parts.append(
-                                
+                                    f"Document: {doc.fileName}{metadata_info}\nContent:\n{content[:base_length]}..."
-                        except Exception as extract_error:
+                                )
-                            context_parts.append(f"Document: {doc.fileName} [Could not extract content - binary file]")
+                        else:
-                    
+                            context_parts.append(f"Document: {doc.fileName} [No readable text content - binary file]")
                    if context_parts:
                        # Add a summary header to help the AI understand the context
                        context_header = f"""
                        === DOCUMENT CONTEXT FOR AI PROCESSING ===
                        AI Task: {aiPrompt[:100]}...
@ -147,7 +151,6 @@ class MethodAi(MethodBase):
                        Use this information to provide the most accurate and helpful response.
                        ================================================
                        """
                        context = context_header + "\n\n" + "\n\n".join(context_parts)
                        logger.info(f"Included {len(chatDocuments)} documents in AI context with task-specific extraction")
--- a/modules/workflows/methods/methodDocument.py
+++ b/modules/workflows/methods/methodDocument.py
@ -62,32 +62,36 @@ class MethodDocument(MethodBase):
                    error="No documents found for the provided reference"
                )
-            # Extract content from all documents using AI
+            # Batch extract content from all documents at once
            all_extracted_content = []
            file_infos = []
-            
+            batch_docs = []
            for chatDocument in chatDocuments:
                file_info = self.services.workflow.getFileInfo(chatDocument.fileId)
-                
+                if includeMetadata:
                    file_infos.append(file_info)
                try:
-                    # Use the document content extraction service with the specific AI prompt
+                    data = self.services.workflow.getFileData(chatDocument.fileId) if hasattr(chatDocument, 'fileId') else None
-                    # This handles all document types (text, binary, image, etc.) intelligently
+                except Exception:
-                    extracted_content = await self.services.documentExtraction.extractContentFromDocument(
+                    data = None
-                        prompt=aiPrompt,
+                batch_docs.append({
-                        document=chatDocument
+                    "id": getattr(chatDocument, 'id', None),
-                    )
+                    "bytes": data or b"",
-                    
+                    "fileName": getattr(chatDocument, 'fileName', 'unknown'),
-                    if extracted_content and extracted_content.contents:
+                    "mimeType": getattr(chatDocument, 'mimeType', None) or "application/octet-stream"
-                        all_extracted_content.append(extracted_content)
+                })
-                        if includeMetadata:
+
-                            file_infos.append(file_info)
+            try:
-                        logger.info(f"Successfully extracted content from {chatDocument.fileName}")
+                extracted_list = await self.services.extraction.extractContentFromDocuments(
-                    else:
+                    prompt=aiPrompt,
-                        logger.warning(f"No content extracted from {chatDocument.fileName}")
+                    documents=batch_docs,
-                        
+                    options={"ai": {"enabled": False}}
-                except Exception as e:
+                )
-                    logger.error(f"Error extracting content from {chatDocument.fileName}: {str(e)}")
+            except Exception as e:
-                    continue
+                logger.error(f"Batch extraction failed: {str(e)}")
                extracted_list = []
            all_extracted_content = extracted_list or []
            if not all_extracted_content:
                return ActionResult.isFailure(
@ -97,20 +101,24 @@ class MethodDocument(MethodBase):
            # Process each document individually with its own format conversion
            output_documents = []
-            for i, (chatDocument, extracted_content) in enumerate(zip(chatDocuments, all_extracted_content)):
+            for i, chatDocument in enumerate(chatDocuments):
                # Extract text content from this document
                text_content = ""
-                if hasattr(extracted_content, 'contents') and extracted_content.contents:
+                try:
-                    # Extract text from ContentItem objects
+                    ec = all_extracted_content[i] if i < len(all_extracted_content) else None
-                    text_parts = []
+                    if ec and hasattr(ec, 'parts'):
-                    for content_item in extracted_content.contents:
+                        text_parts = []
-                        if hasattr(content_item, 'data') and content_item.data:
+                        for part in getattr(ec, 'parts', []):
-                            text_parts.append(content_item.data)
+                            try:
-                    text_content = "\n".join(text_parts)
+                                if getattr(part, 'typeGroup', '') in ("text", "table", "structure") and getattr(part, 'data', None):
-                elif isinstance(extracted_content, str):
+                                    text_parts.append(part.data)
-                    text_content = extracted_content
+                            except Exception:
-                else:
+                                continue
-                    text_content = str(extracted_content)
+                        text_content = "\n".join(text_parts)
                    else:
                        text_content = ""
                except Exception:
                    text_content = ""
                # Get the expected format for this document (or use default)
                target_format = None
@ -692,27 +700,38 @@ class MethodDocument(MethodBase):
                content = ""
                logger.info(f"Processing document: type={type(doc)}")
-                # Get actual file content using the document content extraction service
+            # Batch extraction approach: prepare one doc payload and call extractor
            try:
                try:
-                    extracted_content = await self.services.documentExtraction.extractContentFromDocument(
+                    data = self.services.workflow.getFileData(doc.fileId) if hasattr(doc, 'fileId') else None
-                        prompt="Extract readable text content for HTML report generation", 
+                except Exception:
-                        document=doc
+                    data = None
-                    )
+                extracted_list = await self.services.extraction.extractContentFromDocuments(
-                    
+                    prompt="Extract readable text content for HTML report generation",
-                    if extracted_content and extracted_content.contents:
+                    documents=[{
-                        # Get the first content item's data
+                        "id": getattr(doc, 'id', None),
-                        for content_item in extracted_content.contents:
+                        "bytes": data or b"",
-                            if hasattr(content_item, 'data') and content_item.data:
+                        "fileName": getattr(doc, 'fileName', 'unknown'),
-                                content += content_item.data + " "
+                        "mimeType": getattr(doc, 'mimeType', None) or "application/octet-stream"
-                        
+                    }],
-                        if content.strip():
+                    options={"ai": {"enabled": False}}
-                            logger.info(f"  Retrieved content from file: {len(content)} characters")
+                )
-                        else:
+                ec = extracted_list[0] if extracted_list else None
-                            logger.info(f"  No readable text content found (binary file)")
+                if ec and hasattr(ec, 'parts'):
                    for part in getattr(ec, 'parts', []):
                        try:
                            if getattr(part, 'typeGroup', '') in ("text", "table", "structure") and getattr(part, 'data', None):
                                content += part.data + " "
                        except Exception:
                            continue
                    if content.strip():
                        logger.info(f"  Retrieved content from file: {len(content)} characters")
                    else:
-                        logger.info(f"  No content extracted (binary file)")
+                        logger.info(f"  No readable text content found (binary file)")
-                except Exception as e:
+                else:
-                    logger.info(f"  Could not extract content (binary file): {str(e)}")
+                    logger.info(f"  No content extracted (binary file)")
            except Exception as e:
                logger.info(f"  Could not extract content (binary file): {str(e)}")
                # Skip empty documents
                if content and content.strip():
--- a/modules/workflows/methods/methodOutlook.py
+++ b/modules/workflows/methods/methodOutlook.py
@ -1392,45 +1392,53 @@ class MethodOutlook(MethodBase):
            composition_documents = []
            if documentList:
                try:
                    # Get document content from service center
                    docs = self.services.workflow.getChatDocumentsFromDocumentList(documentList)
                    if docs:
                        composition_documents.extend(docs)
                        # Batch extract summaries for AI context
                        batch_docs = []
                        for doc in docs:
                            composition_documents.append(doc)
                            # Extract content for AI context using proper document service
                            try:
-                                if hasattr(doc, 'fileId') and doc.fileId:
+                                data = self.services.workflow.getFileData(doc.fileId) if hasattr(doc, 'fileId') else None
-                                    # Use the document content extraction service instead of raw file reading
+                            except Exception:
-                                    try:
+                                data = None
-                                        extracted_content = await self.services.documentExtraction.extractContentFromDocument(
+                            batch_docs.append({
-                                            prompt="Extract readable text content for email composition", 
+                                "id": getattr(doc, 'id', None),
-                                            document=doc
+                                "bytes": data or b"",
-                                        )
+                                "fileName": getattr(doc, 'fileName', 'unknown'),
-                                        
+                                "mimeType": getattr(doc, 'mimeType', None) or "application/octet-stream"
-                                        if extracted_content and extracted_content.contents:
+                            })
                                            # Get the first content item's data
                                            content_text = ""
                                            for content_item in extracted_content.contents:
                                                if hasattr(content_item, 'data') and content_item.data:
                                                    content_text += content_item.data + " "
-                                            
+                        try:
-                                            if content_text.strip():
+                            extracted_list = await self.services.extraction.extractContentFromDocuments(
-                                                # Truncate content for AI context (avoid token limits)
+                                prompt="Extract readable text content for email composition",
-                                                content_preview = content_text[:1000] + "..." if len(content_text) > 1000 else content_text
+                                documents=batch_docs,
-                                                document_content_summary += f"\nDocument: {doc.fileName}\nContent Preview: {content_preview}\n"
+                                options={"ai": {"enabled": False}}
-                                                                                        # No content to extract
+                            )
-                                            
+                        except Exception:
-                                    except Exception as extract_error:
+                            extracted_list = []
-                                        # Content extraction failed (normal for binary files)
+
-                                        pass
+                        # Aggregate previews
-                                else:
+                        def _partsToText(parts) -> str:
-                                    logger.warning(f"Document {doc.fileName} has no fileId")
+                            lines: List[str] = []
-                            except Exception as e:
+                            for p in (parts or []):
-                                logger.warning(f"Error processing document {doc.fileName}: {str(e)}")
+                                try:
                                    if getattr(p, 'typeGroup', '') in ("text", "table", "structure") and getattr(p, 'data', None):
                                        lines.append(p.data)
                                except Exception:
                                    continue
                            return "\n\n".join(lines)
                        for i, doc in enumerate(docs):
                            try:
                                ec = extracted_list[i] if i < len(extracted_list) else None
                                content_text = _partsToText(getattr(ec, 'parts', [])) if ec else ""
                                if content_text.strip():
                                    content_preview = content_text[:1000] + "..." if len(content_text) > 1000 else content_text
                                    document_content_summary += f"\nDocument: {doc.fileName}\nContent Preview: {content_preview}\n"
                            except Exception:
                                continue
                    else:
                        logger.warning("No documents found from documentList")
                except Exception as e:
--- a/modules/workflows/methods/methodSharepoint.py
+++ b/modules/workflows/methods/methodSharepoint.py
@ -33,7 +33,7 @@ class MethodSharepoint(MethodBase):
    def _getMicrosoftConnection(self, connectionReference: str) -> Optional[Dict[str, Any]]:
        """Get Microsoft connection from connection reference and configure SharePoint service"""
        try:
-            userConnection = self.service.getUserConnectionFromConnectionReference(connectionReference)
+            userConnection = self.services.workflow.getUserConnectionFromConnectionReference(connectionReference)
            if not userConnection:
                logger.warning(f"No user connection found for reference: {connectionReference}")
                return None
@ -48,7 +48,7 @@ class MethodSharepoint(MethodBase):
                return None
            # Configure SharePoint service with the UserConnection
-            if not self.service.sharepoint.setAccessTokenFromConnection(userConnection):
+            if not self.services.sharepoint.setAccessTokenFromConnection(userConnection):
                logger.warning(f"Failed to configure SharePoint service with connection {userConnection.id}")
                return None
@ -363,11 +363,11 @@ class MethodSharepoint(MethodBase):
    async def _makeGraphApiCall(self, endpoint: str, method: str = "GET", data: bytes = None) -> Dict[str, Any]:
        """Make a Microsoft Graph API call with timeout and detailed logging"""
        try:
-            if not hasattr(self.service, 'sharepoint') or not self.service.sharepoint._target.access_token:
+            if not hasattr(self.services, 'sharepoint') or not self.services.sharepoint._target.access_token:
                return {"error": "SharePoint service not configured with access token"}
            headers = {
-                "Authorization": f"Bearer {self.service.sharepoint._target.access_token}",
+                "Authorization": f"Bearer {self.services.sharepoint._target.access_token}",
                "Content-Type": "application/json" if data and method != "PUT" else "application/octet-stream" if data else "application/json"
            }
@ -1014,7 +1014,7 @@ class MethodSharepoint(MethodBase):
                                    # For content download, we need to handle binary data
                                    try:
                                        async with aiohttp.ClientSession() as session:
-                                            headers = {"Authorization": f"Bearer {self.service.sharepoint._target.access_token}"}
+                                            headers = {"Authorization": f"Bearer {self.services.sharepoint._target.access_token}"}
                                            async with session.get(f"https://graph.microsoft.com/v1.0/{content_endpoint}", headers=headers) as response:
                                                if response.status == 200:
                                                    content = await response.text()
--- a/modules/workflows/processing/handlingTasks.py
+++ b/modules/workflows/processing/handlingTasks.py
@ -16,7 +16,8 @@ from modules.datamodels.datamodelWorkflow import (
    TaskResult, 
    ReviewContext,
    TaskStatus, 
-    ActionResult
+    ActionResult,
    TaskAction
 )
 from modules.datamodels.datamodelChat import (
    WorkflowResult,
@ -47,7 +48,6 @@ from modules.workflows.processing.promptFactoryPlaceholders import (
    extractUserLanguage,
    extractReviewContent
 )
 from modules.services.serviceDocument.mainServiceDocumentGeneration import DocumentGenerationService
 from modules.workflows.processing.promptFactory import methods
 from modules.workflows.processing.executionState import should_continue
 from modules.datamodels.datamodelAi import AiCallOptions, OperationType, ProcessingMode, Priority
@ -62,7 +62,6 @@ class HandlingTasks:
    def __init__(self, services, workflow=None):
        self.services = services
        self.workflow = workflow
        self.documentGenerator = DocumentGenerationService(self.services.center)
    def _checkWorkflowStopped(self):
        """
@ -71,7 +70,7 @@ class HandlingTasks:
        """
        try:
            # Get the current workflow status from the database to avoid stale data
-            current_workflow = services.chatInterface.getWorkflow(self.service.workflow.id)
+            current_workflow = self.services.chatInterface.getWorkflow(self.workflow.id)
            if current_workflow and current_workflow.status == "stopped":
                logger.info("Workflow stopped by user, aborting execution")
                raise WorkflowStoppedException("Workflow was stopped by user")
@ -81,7 +80,7 @@ class HandlingTasks:
        except Exception as e:
            # If we can't get the current status due to other database issues, fall back to the in-memory object
            logger.warning(f"Could not check current workflow status from database: {str(e)}")
-            if self.service.workflow.status == "stopped":
+            if self.workflow and self.workflow.status == "stopped":
                logger.info("Workflow stopped by user (from in-memory object), aborting execution")
                raise WorkflowStoppedException("Workflow was stopped by user")
@ -137,7 +136,7 @@ class HandlingTasks:
            # Extract content for placeholders
            user_prompt = extractUserPrompt(task_planning_context)
            available_documents = extractAvailableDocuments(task_planning_context)
-            workflow_history = extractWorkflowHistory(self.service, task_planning_context)
+            workflow_history = extractWorkflowHistory(self.services, task_planning_context)
            # Create placeholders dictionary
            placeholders = {
@ -206,7 +205,7 @@ class HandlingTasks:
            # LANGUAGE DETECTION: Determine user language once for the entire workflow
            # Priority: 1. languageUserDetected from AI response, 2. service.user.language, 3. "en"
            detected_language = task_plan_dict.get('languageUserDetected', '').strip()
-            service_user_language = getattr(self.service.user, 'language', '') if self.service and self.service.user else ''
+            service_user_language = getattr(self.services.user, 'language', '') if self.services and self.services.user else ''
            if detected_language and len(detected_language) == 2:  # Valid language code like "en", "de", "fr"
                user_language = detected_language
@ -219,8 +218,8 @@ class HandlingTasks:
                logger.info(f"Using default language: {user_language}")
            # Set the detected language in the service for use throughout the workflow
-            if self.service and self.service.user:
+            if self.services and self.services.user:
-                self.service.user.language = user_language
+                self.services.user.language = user_language
                logger.info(f"Set workflow user language to: {user_language}")
            tasks = []
@ -414,9 +413,9 @@ class HandlingTasks:
            # Extract content for placeholders
            user_prompt = extractUserPrompt(action_context)
            available_documents = extractAvailableDocuments(action_context)
-            workflow_history = extractWorkflowHistory(self.service, action_context)
+            workflow_history = extractWorkflowHistory(self.services, action_context)
-            available_methods = extractAvailableMethods(self.service)
+            available_methods = extractAvailableMethods(self.services)
-            user_language = extractUserLanguage(self.service)
+            user_language = extractUserLanguage(self.services)
            # Create placeholders dictionary
            placeholders = {
@ -527,7 +526,7 @@ class HandlingTasks:
        # Extract content for placeholders
        user_prompt = extractUserPrompt(context)
        available_documents = extractAvailableDocuments(context)
-        user_language = extractUserLanguage(self.service)
+        user_language = extractUserLanguage(self.services)
        available_methods = extractAvailableMethods(self.service)
        # Create placeholders dictionary
@ -581,7 +580,7 @@ class HandlingTasks:
        method = action.get('method', '')
        name = action.get('name', '')
        action_signature = ""
-        if self.service and method in methods:
+        if self.services and method in methods:
            method_instance = methods[method]['instance']
            action_signature = method_instance.getActionSignature(name)
@ -624,8 +623,8 @@ class HandlingTasks:
        parameters = param_obj.get('parameters', {}) if isinstance(param_obj, dict) else {}
        # Apply minimal defaults in-code (language)
-        if 'language' not in parameters and hasattr(self.service, 'user') and getattr(self.service.user, 'language', None):
+        if 'language' not in parameters and hasattr(self.services, 'user') and getattr(self.services.user, 'language', None):
-            parameters['language'] = self.service.user.language
+            parameters['language'] = self.services.user.language
        # Build a synthetic TaskAction for execution routing and labels
        current_round = getattr(self.workflow, 'currentRound', 0)
@ -718,7 +717,7 @@ class HandlingTasks:
        # Update workflow context for this task
        if task_index is not None:
-            self.service.setWorkflowContext(task_number=task_index)
+            self.services.setWorkflowContext(task_number=task_index)
            # Remove the increment call that causes double-increment bug
        # Create database log entry for task start in format expected by frontend
@ -765,7 +764,7 @@ class HandlingTasks:
                self._checkWorkflowStopped()
                # Update workflow[currentAction] for UI
                self.updateWorkflowBeforeExecutingAction(step)
-                self.service.setWorkflowContext(action_number=step)
+                self.services.setWorkflowContext(action_number=step)
                try:
                    t0 = time.time()
                    selection = await self.plan_select(context)
@ -864,7 +863,7 @@ class HandlingTasks:
                self.updateWorkflowBeforeExecutingAction(action_number)
                # Update workflow context for this action
-                self.service.setWorkflowContext(action_number=action_number)
+                self.services.setWorkflowContext(action_number=action_number)
                # Remove the increment call that causes double-increment bug
                # Log action start in format expected by frontend
@ -1483,7 +1482,7 @@ class HandlingTasks:
                message = await self.createActionMessage(action, result, workflow, message_result_label, [], task_step, task_index)
                if message:
                    # Now create documents with the messageId
-                    created_documents = self.documentGenerator.createDocumentsFromActionResult(result, action, workflow, message.id)
+                    created_documents = self.services.generation.createDocumentsFromActionResult(result, action, workflow, message.id)
                    # Update the message with the created documents
                    if created_documents:
                        message.documents = created_documents
@ -1562,8 +1561,8 @@ class HandlingTasks:
                logger.info(f"Result label: {result_label} - No documents")
            # Get current workflow context and stats
-            workflow_context = self.service.getWorkflowContext()
+            workflow_context = self.services.getWorkflowContext()
-            workflow_stats = self.service.getWorkflowStats()
+            workflow_stats = self.services.getWorkflowStats()
            # Create a more meaningful message that includes task context
            task_objective = task_step.objective if task_step else 'Unknown task'
--- a/modules/workflows/processing/promptFactory.py
+++ b/modules/workflows/processing/promptFactory.py
@ -9,7 +9,7 @@ import inspect
 from typing import Any, Dict, List
 from modules.datamodels.datamodelWorkflow import TaskContext, ReviewContext, DocumentExchange
 from modules.datamodels.datamodelChat import ChatDocument
-from modules.services.serviceDocument.subDocumentUtility import getFileExtension
+from modules.services.serviceGeneration.subDocumentUtility import getFileExtension
 from modules.workflows.methods.methodBase import MethodBase
 # Set up logger
--- a/modules/workflows/workflowManager.py
+++ b/modules/workflows/workflowManager.py
@ -625,8 +625,8 @@ class WorkflowManager:
        documents = []
        for fileId in fileIds:
            try:
-                # Get file info from service
+                # Get file info from unified workflow service
-                fileInfo = self.handlingTasks.service.methodService.getFileInfo(fileId)
+                fileInfo = self.services.workflow.getFileInfo(fileId)
                if fileInfo:
                    # Create document directly with all file attributes
                    document = ChatDocument(
@ -647,4 +647,4 @@ class WorkflowManager:
    def _setUserLanguage(self, language: str) -> None:
        """Set user language for the service center"""
-        self.handlingTasks.service.user.language = language
+        self.services.user.language = language
--- a/testdata/00Untitled.jpg
+++ b/testdata/00Untitled.jpg
--- a/testdata/Muster_Kundenliste_Test1.xlsx
+++ b/testdata/Muster_Kundenliste_Test1.xlsx
--- a/testdata/diagramm_komponenten.pdf
+++ b/testdata/diagramm_komponenten.pdf