start testing with backend running

This commit is contained in:
ValueOn AG 2025-09-30 18:30:33 +02:00
parent cbea086f91
commit 501cebe342
23 changed files with 618 additions and 2427 deletions

View file

@ -1,21 +1,19 @@
from typing import Any, Dict, List, Optional from typing import Any, Dict, List, Optional
from dataclasses import dataclass, field from pydantic import BaseModel, Field
@dataclass class ContentPart(BaseModel):
class ContentPart: id: str = Field(description="Unique content part identifier")
id: str parentId: Optional[str] = Field(default=None, description="Optional parent content part id")
parentId: Optional[str] label: str = Field(description="Human readable label of the part")
label: str typeGroup: str = Field(description="Logical type group: text, table, structure, binary, ...")
typeGroup: str mimeType: str = Field(description="MIME type of the part payload")
mimeType: str data: str = Field(default="", description="Primary data payload, often extracted text")
data: str metadata: Dict[str, Any] = Field(default_factory=dict, description="Arbitrary metadata for the part")
metadata: Dict[str, Any] = field(default_factory=dict)
@dataclass class ExtractedContent(BaseModel):
class ExtractedContent: id: str = Field(description="Extraction id or source document id")
id: str parts: List[ContentPart] = Field(default_factory=list, description="List of extracted parts")
parts: List[ContentPart] summary: Optional[Dict[str, Any]] = Field(default=None, description="Optional extraction summary")
summary: Optional[Dict[str, Any]] = None

View file

@ -55,11 +55,11 @@ class Services:
# Initialize service packages # Initialize service packages
from .serviceDocument.mainServiceDocumentExtraction import DocumentExtractionService from .serviceExtraction.mainServiceExtraction import ExtractionService
self.documentExtraction = PublicService(DocumentExtractionService(self)) self.extraction = PublicService(ExtractionService(self))
from .serviceDocument.mainServiceDocumentGeneration import DocumentGenerationService from .serviceGeneration.mainServiceGeneration import GenerationService
self.documentGeneration = PublicService(DocumentGenerationService(self)) self.generation = PublicService(GenerationService(self))
from .serviceNeutralization.mainServiceNeutralization import NeutralizationService from .serviceNeutralization.mainServiceNeutralization import NeutralizationService
self.neutralization = PublicService(NeutralizationService(self)) self.neutralization = PublicService(NeutralizationService(self))
@ -76,14 +76,9 @@ class Services:
from .serviceWorkflow.mainServiceWorkflow import WorkflowService from .serviceWorkflow.mainServiceWorkflow import WorkflowService
self.workflow = PublicService(WorkflowService(self)) self.workflow = PublicService(WorkflowService(self))
from .serviceWeb.mainServiceWeb import WebService
self.web = PublicService(WebService(self))
from .serviceUtils.mainServiceUtils import UtilsService from .serviceUtils.mainServiceUtils import UtilsService
self.utils = PublicService(UtilsService(self)) self.utils = PublicService(UtilsService(self))
async def extractContentFromDocument(self, prompt, document):
return await self.services.documentExtraction.extractContentFromDocument(prompt, document)
def getInterface(user: User, workflow: ChatWorkflow) -> Services: def getInterface(user: User, workflow: ChatWorkflow) -> Services:
return Services(user, workflow) return Services(user, workflow)

View file

@ -196,7 +196,7 @@ class AiService:
processedContents: List[str] = [] processedContents: List[str] = []
try: try:
extractionResult = self.extractionService.extractDocuments(documentList, extractionOptions) extractionResult = self.extractionService.extractContent(documentList, extractionOptions)
def _partsToText(parts) -> str: def _partsToText(parts) -> str:
lines: List[str] = [] lines: List[str] = []
@ -205,7 +205,7 @@ class AiService:
lines.append(p.data) lines.append(p.data)
return "\n\n".join(lines) return "\n\n".join(lines)
if processIndividually and isinstance(extractionResult, list): if isinstance(extractionResult, list):
for i, ec in enumerate(extractionResult): for i, ec in enumerate(extractionResult):
try: try:
contentText = _partsToText(ec.parts) contentText = _partsToText(ec.parts)
@ -216,9 +216,8 @@ class AiService:
logger.warning(f"Error aggregating extracted content: {str(e)}") logger.warning(f"Error aggregating extracted content: {str(e)}")
processedContents.append("[Error aggregating content]") processedContents.append("[Error aggregating content]")
else: else:
# pooled mode returns dict # Fallback: no content
parts = extractionResult.get("parts", []) if isinstance(extractionResult, dict) else [] contentText = ""
contentText = _partsToText(parts)
if compressDocuments and len(contentText.encode("utf-8")) > 10000: if compressDocuments and len(contentText.encode("utf-8")) > 10000:
contentText = await self._compressContent(contentText, 10000, "document") contentText = await self._compressContent(contentText, 10000, "document")
processedContents.append(contentText) processedContents.append(contentText)
@ -359,7 +358,7 @@ class AiService:
"mimeType": d.mimeType "mimeType": d.mimeType
} for d in documents] } for d in documents]
extracted_content = await self.extractionService.extractDocuments( extracted_content = await self.extractionService.extractContent(
documentList=documentList, documentList=documentList,
options={ options={
"prompt": prompt, "prompt": prompt,
@ -371,8 +370,15 @@ class AiService:
} }
) )
# Get text content from extracted parts using typeGroup-aware processing # Build context from list of ExtractedContent
context = self._extractTextFromContentParts(extracted_content) if isinstance(extracted_content, list):
context = "\n\n---\n\n".join([
"\n\n".join([
p.data for p in ec.parts if p.typeGroup in ["text", "table", "structure"] and p.data
]) for ec in extracted_content
])
else:
context = ""
# Check size and reduce if needed # Check size and reduce if needed
full_prompt = prompt + "\n\n" + context if context else prompt full_prompt = prompt + "\n\n" + context if context else prompt

View file

@ -1,7 +1,7 @@
from typing import Any, Dict, List from typing import Any, Dict, List
import json import json
from ..types import ContentPart from modules.datamodels.datamodelExtraction import ContentPart
from ..subRegistry import Chunker from ..subRegistry import Chunker

View file

@ -1,6 +1,6 @@
from typing import Any, Dict, List from typing import Any, Dict, List
from ..types import ContentPart from modules.datamodels.datamodelExtraction import ContentPart
from ..subRegistry import Chunker from ..subRegistry import Chunker

View file

@ -1,6 +1,6 @@
from typing import Any, Dict, List from typing import Any, Dict, List
from ..types import ContentPart from modules.datamodels.datamodelExtraction import ContentPart
from ..subRegistry import Chunker from ..subRegistry import Chunker

View file

@ -1,4 +1,4 @@
from typing import Any, Dict, List, Optional from typing import Any, Dict, List, Optional, Union
import uuid import uuid
from .subRegistry import ExtractorRegistry, ChunkerRegistry from .subRegistry import ExtractorRegistry, ChunkerRegistry
@ -7,14 +7,12 @@ from modules.datamodels.datamodelExtraction import ExtractedContent, ContentPart
class ExtractionService: class ExtractionService:
def __init__(self): def __init__(self, services: Optional[Any] = None):
self.services = services
self._extractorRegistry = ExtractorRegistry() self._extractorRegistry = ExtractorRegistry()
self._chunkerRegistry = ChunkerRegistry() self._chunkerRegistry = ChunkerRegistry()
def extractDocuments(self, documentList: List[Dict[str, Any]], options: Dict[str, Any]) -> Any: def extractContent(self, documentList: List[Dict[str, Any]], options: Dict[str, Any]) -> List[ExtractedContent]:
processIndividually = options.get("processDocumentsIndividually", True)
if processIndividually:
results: List[ExtractedContent] = [] results: List[ExtractedContent] = []
for doc in documentList: for doc in documentList:
ec = runExtraction( ec = runExtraction(
@ -25,33 +23,34 @@ class ExtractionService:
mimeType=doc.get("mimeType"), mimeType=doc.get("mimeType"),
options=options options=options
) )
ec = applyAiIfRequested(ec, options) # Attach document id to parts if missing
results.append(ec)
return results
else:
allParts: List[ContentPart] = []
for doc in documentList:
ec = runExtraction(
extractorRegistry=self._extractorRegistry,
chunkerRegistry=self._chunkerRegistry,
documentBytes=doc.get("bytes"),
fileName=doc.get("fileName"),
mimeType=doc.get("mimeType"),
options=options
)
for p in ec.parts: for p in ec.parts:
if "documentId" not in p.metadata: if "documentId" not in p.metadata:
p.metadata["documentId"] = doc.get("id") or str(uuid.uuid4()) p.metadata["documentId"] = doc.get("id") or str(uuid.uuid4())
allParts.extend(ec.parts) ec = applyAiIfRequested(ec, options)
results.append(ec)
return results
pooled = poolAndLimit(allParts, self._chunkerRegistry, options) async def extractContentFromDocument(self, prompt: str, documents: List[Dict[str, Any]], options: Optional[Dict[str, Any]] = None) -> List[ExtractedContent]:
# In pooled mode we return a dict containing pooled parts and an optional AI output """
pooledResult: Dict[str, Any] = { Batch extract content from multiple documents.
"parts": pooled,
"summary": {"documents": len(documentList)} Args:
} prompt: Instructional prompt for optional AI post-processing/selection.
aiOut = applyAiIfRequested(ExtractedContent(id=str(uuid.uuid4()), parts=pooled, summary=None), options) documents: List of dicts with keys: id, bytes, fileName, mimeType.
pooledResult["ai"] = aiOut.summary if isinstance(aiOut, ExtractedContent) else aiOut options: Optional extraction options. "ai" config may be provided.
return pooledResult
Returns:
List[ExtractedContent]: one per input document in order.
"""
# Build options safely and inject prompt for downstream AI selection if desired
effectiveOptions: Dict[str, Any] = options.copy() if options else {}
aiCfg = effectiveOptions.get("ai") or {}
if prompt:
aiCfg["prompt"] = prompt
effectiveOptions["ai"] = aiCfg
# Delegate to existing synchronous pipeline
return self.extractContent(documents, effectiveOptions)

View file

@ -1,6 +1,6 @@
from typing import Any, Dict, Optional from typing import Any, Dict, Optional
from .types import ContentPart from modules.datamodels.datamodelExtraction import ContentPart
class Extractor: class Extractor:

View file

@ -1,9 +1,11 @@
import logging import logging
import uuid
from typing import Any, Dict, List, Optional from typing import Any, Dict, List, Optional
from datetime import datetime, UTC from datetime import datetime, UTC
import re import re
from modules.shared.timezoneUtils import get_utc_timestamp from modules.shared.timezoneUtils import get_utc_timestamp
from modules.services.serviceDocument.subDocumentUtility import ( from modules.datamodels.datamodelChat import ChatDocument
from modules.services.serviceGeneration.subDocumentUtility import (
getFileExtension, getFileExtension,
getMimeTypeFromExtension, getMimeTypeFromExtension,
detectMimeTypeFromContent, detectMimeTypeFromContent,
@ -13,9 +15,13 @@ from modules.services.serviceDocument.subDocumentUtility import (
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
class DocumentGenerationService: class GenerationService:
def __init__(self, service): def __init__(self, serviceCenter=None):
self.service = service # Directly use interfaces from the provided service center (no self.service calls)
self.serviceCenter = serviceCenter
self.interfaceDbComponent = getattr(serviceCenter, 'interfaceDbComponent', None) if serviceCenter else None
self.interfaceDbChat = getattr(serviceCenter, 'interfaceDbChat', None) if serviceCenter else None
self.workflow = getattr(serviceCenter, 'workflow', None) if serviceCenter else None
def processActionResultDocuments(self, action_result, action, workflow) -> List[Dict[str, Any]]: def processActionResultDocuments(self, action_result, action, workflow) -> List[Dict[str, Any]]:
""" """
@ -53,7 +59,8 @@ class DocumentGenerationService:
mime_type = doc.mimeType mime_type = doc.mimeType
if mime_type == "application/octet-stream": if mime_type == "application/octet-stream":
content = doc.documentData content = doc.documentData
mime_type = detectMimeTypeFromContent(content, doc.documentName, self.service) # Detect MIME without relying on a service center
mime_type = detectMimeTypeFromContent(content, doc.documentName)
return { return {
'fileName': doc.documentName, 'fileName': doc.documentName,
@ -98,8 +105,8 @@ class DocumentGenerationService:
logger.info(f"Document {document_name} has content: {len(content)} characters") logger.info(f"Document {document_name} has content: {len(content)} characters")
# Create document with file in one step # Create document with file in one step using interfaces directly
document = self.service.createDocument( document = self._createDocument(
fileName=document_name, fileName=document_name,
mimeType=mime_type, mimeType=mime_type,
content=content, content=content,
@ -126,9 +133,9 @@ class DocumentGenerationService:
def _setDocumentWorkflowContext(self, document, action, workflow): def _setDocumentWorkflowContext(self, document, action, workflow):
"""Set workflow context on a document for proper routing and labeling""" """Set workflow context on a document for proper routing and labeling"""
try: try:
# Get current workflow context from service center # Get current workflow context directly from workflow object
workflow_context = self.service.getWorkflowContext() workflow_context = self._getWorkflowContext(workflow)
workflow_stats = self.service.getWorkflowStats() workflow_stats = self._getWorkflowStats(workflow)
current_round = workflow_context.get('currentRound', 0) current_round = workflow_context.get('currentRound', 0)
current_task = workflow_context.get('currentTask', 0) current_task = workflow_context.get('currentTask', 0)
@ -155,3 +162,99 @@ class DocumentGenerationService:
except Exception as e: except Exception as e:
logger.warning(f"Could not set workflow context on document: {str(e)}") logger.warning(f"Could not set workflow context on document: {str(e)}")
def _createDocument(self, fileName: str, mimeType: str, content: str, base64encoded: bool = True, messageId: str = None) -> Optional[ChatDocument]:
"""Create file and ChatDocument using interfaces without service indirection."""
try:
if not self.interfaceDbComponent:
logger.error("Component interface not available for document creation")
return None
# Convert content to bytes
if base64encoded:
import base64
content_bytes = base64.b64decode(content)
else:
content_bytes = content.encode('utf-8')
# Create file and store data
file_item = self.interfaceDbComponent.createFile(
name=fileName,
mimeType=mimeType,
content=content_bytes
)
self.interfaceDbComponent.createFileData(file_item.id, content_bytes)
# Collect file info
file_info = self._getFileInfo(file_item.id)
if not file_info:
logger.error(f"Could not get file info for fileId: {file_item.id}")
return None
# Build ChatDocument
document = ChatDocument(
id=str(uuid.uuid4()),
messageId=messageId or "",
fileId=file_item.id,
fileName=file_info.get("fileName", fileName),
fileSize=file_info.get("size", 0),
mimeType=file_info.get("mimeType", mimeType)
)
# Ensure document can access component interface later
if hasattr(document, 'setComponentInterface') and self.interfaceDbComponent:
try:
document.setComponentInterface(self.interfaceDbComponent)
except Exception:
pass
return document
except Exception as e:
logger.error(f"Error creating document: {str(e)}")
return None
def _getFileInfo(self, fileId: str) -> Optional[Dict[str, Any]]:
try:
if not self.interfaceDbComponent:
return None
file_item = self.interfaceDbComponent.getFile(fileId)
if file_item:
return {
"id": file_item.id,
"fileName": file_item.fileName,
"size": file_item.fileSize,
"mimeType": file_item.mimeType,
"fileHash": getattr(file_item, 'fileHash', None),
"creationDate": getattr(file_item, 'creationDate', None)
}
return None
except Exception as e:
logger.error(f"Error getting file info for {fileId}: {str(e)}")
return None
def _getWorkflowContext(self, workflow) -> Dict[str, int]:
try:
return {
'currentRound': getattr(workflow, 'currentRound', 0),
'currentTask': getattr(workflow, 'currentTask', 0),
'currentAction': getattr(workflow, 'currentAction', 0)
}
except Exception:
return {'currentRound': 0, 'currentTask': 0, 'currentAction': 0}
def _getWorkflowStats(self, workflow) -> Dict[str, Any]:
try:
context = self._getWorkflowContext(workflow)
return {
'currentRound': context['currentRound'],
'currentTask': context['currentTask'],
'currentAction': context['currentAction'],
'totalTasks': getattr(workflow, 'totalTasks', 0),
'totalActions': getattr(workflow, 'totalActions', 0),
'workflowStatus': getattr(workflow, 'status', 'unknown'),
'workflowId': getattr(workflow, 'id', 'unknown')
}
except Exception:
return {
'currentRound': 0,
'currentTask': 0,
'currentAction': 0,
'totalTasks': 0,
'totalActions': 0,
'workflowStatus': 'unknown',
'workflowId': 'unknown'
}

View file

@ -4,8 +4,8 @@ from typing import Dict, Any, List, Optional
from modules.datamodels.datamodelUam import User, UserConnection from modules.datamodels.datamodelUam import User, UserConnection
from modules.datamodels.datamodelChat import ChatDocument, ChatMessage from modules.datamodels.datamodelChat import ChatDocument, ChatMessage
from modules.datamodels.datamodelChat import ExtractedContent from modules.datamodels.datamodelChat import ExtractedContent
from modules.services.serviceDocument.mainServiceDocumentExtraction import DocumentExtractionService from modules.services.serviceExtraction.mainServiceExtraction import ExtractionService
from modules.services.serviceDocument.subDocumentUtility import getFileExtension, getMimeTypeFromExtension, detectContentTypeFromData from modules.services.serviceGeneration.subDocumentUtility import getFileExtension, getMimeTypeFromExtension, detectContentTypeFromData
from modules.shared.timezoneUtils import get_utc_timestamp from modules.shared.timezoneUtils import get_utc_timestamp
from modules.services.serviceAi.mainServiceAi import AiService from modules.services.serviceAi.mainServiceAi import AiService
from modules.security.tokenManager import TokenManager from modules.security.tokenManager import TokenManager
@ -313,80 +313,6 @@ class WorkflowService:
"""Get file data by ID""" """Get file data by ID"""
return self.interfaceDbComponent.getFileData(fileId) return self.interfaceDbComponent.getFileData(fileId)
async def extractContentFromDocument(self, prompt: str, document: ChatDocument) -> ExtractedContent:
"""Extract content from ChatDocument using prompt"""
try:
# ChatDocument is just a reference, so we need to get file data using fileId
if not hasattr(document, 'fileId') or not document.fileId:
logger.error(f"Document {document.id} has no fileId")
raise ValueError("Document has no fileId")
# Get file data from service center using document's fileId
fileData = self.getFileData(document.fileId)
if not fileData:
logger.error(f"No file data found for fileId: {document.fileId}")
raise ValueError("No file data found for document")
# Get fileName and mime type from document properties
try:
fileName = document.fileName
mimeType = document.mimeType
except Exception as e:
# Try to diagnose and recover the issue
diagnosis = self._diagnoseDocumentAccess(document)
logger.error(f"Critical error: Cannot access document properties for document {document.id}. Diagnosis: {diagnosis}")
# Attempt recovery
if self._recoverDocumentAccess(document):
try:
fileName = document.fileName
mimeType = document.mimeType
logger.info(f"Document access recovered for {document.id} - proceeding with AI extraction")
except Exception as recovery_error:
logger.error(f"Recovery failed for document {document.id}: {str(recovery_error)}")
raise RuntimeError(f"Document {document.id} properties are permanently inaccessible after recovery attempt - cannot proceed with AI extraction: {str(recovery_error)}")
else:
# Recovery failed - don't continue with invalid data
raise RuntimeError(f"Document {document.id} properties are inaccessible and recovery failed. Diagnosis: {diagnosis}")
# Process with DocumentExtractionService directly (no circular dependency)
docService = DocumentExtractionService(None) # Pass None to avoid circular dependency
content_items = await docService.processFileData(
fileData=fileData,
fileName=fileName,
mimeType=mimeType,
base64Encoded=False,
prompt=prompt,
enableAI=True
)
# Convert ContentItem list to ExtractedContent
contents = []
for item in content_items:
contents.append({
'label': item.label,
'data': item.data,
'metadata': {
'mimeType': item.metadata.mimeType if hasattr(item.metadata, 'mimeType') else mimeType,
'size': item.metadata.size if hasattr(item.metadata, 'size') else len(fileData),
'base64Encoded': item.metadata.base64Encoded if hasattr(item.metadata, 'base64Encoded') else False
}
})
extractedContent = ExtractedContent(
id=document.id,
contents=contents
)
# Note: ExtractedContent model only has 'id' and 'contents' fields
# No need to set objectId or objectType as they don't exist in the model
return extractedContent
except Exception as e:
logger.error(f"Error extracting from document: {str(e)}")
raise
def _diagnoseDocumentAccess(self, document: ChatDocument) -> Dict[str, Any]: def _diagnoseDocumentAccess(self, document: ChatDocument) -> Dict[str, Any]:
""" """
Diagnose document access issues and provide recovery information. Diagnose document access issues and provide recovery information.
@ -456,43 +382,6 @@ class WorkflowService:
logger.error(f"Error during document access recovery for {document.id}: {str(e)}") logger.error(f"Error during document access recovery for {document.id}: {str(e)}")
return False return False
def createDocument(self, fileName: str, mimeType: str, content: str, base64encoded: bool = True, messageId: str = None) -> ChatDocument:
"""Create document with file in one step - handles file creation internally"""
# Convert content to bytes based on base64 flag
if base64encoded:
import base64
content_bytes = base64.b64decode(content)
else:
content_bytes = content.encode('utf-8')
# Create the file (hash and size are computed inside interfaceDbComponent)
file_item = self.interfaceDbComponent.createFile(
name=fileName,
mimeType=mimeType,
content=content_bytes
)
# Then store the file data
self.interfaceDbComponent.createFileData(file_item.id, content_bytes)
# Get file info to copy attributes
file_info = self.getFileInfo(file_item.id)
if not file_info:
logger.error(f"Could not get file info for fileId: {file_item.id}")
raise ValueError(f"File info not found for fileId: {file_item.id}")
# Create document with all file attributes copied
document = ChatDocument(
id=str(uuid.uuid4()),
messageId=messageId or "", # Use provided messageId or empty string as fallback
fileId=file_item.id,
fileName=file_info.get("fileName", fileName),
fileSize=file_info.get("size", 0),
mimeType=file_info.get("mimeType", mimeType)
)
return document
def calculateObjectSize(self, obj: Any) -> int: def calculateObjectSize(self, obj: Any) -> int:
""" """
Calculate the size of an object in bytes. Calculate the size of an object in bytes.

View file

@ -0,0 +1,226 @@
import asyncio
import os
import sys
from typing import List, Dict, Any
# Ensure relative imports work when running directly
CURRENT_DIR = os.path.dirname(__file__)
GATEWAY_DIR = os.path.dirname(os.path.dirname(CURRENT_DIR))
if GATEWAY_DIR not in sys.path:
sys.path.append(GATEWAY_DIR)
from modules.services.serviceExtraction.mainServiceExtraction import ExtractionService
from modules.services.serviceGeneration.mainServiceGeneration import DocumentGenerationService
from modules.datamodels.datamodelWorkflow import ActionResult, ActionDocument
from modules.datamodels.datamodelAi import AiCallOptions, OperationType, ProcessingMode, Priority
from modules.services.serviceAi.mainServiceAi import AiService
TESTDATA_DIR = os.path.join(GATEWAY_DIR, "testdata")
def _read_test_files() -> List[Dict[str, Any]]:
files = []
for name in os.listdir(TESTDATA_DIR):
path = os.path.join(TESTDATA_DIR, name)
if not os.path.isfile(path):
continue
try:
with open(path, "rb") as f:
data = f.read()
mime = _guess_mime(name)
files.append({
"id": name,
"bytes": data,
"fileName": name,
"mimeType": mime,
})
except Exception:
continue
return files
def _guess_mime(name: str) -> str:
lower = name.lower()
if lower.endswith(".pdf"):
return "application/pdf"
if lower.endswith(".xlsx"):
return "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
if lower.endswith(".jpg") or lower.endswith(".jpeg"):
return "image/jpeg"
if lower.endswith(".png"):
return "image/png"
return "application/octet-stream"
def run_extraction_1000_bytes() -> None:
svc = ExtractionService()
docs = _read_test_files()
options = {
# cap total pooled size per document set
"maxSize": 1000,
# allow chunking to respect the cap across parts
"chunkAllowed": True,
# chunk sizes for different content types to help fit under the cap
"textChunkSize": 500,
"tableChunkSize": 500,
"structureChunkSize": 500,
# simple merge strategy if supported
"mergeStrategy": {},
}
results = svc.extractContent(docs, options)
print("[extraction] documents:", len(docs), "results:", len(results))
for i, ec in enumerate(results):
total = sum(int(p.metadata.get("size", 0) or 0) for p in ec.parts)
print(f" - doc[{i}] parts={len(ec.parts)} pooledBytes={total}")
async def main():
print("=== serviceExtraction: compress to 1000 bytes ===")
run_extraction_1000_bytes()
print("\n=== serviceGeneration: create ActionResult and write output to testdata ===")
await run_generation_write_file()
print("\n=== serviceAi: planning call + image + pdf extraction ===")
await run_ai_tests()
if __name__ == "__main__":
asyncio.run(main())
async def run_generation_write_file() -> None:
# Minimal stubs for interfaces expected by DocumentGenerationService
class _FileItem:
def __init__(self, file_id: str, file_name: str, mime_type: str, content: bytes):
self.id = file_id
self.fileName = file_name
self.mimeType = mime_type
self.fileSize = len(content)
class _ComponentInterface:
def __init__(self):
self._files = {}
def createFile(self, name: str, mimeType: str, content: bytes):
fid = f"test_{len(self._files)+1}"
item = _FileItem(fid, name, mimeType, content)
self._files[fid] = item
return item
def createFileData(self, fileId: str, content: bytes):
# Persist into testdata directory as requested
item = self._files[fileId]
out_path = os.path.join(TESTDATA_DIR, f"output_{fileId}_{item.fileName}")
with open(out_path, "wb") as f:
f.write(content)
def getFile(self, fileId: str):
return self._files.get(fileId)
class _ServiceCenter:
def __init__(self, comp):
self.interfaceDbComponent = comp
self.interfaceDbChat = None
self.workflow = type("_Wf", (), {"id": "wf_test", "currentRound": 1, "currentTask": 1, "currentAction": 1, "status": "running", "totalTasks": 1, "totalActions": 1})()
component = _ComponentInterface()
center = _ServiceCenter(component)
gen = DocumentGenerationService(center)
# Build a fake action and ActionResult with a small text document
class _Action:
def __init__(self):
self.id = "action_test"
self.execMethod = "document"
self.execAction = "generate"
self.execParameters = {}
self.execResultLabel = "round1_task1_action1_results"
action = _Action()
content = "This is a generated test file from serviceGeneration test."
action_doc = ActionDocument(documentName="test_generated.txt", documentData=content, mimeType="text/plain")
action_result = ActionResult(success=True, documents=[action_doc])
docs = gen.createDocumentsFromActionResult(action_result, action, center.workflow, message_id="msg_test")
print("[generation] created documents:", len(docs))
async def run_ai_tests() -> None:
# Create AiService instance (uses internal default model registry; no external creds required for this test)
ai = await AiService.create()
# Planning AI call (like in handlingTasks.generateTaskPlan)
plan_options = AiCallOptions(
operationType=OperationType.GENERATE_PLAN,
priority=Priority.QUALITY,
compressPrompt=False,
compressContext=False,
processingMode=ProcessingMode.DETAILED,
maxCost=0.05,
maxProcessingTime=10,
)
plan_prompt = """
You are a planning assistant. Return a compact JSON with fields: tasks:[{id, objective, success_criteria:[]}], languageUserDetected:"en".
Create exactly one simple task id:"task_1" objective:"Test planning" success_criteria:["done"].
""".strip()
plan_resp = await ai.callAi(prompt=plan_prompt, placeholders=None, options=plan_options)
print("[ai] planning response length:", len(plan_resp) if plan_resp else 0)
# Image content extraction prompt using test JPEG
img_path = os.path.join(TESTDATA_DIR, "00Untitled.jpg")
img_resp = None
if os.path.exists(img_path):
try:
with open(img_path, "rb") as f:
img_bytes = f.read()
img_options = AiCallOptions(
operationType=OperationType.ANALYSE_CONTENT,
priority=Priority.BALANCED,
compressPrompt=True,
compressContext=False,
processingMode=ProcessingMode.ADVANCED,
maxCost=0.02,
maxProcessingTime=10,
)
img_resp = await ai.callAiImage(
prompt="Describe the content of this image succinctly.",
imageData=img_bytes,
mimeType="image/jpeg",
options=img_options,
)
print("[ai] image analysis response length:", len(img_resp) if img_resp else 0)
except Exception as e:
print("[ai] image analysis error:", str(e))
else:
print("[ai] image test file not found; skipping")
# PDF extraction prompt: emulate text call with document context built via ExtractionService
pdf_path = os.path.join(TESTDATA_DIR, "diagramm_komponenten.pdf")
if os.path.exists(pdf_path):
try:
# Build a minimal ChatDocument-like shim that AiService._callAiText expects via extraction
class _Doc:
def __init__(self, file_path: str, mime: str):
self.id = "doc_pdf"
self.fileName = os.path.basename(file_path)
self.mimeType = mime
with open(file_path, "rb") as f:
self.fileData = f.read()
pdf_doc = _Doc(pdf_path, "application/pdf")
pdf_options = AiCallOptions(
operationType=OperationType.ANALYSE_CONTENT,
priority=Priority.BALANCED,
compressPrompt=True,
compressContext=True,
processingMode=ProcessingMode.ADVANCED,
maxContextBytes=1000,
chunkAllowed=True,
maxCost=0.02,
maxProcessingTime=10,
)
pdf_prompt = "Extract key information from the attached PDF."
pdf_resp = await ai.callAi(prompt=pdf_prompt, documents=[pdf_doc], options=pdf_options)
print("[ai] pdf extraction response length:", len(pdf_resp) if pdf_resp else 0)
except Exception as e:
print("[ai] pdf extraction error:", str(e))
else:
print("[ai] pdf test file not found; skipping")

View file

@ -76,66 +76,70 @@ class MethodAi(MethodBase):
chatDocuments = self.services.workflow.getChatDocumentsFromDocumentList(documentList) chatDocuments = self.services.workflow.getChatDocumentsFromDocumentList(documentList)
if chatDocuments: if chatDocuments:
context_parts = [] context_parts = []
# Build batch payload for extraction
batch_docs = []
for doc in chatDocuments: for doc in chatDocuments:
file_info = self.services.workflow.getFileInfo(doc.fileId)
try: try:
# Use the document content extraction service with the specific AI prompt context fileBytes = self.services.workflow.getFileData(doc.fileId) if hasattr(doc, 'fileId') else None
# This tells the extraction engine exactly what and how to extract except Exception:
extraction_prompt = f""" fileBytes = None
Extract content from this document for AI processing context. batch_docs.append({
"id": getattr(doc, 'id', None),
"bytes": fileBytes or b"",
"fileName": getattr(doc, 'fileName', 'unknown'),
"mimeType": getattr(doc, 'mimeType', None) or "application/octet-stream"
})
AI Task: {aiPrompt} extraction_prompt = (
Processing Mode: {processingMode} f"Extract content for AI task context. Task: {aiPrompt}. Mode: {processingMode}."
Expected Output: {output_extension.upper()} format
Requirements:
1. Extract the most relevant text content that would be useful for the AI task
2. Focus on content that directly relates to: {aiPrompt}
3. Include key information, data, and insights that the AI needs
4. Provide clean, readable text without formatting artifacts
Document: {doc.fileName}
"""
logger.debug(f"Extracting content from {doc.fileName} with task-specific prompt: {extraction_prompt[:100]}...")
extracted_content = await self.services.documentExtraction.extractContentFromDocument(
prompt=extraction_prompt.strip(),
document=doc
) )
try:
extracted_list = await self.services.extraction.extractContentFromDocuments(
prompt=extraction_prompt,
documents=batch_docs,
options={"ai": {"enabled": False}, "mergeStrategy": {}}
)
except Exception:
extracted_list = []
if extracted_content and extracted_content.contents: # Helper to aggregate readable text from parts
# Get the first content item's data def _partsToText(parts) -> str:
lines: List[str] = []
for p in (parts or []):
try:
if getattr(p, 'typeGroup', '') in ("text", "table", "structure") and getattr(p, 'data', None):
lines.append(p.data)
except Exception:
continue
return "\n\n".join(lines)
for i, doc in enumerate(chatDocuments):
file_info = self.services.workflow.getFileInfo(doc.fileId)
content = ""
try:
ec = extracted_list[i] if i < len(extracted_list) else None
if ec:
content = _partsToText(getattr(ec, 'parts', []))
except Exception:
content = "" content = ""
for content_item in extracted_content.contents:
if hasattr(content_item, 'data') and content_item.data:
content += content_item.data + " "
if content.strip(): if content.strip():
metadata_info = "" metadata_info = ""
if file_info and includeMetadata: if file_info and includeMetadata:
metadata_info = f" (Size: {file_info.get('fileSize', 'unknown')}, Type: {file_info.get('mimeType', 'unknown')})" metadata_info = f" (Size: {file_info.get('fileSize', 'unknown')}, Type: {file_info.get('mimeType', 'unknown')})"
# Adjust context length based on processing mode and AI task relevance
base_length = 5000 if processingMode == "detailed" else 3000 if processingMode == "advanced" else 2000 base_length = 5000 if processingMode == "detailed" else 3000 if processingMode == "advanced" else 2000
# For detailed mode, include more context
if processingMode == "detailed": if processingMode == "detailed":
context_parts.append(f"Document: {doc.fileName}{metadata_info}\nRelevance to AI Task: This document contains content directly related to '{aiPrompt[:100]}...'\nContent:\n{content[:base_length]}...") context_parts.append(
f"Document: {doc.fileName}{metadata_info}\nRelevance to AI Task: This document contains content directly related to '{aiPrompt[:100]}...'\nContent:\n{content[:base_length]}..."
)
else: else:
context_parts.append(f"Document: {doc.fileName}{metadata_info}\nContent:\n{content[:base_length]}...") context_parts.append(
f"Document: {doc.fileName}{metadata_info}\nContent:\n{content[:base_length]}..."
)
else: else:
context_parts.append(f"Document: {doc.fileName} [No readable text content - binary file]") context_parts.append(f"Document: {doc.fileName} [No readable text content - binary file]")
else:
context_parts.append(f"Document: {doc.fileName} [No readable text content - binary file]")
except Exception as extract_error:
context_parts.append(f"Document: {doc.fileName} [Could not extract content - binary file]")
if context_parts: if context_parts:
# Add a summary header to help the AI understand the context
context_header = f""" context_header = f"""
=== DOCUMENT CONTEXT FOR AI PROCESSING === === DOCUMENT CONTEXT FOR AI PROCESSING ===
AI Task: {aiPrompt[:100]}... AI Task: {aiPrompt[:100]}...
@ -147,7 +151,6 @@ class MethodAi(MethodBase):
Use this information to provide the most accurate and helpful response. Use this information to provide the most accurate and helpful response.
================================================ ================================================
""" """
context = context_header + "\n\n" + "\n\n".join(context_parts) context = context_header + "\n\n" + "\n\n".join(context_parts)
logger.info(f"Included {len(chatDocuments)} documents in AI context with task-specific extraction") logger.info(f"Included {len(chatDocuments)} documents in AI context with task-specific extraction")

View file

@ -62,32 +62,36 @@ class MethodDocument(MethodBase):
error="No documents found for the provided reference" error="No documents found for the provided reference"
) )
# Extract content from all documents using AI # Batch extract content from all documents at once
all_extracted_content = [] all_extracted_content = []
file_infos = [] file_infos = []
batch_docs = []
for chatDocument in chatDocuments: for chatDocument in chatDocuments:
file_info = self.services.workflow.getFileInfo(chatDocument.fileId) file_info = self.services.workflow.getFileInfo(chatDocument.fileId)
try:
# Use the document content extraction service with the specific AI prompt
# This handles all document types (text, binary, image, etc.) intelligently
extracted_content = await self.services.documentExtraction.extractContentFromDocument(
prompt=aiPrompt,
document=chatDocument
)
if extracted_content and extracted_content.contents:
all_extracted_content.append(extracted_content)
if includeMetadata: if includeMetadata:
file_infos.append(file_info) file_infos.append(file_info)
logger.info(f"Successfully extracted content from {chatDocument.fileName}") try:
else: data = self.services.workflow.getFileData(chatDocument.fileId) if hasattr(chatDocument, 'fileId') else None
logger.warning(f"No content extracted from {chatDocument.fileName}") except Exception:
data = None
batch_docs.append({
"id": getattr(chatDocument, 'id', None),
"bytes": data or b"",
"fileName": getattr(chatDocument, 'fileName', 'unknown'),
"mimeType": getattr(chatDocument, 'mimeType', None) or "application/octet-stream"
})
try:
extracted_list = await self.services.extraction.extractContentFromDocuments(
prompt=aiPrompt,
documents=batch_docs,
options={"ai": {"enabled": False}}
)
except Exception as e: except Exception as e:
logger.error(f"Error extracting content from {chatDocument.fileName}: {str(e)}") logger.error(f"Batch extraction failed: {str(e)}")
continue extracted_list = []
all_extracted_content = extracted_list or []
if not all_extracted_content: if not all_extracted_content:
return ActionResult.isFailure( return ActionResult.isFailure(
@ -97,20 +101,24 @@ class MethodDocument(MethodBase):
# Process each document individually with its own format conversion # Process each document individually with its own format conversion
output_documents = [] output_documents = []
for i, (chatDocument, extracted_content) in enumerate(zip(chatDocuments, all_extracted_content)): for i, chatDocument in enumerate(chatDocuments):
# Extract text content from this document # Extract text content from this document
text_content = "" text_content = ""
if hasattr(extracted_content, 'contents') and extracted_content.contents: try:
# Extract text from ContentItem objects ec = all_extracted_content[i] if i < len(all_extracted_content) else None
if ec and hasattr(ec, 'parts'):
text_parts = [] text_parts = []
for content_item in extracted_content.contents: for part in getattr(ec, 'parts', []):
if hasattr(content_item, 'data') and content_item.data: try:
text_parts.append(content_item.data) if getattr(part, 'typeGroup', '') in ("text", "table", "structure") and getattr(part, 'data', None):
text_parts.append(part.data)
except Exception:
continue
text_content = "\n".join(text_parts) text_content = "\n".join(text_parts)
elif isinstance(extracted_content, str):
text_content = extracted_content
else: else:
text_content = str(extracted_content) text_content = ""
except Exception:
text_content = ""
# Get the expected format for this document (or use default) # Get the expected format for this document (or use default)
target_format = None target_format = None
@ -692,19 +700,30 @@ class MethodDocument(MethodBase):
content = "" content = ""
logger.info(f"Processing document: type={type(doc)}") logger.info(f"Processing document: type={type(doc)}")
# Get actual file content using the document content extraction service # Batch extraction approach: prepare one doc payload and call extractor
try: try:
extracted_content = await self.services.documentExtraction.extractContentFromDocument( try:
data = self.services.workflow.getFileData(doc.fileId) if hasattr(doc, 'fileId') else None
except Exception:
data = None
extracted_list = await self.services.extraction.extractContentFromDocuments(
prompt="Extract readable text content for HTML report generation", prompt="Extract readable text content for HTML report generation",
document=doc documents=[{
"id": getattr(doc, 'id', None),
"bytes": data or b"",
"fileName": getattr(doc, 'fileName', 'unknown'),
"mimeType": getattr(doc, 'mimeType', None) or "application/octet-stream"
}],
options={"ai": {"enabled": False}}
) )
ec = extracted_list[0] if extracted_list else None
if extracted_content and extracted_content.contents: if ec and hasattr(ec, 'parts'):
# Get the first content item's data for part in getattr(ec, 'parts', []):
for content_item in extracted_content.contents: try:
if hasattr(content_item, 'data') and content_item.data: if getattr(part, 'typeGroup', '') in ("text", "table", "structure") and getattr(part, 'data', None):
content += content_item.data + " " content += part.data + " "
except Exception:
continue
if content.strip(): if content.strip():
logger.info(f" Retrieved content from file: {len(content)} characters") logger.info(f" Retrieved content from file: {len(content)} characters")
else: else:

View file

@ -1392,45 +1392,53 @@ class MethodOutlook(MethodBase):
composition_documents = [] composition_documents = []
if documentList: if documentList:
try: try:
# Get document content from service center
docs = self.services.workflow.getChatDocumentsFromDocumentList(documentList) docs = self.services.workflow.getChatDocumentsFromDocumentList(documentList)
if docs: if docs:
composition_documents.extend(docs)
# Batch extract summaries for AI context
batch_docs = []
for doc in docs: for doc in docs:
composition_documents.append(doc) try:
data = self.services.workflow.getFileData(doc.fileId) if hasattr(doc, 'fileId') else None
except Exception:
data = None
batch_docs.append({
"id": getattr(doc, 'id', None),
"bytes": data or b"",
"fileName": getattr(doc, 'fileName', 'unknown'),
"mimeType": getattr(doc, 'mimeType', None) or "application/octet-stream"
})
# Extract content for AI context using proper document service
try: try:
if hasattr(doc, 'fileId') and doc.fileId: extracted_list = await self.services.extraction.extractContentFromDocuments(
# Use the document content extraction service instead of raw file reading
try:
extracted_content = await self.services.documentExtraction.extractContentFromDocument(
prompt="Extract readable text content for email composition", prompt="Extract readable text content for email composition",
document=doc documents=batch_docs,
options={"ai": {"enabled": False}}
) )
except Exception:
extracted_list = []
if extracted_content and extracted_content.contents: # Aggregate previews
# Get the first content item's data def _partsToText(parts) -> str:
content_text = "" lines: List[str] = []
for content_item in extracted_content.contents: for p in (parts or []):
if hasattr(content_item, 'data') and content_item.data: try:
content_text += content_item.data + " " if getattr(p, 'typeGroup', '') in ("text", "table", "structure") and getattr(p, 'data', None):
lines.append(p.data)
except Exception:
continue
return "\n\n".join(lines)
for i, doc in enumerate(docs):
try:
ec = extracted_list[i] if i < len(extracted_list) else None
content_text = _partsToText(getattr(ec, 'parts', [])) if ec else ""
if content_text.strip(): if content_text.strip():
# Truncate content for AI context (avoid token limits)
content_preview = content_text[:1000] + "..." if len(content_text) > 1000 else content_text content_preview = content_text[:1000] + "..." if len(content_text) > 1000 else content_text
document_content_summary += f"\nDocument: {doc.fileName}\nContent Preview: {content_preview}\n" document_content_summary += f"\nDocument: {doc.fileName}\nContent Preview: {content_preview}\n"
# No content to extract except Exception:
continue
except Exception as extract_error:
# Content extraction failed (normal for binary files)
pass
else:
logger.warning(f"Document {doc.fileName} has no fileId")
except Exception as e:
logger.warning(f"Error processing document {doc.fileName}: {str(e)}")
else: else:
logger.warning("No documents found from documentList") logger.warning("No documents found from documentList")
except Exception as e: except Exception as e:

View file

@ -33,7 +33,7 @@ class MethodSharepoint(MethodBase):
def _getMicrosoftConnection(self, connectionReference: str) -> Optional[Dict[str, Any]]: def _getMicrosoftConnection(self, connectionReference: str) -> Optional[Dict[str, Any]]:
"""Get Microsoft connection from connection reference and configure SharePoint service""" """Get Microsoft connection from connection reference and configure SharePoint service"""
try: try:
userConnection = self.service.getUserConnectionFromConnectionReference(connectionReference) userConnection = self.services.workflow.getUserConnectionFromConnectionReference(connectionReference)
if not userConnection: if not userConnection:
logger.warning(f"No user connection found for reference: {connectionReference}") logger.warning(f"No user connection found for reference: {connectionReference}")
return None return None
@ -48,7 +48,7 @@ class MethodSharepoint(MethodBase):
return None return None
# Configure SharePoint service with the UserConnection # Configure SharePoint service with the UserConnection
if not self.service.sharepoint.setAccessTokenFromConnection(userConnection): if not self.services.sharepoint.setAccessTokenFromConnection(userConnection):
logger.warning(f"Failed to configure SharePoint service with connection {userConnection.id}") logger.warning(f"Failed to configure SharePoint service with connection {userConnection.id}")
return None return None
@ -363,11 +363,11 @@ class MethodSharepoint(MethodBase):
async def _makeGraphApiCall(self, endpoint: str, method: str = "GET", data: bytes = None) -> Dict[str, Any]: async def _makeGraphApiCall(self, endpoint: str, method: str = "GET", data: bytes = None) -> Dict[str, Any]:
"""Make a Microsoft Graph API call with timeout and detailed logging""" """Make a Microsoft Graph API call with timeout and detailed logging"""
try: try:
if not hasattr(self.service, 'sharepoint') or not self.service.sharepoint._target.access_token: if not hasattr(self.services, 'sharepoint') or not self.services.sharepoint._target.access_token:
return {"error": "SharePoint service not configured with access token"} return {"error": "SharePoint service not configured with access token"}
headers = { headers = {
"Authorization": f"Bearer {self.service.sharepoint._target.access_token}", "Authorization": f"Bearer {self.services.sharepoint._target.access_token}",
"Content-Type": "application/json" if data and method != "PUT" else "application/octet-stream" if data else "application/json" "Content-Type": "application/json" if data and method != "PUT" else "application/octet-stream" if data else "application/json"
} }
@ -1014,7 +1014,7 @@ class MethodSharepoint(MethodBase):
# For content download, we need to handle binary data # For content download, we need to handle binary data
try: try:
async with aiohttp.ClientSession() as session: async with aiohttp.ClientSession() as session:
headers = {"Authorization": f"Bearer {self.service.sharepoint._target.access_token}"} headers = {"Authorization": f"Bearer {self.services.sharepoint._target.access_token}"}
async with session.get(f"https://graph.microsoft.com/v1.0/{content_endpoint}", headers=headers) as response: async with session.get(f"https://graph.microsoft.com/v1.0/{content_endpoint}", headers=headers) as response:
if response.status == 200: if response.status == 200:
content = await response.text() content = await response.text()

View file

@ -16,7 +16,8 @@ from modules.datamodels.datamodelWorkflow import (
TaskResult, TaskResult,
ReviewContext, ReviewContext,
TaskStatus, TaskStatus,
ActionResult ActionResult,
TaskAction
) )
from modules.datamodels.datamodelChat import ( from modules.datamodels.datamodelChat import (
WorkflowResult, WorkflowResult,
@ -47,7 +48,6 @@ from modules.workflows.processing.promptFactoryPlaceholders import (
extractUserLanguage, extractUserLanguage,
extractReviewContent extractReviewContent
) )
from modules.services.serviceDocument.mainServiceDocumentGeneration import DocumentGenerationService
from modules.workflows.processing.promptFactory import methods from modules.workflows.processing.promptFactory import methods
from modules.workflows.processing.executionState import should_continue from modules.workflows.processing.executionState import should_continue
from modules.datamodels.datamodelAi import AiCallOptions, OperationType, ProcessingMode, Priority from modules.datamodels.datamodelAi import AiCallOptions, OperationType, ProcessingMode, Priority
@ -62,7 +62,6 @@ class HandlingTasks:
def __init__(self, services, workflow=None): def __init__(self, services, workflow=None):
self.services = services self.services = services
self.workflow = workflow self.workflow = workflow
self.documentGenerator = DocumentGenerationService(self.services.center)
def _checkWorkflowStopped(self): def _checkWorkflowStopped(self):
""" """
@ -71,7 +70,7 @@ class HandlingTasks:
""" """
try: try:
# Get the current workflow status from the database to avoid stale data # Get the current workflow status from the database to avoid stale data
current_workflow = services.chatInterface.getWorkflow(self.service.workflow.id) current_workflow = self.services.chatInterface.getWorkflow(self.workflow.id)
if current_workflow and current_workflow.status == "stopped": if current_workflow and current_workflow.status == "stopped":
logger.info("Workflow stopped by user, aborting execution") logger.info("Workflow stopped by user, aborting execution")
raise WorkflowStoppedException("Workflow was stopped by user") raise WorkflowStoppedException("Workflow was stopped by user")
@ -81,7 +80,7 @@ class HandlingTasks:
except Exception as e: except Exception as e:
# If we can't get the current status due to other database issues, fall back to the in-memory object # If we can't get the current status due to other database issues, fall back to the in-memory object
logger.warning(f"Could not check current workflow status from database: {str(e)}") logger.warning(f"Could not check current workflow status from database: {str(e)}")
if self.service.workflow.status == "stopped": if self.workflow and self.workflow.status == "stopped":
logger.info("Workflow stopped by user (from in-memory object), aborting execution") logger.info("Workflow stopped by user (from in-memory object), aborting execution")
raise WorkflowStoppedException("Workflow was stopped by user") raise WorkflowStoppedException("Workflow was stopped by user")
@ -137,7 +136,7 @@ class HandlingTasks:
# Extract content for placeholders # Extract content for placeholders
user_prompt = extractUserPrompt(task_planning_context) user_prompt = extractUserPrompt(task_planning_context)
available_documents = extractAvailableDocuments(task_planning_context) available_documents = extractAvailableDocuments(task_planning_context)
workflow_history = extractWorkflowHistory(self.service, task_planning_context) workflow_history = extractWorkflowHistory(self.services, task_planning_context)
# Create placeholders dictionary # Create placeholders dictionary
placeholders = { placeholders = {
@ -206,7 +205,7 @@ class HandlingTasks:
# LANGUAGE DETECTION: Determine user language once for the entire workflow # LANGUAGE DETECTION: Determine user language once for the entire workflow
# Priority: 1. languageUserDetected from AI response, 2. service.user.language, 3. "en" # Priority: 1. languageUserDetected from AI response, 2. service.user.language, 3. "en"
detected_language = task_plan_dict.get('languageUserDetected', '').strip() detected_language = task_plan_dict.get('languageUserDetected', '').strip()
service_user_language = getattr(self.service.user, 'language', '') if self.service and self.service.user else '' service_user_language = getattr(self.services.user, 'language', '') if self.services and self.services.user else ''
if detected_language and len(detected_language) == 2: # Valid language code like "en", "de", "fr" if detected_language and len(detected_language) == 2: # Valid language code like "en", "de", "fr"
user_language = detected_language user_language = detected_language
@ -219,8 +218,8 @@ class HandlingTasks:
logger.info(f"Using default language: {user_language}") logger.info(f"Using default language: {user_language}")
# Set the detected language in the service for use throughout the workflow # Set the detected language in the service for use throughout the workflow
if self.service and self.service.user: if self.services and self.services.user:
self.service.user.language = user_language self.services.user.language = user_language
logger.info(f"Set workflow user language to: {user_language}") logger.info(f"Set workflow user language to: {user_language}")
tasks = [] tasks = []
@ -414,9 +413,9 @@ class HandlingTasks:
# Extract content for placeholders # Extract content for placeholders
user_prompt = extractUserPrompt(action_context) user_prompt = extractUserPrompt(action_context)
available_documents = extractAvailableDocuments(action_context) available_documents = extractAvailableDocuments(action_context)
workflow_history = extractWorkflowHistory(self.service, action_context) workflow_history = extractWorkflowHistory(self.services, action_context)
available_methods = extractAvailableMethods(self.service) available_methods = extractAvailableMethods(self.services)
user_language = extractUserLanguage(self.service) user_language = extractUserLanguage(self.services)
# Create placeholders dictionary # Create placeholders dictionary
placeholders = { placeholders = {
@ -527,7 +526,7 @@ class HandlingTasks:
# Extract content for placeholders # Extract content for placeholders
user_prompt = extractUserPrompt(context) user_prompt = extractUserPrompt(context)
available_documents = extractAvailableDocuments(context) available_documents = extractAvailableDocuments(context)
user_language = extractUserLanguage(self.service) user_language = extractUserLanguage(self.services)
available_methods = extractAvailableMethods(self.service) available_methods = extractAvailableMethods(self.service)
# Create placeholders dictionary # Create placeholders dictionary
@ -581,7 +580,7 @@ class HandlingTasks:
method = action.get('method', '') method = action.get('method', '')
name = action.get('name', '') name = action.get('name', '')
action_signature = "" action_signature = ""
if self.service and method in methods: if self.services and method in methods:
method_instance = methods[method]['instance'] method_instance = methods[method]['instance']
action_signature = method_instance.getActionSignature(name) action_signature = method_instance.getActionSignature(name)
@ -624,8 +623,8 @@ class HandlingTasks:
parameters = param_obj.get('parameters', {}) if isinstance(param_obj, dict) else {} parameters = param_obj.get('parameters', {}) if isinstance(param_obj, dict) else {}
# Apply minimal defaults in-code (language) # Apply minimal defaults in-code (language)
if 'language' not in parameters and hasattr(self.service, 'user') and getattr(self.service.user, 'language', None): if 'language' not in parameters and hasattr(self.services, 'user') and getattr(self.services.user, 'language', None):
parameters['language'] = self.service.user.language parameters['language'] = self.services.user.language
# Build a synthetic TaskAction for execution routing and labels # Build a synthetic TaskAction for execution routing and labels
current_round = getattr(self.workflow, 'currentRound', 0) current_round = getattr(self.workflow, 'currentRound', 0)
@ -718,7 +717,7 @@ class HandlingTasks:
# Update workflow context for this task # Update workflow context for this task
if task_index is not None: if task_index is not None:
self.service.setWorkflowContext(task_number=task_index) self.services.setWorkflowContext(task_number=task_index)
# Remove the increment call that causes double-increment bug # Remove the increment call that causes double-increment bug
# Create database log entry for task start in format expected by frontend # Create database log entry for task start in format expected by frontend
@ -765,7 +764,7 @@ class HandlingTasks:
self._checkWorkflowStopped() self._checkWorkflowStopped()
# Update workflow[currentAction] for UI # Update workflow[currentAction] for UI
self.updateWorkflowBeforeExecutingAction(step) self.updateWorkflowBeforeExecutingAction(step)
self.service.setWorkflowContext(action_number=step) self.services.setWorkflowContext(action_number=step)
try: try:
t0 = time.time() t0 = time.time()
selection = await self.plan_select(context) selection = await self.plan_select(context)
@ -864,7 +863,7 @@ class HandlingTasks:
self.updateWorkflowBeforeExecutingAction(action_number) self.updateWorkflowBeforeExecutingAction(action_number)
# Update workflow context for this action # Update workflow context for this action
self.service.setWorkflowContext(action_number=action_number) self.services.setWorkflowContext(action_number=action_number)
# Remove the increment call that causes double-increment bug # Remove the increment call that causes double-increment bug
# Log action start in format expected by frontend # Log action start in format expected by frontend
@ -1483,7 +1482,7 @@ class HandlingTasks:
message = await self.createActionMessage(action, result, workflow, message_result_label, [], task_step, task_index) message = await self.createActionMessage(action, result, workflow, message_result_label, [], task_step, task_index)
if message: if message:
# Now create documents with the messageId # Now create documents with the messageId
created_documents = self.documentGenerator.createDocumentsFromActionResult(result, action, workflow, message.id) created_documents = self.services.generation.createDocumentsFromActionResult(result, action, workflow, message.id)
# Update the message with the created documents # Update the message with the created documents
if created_documents: if created_documents:
message.documents = created_documents message.documents = created_documents
@ -1562,8 +1561,8 @@ class HandlingTasks:
logger.info(f"Result label: {result_label} - No documents") logger.info(f"Result label: {result_label} - No documents")
# Get current workflow context and stats # Get current workflow context and stats
workflow_context = self.service.getWorkflowContext() workflow_context = self.services.getWorkflowContext()
workflow_stats = self.service.getWorkflowStats() workflow_stats = self.services.getWorkflowStats()
# Create a more meaningful message that includes task context # Create a more meaningful message that includes task context
task_objective = task_step.objective if task_step else 'Unknown task' task_objective = task_step.objective if task_step else 'Unknown task'

View file

@ -9,7 +9,7 @@ import inspect
from typing import Any, Dict, List from typing import Any, Dict, List
from modules.datamodels.datamodelWorkflow import TaskContext, ReviewContext, DocumentExchange from modules.datamodels.datamodelWorkflow import TaskContext, ReviewContext, DocumentExchange
from modules.datamodels.datamodelChat import ChatDocument from modules.datamodels.datamodelChat import ChatDocument
from modules.services.serviceDocument.subDocumentUtility import getFileExtension from modules.services.serviceGeneration.subDocumentUtility import getFileExtension
from modules.workflows.methods.methodBase import MethodBase from modules.workflows.methods.methodBase import MethodBase
# Set up logger # Set up logger

View file

@ -625,8 +625,8 @@ class WorkflowManager:
documents = [] documents = []
for fileId in fileIds: for fileId in fileIds:
try: try:
# Get file info from service # Get file info from unified workflow service
fileInfo = self.handlingTasks.service.methodService.getFileInfo(fileId) fileInfo = self.services.workflow.getFileInfo(fileId)
if fileInfo: if fileInfo:
# Create document directly with all file attributes # Create document directly with all file attributes
document = ChatDocument( document = ChatDocument(
@ -647,4 +647,4 @@ class WorkflowManager:
def _setUserLanguage(self, language: str) -> None: def _setUserLanguage(self, language: str) -> None:
"""Set user language for the service center""" """Set user language for the service center"""
self.handlingTasks.service.user.language = language self.services.user.language = language

BIN
testdata/00Untitled.jpg vendored Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 407 KiB

BIN
testdata/Muster_Kundenliste_Test1.xlsx vendored Normal file

Binary file not shown.

BIN
testdata/diagramm_komponenten.pdf vendored Normal file

Binary file not shown.