start testing with backend running
This commit is contained in:
parent
cbea086f91
commit
501cebe342
23 changed files with 618 additions and 2427 deletions
|
|
@ -1,21 +1,19 @@
|
||||||
from typing import Any, Dict, List, Optional
|
from typing import Any, Dict, List, Optional
|
||||||
from dataclasses import dataclass, field
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
class ContentPart(BaseModel):
|
||||||
class ContentPart:
|
id: str = Field(description="Unique content part identifier")
|
||||||
id: str
|
parentId: Optional[str] = Field(default=None, description="Optional parent content part id")
|
||||||
parentId: Optional[str]
|
label: str = Field(description="Human readable label of the part")
|
||||||
label: str
|
typeGroup: str = Field(description="Logical type group: text, table, structure, binary, ...")
|
||||||
typeGroup: str
|
mimeType: str = Field(description="MIME type of the part payload")
|
||||||
mimeType: str
|
data: str = Field(default="", description="Primary data payload, often extracted text")
|
||||||
data: str
|
metadata: Dict[str, Any] = Field(default_factory=dict, description="Arbitrary metadata for the part")
|
||||||
metadata: Dict[str, Any] = field(default_factory=dict)
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
class ExtractedContent(BaseModel):
|
||||||
class ExtractedContent:
|
id: str = Field(description="Extraction id or source document id")
|
||||||
id: str
|
parts: List[ContentPart] = Field(default_factory=list, description="List of extracted parts")
|
||||||
parts: List[ContentPart]
|
summary: Optional[Dict[str, Any]] = Field(default=None, description="Optional extraction summary")
|
||||||
summary: Optional[Dict[str, Any]] = None
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -55,11 +55,11 @@ class Services:
|
||||||
|
|
||||||
# Initialize service packages
|
# Initialize service packages
|
||||||
|
|
||||||
from .serviceDocument.mainServiceDocumentExtraction import DocumentExtractionService
|
from .serviceExtraction.mainServiceExtraction import ExtractionService
|
||||||
self.documentExtraction = PublicService(DocumentExtractionService(self))
|
self.extraction = PublicService(ExtractionService(self))
|
||||||
|
|
||||||
from .serviceDocument.mainServiceDocumentGeneration import DocumentGenerationService
|
from .serviceGeneration.mainServiceGeneration import GenerationService
|
||||||
self.documentGeneration = PublicService(DocumentGenerationService(self))
|
self.generation = PublicService(GenerationService(self))
|
||||||
|
|
||||||
from .serviceNeutralization.mainServiceNeutralization import NeutralizationService
|
from .serviceNeutralization.mainServiceNeutralization import NeutralizationService
|
||||||
self.neutralization = PublicService(NeutralizationService(self))
|
self.neutralization = PublicService(NeutralizationService(self))
|
||||||
|
|
@ -76,14 +76,9 @@ class Services:
|
||||||
from .serviceWorkflow.mainServiceWorkflow import WorkflowService
|
from .serviceWorkflow.mainServiceWorkflow import WorkflowService
|
||||||
self.workflow = PublicService(WorkflowService(self))
|
self.workflow = PublicService(WorkflowService(self))
|
||||||
|
|
||||||
from .serviceWeb.mainServiceWeb import WebService
|
|
||||||
self.web = PublicService(WebService(self))
|
|
||||||
|
|
||||||
from .serviceUtils.mainServiceUtils import UtilsService
|
from .serviceUtils.mainServiceUtils import UtilsService
|
||||||
self.utils = PublicService(UtilsService(self))
|
self.utils = PublicService(UtilsService(self))
|
||||||
|
|
||||||
async def extractContentFromDocument(self, prompt, document):
|
|
||||||
return await self.services.documentExtraction.extractContentFromDocument(prompt, document)
|
|
||||||
|
|
||||||
def getInterface(user: User, workflow: ChatWorkflow) -> Services:
|
def getInterface(user: User, workflow: ChatWorkflow) -> Services:
|
||||||
return Services(user, workflow)
|
return Services(user, workflow)
|
||||||
|
|
|
||||||
|
|
@ -196,7 +196,7 @@ class AiService:
|
||||||
processedContents: List[str] = []
|
processedContents: List[str] = []
|
||||||
|
|
||||||
try:
|
try:
|
||||||
extractionResult = self.extractionService.extractDocuments(documentList, extractionOptions)
|
extractionResult = self.extractionService.extractContent(documentList, extractionOptions)
|
||||||
|
|
||||||
def _partsToText(parts) -> str:
|
def _partsToText(parts) -> str:
|
||||||
lines: List[str] = []
|
lines: List[str] = []
|
||||||
|
|
@ -205,7 +205,7 @@ class AiService:
|
||||||
lines.append(p.data)
|
lines.append(p.data)
|
||||||
return "\n\n".join(lines)
|
return "\n\n".join(lines)
|
||||||
|
|
||||||
if processIndividually and isinstance(extractionResult, list):
|
if isinstance(extractionResult, list):
|
||||||
for i, ec in enumerate(extractionResult):
|
for i, ec in enumerate(extractionResult):
|
||||||
try:
|
try:
|
||||||
contentText = _partsToText(ec.parts)
|
contentText = _partsToText(ec.parts)
|
||||||
|
|
@ -216,9 +216,8 @@ class AiService:
|
||||||
logger.warning(f"Error aggregating extracted content: {str(e)}")
|
logger.warning(f"Error aggregating extracted content: {str(e)}")
|
||||||
processedContents.append("[Error aggregating content]")
|
processedContents.append("[Error aggregating content]")
|
||||||
else:
|
else:
|
||||||
# pooled mode returns dict
|
# Fallback: no content
|
||||||
parts = extractionResult.get("parts", []) if isinstance(extractionResult, dict) else []
|
contentText = ""
|
||||||
contentText = _partsToText(parts)
|
|
||||||
if compressDocuments and len(contentText.encode("utf-8")) > 10000:
|
if compressDocuments and len(contentText.encode("utf-8")) > 10000:
|
||||||
contentText = await self._compressContent(contentText, 10000, "document")
|
contentText = await self._compressContent(contentText, 10000, "document")
|
||||||
processedContents.append(contentText)
|
processedContents.append(contentText)
|
||||||
|
|
@ -359,7 +358,7 @@ class AiService:
|
||||||
"mimeType": d.mimeType
|
"mimeType": d.mimeType
|
||||||
} for d in documents]
|
} for d in documents]
|
||||||
|
|
||||||
extracted_content = await self.extractionService.extractDocuments(
|
extracted_content = await self.extractionService.extractContent(
|
||||||
documentList=documentList,
|
documentList=documentList,
|
||||||
options={
|
options={
|
||||||
"prompt": prompt,
|
"prompt": prompt,
|
||||||
|
|
@ -371,8 +370,15 @@ class AiService:
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
# Get text content from extracted parts using typeGroup-aware processing
|
# Build context from list of ExtractedContent
|
||||||
context = self._extractTextFromContentParts(extracted_content)
|
if isinstance(extracted_content, list):
|
||||||
|
context = "\n\n---\n\n".join([
|
||||||
|
"\n\n".join([
|
||||||
|
p.data for p in ec.parts if p.typeGroup in ["text", "table", "structure"] and p.data
|
||||||
|
]) for ec in extracted_content
|
||||||
|
])
|
||||||
|
else:
|
||||||
|
context = ""
|
||||||
|
|
||||||
# Check size and reduce if needed
|
# Check size and reduce if needed
|
||||||
full_prompt = prompt + "\n\n" + context if context else prompt
|
full_prompt = prompt + "\n\n" + context if context else prompt
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load diff
|
|
@ -1,7 +1,7 @@
|
||||||
from typing import Any, Dict, List
|
from typing import Any, Dict, List
|
||||||
import json
|
import json
|
||||||
|
|
||||||
from ..types import ContentPart
|
from modules.datamodels.datamodelExtraction import ContentPart
|
||||||
from ..subRegistry import Chunker
|
from ..subRegistry import Chunker
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
from typing import Any, Dict, List
|
from typing import Any, Dict, List
|
||||||
|
|
||||||
from ..types import ContentPart
|
from modules.datamodels.datamodelExtraction import ContentPart
|
||||||
from ..subRegistry import Chunker
|
from ..subRegistry import Chunker
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
from typing import Any, Dict, List
|
from typing import Any, Dict, List
|
||||||
|
|
||||||
from ..types import ContentPart
|
from modules.datamodels.datamodelExtraction import ContentPart
|
||||||
from ..subRegistry import Chunker
|
from ..subRegistry import Chunker
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
from typing import Any, Dict, List, Optional
|
from typing import Any, Dict, List, Optional, Union
|
||||||
import uuid
|
import uuid
|
||||||
|
|
||||||
from .subRegistry import ExtractorRegistry, ChunkerRegistry
|
from .subRegistry import ExtractorRegistry, ChunkerRegistry
|
||||||
|
|
@ -7,51 +7,50 @@ from modules.datamodels.datamodelExtraction import ExtractedContent, ContentPart
|
||||||
|
|
||||||
|
|
||||||
class ExtractionService:
|
class ExtractionService:
|
||||||
def __init__(self):
|
def __init__(self, services: Optional[Any] = None):
|
||||||
|
self.services = services
|
||||||
self._extractorRegistry = ExtractorRegistry()
|
self._extractorRegistry = ExtractorRegistry()
|
||||||
self._chunkerRegistry = ChunkerRegistry()
|
self._chunkerRegistry = ChunkerRegistry()
|
||||||
|
|
||||||
def extractDocuments(self, documentList: List[Dict[str, Any]], options: Dict[str, Any]) -> Any:
|
def extractContent(self, documentList: List[Dict[str, Any]], options: Dict[str, Any]) -> List[ExtractedContent]:
|
||||||
processIndividually = options.get("processDocumentsIndividually", True)
|
results: List[ExtractedContent] = []
|
||||||
|
for doc in documentList:
|
||||||
|
ec = runExtraction(
|
||||||
|
extractorRegistry=self._extractorRegistry,
|
||||||
|
chunkerRegistry=self._chunkerRegistry,
|
||||||
|
documentBytes=doc.get("bytes"),
|
||||||
|
fileName=doc.get("fileName"),
|
||||||
|
mimeType=doc.get("mimeType"),
|
||||||
|
options=options
|
||||||
|
)
|
||||||
|
# Attach document id to parts if missing
|
||||||
|
for p in ec.parts:
|
||||||
|
if "documentId" not in p.metadata:
|
||||||
|
p.metadata["documentId"] = doc.get("id") or str(uuid.uuid4())
|
||||||
|
ec = applyAiIfRequested(ec, options)
|
||||||
|
results.append(ec)
|
||||||
|
return results
|
||||||
|
|
||||||
if processIndividually:
|
async def extractContentFromDocument(self, prompt: str, documents: List[Dict[str, Any]], options: Optional[Dict[str, Any]] = None) -> List[ExtractedContent]:
|
||||||
results: List[ExtractedContent] = []
|
"""
|
||||||
for doc in documentList:
|
Batch extract content from multiple documents.
|
||||||
ec = runExtraction(
|
|
||||||
extractorRegistry=self._extractorRegistry,
|
|
||||||
chunkerRegistry=self._chunkerRegistry,
|
|
||||||
documentBytes=doc.get("bytes"),
|
|
||||||
fileName=doc.get("fileName"),
|
|
||||||
mimeType=doc.get("mimeType"),
|
|
||||||
options=options
|
|
||||||
)
|
|
||||||
ec = applyAiIfRequested(ec, options)
|
|
||||||
results.append(ec)
|
|
||||||
return results
|
|
||||||
else:
|
|
||||||
allParts: List[ContentPart] = []
|
|
||||||
for doc in documentList:
|
|
||||||
ec = runExtraction(
|
|
||||||
extractorRegistry=self._extractorRegistry,
|
|
||||||
chunkerRegistry=self._chunkerRegistry,
|
|
||||||
documentBytes=doc.get("bytes"),
|
|
||||||
fileName=doc.get("fileName"),
|
|
||||||
mimeType=doc.get("mimeType"),
|
|
||||||
options=options
|
|
||||||
)
|
|
||||||
for p in ec.parts:
|
|
||||||
if "documentId" not in p.metadata:
|
|
||||||
p.metadata["documentId"] = doc.get("id") or str(uuid.uuid4())
|
|
||||||
allParts.extend(ec.parts)
|
|
||||||
|
|
||||||
pooled = poolAndLimit(allParts, self._chunkerRegistry, options)
|
Args:
|
||||||
# In pooled mode we return a dict containing pooled parts and an optional AI output
|
prompt: Instructional prompt for optional AI post-processing/selection.
|
||||||
pooledResult: Dict[str, Any] = {
|
documents: List of dicts with keys: id, bytes, fileName, mimeType.
|
||||||
"parts": pooled,
|
options: Optional extraction options. "ai" config may be provided.
|
||||||
"summary": {"documents": len(documentList)}
|
|
||||||
}
|
Returns:
|
||||||
aiOut = applyAiIfRequested(ExtractedContent(id=str(uuid.uuid4()), parts=pooled, summary=None), options)
|
List[ExtractedContent]: one per input document in order.
|
||||||
pooledResult["ai"] = aiOut.summary if isinstance(aiOut, ExtractedContent) else aiOut
|
"""
|
||||||
return pooledResult
|
# Build options safely and inject prompt for downstream AI selection if desired
|
||||||
|
effectiveOptions: Dict[str, Any] = options.copy() if options else {}
|
||||||
|
aiCfg = effectiveOptions.get("ai") or {}
|
||||||
|
if prompt:
|
||||||
|
aiCfg["prompt"] = prompt
|
||||||
|
effectiveOptions["ai"] = aiCfg
|
||||||
|
|
||||||
|
# Delegate to existing synchronous pipeline
|
||||||
|
return self.extractContent(documents, effectiveOptions)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
from typing import Any, Dict, Optional
|
from typing import Any, Dict, Optional
|
||||||
|
|
||||||
from .types import ContentPart
|
from modules.datamodels.datamodelExtraction import ContentPart
|
||||||
|
|
||||||
|
|
||||||
class Extractor:
|
class Extractor:
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,11 @@
|
||||||
import logging
|
import logging
|
||||||
|
import uuid
|
||||||
from typing import Any, Dict, List, Optional
|
from typing import Any, Dict, List, Optional
|
||||||
from datetime import datetime, UTC
|
from datetime import datetime, UTC
|
||||||
import re
|
import re
|
||||||
from modules.shared.timezoneUtils import get_utc_timestamp
|
from modules.shared.timezoneUtils import get_utc_timestamp
|
||||||
from modules.services.serviceDocument.subDocumentUtility import (
|
from modules.datamodels.datamodelChat import ChatDocument
|
||||||
|
from modules.services.serviceGeneration.subDocumentUtility import (
|
||||||
getFileExtension,
|
getFileExtension,
|
||||||
getMimeTypeFromExtension,
|
getMimeTypeFromExtension,
|
||||||
detectMimeTypeFromContent,
|
detectMimeTypeFromContent,
|
||||||
|
|
@ -13,9 +15,13 @@ from modules.services.serviceDocument.subDocumentUtility import (
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
class DocumentGenerationService:
|
class GenerationService:
|
||||||
def __init__(self, service):
|
def __init__(self, serviceCenter=None):
|
||||||
self.service = service
|
# Directly use interfaces from the provided service center (no self.service calls)
|
||||||
|
self.serviceCenter = serviceCenter
|
||||||
|
self.interfaceDbComponent = getattr(serviceCenter, 'interfaceDbComponent', None) if serviceCenter else None
|
||||||
|
self.interfaceDbChat = getattr(serviceCenter, 'interfaceDbChat', None) if serviceCenter else None
|
||||||
|
self.workflow = getattr(serviceCenter, 'workflow', None) if serviceCenter else None
|
||||||
|
|
||||||
def processActionResultDocuments(self, action_result, action, workflow) -> List[Dict[str, Any]]:
|
def processActionResultDocuments(self, action_result, action, workflow) -> List[Dict[str, Any]]:
|
||||||
"""
|
"""
|
||||||
|
|
@ -53,7 +59,8 @@ class DocumentGenerationService:
|
||||||
mime_type = doc.mimeType
|
mime_type = doc.mimeType
|
||||||
if mime_type == "application/octet-stream":
|
if mime_type == "application/octet-stream":
|
||||||
content = doc.documentData
|
content = doc.documentData
|
||||||
mime_type = detectMimeTypeFromContent(content, doc.documentName, self.service)
|
# Detect MIME without relying on a service center
|
||||||
|
mime_type = detectMimeTypeFromContent(content, doc.documentName)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'fileName': doc.documentName,
|
'fileName': doc.documentName,
|
||||||
|
|
@ -98,8 +105,8 @@ class DocumentGenerationService:
|
||||||
|
|
||||||
logger.info(f"Document {document_name} has content: {len(content)} characters")
|
logger.info(f"Document {document_name} has content: {len(content)} characters")
|
||||||
|
|
||||||
# Create document with file in one step
|
# Create document with file in one step using interfaces directly
|
||||||
document = self.service.createDocument(
|
document = self._createDocument(
|
||||||
fileName=document_name,
|
fileName=document_name,
|
||||||
mimeType=mime_type,
|
mimeType=mime_type,
|
||||||
content=content,
|
content=content,
|
||||||
|
|
@ -126,9 +133,9 @@ class DocumentGenerationService:
|
||||||
def _setDocumentWorkflowContext(self, document, action, workflow):
|
def _setDocumentWorkflowContext(self, document, action, workflow):
|
||||||
"""Set workflow context on a document for proper routing and labeling"""
|
"""Set workflow context on a document for proper routing and labeling"""
|
||||||
try:
|
try:
|
||||||
# Get current workflow context from service center
|
# Get current workflow context directly from workflow object
|
||||||
workflow_context = self.service.getWorkflowContext()
|
workflow_context = self._getWorkflowContext(workflow)
|
||||||
workflow_stats = self.service.getWorkflowStats()
|
workflow_stats = self._getWorkflowStats(workflow)
|
||||||
|
|
||||||
current_round = workflow_context.get('currentRound', 0)
|
current_round = workflow_context.get('currentRound', 0)
|
||||||
current_task = workflow_context.get('currentTask', 0)
|
current_task = workflow_context.get('currentTask', 0)
|
||||||
|
|
@ -155,3 +162,99 @@ class DocumentGenerationService:
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(f"Could not set workflow context on document: {str(e)}")
|
logger.warning(f"Could not set workflow context on document: {str(e)}")
|
||||||
|
|
||||||
|
def _createDocument(self, fileName: str, mimeType: str, content: str, base64encoded: bool = True, messageId: str = None) -> Optional[ChatDocument]:
|
||||||
|
"""Create file and ChatDocument using interfaces without service indirection."""
|
||||||
|
try:
|
||||||
|
if not self.interfaceDbComponent:
|
||||||
|
logger.error("Component interface not available for document creation")
|
||||||
|
return None
|
||||||
|
# Convert content to bytes
|
||||||
|
if base64encoded:
|
||||||
|
import base64
|
||||||
|
content_bytes = base64.b64decode(content)
|
||||||
|
else:
|
||||||
|
content_bytes = content.encode('utf-8')
|
||||||
|
# Create file and store data
|
||||||
|
file_item = self.interfaceDbComponent.createFile(
|
||||||
|
name=fileName,
|
||||||
|
mimeType=mimeType,
|
||||||
|
content=content_bytes
|
||||||
|
)
|
||||||
|
self.interfaceDbComponent.createFileData(file_item.id, content_bytes)
|
||||||
|
# Collect file info
|
||||||
|
file_info = self._getFileInfo(file_item.id)
|
||||||
|
if not file_info:
|
||||||
|
logger.error(f"Could not get file info for fileId: {file_item.id}")
|
||||||
|
return None
|
||||||
|
# Build ChatDocument
|
||||||
|
document = ChatDocument(
|
||||||
|
id=str(uuid.uuid4()),
|
||||||
|
messageId=messageId or "",
|
||||||
|
fileId=file_item.id,
|
||||||
|
fileName=file_info.get("fileName", fileName),
|
||||||
|
fileSize=file_info.get("size", 0),
|
||||||
|
mimeType=file_info.get("mimeType", mimeType)
|
||||||
|
)
|
||||||
|
# Ensure document can access component interface later
|
||||||
|
if hasattr(document, 'setComponentInterface') and self.interfaceDbComponent:
|
||||||
|
try:
|
||||||
|
document.setComponentInterface(self.interfaceDbComponent)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return document
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error creating document: {str(e)}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _getFileInfo(self, fileId: str) -> Optional[Dict[str, Any]]:
|
||||||
|
try:
|
||||||
|
if not self.interfaceDbComponent:
|
||||||
|
return None
|
||||||
|
file_item = self.interfaceDbComponent.getFile(fileId)
|
||||||
|
if file_item:
|
||||||
|
return {
|
||||||
|
"id": file_item.id,
|
||||||
|
"fileName": file_item.fileName,
|
||||||
|
"size": file_item.fileSize,
|
||||||
|
"mimeType": file_item.mimeType,
|
||||||
|
"fileHash": getattr(file_item, 'fileHash', None),
|
||||||
|
"creationDate": getattr(file_item, 'creationDate', None)
|
||||||
|
}
|
||||||
|
return None
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error getting file info for {fileId}: {str(e)}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _getWorkflowContext(self, workflow) -> Dict[str, int]:
|
||||||
|
try:
|
||||||
|
return {
|
||||||
|
'currentRound': getattr(workflow, 'currentRound', 0),
|
||||||
|
'currentTask': getattr(workflow, 'currentTask', 0),
|
||||||
|
'currentAction': getattr(workflow, 'currentAction', 0)
|
||||||
|
}
|
||||||
|
except Exception:
|
||||||
|
return {'currentRound': 0, 'currentTask': 0, 'currentAction': 0}
|
||||||
|
|
||||||
|
def _getWorkflowStats(self, workflow) -> Dict[str, Any]:
|
||||||
|
try:
|
||||||
|
context = self._getWorkflowContext(workflow)
|
||||||
|
return {
|
||||||
|
'currentRound': context['currentRound'],
|
||||||
|
'currentTask': context['currentTask'],
|
||||||
|
'currentAction': context['currentAction'],
|
||||||
|
'totalTasks': getattr(workflow, 'totalTasks', 0),
|
||||||
|
'totalActions': getattr(workflow, 'totalActions', 0),
|
||||||
|
'workflowStatus': getattr(workflow, 'status', 'unknown'),
|
||||||
|
'workflowId': getattr(workflow, 'id', 'unknown')
|
||||||
|
}
|
||||||
|
except Exception:
|
||||||
|
return {
|
||||||
|
'currentRound': 0,
|
||||||
|
'currentTask': 0,
|
||||||
|
'currentAction': 0,
|
||||||
|
'totalTasks': 0,
|
||||||
|
'totalActions': 0,
|
||||||
|
'workflowStatus': 'unknown',
|
||||||
|
'workflowId': 'unknown'
|
||||||
|
}
|
||||||
|
|
@ -4,8 +4,8 @@ from typing import Dict, Any, List, Optional
|
||||||
from modules.datamodels.datamodelUam import User, UserConnection
|
from modules.datamodels.datamodelUam import User, UserConnection
|
||||||
from modules.datamodels.datamodelChat import ChatDocument, ChatMessage
|
from modules.datamodels.datamodelChat import ChatDocument, ChatMessage
|
||||||
from modules.datamodels.datamodelChat import ExtractedContent
|
from modules.datamodels.datamodelChat import ExtractedContent
|
||||||
from modules.services.serviceDocument.mainServiceDocumentExtraction import DocumentExtractionService
|
from modules.services.serviceExtraction.mainServiceExtraction import ExtractionService
|
||||||
from modules.services.serviceDocument.subDocumentUtility import getFileExtension, getMimeTypeFromExtension, detectContentTypeFromData
|
from modules.services.serviceGeneration.subDocumentUtility import getFileExtension, getMimeTypeFromExtension, detectContentTypeFromData
|
||||||
from modules.shared.timezoneUtils import get_utc_timestamp
|
from modules.shared.timezoneUtils import get_utc_timestamp
|
||||||
from modules.services.serviceAi.mainServiceAi import AiService
|
from modules.services.serviceAi.mainServiceAi import AiService
|
||||||
from modules.security.tokenManager import TokenManager
|
from modules.security.tokenManager import TokenManager
|
||||||
|
|
@ -312,80 +312,6 @@ class WorkflowService:
|
||||||
def getFileData(self, fileId: str) -> bytes:
|
def getFileData(self, fileId: str) -> bytes:
|
||||||
"""Get file data by ID"""
|
"""Get file data by ID"""
|
||||||
return self.interfaceDbComponent.getFileData(fileId)
|
return self.interfaceDbComponent.getFileData(fileId)
|
||||||
|
|
||||||
async def extractContentFromDocument(self, prompt: str, document: ChatDocument) -> ExtractedContent:
|
|
||||||
"""Extract content from ChatDocument using prompt"""
|
|
||||||
try:
|
|
||||||
# ChatDocument is just a reference, so we need to get file data using fileId
|
|
||||||
if not hasattr(document, 'fileId') or not document.fileId:
|
|
||||||
logger.error(f"Document {document.id} has no fileId")
|
|
||||||
raise ValueError("Document has no fileId")
|
|
||||||
|
|
||||||
# Get file data from service center using document's fileId
|
|
||||||
fileData = self.getFileData(document.fileId)
|
|
||||||
if not fileData:
|
|
||||||
logger.error(f"No file data found for fileId: {document.fileId}")
|
|
||||||
raise ValueError("No file data found for document")
|
|
||||||
|
|
||||||
# Get fileName and mime type from document properties
|
|
||||||
try:
|
|
||||||
fileName = document.fileName
|
|
||||||
mimeType = document.mimeType
|
|
||||||
except Exception as e:
|
|
||||||
# Try to diagnose and recover the issue
|
|
||||||
diagnosis = self._diagnoseDocumentAccess(document)
|
|
||||||
logger.error(f"Critical error: Cannot access document properties for document {document.id}. Diagnosis: {diagnosis}")
|
|
||||||
|
|
||||||
# Attempt recovery
|
|
||||||
if self._recoverDocumentAccess(document):
|
|
||||||
try:
|
|
||||||
fileName = document.fileName
|
|
||||||
mimeType = document.mimeType
|
|
||||||
logger.info(f"Document access recovered for {document.id} - proceeding with AI extraction")
|
|
||||||
except Exception as recovery_error:
|
|
||||||
logger.error(f"Recovery failed for document {document.id}: {str(recovery_error)}")
|
|
||||||
raise RuntimeError(f"Document {document.id} properties are permanently inaccessible after recovery attempt - cannot proceed with AI extraction: {str(recovery_error)}")
|
|
||||||
else:
|
|
||||||
# Recovery failed - don't continue with invalid data
|
|
||||||
raise RuntimeError(f"Document {document.id} properties are inaccessible and recovery failed. Diagnosis: {diagnosis}")
|
|
||||||
|
|
||||||
# Process with DocumentExtractionService directly (no circular dependency)
|
|
||||||
docService = DocumentExtractionService(None) # Pass None to avoid circular dependency
|
|
||||||
content_items = await docService.processFileData(
|
|
||||||
fileData=fileData,
|
|
||||||
fileName=fileName,
|
|
||||||
mimeType=mimeType,
|
|
||||||
base64Encoded=False,
|
|
||||||
prompt=prompt,
|
|
||||||
enableAI=True
|
|
||||||
)
|
|
||||||
|
|
||||||
# Convert ContentItem list to ExtractedContent
|
|
||||||
contents = []
|
|
||||||
for item in content_items:
|
|
||||||
contents.append({
|
|
||||||
'label': item.label,
|
|
||||||
'data': item.data,
|
|
||||||
'metadata': {
|
|
||||||
'mimeType': item.metadata.mimeType if hasattr(item.metadata, 'mimeType') else mimeType,
|
|
||||||
'size': item.metadata.size if hasattr(item.metadata, 'size') else len(fileData),
|
|
||||||
'base64Encoded': item.metadata.base64Encoded if hasattr(item.metadata, 'base64Encoded') else False
|
|
||||||
}
|
|
||||||
})
|
|
||||||
|
|
||||||
extractedContent = ExtractedContent(
|
|
||||||
id=document.id,
|
|
||||||
contents=contents
|
|
||||||
)
|
|
||||||
|
|
||||||
# Note: ExtractedContent model only has 'id' and 'contents' fields
|
|
||||||
# No need to set objectId or objectType as they don't exist in the model
|
|
||||||
|
|
||||||
return extractedContent
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error extracting from document: {str(e)}")
|
|
||||||
raise
|
|
||||||
|
|
||||||
def _diagnoseDocumentAccess(self, document: ChatDocument) -> Dict[str, Any]:
|
def _diagnoseDocumentAccess(self, document: ChatDocument) -> Dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
|
|
@ -456,43 +382,6 @@ class WorkflowService:
|
||||||
logger.error(f"Error during document access recovery for {document.id}: {str(e)}")
|
logger.error(f"Error during document access recovery for {document.id}: {str(e)}")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def createDocument(self, fileName: str, mimeType: str, content: str, base64encoded: bool = True, messageId: str = None) -> ChatDocument:
|
|
||||||
"""Create document with file in one step - handles file creation internally"""
|
|
||||||
# Convert content to bytes based on base64 flag
|
|
||||||
if base64encoded:
|
|
||||||
import base64
|
|
||||||
content_bytes = base64.b64decode(content)
|
|
||||||
else:
|
|
||||||
content_bytes = content.encode('utf-8')
|
|
||||||
|
|
||||||
# Create the file (hash and size are computed inside interfaceDbComponent)
|
|
||||||
file_item = self.interfaceDbComponent.createFile(
|
|
||||||
name=fileName,
|
|
||||||
mimeType=mimeType,
|
|
||||||
content=content_bytes
|
|
||||||
)
|
|
||||||
|
|
||||||
# Then store the file data
|
|
||||||
self.interfaceDbComponent.createFileData(file_item.id, content_bytes)
|
|
||||||
|
|
||||||
# Get file info to copy attributes
|
|
||||||
file_info = self.getFileInfo(file_item.id)
|
|
||||||
if not file_info:
|
|
||||||
logger.error(f"Could not get file info for fileId: {file_item.id}")
|
|
||||||
raise ValueError(f"File info not found for fileId: {file_item.id}")
|
|
||||||
|
|
||||||
# Create document with all file attributes copied
|
|
||||||
document = ChatDocument(
|
|
||||||
id=str(uuid.uuid4()),
|
|
||||||
messageId=messageId or "", # Use provided messageId or empty string as fallback
|
|
||||||
fileId=file_item.id,
|
|
||||||
fileName=file_info.get("fileName", fileName),
|
|
||||||
fileSize=file_info.get("size", 0),
|
|
||||||
mimeType=file_info.get("mimeType", mimeType)
|
|
||||||
)
|
|
||||||
|
|
||||||
return document
|
|
||||||
|
|
||||||
def calculateObjectSize(self, obj: Any) -> int:
|
def calculateObjectSize(self, obj: Any) -> int:
|
||||||
"""
|
"""
|
||||||
Calculate the size of an object in bytes.
|
Calculate the size of an object in bytes.
|
||||||
|
|
|
||||||
226
modules/services/test_all_services.py
Normal file
226
modules/services/test_all_services.py
Normal file
|
|
@ -0,0 +1,226 @@
|
||||||
|
import asyncio
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
from typing import List, Dict, Any
|
||||||
|
|
||||||
|
# Ensure relative imports work when running directly
|
||||||
|
CURRENT_DIR = os.path.dirname(__file__)
|
||||||
|
GATEWAY_DIR = os.path.dirname(os.path.dirname(CURRENT_DIR))
|
||||||
|
if GATEWAY_DIR not in sys.path:
|
||||||
|
sys.path.append(GATEWAY_DIR)
|
||||||
|
|
||||||
|
from modules.services.serviceExtraction.mainServiceExtraction import ExtractionService
|
||||||
|
from modules.services.serviceGeneration.mainServiceGeneration import DocumentGenerationService
|
||||||
|
from modules.datamodels.datamodelWorkflow import ActionResult, ActionDocument
|
||||||
|
from modules.datamodels.datamodelAi import AiCallOptions, OperationType, ProcessingMode, Priority
|
||||||
|
from modules.services.serviceAi.mainServiceAi import AiService
|
||||||
|
|
||||||
|
|
||||||
|
TESTDATA_DIR = os.path.join(GATEWAY_DIR, "testdata")
|
||||||
|
|
||||||
|
|
||||||
|
def _read_test_files() -> List[Dict[str, Any]]:
|
||||||
|
files = []
|
||||||
|
for name in os.listdir(TESTDATA_DIR):
|
||||||
|
path = os.path.join(TESTDATA_DIR, name)
|
||||||
|
if not os.path.isfile(path):
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
with open(path, "rb") as f:
|
||||||
|
data = f.read()
|
||||||
|
mime = _guess_mime(name)
|
||||||
|
files.append({
|
||||||
|
"id": name,
|
||||||
|
"bytes": data,
|
||||||
|
"fileName": name,
|
||||||
|
"mimeType": mime,
|
||||||
|
})
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
return files
|
||||||
|
|
||||||
|
|
||||||
|
def _guess_mime(name: str) -> str:
|
||||||
|
lower = name.lower()
|
||||||
|
if lower.endswith(".pdf"):
|
||||||
|
return "application/pdf"
|
||||||
|
if lower.endswith(".xlsx"):
|
||||||
|
return "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
||||||
|
if lower.endswith(".jpg") or lower.endswith(".jpeg"):
|
||||||
|
return "image/jpeg"
|
||||||
|
if lower.endswith(".png"):
|
||||||
|
return "image/png"
|
||||||
|
return "application/octet-stream"
|
||||||
|
|
||||||
|
|
||||||
|
def run_extraction_1000_bytes() -> None:
|
||||||
|
svc = ExtractionService()
|
||||||
|
docs = _read_test_files()
|
||||||
|
options = {
|
||||||
|
# cap total pooled size per document set
|
||||||
|
"maxSize": 1000,
|
||||||
|
# allow chunking to respect the cap across parts
|
||||||
|
"chunkAllowed": True,
|
||||||
|
# chunk sizes for different content types to help fit under the cap
|
||||||
|
"textChunkSize": 500,
|
||||||
|
"tableChunkSize": 500,
|
||||||
|
"structureChunkSize": 500,
|
||||||
|
# simple merge strategy if supported
|
||||||
|
"mergeStrategy": {},
|
||||||
|
}
|
||||||
|
results = svc.extractContent(docs, options)
|
||||||
|
print("[extraction] documents:", len(docs), "results:", len(results))
|
||||||
|
for i, ec in enumerate(results):
|
||||||
|
total = sum(int(p.metadata.get("size", 0) or 0) for p in ec.parts)
|
||||||
|
print(f" - doc[{i}] parts={len(ec.parts)} pooledBytes={total}")
|
||||||
|
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
print("=== serviceExtraction: compress to 1000 bytes ===")
|
||||||
|
run_extraction_1000_bytes()
|
||||||
|
print("\n=== serviceGeneration: create ActionResult and write output to testdata ===")
|
||||||
|
await run_generation_write_file()
|
||||||
|
print("\n=== serviceAi: planning call + image + pdf extraction ===")
|
||||||
|
await run_ai_tests()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
asyncio.run(main())
|
||||||
|
|
||||||
|
async def run_generation_write_file() -> None:
|
||||||
|
# Minimal stubs for interfaces expected by DocumentGenerationService
|
||||||
|
class _FileItem:
|
||||||
|
def __init__(self, file_id: str, file_name: str, mime_type: str, content: bytes):
|
||||||
|
self.id = file_id
|
||||||
|
self.fileName = file_name
|
||||||
|
self.mimeType = mime_type
|
||||||
|
self.fileSize = len(content)
|
||||||
|
|
||||||
|
class _ComponentInterface:
|
||||||
|
def __init__(self):
|
||||||
|
self._files = {}
|
||||||
|
def createFile(self, name: str, mimeType: str, content: bytes):
|
||||||
|
fid = f"test_{len(self._files)+1}"
|
||||||
|
item = _FileItem(fid, name, mimeType, content)
|
||||||
|
self._files[fid] = item
|
||||||
|
return item
|
||||||
|
def createFileData(self, fileId: str, content: bytes):
|
||||||
|
# Persist into testdata directory as requested
|
||||||
|
item = self._files[fileId]
|
||||||
|
out_path = os.path.join(TESTDATA_DIR, f"output_{fileId}_{item.fileName}")
|
||||||
|
with open(out_path, "wb") as f:
|
||||||
|
f.write(content)
|
||||||
|
def getFile(self, fileId: str):
|
||||||
|
return self._files.get(fileId)
|
||||||
|
|
||||||
|
class _ServiceCenter:
|
||||||
|
def __init__(self, comp):
|
||||||
|
self.interfaceDbComponent = comp
|
||||||
|
self.interfaceDbChat = None
|
||||||
|
self.workflow = type("_Wf", (), {"id": "wf_test", "currentRound": 1, "currentTask": 1, "currentAction": 1, "status": "running", "totalTasks": 1, "totalActions": 1})()
|
||||||
|
|
||||||
|
component = _ComponentInterface()
|
||||||
|
center = _ServiceCenter(component)
|
||||||
|
gen = DocumentGenerationService(center)
|
||||||
|
|
||||||
|
# Build a fake action and ActionResult with a small text document
|
||||||
|
class _Action:
|
||||||
|
def __init__(self):
|
||||||
|
self.id = "action_test"
|
||||||
|
self.execMethod = "document"
|
||||||
|
self.execAction = "generate"
|
||||||
|
self.execParameters = {}
|
||||||
|
self.execResultLabel = "round1_task1_action1_results"
|
||||||
|
action = _Action()
|
||||||
|
|
||||||
|
content = "This is a generated test file from serviceGeneration test."
|
||||||
|
action_doc = ActionDocument(documentName="test_generated.txt", documentData=content, mimeType="text/plain")
|
||||||
|
action_result = ActionResult(success=True, documents=[action_doc])
|
||||||
|
|
||||||
|
docs = gen.createDocumentsFromActionResult(action_result, action, center.workflow, message_id="msg_test")
|
||||||
|
print("[generation] created documents:", len(docs))
|
||||||
|
|
||||||
|
|
||||||
|
async def run_ai_tests() -> None:
|
||||||
|
# Create AiService instance (uses internal default model registry; no external creds required for this test)
|
||||||
|
ai = await AiService.create()
|
||||||
|
|
||||||
|
# Planning AI call (like in handlingTasks.generateTaskPlan)
|
||||||
|
plan_options = AiCallOptions(
|
||||||
|
operationType=OperationType.GENERATE_PLAN,
|
||||||
|
priority=Priority.QUALITY,
|
||||||
|
compressPrompt=False,
|
||||||
|
compressContext=False,
|
||||||
|
processingMode=ProcessingMode.DETAILED,
|
||||||
|
maxCost=0.05,
|
||||||
|
maxProcessingTime=10,
|
||||||
|
)
|
||||||
|
plan_prompt = """
|
||||||
|
You are a planning assistant. Return a compact JSON with fields: tasks:[{id, objective, success_criteria:[]}], languageUserDetected:"en".
|
||||||
|
Create exactly one simple task id:"task_1" objective:"Test planning" success_criteria:["done"].
|
||||||
|
""".strip()
|
||||||
|
plan_resp = await ai.callAi(prompt=plan_prompt, placeholders=None, options=plan_options)
|
||||||
|
print("[ai] planning response length:", len(plan_resp) if plan_resp else 0)
|
||||||
|
|
||||||
|
# Image content extraction prompt using test JPEG
|
||||||
|
img_path = os.path.join(TESTDATA_DIR, "00Untitled.jpg")
|
||||||
|
img_resp = None
|
||||||
|
if os.path.exists(img_path):
|
||||||
|
try:
|
||||||
|
with open(img_path, "rb") as f:
|
||||||
|
img_bytes = f.read()
|
||||||
|
img_options = AiCallOptions(
|
||||||
|
operationType=OperationType.ANALYSE_CONTENT,
|
||||||
|
priority=Priority.BALANCED,
|
||||||
|
compressPrompt=True,
|
||||||
|
compressContext=False,
|
||||||
|
processingMode=ProcessingMode.ADVANCED,
|
||||||
|
maxCost=0.02,
|
||||||
|
maxProcessingTime=10,
|
||||||
|
)
|
||||||
|
img_resp = await ai.callAiImage(
|
||||||
|
prompt="Describe the content of this image succinctly.",
|
||||||
|
imageData=img_bytes,
|
||||||
|
mimeType="image/jpeg",
|
||||||
|
options=img_options,
|
||||||
|
)
|
||||||
|
print("[ai] image analysis response length:", len(img_resp) if img_resp else 0)
|
||||||
|
except Exception as e:
|
||||||
|
print("[ai] image analysis error:", str(e))
|
||||||
|
else:
|
||||||
|
print("[ai] image test file not found; skipping")
|
||||||
|
|
||||||
|
# PDF extraction prompt: emulate text call with document context built via ExtractionService
|
||||||
|
pdf_path = os.path.join(TESTDATA_DIR, "diagramm_komponenten.pdf")
|
||||||
|
if os.path.exists(pdf_path):
|
||||||
|
try:
|
||||||
|
# Build a minimal ChatDocument-like shim that AiService._callAiText expects via extraction
|
||||||
|
class _Doc:
|
||||||
|
def __init__(self, file_path: str, mime: str):
|
||||||
|
self.id = "doc_pdf"
|
||||||
|
self.fileName = os.path.basename(file_path)
|
||||||
|
self.mimeType = mime
|
||||||
|
with open(file_path, "rb") as f:
|
||||||
|
self.fileData = f.read()
|
||||||
|
pdf_doc = _Doc(pdf_path, "application/pdf")
|
||||||
|
|
||||||
|
pdf_options = AiCallOptions(
|
||||||
|
operationType=OperationType.ANALYSE_CONTENT,
|
||||||
|
priority=Priority.BALANCED,
|
||||||
|
compressPrompt=True,
|
||||||
|
compressContext=True,
|
||||||
|
processingMode=ProcessingMode.ADVANCED,
|
||||||
|
maxContextBytes=1000,
|
||||||
|
chunkAllowed=True,
|
||||||
|
maxCost=0.02,
|
||||||
|
maxProcessingTime=10,
|
||||||
|
)
|
||||||
|
pdf_prompt = "Extract key information from the attached PDF."
|
||||||
|
pdf_resp = await ai.callAi(prompt=pdf_prompt, documents=[pdf_doc], options=pdf_options)
|
||||||
|
print("[ai] pdf extraction response length:", len(pdf_resp) if pdf_resp else 0)
|
||||||
|
except Exception as e:
|
||||||
|
print("[ai] pdf extraction error:", str(e))
|
||||||
|
else:
|
||||||
|
print("[ai] pdf test file not found; skipping")
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -76,66 +76,70 @@ class MethodAi(MethodBase):
|
||||||
chatDocuments = self.services.workflow.getChatDocumentsFromDocumentList(documentList)
|
chatDocuments = self.services.workflow.getChatDocumentsFromDocumentList(documentList)
|
||||||
if chatDocuments:
|
if chatDocuments:
|
||||||
context_parts = []
|
context_parts = []
|
||||||
|
# Build batch payload for extraction
|
||||||
|
batch_docs = []
|
||||||
for doc in chatDocuments:
|
for doc in chatDocuments:
|
||||||
file_info = self.services.workflow.getFileInfo(doc.fileId)
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Use the document content extraction service with the specific AI prompt context
|
fileBytes = self.services.workflow.getFileData(doc.fileId) if hasattr(doc, 'fileId') else None
|
||||||
# This tells the extraction engine exactly what and how to extract
|
except Exception:
|
||||||
extraction_prompt = f"""
|
fileBytes = None
|
||||||
Extract content from this document for AI processing context.
|
batch_docs.append({
|
||||||
|
"id": getattr(doc, 'id', None),
|
||||||
AI Task: {aiPrompt}
|
"bytes": fileBytes or b"",
|
||||||
Processing Mode: {processingMode}
|
"fileName": getattr(doc, 'fileName', 'unknown'),
|
||||||
Expected Output: {output_extension.upper()} format
|
"mimeType": getattr(doc, 'mimeType', None) or "application/octet-stream"
|
||||||
|
})
|
||||||
Requirements:
|
|
||||||
1. Extract the most relevant text content that would be useful for the AI task
|
|
||||||
2. Focus on content that directly relates to: {aiPrompt}
|
|
||||||
3. Include key information, data, and insights that the AI needs
|
|
||||||
4. Provide clean, readable text without formatting artifacts
|
|
||||||
|
|
||||||
Document: {doc.fileName}
|
|
||||||
"""
|
|
||||||
|
|
||||||
logger.debug(f"Extracting content from {doc.fileName} with task-specific prompt: {extraction_prompt[:100]}...")
|
|
||||||
|
|
||||||
extracted_content = await self.services.documentExtraction.extractContentFromDocument(
|
|
||||||
prompt=extraction_prompt.strip(),
|
|
||||||
document=doc
|
|
||||||
)
|
|
||||||
|
|
||||||
if extracted_content and extracted_content.contents:
|
|
||||||
# Get the first content item's data
|
|
||||||
content = ""
|
|
||||||
for content_item in extracted_content.contents:
|
|
||||||
if hasattr(content_item, 'data') and content_item.data:
|
|
||||||
content += content_item.data + " "
|
|
||||||
|
|
||||||
|
extraction_prompt = (
|
||||||
if content.strip():
|
f"Extract content for AI task context. Task: {aiPrompt}. Mode: {processingMode}."
|
||||||
metadata_info = ""
|
)
|
||||||
if file_info and includeMetadata:
|
try:
|
||||||
metadata_info = f" (Size: {file_info.get('fileSize', 'unknown')}, Type: {file_info.get('mimeType', 'unknown')})"
|
extracted_list = await self.services.extraction.extractContentFromDocuments(
|
||||||
|
prompt=extraction_prompt,
|
||||||
# Adjust context length based on processing mode and AI task relevance
|
documents=batch_docs,
|
||||||
base_length = 5000 if processingMode == "detailed" else 3000 if processingMode == "advanced" else 2000
|
options={"ai": {"enabled": False}, "mergeStrategy": {}}
|
||||||
|
)
|
||||||
# For detailed mode, include more context
|
except Exception:
|
||||||
if processingMode == "detailed":
|
extracted_list = []
|
||||||
context_parts.append(f"Document: {doc.fileName}{metadata_info}\nRelevance to AI Task: This document contains content directly related to '{aiPrompt[:100]}...'\nContent:\n{content[:base_length]}...")
|
|
||||||
else:
|
# Helper to aggregate readable text from parts
|
||||||
context_parts.append(f"Document: {doc.fileName}{metadata_info}\nContent:\n{content[:base_length]}...")
|
def _partsToText(parts) -> str:
|
||||||
else:
|
lines: List[str] = []
|
||||||
context_parts.append(f"Document: {doc.fileName} [No readable text content - binary file]")
|
for p in (parts or []):
|
||||||
|
try:
|
||||||
|
if getattr(p, 'typeGroup', '') in ("text", "table", "structure") and getattr(p, 'data', None):
|
||||||
|
lines.append(p.data)
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
return "\n\n".join(lines)
|
||||||
|
|
||||||
|
for i, doc in enumerate(chatDocuments):
|
||||||
|
file_info = self.services.workflow.getFileInfo(doc.fileId)
|
||||||
|
content = ""
|
||||||
|
try:
|
||||||
|
ec = extracted_list[i] if i < len(extracted_list) else None
|
||||||
|
if ec:
|
||||||
|
content = _partsToText(getattr(ec, 'parts', []))
|
||||||
|
except Exception:
|
||||||
|
content = ""
|
||||||
|
|
||||||
|
if content.strip():
|
||||||
|
metadata_info = ""
|
||||||
|
if file_info and includeMetadata:
|
||||||
|
metadata_info = f" (Size: {file_info.get('fileSize', 'unknown')}, Type: {file_info.get('mimeType', 'unknown')})"
|
||||||
|
base_length = 5000 if processingMode == "detailed" else 3000 if processingMode == "advanced" else 2000
|
||||||
|
if processingMode == "detailed":
|
||||||
|
context_parts.append(
|
||||||
|
f"Document: {doc.fileName}{metadata_info}\nRelevance to AI Task: This document contains content directly related to '{aiPrompt[:100]}...'\nContent:\n{content[:base_length]}..."
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
context_parts.append(f"Document: {doc.fileName} [No readable text content - binary file]")
|
context_parts.append(
|
||||||
|
f"Document: {doc.fileName}{metadata_info}\nContent:\n{content[:base_length]}..."
|
||||||
except Exception as extract_error:
|
)
|
||||||
context_parts.append(f"Document: {doc.fileName} [Could not extract content - binary file]")
|
else:
|
||||||
|
context_parts.append(f"Document: {doc.fileName} [No readable text content - binary file]")
|
||||||
|
|
||||||
if context_parts:
|
if context_parts:
|
||||||
# Add a summary header to help the AI understand the context
|
|
||||||
context_header = f"""
|
context_header = f"""
|
||||||
=== DOCUMENT CONTEXT FOR AI PROCESSING ===
|
=== DOCUMENT CONTEXT FOR AI PROCESSING ===
|
||||||
AI Task: {aiPrompt[:100]}...
|
AI Task: {aiPrompt[:100]}...
|
||||||
|
|
@ -147,7 +151,6 @@ class MethodAi(MethodBase):
|
||||||
Use this information to provide the most accurate and helpful response.
|
Use this information to provide the most accurate and helpful response.
|
||||||
================================================
|
================================================
|
||||||
"""
|
"""
|
||||||
|
|
||||||
context = context_header + "\n\n" + "\n\n".join(context_parts)
|
context = context_header + "\n\n" + "\n\n".join(context_parts)
|
||||||
logger.info(f"Included {len(chatDocuments)} documents in AI context with task-specific extraction")
|
logger.info(f"Included {len(chatDocuments)} documents in AI context with task-specific extraction")
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -62,32 +62,36 @@ class MethodDocument(MethodBase):
|
||||||
error="No documents found for the provided reference"
|
error="No documents found for the provided reference"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Extract content from all documents using AI
|
# Batch extract content from all documents at once
|
||||||
all_extracted_content = []
|
all_extracted_content = []
|
||||||
file_infos = []
|
file_infos = []
|
||||||
|
batch_docs = []
|
||||||
for chatDocument in chatDocuments:
|
for chatDocument in chatDocuments:
|
||||||
file_info = self.services.workflow.getFileInfo(chatDocument.fileId)
|
file_info = self.services.workflow.getFileInfo(chatDocument.fileId)
|
||||||
|
if includeMetadata:
|
||||||
|
file_infos.append(file_info)
|
||||||
try:
|
try:
|
||||||
# Use the document content extraction service with the specific AI prompt
|
data = self.services.workflow.getFileData(chatDocument.fileId) if hasattr(chatDocument, 'fileId') else None
|
||||||
# This handles all document types (text, binary, image, etc.) intelligently
|
except Exception:
|
||||||
extracted_content = await self.services.documentExtraction.extractContentFromDocument(
|
data = None
|
||||||
prompt=aiPrompt,
|
batch_docs.append({
|
||||||
document=chatDocument
|
"id": getattr(chatDocument, 'id', None),
|
||||||
)
|
"bytes": data or b"",
|
||||||
|
"fileName": getattr(chatDocument, 'fileName', 'unknown'),
|
||||||
if extracted_content and extracted_content.contents:
|
"mimeType": getattr(chatDocument, 'mimeType', None) or "application/octet-stream"
|
||||||
all_extracted_content.append(extracted_content)
|
})
|
||||||
if includeMetadata:
|
|
||||||
file_infos.append(file_info)
|
try:
|
||||||
logger.info(f"Successfully extracted content from {chatDocument.fileName}")
|
extracted_list = await self.services.extraction.extractContentFromDocuments(
|
||||||
else:
|
prompt=aiPrompt,
|
||||||
logger.warning(f"No content extracted from {chatDocument.fileName}")
|
documents=batch_docs,
|
||||||
|
options={"ai": {"enabled": False}}
|
||||||
except Exception as e:
|
)
|
||||||
logger.error(f"Error extracting content from {chatDocument.fileName}: {str(e)}")
|
except Exception as e:
|
||||||
continue
|
logger.error(f"Batch extraction failed: {str(e)}")
|
||||||
|
extracted_list = []
|
||||||
|
|
||||||
|
all_extracted_content = extracted_list or []
|
||||||
|
|
||||||
if not all_extracted_content:
|
if not all_extracted_content:
|
||||||
return ActionResult.isFailure(
|
return ActionResult.isFailure(
|
||||||
|
|
@ -97,20 +101,24 @@ class MethodDocument(MethodBase):
|
||||||
# Process each document individually with its own format conversion
|
# Process each document individually with its own format conversion
|
||||||
output_documents = []
|
output_documents = []
|
||||||
|
|
||||||
for i, (chatDocument, extracted_content) in enumerate(zip(chatDocuments, all_extracted_content)):
|
for i, chatDocument in enumerate(chatDocuments):
|
||||||
# Extract text content from this document
|
# Extract text content from this document
|
||||||
text_content = ""
|
text_content = ""
|
||||||
if hasattr(extracted_content, 'contents') and extracted_content.contents:
|
try:
|
||||||
# Extract text from ContentItem objects
|
ec = all_extracted_content[i] if i < len(all_extracted_content) else None
|
||||||
text_parts = []
|
if ec and hasattr(ec, 'parts'):
|
||||||
for content_item in extracted_content.contents:
|
text_parts = []
|
||||||
if hasattr(content_item, 'data') and content_item.data:
|
for part in getattr(ec, 'parts', []):
|
||||||
text_parts.append(content_item.data)
|
try:
|
||||||
text_content = "\n".join(text_parts)
|
if getattr(part, 'typeGroup', '') in ("text", "table", "structure") and getattr(part, 'data', None):
|
||||||
elif isinstance(extracted_content, str):
|
text_parts.append(part.data)
|
||||||
text_content = extracted_content
|
except Exception:
|
||||||
else:
|
continue
|
||||||
text_content = str(extracted_content)
|
text_content = "\n".join(text_parts)
|
||||||
|
else:
|
||||||
|
text_content = ""
|
||||||
|
except Exception:
|
||||||
|
text_content = ""
|
||||||
|
|
||||||
# Get the expected format for this document (or use default)
|
# Get the expected format for this document (or use default)
|
||||||
target_format = None
|
target_format = None
|
||||||
|
|
@ -692,27 +700,38 @@ class MethodDocument(MethodBase):
|
||||||
content = ""
|
content = ""
|
||||||
logger.info(f"Processing document: type={type(doc)}")
|
logger.info(f"Processing document: type={type(doc)}")
|
||||||
|
|
||||||
# Get actual file content using the document content extraction service
|
# Batch extraction approach: prepare one doc payload and call extractor
|
||||||
|
try:
|
||||||
try:
|
try:
|
||||||
extracted_content = await self.services.documentExtraction.extractContentFromDocument(
|
data = self.services.workflow.getFileData(doc.fileId) if hasattr(doc, 'fileId') else None
|
||||||
prompt="Extract readable text content for HTML report generation",
|
except Exception:
|
||||||
document=doc
|
data = None
|
||||||
)
|
extracted_list = await self.services.extraction.extractContentFromDocuments(
|
||||||
|
prompt="Extract readable text content for HTML report generation",
|
||||||
if extracted_content and extracted_content.contents:
|
documents=[{
|
||||||
# Get the first content item's data
|
"id": getattr(doc, 'id', None),
|
||||||
for content_item in extracted_content.contents:
|
"bytes": data or b"",
|
||||||
if hasattr(content_item, 'data') and content_item.data:
|
"fileName": getattr(doc, 'fileName', 'unknown'),
|
||||||
content += content_item.data + " "
|
"mimeType": getattr(doc, 'mimeType', None) or "application/octet-stream"
|
||||||
|
}],
|
||||||
if content.strip():
|
options={"ai": {"enabled": False}}
|
||||||
logger.info(f" Retrieved content from file: {len(content)} characters")
|
)
|
||||||
else:
|
ec = extracted_list[0] if extracted_list else None
|
||||||
logger.info(f" No readable text content found (binary file)")
|
if ec and hasattr(ec, 'parts'):
|
||||||
|
for part in getattr(ec, 'parts', []):
|
||||||
|
try:
|
||||||
|
if getattr(part, 'typeGroup', '') in ("text", "table", "structure") and getattr(part, 'data', None):
|
||||||
|
content += part.data + " "
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
if content.strip():
|
||||||
|
logger.info(f" Retrieved content from file: {len(content)} characters")
|
||||||
else:
|
else:
|
||||||
logger.info(f" No content extracted (binary file)")
|
logger.info(f" No readable text content found (binary file)")
|
||||||
except Exception as e:
|
else:
|
||||||
logger.info(f" Could not extract content (binary file): {str(e)}")
|
logger.info(f" No content extracted (binary file)")
|
||||||
|
except Exception as e:
|
||||||
|
logger.info(f" Could not extract content (binary file): {str(e)}")
|
||||||
|
|
||||||
# Skip empty documents
|
# Skip empty documents
|
||||||
if content and content.strip():
|
if content and content.strip():
|
||||||
|
|
|
||||||
|
|
@ -1392,45 +1392,53 @@ class MethodOutlook(MethodBase):
|
||||||
composition_documents = []
|
composition_documents = []
|
||||||
|
|
||||||
if documentList:
|
if documentList:
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Get document content from service center
|
|
||||||
docs = self.services.workflow.getChatDocumentsFromDocumentList(documentList)
|
docs = self.services.workflow.getChatDocumentsFromDocumentList(documentList)
|
||||||
if docs:
|
if docs:
|
||||||
|
composition_documents.extend(docs)
|
||||||
|
# Batch extract summaries for AI context
|
||||||
|
batch_docs = []
|
||||||
for doc in docs:
|
for doc in docs:
|
||||||
composition_documents.append(doc)
|
|
||||||
|
|
||||||
# Extract content for AI context using proper document service
|
|
||||||
try:
|
try:
|
||||||
if hasattr(doc, 'fileId') and doc.fileId:
|
data = self.services.workflow.getFileData(doc.fileId) if hasattr(doc, 'fileId') else None
|
||||||
# Use the document content extraction service instead of raw file reading
|
except Exception:
|
||||||
try:
|
data = None
|
||||||
extracted_content = await self.services.documentExtraction.extractContentFromDocument(
|
batch_docs.append({
|
||||||
prompt="Extract readable text content for email composition",
|
"id": getattr(doc, 'id', None),
|
||||||
document=doc
|
"bytes": data or b"",
|
||||||
)
|
"fileName": getattr(doc, 'fileName', 'unknown'),
|
||||||
|
"mimeType": getattr(doc, 'mimeType', None) or "application/octet-stream"
|
||||||
if extracted_content and extracted_content.contents:
|
})
|
||||||
# Get the first content item's data
|
|
||||||
content_text = ""
|
|
||||||
for content_item in extracted_content.contents:
|
|
||||||
if hasattr(content_item, 'data') and content_item.data:
|
|
||||||
content_text += content_item.data + " "
|
|
||||||
|
|
||||||
|
try:
|
||||||
if content_text.strip():
|
extracted_list = await self.services.extraction.extractContentFromDocuments(
|
||||||
# Truncate content for AI context (avoid token limits)
|
prompt="Extract readable text content for email composition",
|
||||||
content_preview = content_text[:1000] + "..." if len(content_text) > 1000 else content_text
|
documents=batch_docs,
|
||||||
document_content_summary += f"\nDocument: {doc.fileName}\nContent Preview: {content_preview}\n"
|
options={"ai": {"enabled": False}}
|
||||||
# No content to extract
|
)
|
||||||
|
except Exception:
|
||||||
except Exception as extract_error:
|
extracted_list = []
|
||||||
# Content extraction failed (normal for binary files)
|
|
||||||
pass
|
# Aggregate previews
|
||||||
else:
|
def _partsToText(parts) -> str:
|
||||||
logger.warning(f"Document {doc.fileName} has no fileId")
|
lines: List[str] = []
|
||||||
except Exception as e:
|
for p in (parts or []):
|
||||||
logger.warning(f"Error processing document {doc.fileName}: {str(e)}")
|
try:
|
||||||
|
if getattr(p, 'typeGroup', '') in ("text", "table", "structure") and getattr(p, 'data', None):
|
||||||
|
lines.append(p.data)
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
return "\n\n".join(lines)
|
||||||
|
|
||||||
|
for i, doc in enumerate(docs):
|
||||||
|
try:
|
||||||
|
ec = extracted_list[i] if i < len(extracted_list) else None
|
||||||
|
content_text = _partsToText(getattr(ec, 'parts', [])) if ec else ""
|
||||||
|
if content_text.strip():
|
||||||
|
content_preview = content_text[:1000] + "..." if len(content_text) > 1000 else content_text
|
||||||
|
document_content_summary += f"\nDocument: {doc.fileName}\nContent Preview: {content_preview}\n"
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
else:
|
else:
|
||||||
logger.warning("No documents found from documentList")
|
logger.warning("No documents found from documentList")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
|
||||||
|
|
@ -33,7 +33,7 @@ class MethodSharepoint(MethodBase):
|
||||||
def _getMicrosoftConnection(self, connectionReference: str) -> Optional[Dict[str, Any]]:
|
def _getMicrosoftConnection(self, connectionReference: str) -> Optional[Dict[str, Any]]:
|
||||||
"""Get Microsoft connection from connection reference and configure SharePoint service"""
|
"""Get Microsoft connection from connection reference and configure SharePoint service"""
|
||||||
try:
|
try:
|
||||||
userConnection = self.service.getUserConnectionFromConnectionReference(connectionReference)
|
userConnection = self.services.workflow.getUserConnectionFromConnectionReference(connectionReference)
|
||||||
if not userConnection:
|
if not userConnection:
|
||||||
logger.warning(f"No user connection found for reference: {connectionReference}")
|
logger.warning(f"No user connection found for reference: {connectionReference}")
|
||||||
return None
|
return None
|
||||||
|
|
@ -48,7 +48,7 @@ class MethodSharepoint(MethodBase):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# Configure SharePoint service with the UserConnection
|
# Configure SharePoint service with the UserConnection
|
||||||
if not self.service.sharepoint.setAccessTokenFromConnection(userConnection):
|
if not self.services.sharepoint.setAccessTokenFromConnection(userConnection):
|
||||||
logger.warning(f"Failed to configure SharePoint service with connection {userConnection.id}")
|
logger.warning(f"Failed to configure SharePoint service with connection {userConnection.id}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
@ -363,11 +363,11 @@ class MethodSharepoint(MethodBase):
|
||||||
async def _makeGraphApiCall(self, endpoint: str, method: str = "GET", data: bytes = None) -> Dict[str, Any]:
|
async def _makeGraphApiCall(self, endpoint: str, method: str = "GET", data: bytes = None) -> Dict[str, Any]:
|
||||||
"""Make a Microsoft Graph API call with timeout and detailed logging"""
|
"""Make a Microsoft Graph API call with timeout and detailed logging"""
|
||||||
try:
|
try:
|
||||||
if not hasattr(self.service, 'sharepoint') or not self.service.sharepoint._target.access_token:
|
if not hasattr(self.services, 'sharepoint') or not self.services.sharepoint._target.access_token:
|
||||||
return {"error": "SharePoint service not configured with access token"}
|
return {"error": "SharePoint service not configured with access token"}
|
||||||
|
|
||||||
headers = {
|
headers = {
|
||||||
"Authorization": f"Bearer {self.service.sharepoint._target.access_token}",
|
"Authorization": f"Bearer {self.services.sharepoint._target.access_token}",
|
||||||
"Content-Type": "application/json" if data and method != "PUT" else "application/octet-stream" if data else "application/json"
|
"Content-Type": "application/json" if data and method != "PUT" else "application/octet-stream" if data else "application/json"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1014,7 +1014,7 @@ class MethodSharepoint(MethodBase):
|
||||||
# For content download, we need to handle binary data
|
# For content download, we need to handle binary data
|
||||||
try:
|
try:
|
||||||
async with aiohttp.ClientSession() as session:
|
async with aiohttp.ClientSession() as session:
|
||||||
headers = {"Authorization": f"Bearer {self.service.sharepoint._target.access_token}"}
|
headers = {"Authorization": f"Bearer {self.services.sharepoint._target.access_token}"}
|
||||||
async with session.get(f"https://graph.microsoft.com/v1.0/{content_endpoint}", headers=headers) as response:
|
async with session.get(f"https://graph.microsoft.com/v1.0/{content_endpoint}", headers=headers) as response:
|
||||||
if response.status == 200:
|
if response.status == 200:
|
||||||
content = await response.text()
|
content = await response.text()
|
||||||
|
|
|
||||||
|
|
@ -16,7 +16,8 @@ from modules.datamodels.datamodelWorkflow import (
|
||||||
TaskResult,
|
TaskResult,
|
||||||
ReviewContext,
|
ReviewContext,
|
||||||
TaskStatus,
|
TaskStatus,
|
||||||
ActionResult
|
ActionResult,
|
||||||
|
TaskAction
|
||||||
)
|
)
|
||||||
from modules.datamodels.datamodelChat import (
|
from modules.datamodels.datamodelChat import (
|
||||||
WorkflowResult,
|
WorkflowResult,
|
||||||
|
|
@ -47,7 +48,6 @@ from modules.workflows.processing.promptFactoryPlaceholders import (
|
||||||
extractUserLanguage,
|
extractUserLanguage,
|
||||||
extractReviewContent
|
extractReviewContent
|
||||||
)
|
)
|
||||||
from modules.services.serviceDocument.mainServiceDocumentGeneration import DocumentGenerationService
|
|
||||||
from modules.workflows.processing.promptFactory import methods
|
from modules.workflows.processing.promptFactory import methods
|
||||||
from modules.workflows.processing.executionState import should_continue
|
from modules.workflows.processing.executionState import should_continue
|
||||||
from modules.datamodels.datamodelAi import AiCallOptions, OperationType, ProcessingMode, Priority
|
from modules.datamodels.datamodelAi import AiCallOptions, OperationType, ProcessingMode, Priority
|
||||||
|
|
@ -62,7 +62,6 @@ class HandlingTasks:
|
||||||
def __init__(self, services, workflow=None):
|
def __init__(self, services, workflow=None):
|
||||||
self.services = services
|
self.services = services
|
||||||
self.workflow = workflow
|
self.workflow = workflow
|
||||||
self.documentGenerator = DocumentGenerationService(self.services.center)
|
|
||||||
|
|
||||||
def _checkWorkflowStopped(self):
|
def _checkWorkflowStopped(self):
|
||||||
"""
|
"""
|
||||||
|
|
@ -71,7 +70,7 @@ class HandlingTasks:
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
# Get the current workflow status from the database to avoid stale data
|
# Get the current workflow status from the database to avoid stale data
|
||||||
current_workflow = services.chatInterface.getWorkflow(self.service.workflow.id)
|
current_workflow = self.services.chatInterface.getWorkflow(self.workflow.id)
|
||||||
if current_workflow and current_workflow.status == "stopped":
|
if current_workflow and current_workflow.status == "stopped":
|
||||||
logger.info("Workflow stopped by user, aborting execution")
|
logger.info("Workflow stopped by user, aborting execution")
|
||||||
raise WorkflowStoppedException("Workflow was stopped by user")
|
raise WorkflowStoppedException("Workflow was stopped by user")
|
||||||
|
|
@ -81,7 +80,7 @@ class HandlingTasks:
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# If we can't get the current status due to other database issues, fall back to the in-memory object
|
# If we can't get the current status due to other database issues, fall back to the in-memory object
|
||||||
logger.warning(f"Could not check current workflow status from database: {str(e)}")
|
logger.warning(f"Could not check current workflow status from database: {str(e)}")
|
||||||
if self.service.workflow.status == "stopped":
|
if self.workflow and self.workflow.status == "stopped":
|
||||||
logger.info("Workflow stopped by user (from in-memory object), aborting execution")
|
logger.info("Workflow stopped by user (from in-memory object), aborting execution")
|
||||||
raise WorkflowStoppedException("Workflow was stopped by user")
|
raise WorkflowStoppedException("Workflow was stopped by user")
|
||||||
|
|
||||||
|
|
@ -137,7 +136,7 @@ class HandlingTasks:
|
||||||
# Extract content for placeholders
|
# Extract content for placeholders
|
||||||
user_prompt = extractUserPrompt(task_planning_context)
|
user_prompt = extractUserPrompt(task_planning_context)
|
||||||
available_documents = extractAvailableDocuments(task_planning_context)
|
available_documents = extractAvailableDocuments(task_planning_context)
|
||||||
workflow_history = extractWorkflowHistory(self.service, task_planning_context)
|
workflow_history = extractWorkflowHistory(self.services, task_planning_context)
|
||||||
|
|
||||||
# Create placeholders dictionary
|
# Create placeholders dictionary
|
||||||
placeholders = {
|
placeholders = {
|
||||||
|
|
@ -206,7 +205,7 @@ class HandlingTasks:
|
||||||
# LANGUAGE DETECTION: Determine user language once for the entire workflow
|
# LANGUAGE DETECTION: Determine user language once for the entire workflow
|
||||||
# Priority: 1. languageUserDetected from AI response, 2. service.user.language, 3. "en"
|
# Priority: 1. languageUserDetected from AI response, 2. service.user.language, 3. "en"
|
||||||
detected_language = task_plan_dict.get('languageUserDetected', '').strip()
|
detected_language = task_plan_dict.get('languageUserDetected', '').strip()
|
||||||
service_user_language = getattr(self.service.user, 'language', '') if self.service and self.service.user else ''
|
service_user_language = getattr(self.services.user, 'language', '') if self.services and self.services.user else ''
|
||||||
|
|
||||||
if detected_language and len(detected_language) == 2: # Valid language code like "en", "de", "fr"
|
if detected_language and len(detected_language) == 2: # Valid language code like "en", "de", "fr"
|
||||||
user_language = detected_language
|
user_language = detected_language
|
||||||
|
|
@ -219,8 +218,8 @@ class HandlingTasks:
|
||||||
logger.info(f"Using default language: {user_language}")
|
logger.info(f"Using default language: {user_language}")
|
||||||
|
|
||||||
# Set the detected language in the service for use throughout the workflow
|
# Set the detected language in the service for use throughout the workflow
|
||||||
if self.service and self.service.user:
|
if self.services and self.services.user:
|
||||||
self.service.user.language = user_language
|
self.services.user.language = user_language
|
||||||
logger.info(f"Set workflow user language to: {user_language}")
|
logger.info(f"Set workflow user language to: {user_language}")
|
||||||
|
|
||||||
tasks = []
|
tasks = []
|
||||||
|
|
@ -414,9 +413,9 @@ class HandlingTasks:
|
||||||
# Extract content for placeholders
|
# Extract content for placeholders
|
||||||
user_prompt = extractUserPrompt(action_context)
|
user_prompt = extractUserPrompt(action_context)
|
||||||
available_documents = extractAvailableDocuments(action_context)
|
available_documents = extractAvailableDocuments(action_context)
|
||||||
workflow_history = extractWorkflowHistory(self.service, action_context)
|
workflow_history = extractWorkflowHistory(self.services, action_context)
|
||||||
available_methods = extractAvailableMethods(self.service)
|
available_methods = extractAvailableMethods(self.services)
|
||||||
user_language = extractUserLanguage(self.service)
|
user_language = extractUserLanguage(self.services)
|
||||||
|
|
||||||
# Create placeholders dictionary
|
# Create placeholders dictionary
|
||||||
placeholders = {
|
placeholders = {
|
||||||
|
|
@ -527,7 +526,7 @@ class HandlingTasks:
|
||||||
# Extract content for placeholders
|
# Extract content for placeholders
|
||||||
user_prompt = extractUserPrompt(context)
|
user_prompt = extractUserPrompt(context)
|
||||||
available_documents = extractAvailableDocuments(context)
|
available_documents = extractAvailableDocuments(context)
|
||||||
user_language = extractUserLanguage(self.service)
|
user_language = extractUserLanguage(self.services)
|
||||||
available_methods = extractAvailableMethods(self.service)
|
available_methods = extractAvailableMethods(self.service)
|
||||||
|
|
||||||
# Create placeholders dictionary
|
# Create placeholders dictionary
|
||||||
|
|
@ -581,7 +580,7 @@ class HandlingTasks:
|
||||||
method = action.get('method', '')
|
method = action.get('method', '')
|
||||||
name = action.get('name', '')
|
name = action.get('name', '')
|
||||||
action_signature = ""
|
action_signature = ""
|
||||||
if self.service and method in methods:
|
if self.services and method in methods:
|
||||||
method_instance = methods[method]['instance']
|
method_instance = methods[method]['instance']
|
||||||
action_signature = method_instance.getActionSignature(name)
|
action_signature = method_instance.getActionSignature(name)
|
||||||
|
|
||||||
|
|
@ -624,8 +623,8 @@ class HandlingTasks:
|
||||||
parameters = param_obj.get('parameters', {}) if isinstance(param_obj, dict) else {}
|
parameters = param_obj.get('parameters', {}) if isinstance(param_obj, dict) else {}
|
||||||
|
|
||||||
# Apply minimal defaults in-code (language)
|
# Apply minimal defaults in-code (language)
|
||||||
if 'language' not in parameters and hasattr(self.service, 'user') and getattr(self.service.user, 'language', None):
|
if 'language' not in parameters and hasattr(self.services, 'user') and getattr(self.services.user, 'language', None):
|
||||||
parameters['language'] = self.service.user.language
|
parameters['language'] = self.services.user.language
|
||||||
|
|
||||||
# Build a synthetic TaskAction for execution routing and labels
|
# Build a synthetic TaskAction for execution routing and labels
|
||||||
current_round = getattr(self.workflow, 'currentRound', 0)
|
current_round = getattr(self.workflow, 'currentRound', 0)
|
||||||
|
|
@ -718,7 +717,7 @@ class HandlingTasks:
|
||||||
|
|
||||||
# Update workflow context for this task
|
# Update workflow context for this task
|
||||||
if task_index is not None:
|
if task_index is not None:
|
||||||
self.service.setWorkflowContext(task_number=task_index)
|
self.services.setWorkflowContext(task_number=task_index)
|
||||||
# Remove the increment call that causes double-increment bug
|
# Remove the increment call that causes double-increment bug
|
||||||
|
|
||||||
# Create database log entry for task start in format expected by frontend
|
# Create database log entry for task start in format expected by frontend
|
||||||
|
|
@ -765,7 +764,7 @@ class HandlingTasks:
|
||||||
self._checkWorkflowStopped()
|
self._checkWorkflowStopped()
|
||||||
# Update workflow[currentAction] for UI
|
# Update workflow[currentAction] for UI
|
||||||
self.updateWorkflowBeforeExecutingAction(step)
|
self.updateWorkflowBeforeExecutingAction(step)
|
||||||
self.service.setWorkflowContext(action_number=step)
|
self.services.setWorkflowContext(action_number=step)
|
||||||
try:
|
try:
|
||||||
t0 = time.time()
|
t0 = time.time()
|
||||||
selection = await self.plan_select(context)
|
selection = await self.plan_select(context)
|
||||||
|
|
@ -864,7 +863,7 @@ class HandlingTasks:
|
||||||
self.updateWorkflowBeforeExecutingAction(action_number)
|
self.updateWorkflowBeforeExecutingAction(action_number)
|
||||||
|
|
||||||
# Update workflow context for this action
|
# Update workflow context for this action
|
||||||
self.service.setWorkflowContext(action_number=action_number)
|
self.services.setWorkflowContext(action_number=action_number)
|
||||||
# Remove the increment call that causes double-increment bug
|
# Remove the increment call that causes double-increment bug
|
||||||
|
|
||||||
# Log action start in format expected by frontend
|
# Log action start in format expected by frontend
|
||||||
|
|
@ -1483,7 +1482,7 @@ class HandlingTasks:
|
||||||
message = await self.createActionMessage(action, result, workflow, message_result_label, [], task_step, task_index)
|
message = await self.createActionMessage(action, result, workflow, message_result_label, [], task_step, task_index)
|
||||||
if message:
|
if message:
|
||||||
# Now create documents with the messageId
|
# Now create documents with the messageId
|
||||||
created_documents = self.documentGenerator.createDocumentsFromActionResult(result, action, workflow, message.id)
|
created_documents = self.services.generation.createDocumentsFromActionResult(result, action, workflow, message.id)
|
||||||
# Update the message with the created documents
|
# Update the message with the created documents
|
||||||
if created_documents:
|
if created_documents:
|
||||||
message.documents = created_documents
|
message.documents = created_documents
|
||||||
|
|
@ -1562,8 +1561,8 @@ class HandlingTasks:
|
||||||
logger.info(f"Result label: {result_label} - No documents")
|
logger.info(f"Result label: {result_label} - No documents")
|
||||||
|
|
||||||
# Get current workflow context and stats
|
# Get current workflow context and stats
|
||||||
workflow_context = self.service.getWorkflowContext()
|
workflow_context = self.services.getWorkflowContext()
|
||||||
workflow_stats = self.service.getWorkflowStats()
|
workflow_stats = self.services.getWorkflowStats()
|
||||||
|
|
||||||
# Create a more meaningful message that includes task context
|
# Create a more meaningful message that includes task context
|
||||||
task_objective = task_step.objective if task_step else 'Unknown task'
|
task_objective = task_step.objective if task_step else 'Unknown task'
|
||||||
|
|
|
||||||
|
|
@ -9,7 +9,7 @@ import inspect
|
||||||
from typing import Any, Dict, List
|
from typing import Any, Dict, List
|
||||||
from modules.datamodels.datamodelWorkflow import TaskContext, ReviewContext, DocumentExchange
|
from modules.datamodels.datamodelWorkflow import TaskContext, ReviewContext, DocumentExchange
|
||||||
from modules.datamodels.datamodelChat import ChatDocument
|
from modules.datamodels.datamodelChat import ChatDocument
|
||||||
from modules.services.serviceDocument.subDocumentUtility import getFileExtension
|
from modules.services.serviceGeneration.subDocumentUtility import getFileExtension
|
||||||
from modules.workflows.methods.methodBase import MethodBase
|
from modules.workflows.methods.methodBase import MethodBase
|
||||||
|
|
||||||
# Set up logger
|
# Set up logger
|
||||||
|
|
|
||||||
|
|
@ -625,8 +625,8 @@ class WorkflowManager:
|
||||||
documents = []
|
documents = []
|
||||||
for fileId in fileIds:
|
for fileId in fileIds:
|
||||||
try:
|
try:
|
||||||
# Get file info from service
|
# Get file info from unified workflow service
|
||||||
fileInfo = self.handlingTasks.service.methodService.getFileInfo(fileId)
|
fileInfo = self.services.workflow.getFileInfo(fileId)
|
||||||
if fileInfo:
|
if fileInfo:
|
||||||
# Create document directly with all file attributes
|
# Create document directly with all file attributes
|
||||||
document = ChatDocument(
|
document = ChatDocument(
|
||||||
|
|
@ -647,4 +647,4 @@ class WorkflowManager:
|
||||||
|
|
||||||
def _setUserLanguage(self, language: str) -> None:
|
def _setUserLanguage(self, language: str) -> None:
|
||||||
"""Set user language for the service center"""
|
"""Set user language for the service center"""
|
||||||
self.handlingTasks.service.user.language = language
|
self.services.user.language = language
|
||||||
|
|
|
||||||
BIN
testdata/00Untitled.jpg
vendored
Normal file
BIN
testdata/00Untitled.jpg
vendored
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 407 KiB |
BIN
testdata/Muster_Kundenliste_Test1.xlsx
vendored
Normal file
BIN
testdata/Muster_Kundenliste_Test1.xlsx
vendored
Normal file
Binary file not shown.
BIN
testdata/diagramm_komponenten.pdf
vendored
Normal file
BIN
testdata/diagramm_komponenten.pdf
vendored
Normal file
Binary file not shown.
Loading…
Reference in a new issue