Merge pull request #80 from valueonag/feat/cost-user-management

Changed topic: Enhanced AI service
This commit is contained in:
Patrick Motsch 2026-01-03 01:22:40 +01:00 committed by GitHub
commit 9f433743a6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
89 changed files with 7833 additions and 8812 deletions

View file

@ -70,7 +70,7 @@ class AiPerplexity(BaseConnectorAi):
priority=PriorityEnum.BALANCED,
processingMode=ProcessingModeEnum.ADVANCED,
operationTypes=createOperationTypeRatings(
(OperationTypeEnum.WEB_SEARCH, 9),
(OperationTypeEnum.WEB_SEARCH_DATA, 9),
(OperationTypeEnum.WEB_CRAWL, 7)
),
version="sonar",
@ -93,7 +93,7 @@ class AiPerplexity(BaseConnectorAi):
priority=PriorityEnum.QUALITY,
processingMode=ProcessingModeEnum.DETAILED,
operationTypes=createOperationTypeRatings(
(OperationTypeEnum.WEB_SEARCH, 9),
(OperationTypeEnum.WEB_SEARCH_DATA, 9),
(OperationTypeEnum.WEB_CRAWL, 8)
),
version="sonar-pro",
@ -211,7 +211,7 @@ class AiPerplexity(BaseConnectorAi):
"""
operationType = modelCall.options.operationType
if operationType == OperationTypeEnum.WEB_SEARCH:
if operationType == OperationTypeEnum.WEB_SEARCH_DATA:
return await self.webSearch(modelCall)
elif operationType == OperationTypeEnum.WEB_CRAWL:
return await self.webCrawl(modelCall)
@ -257,7 +257,7 @@ class AiPerplexity(BaseConnectorAi):
async def webSearch(self, modelCall: AiModelCall) -> AiModelResponse:
"""
WEB_SEARCH operation - returns list of URLs based on search query.
WEB_SEARCH_DATA operation - returns list of URLs based on search query.
Args:
modelCall: AiModelCall with AiCallPromptWebSearch as prompt
@ -340,7 +340,7 @@ Return ONLY a JSON array of URLs, no additional text:
content=content,
success=True,
modelId=model.name,
metadata={"response_id": apiResponse.get("id", ""), "operation": "WEB_SEARCH"}
metadata={"response_id": apiResponse.get("id", ""), "operation": "WEB_SEARCH_DATA"}
)
except Exception as e:

View file

@ -67,7 +67,7 @@ class AiTavily(BaseConnectorAi):
priority=PriorityEnum.BALANCED,
processingMode=ProcessingModeEnum.BASIC,
operationTypes=createOperationTypeRatings(
(OperationTypeEnum.WEB_SEARCH, 9),
(OperationTypeEnum.WEB_SEARCH_DATA, 9),
(OperationTypeEnum.WEB_CRAWL, 10)
),
version="tavily-search",
@ -445,7 +445,7 @@ class AiTavily(BaseConnectorAi):
"""
operationType = modelCall.options.operationType
if operationType == OperationTypeEnum.WEB_SEARCH:
if operationType == OperationTypeEnum.WEB_SEARCH_DATA:
return await self.webSearch(modelCall)
elif operationType == OperationTypeEnum.WEB_CRAWL:
return await self.webCrawl(modelCall)
@ -459,7 +459,7 @@ class AiTavily(BaseConnectorAi):
async def webSearch(self, modelCall: AiModelCall) -> "AiModelResponse":
"""
WEB_SEARCH operation - returns list of URLs using Tavily search.
WEB_SEARCH_DATA operation - returns list of URLs using Tavily search.
Args:
modelCall: AiModelCall with AiCallPromptWebSearch as prompt
@ -516,7 +516,7 @@ class AiTavily(BaseConnectorAi):
return AiModelResponse(
content=json.dumps(urls, indent=2),
success=True,
metadata={"total_urls": len(urls), "operation": "WEB_SEARCH"}
metadata={"total_urls": len(urls), "operation": "WEB_SEARCH_DATA"}
)
except Exception as e:

View file

@ -25,7 +25,7 @@ class OperationTypeEnum(str, Enum):
IMAGE_GENERATE = "imageGenerate"
# Web Operations
WEB_SEARCH = "webSearch" # Returns list of URLs only
WEB_SEARCH_DATA = "webSearch" # Returns list of URLs only
WEB_CRAWL = "webCrawl" # Web crawl for a given URL
@ -50,7 +50,7 @@ def createOperationTypeRatings(*ratings: Tuple[OperationTypeEnum, int]) -> List[
Usage:
operationTypes = createOperationTypeRatings(
(OperationTypeEnum.DATA_ANALYSE, 8),
(OperationTypeEnum.WEB_SEARCH, 10),
(OperationTypeEnum.WEB_SEARCH_DATA, 10),
(OperationTypeEnum.WEB_CRAWL, 9)
)
"""
@ -197,7 +197,7 @@ class AiModelResponse(BaseModel):
# Structured prompt models for specialized operations
class AiCallPromptWebSearch(BaseModel):
"""Structured prompt format for WEB_SEARCH operation - returns list of URLs."""
"""Structured prompt format for WEB_SEARCH_DATA operation - returns list of URLs."""
instruction: str = Field(description="Search instruction/query for finding relevant URLs")
country: Optional[str] = Field(default=None, description="Two-digit country code (lowercase, e.g., ch, us, de, fr)")

View file

@ -56,6 +56,7 @@ class WorkflowActionDefinition(BaseModel):
)
category: Optional[str] = Field(None, description="Action category for grouping")
tags: List[str] = Field(default_factory=list, description="Tags for search/filtering")
dynamicMode: bool = Field(False, description="Whether this action is available in dynamic workflow mode (only tagged actions are visible in action planning and refinement prompts)")
# Register model labels for UI
@ -68,6 +69,7 @@ registerModelLabels(
"parameters": {"en": "Parameters", "fr": "Paramètres"},
"category": {"en": "Category", "fr": "Catégorie"},
"tags": {"en": "Tags", "fr": "Étiquettes"},
"dynamicMode": {"en": "Dynamic Mode", "fr": "Mode dynamique"},
},
)

File diff suppressed because it is too large Load diff

View file

@ -14,10 +14,6 @@ from modules.datamodels.datamodelWorkflow import AiResponse, AiResponseMetadata,
from modules.datamodels.datamodelDocument import RenderedDocument
from modules.interfaces.interfaceAiObjects import AiObjects
from modules.shared.jsonUtils import (
extractJsonString,
repairBrokenJson,
extractSectionsFromDocument,
buildContinuationContext,
parseJsonWithModel
)
from modules.services.serviceAi.subJsonResponseHandling import JsonResponseHandler
@ -209,7 +205,7 @@ Respond with ONLY a JSON object in this exact format:
processingMode=ProcessingModeEnum.BASIC
)
async def _callAiWithLooping(
async def callAiWithLooping(
self,
prompt: str,
options: AiCallOptions,
@ -218,11 +214,12 @@ Respond with ONLY a JSON object in this exact format:
promptArgs: Optional[Dict[str, Any]] = None,
operationId: Optional[str] = None,
userPrompt: Optional[str] = None,
contentParts: Optional[List[ContentPart]] = None # ARCHITECTURE: Support ContentParts for large content
contentParts: Optional[List[ContentPart]] = None, # ARCHITECTURE: Support ContentParts for large content
useCaseId: Optional[str] = None # REQUIRED: Explicit use case ID for generic looping system
) -> str:
"""Delegate to AiCallLooper."""
"""Public method: Delegate to AiCallLooper for AI calls with looping support."""
return await self.aiCallLooper.callAiWithLooping(
prompt, options, debugPrefix, promptBuilder, promptArgs, operationId, userPrompt, contentParts
prompt, options, debugPrefix, promptBuilder, promptArgs, operationId, userPrompt, contentParts, useCaseId
)
async def _defineKpisFromPrompt(
@ -341,49 +338,21 @@ Respond with ONLY a JSON object in this exact format:
prompt: str,
options: AiCallOptions,
title: Optional[str],
aiOperationId: str
parentOperationId: Optional[str]
) -> AiResponse:
"""Handle IMAGE_GENERATE operation type."""
self.services.chat.progressLogUpdate(aiOperationId, 0.4, "Calling AI for image generation")
"""Handle IMAGE_GENERATE operation type using image generation path."""
from modules.services.serviceGeneration.paths.imagePath import ImageGenerationPath
request = AiCallRequest(
prompt=prompt,
context="",
options=options
)
imagePath = ImageGenerationPath(self.services)
response = await self.callAi(request)
# Extract format from options
format = options.resultFormat or "png"
if not response.content:
errorMsg = f"No image data returned: {response.content}"
logger.error(f"Error in AI image generation: {errorMsg}")
self.services.chat.progressLogFinish(aiOperationId, False)
raise ValueError(errorMsg)
imageDoc = DocumentData(
documentName="generated_image.png",
documentData=response.content,
mimeType="image/png"
)
metadata = AiResponseMetadata(
title=title or "Generated Image",
operationType=options.operationType.value
)
self.services.chat.storeWorkflowStat(
self.services.workflow,
response,
"ai.generate.image"
)
self.services.chat.progressLogUpdate(aiOperationId, 0.9, "Image generated")
self.services.chat.progressLogFinish(aiOperationId, True)
return AiResponse(
content=response.content,
metadata=metadata,
documents=[imageDoc]
return await imagePath.generateImages(
userPrompt=prompt,
format=format,
title=title,
parentOperationId=parentOperationId
)
async def _handleWebOperation(
@ -393,7 +362,7 @@ Respond with ONLY a JSON object in this exact format:
opType: OperationTypeEnum,
aiOperationId: str
) -> AiResponse:
"""Handle WEB_SEARCH and WEB_CRAWL operation types."""
"""Handle WEB_SEARCH_DATA and WEB_CRAWL operation types."""
self.services.chat.progressLogUpdate(aiOperationId, 0.4, f"Calling AI for {opType.name}")
request = AiCallRequest(
@ -441,57 +410,58 @@ Respond with ONLY a JSON object in this exact format:
return intent
return None
async def _clarifyDocumentIntents(
async def clarifyDocumentIntents(
self,
documents: List[ChatDocument],
userPrompt: str,
actionParameters: Dict[str, Any],
parentOperationId: str
) -> List[DocumentIntent]:
"""Delegate to DocumentIntentAnalyzer."""
"""Public method: Delegate to DocumentIntentAnalyzer."""
return await self.intentAnalyzer.clarifyDocumentIntents(
documents, userPrompt, actionParameters, parentOperationId
)
async def _extractAndPrepareContent(
async def extractAndPrepareContent(
self,
documents: List[ChatDocument],
documentIntents: List[DocumentIntent],
parentOperationId: str
) -> List[ContentPart]:
"""Delegate to ContentExtractor."""
"""Public method: Delegate to ContentExtractor."""
return await self.contentExtractor.extractAndPrepareContent(
documents, documentIntents, parentOperationId, self._getIntentForDocument
)
async def _generateStructure(
async def generateStructure(
self,
userPrompt: str,
contentParts: List[ContentPart],
outputFormat: str,
parentOperationId: str
outputFormat: Optional[str] = None,
parentOperationId: str = None
) -> Dict[str, Any]:
"""Delegate to StructureGenerator."""
"""Public method: Delegate to StructureGenerator."""
return await self.structureGenerator.generateStructure(
userPrompt, contentParts, outputFormat, parentOperationId
)
async def _fillStructure(
async def fillStructure(
self,
structure: Dict[str, Any],
contentParts: List[ContentPart],
userPrompt: str,
parentOperationId: str
) -> Dict[str, Any]:
"""Delegate to StructureFiller."""
"""Public method: Delegate to StructureFiller."""
return await self.structureFiller.fillStructure(
structure, contentParts, userPrompt, parentOperationId
)
async def _renderResult(
async def renderResult(
self,
filledStructure: Dict[str, Any],
outputFormat: str,
language: str,
title: str,
userPrompt: str,
parentOperationId: str
@ -500,9 +470,15 @@ Respond with ONLY a JSON object in this exact format:
Phase 5E: Rendert gefüllte Struktur zum Ziel-Format.
Jedes Dokument wird einzeln gerendert, jeder Renderer kann 1..n Dokumente zurückgeben.
Render filled structure to documents.
Per-document format and language are extracted from structure (validated in State 3).
The outputFormat and language parameters are only used as global fallbacks.
Multiple documents can have different formats and languages.
Args:
filledStructure: Gefüllte Struktur mit elements
outputFormat: Ziel-Format (pdf, docx, html, etc.) - wird für alle Dokumente verwendet
outputFormat: Ziel-Format (pdf, docx, html, etc.) - Global fallback
language: Language (global fallback) - Per-document language extracted from structure
title: Dokument-Titel
userPrompt: User-Anfrage
parentOperationId: Parent Operation-ID für ChatLog-Hierarchie
@ -511,6 +487,11 @@ Respond with ONLY a JSON object in this exact format:
List of RenderedDocument objects.
Jedes RenderedDocument repräsentiert ein gerendertes Dokument (Hauptdokument oder unterstützende Datei)
"""
# Language comes from structure (per-document), validated in State 3
# This parameter is only used as global fallback if structure validation fails
# Use validated currentUserLanguage as fallback (always valid)
if not language:
language = self._getUserLanguage() if hasattr(self, '_getUserLanguage') else (self.services.currentUserLanguage if hasattr(self.services, 'currentUserLanguage') else 'en')
# Erstelle Operation-ID für Rendering
renderOperationId = f"{parentOperationId}_rendering"
@ -533,6 +514,7 @@ Respond with ONLY a JSON object in this exact format:
renderedDocuments = await generationService.renderReport(
filledStructure,
outputFormat,
language, # Pass language (global fallback, per-document extracted in renderReport)
title,
userPrompt,
self,
@ -577,13 +559,14 @@ Respond with ONLY a JSON object in this exact format:
documentIntents: Optional[List[DocumentIntent]] = None,
outputFormat: Optional[str] = None,
title: Optional[str] = None,
parentOperationId: Optional[str] = None
parentOperationId: Optional[str] = None,
generationIntent: Optional[str] = None # NEW: Explicit intent from action (skips detection)
) -> AiResponse:
"""
Einheitliche AI-Content-Verarbeitung - Single Entry Point für alle AI-Actions.
Unified AI content generation with explicit intent requirement.
Alle AI-Actions (ai.process, ai.generateDocument, etc.) routen hier durch.
Sie unterscheiden sich nur in Parametern, nicht in Logik.
All AI-Actions (ai.process, ai.generateDocument, etc.) route through here.
They differ only in parameters, not in logic.
Args:
prompt: The main prompt for the AI call
@ -594,6 +577,8 @@ Respond with ONLY a JSON object in this exact format:
outputFormat: Optional output format for document generation (e.g., 'pdf', 'docx', 'xlsx')
title: Optional title for generated documents
parentOperationId: Optional parent operation ID for hierarchical logging
generationIntent: REQUIRED explicit intent ("document" | "code" | "image") from action.
NO auto-detection - actions must explicitly specify intent.
Returns:
AiResponse with content, metadata, and optional documents
@ -605,18 +590,18 @@ Respond with ONLY a JSON object in this exact format:
aiOperationId = f"ai_content_{workflowId}_{int(time.time())}"
# Starte Progress-Tracking mit Parent-Referenz
formatDisplay = outputFormat if outputFormat else "auto-determined"
self.services.chat.progressLogStart(
aiOperationId,
"AI content processing",
"Content Processing",
f"Format: {outputFormat or 'text'}",
f"Format: {formatDisplay}",
parentOperationId=parentOperationId
)
try:
# Initialisiere Defaults
if not outputFormat:
outputFormat = "txt"
# outputFormat is optional - if None, formats determined from prompt by AI
# No default fallback here - let AI service handle it
opType = getattr(options, "operationType", None)
if not opType:
@ -625,118 +610,257 @@ Respond with ONLY a JSON object in this exact format:
# Route zu Operation-spezifischen Handlern
if opType == OperationTypeEnum.IMAGE_GENERATE:
return await self._handleImageGeneration(prompt, options, title, aiOperationId)
# Image generation - route to image path
return await self._handleImageGeneration(prompt, options, title, parentOperationId)
if opType == OperationTypeEnum.WEB_SEARCH or opType == OperationTypeEnum.WEB_CRAWL:
if opType == OperationTypeEnum.WEB_SEARCH_DATA or opType == OperationTypeEnum.WEB_CRAWL:
return await self._handleWebOperation(prompt, options, opType, aiOperationId)
# Dokument-Generierungs-Pfad
options.compressPrompt = False
options.compressContext = False
# Data generation - REQUIRES explicit generationIntent
if opType == OperationTypeEnum.DATA_GENERATE:
if not generationIntent:
errorMsg = (
"generationIntent is required for DATA_GENERATE operation. "
"Actions must explicitly specify 'document' or 'code' intent. "
"No auto-detection - use qualified actions (ai.generateDocument, ai.generateCode)."
)
logger.error(errorMsg)
self.services.chat.progressLogFinish(aiOperationId, False)
raise ValueError(errorMsg)
# Route based on explicit intent (no auto-detection, no fallback)
if generationIntent == "code":
# Route to code generation path
return await self._handleCodeGeneration(
prompt=prompt,
options=options,
contentParts=contentParts,
outputFormat=outputFormat,
title=title,
parentOperationId=parentOperationId
)
else:
# Route to document generation path (existing behavior)
return await self._handleDocumentGeneration(
prompt=prompt,
options=options,
documentList=documentList,
documentIntents=documentIntents,
contentParts=contentParts,
outputFormat=outputFormat,
title=title,
parentOperationId=parentOperationId
)
# Schritt 5A: Kläre Dokument-Intents
# DATA_EXTRACT: Extract content from documents and process with AI (no structure generation)
if opType == OperationTypeEnum.DATA_EXTRACT:
return await self._handleDataExtraction(
prompt=prompt,
options=options,
documentList=documentList,
documentIntents=documentIntents,
contentParts=contentParts,
outputFormat=outputFormat,
title=title,
parentOperationId=parentOperationId
)
# Other operation types (DATA_ANALYSE, etc.) - not supported
errorMsg = f"Unsupported operation type: {opType}. Supported types: IMAGE_GENERATE, DATA_GENERATE, DATA_EXTRACT"
logger.error(errorMsg)
self.services.chat.progressLogFinish(aiOperationId, False)
raise ValueError(errorMsg)
except Exception as e:
logger.error(f"Error in callAiContent: {str(e)}")
self.services.chat.progressLogFinish(aiOperationId, False)
raise
async def _handleDataExtraction(
self,
prompt: str,
options: AiCallOptions,
documentList: Optional[Any],
documentIntents: Optional[List[DocumentIntent]],
contentParts: Optional[List[ContentPart]],
outputFormat: str,
title: str,
parentOperationId: Optional[str]
) -> AiResponse:
"""
Handle DATA_EXTRACT: Extract content from documents (no AI), then process with AI.
This is the original flow: extract all documents first, then process contentParts with AI.
"""
import time
# Create operation ID
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
extractOperationId = f"data_extract_{workflowId}_{int(time.time())}"
# Start progress tracking
self.services.chat.progressLogStart(
extractOperationId,
"Data Extraction",
"Extraction",
f"Format: {outputFormat}",
parentOperationId=parentOperationId
)
try:
# Step 1: Get documents from documentList
documents = []
if documentList:
documents = self.services.chat.getChatDocumentsFromDocumentList(documentList)
# Filter: Remove original documents if already covered by pre-extracted JSONs
# (to prevent duplicate ContentParts - pre-extracted JSONs contain already extracted ContentParts)
if documents:
# Step 1: Identify all original document IDs covered by pre-extracted JSONs
originalDocIdsCoveredByPreExtracted = set()
for doc in documents:
preExtracted = self.intentAnalyzer.resolvePreExtractedDocument(doc)
if preExtracted:
originalDocId = preExtracted["originalDocument"]["id"]
originalDocIdsCoveredByPreExtracted.add(originalDocId)
logger.debug(f"Found pre-extracted JSON {doc.id} covering original document {originalDocId}")
# Step 2: Filter documents - remove originals covered by pre-extracted JSONs
filteredDocuments = []
for doc in documents:
preExtracted = self.intentAnalyzer.resolvePreExtractedDocument(doc)
if preExtracted:
filteredDocuments.append(doc) # Keep pre-extracted JSON
elif doc.id in originalDocIdsCoveredByPreExtracted:
logger.info(f"Skipping original document {doc.id} ({doc.fileName}) - already covered by pre-extracted JSON")
else:
filteredDocuments.append(doc) # Keep regular document
documents = filteredDocuments # Use filtered list
# Step 2: Clarify document intents (if not provided) - REQUIRED for all documents
if not documentIntents and documents:
documentIntents = await self._clarifyDocumentIntents(
documentIntents = await self.clarifyDocumentIntents(
documents,
prompt,
{"outputFormat": outputFormat},
aiOperationId
extractOperationId
)
# Schritt 5B: Extrahiere und bereite Content vor
# Step 3: Extract and prepare content (NO AI - pure extraction) - REQUIRED for all documents
if documents:
preparedContentParts = await self._extractAndPrepareContent(
preparedContentParts = await self.extractAndPrepareContent(
documents,
documentIntents or [],
aiOperationId
extractOperationId
)
# Merge mit bereitgestellten contentParts (falls vorhanden)
# Merge with provided contentParts (if any)
if contentParts:
# Prüfe auf pre-extracted Content
for part in contentParts:
if part.metadata.get("skipExtraction", False):
# Bereits extrahiert - verwende as-is, stelle sicher dass Metadaten vollständig
part.metadata.setdefault("contentFormat", "extracted")
part.metadata.setdefault("isPreExtracted", True)
preparedContentParts.extend(contentParts)
contentParts = preparedContentParts
# Schritt 5C: Generiere Struktur
structure = await self._generateStructure(
prompt,
contentParts or [],
outputFormat,
aiOperationId
# Step 4: Process extracted contentParts with AI (simple text processing, no structure generation)
if not contentParts:
raise ValueError("No content extracted from documents")
# Use simple AI call to process extracted content
# Prepare content for AI processing
contentText = "\n\n".join([
f"[Document: {part.metadata.get('documentName', 'Unknown')}]\n{part.data}"
for part in contentParts
if part.data
])
# Call AI with extracted content
aiRequest = AiCallRequest(
prompt=f"{prompt}\n\nExtracted Content:\n{contentText}",
context="",
options=options
)
# Schritt 5D: Fülle Struktur
# Language will be extracted from services (user intention analysis) in fillStructure
filledStructure = await self._fillStructure(
structure,
contentParts or [],
prompt,
aiOperationId
aiResponse = await self.callAi(aiRequest)
# Create response document
resultDocument = DocumentData(
documentName=f"{title or 'extracted_data'}.{outputFormat}",
documentData=aiResponse.content.encode('utf-8') if isinstance(aiResponse.content, str) else aiResponse.content,
mimeType=f"text/{outputFormat}" if outputFormat in ["txt", "json", "csv"] else "application/octet-stream"
)
# Schritt 5E: Rendere Resultat
# Jedes Dokument wird einzeln gerendert, kann 1..n Dateien zurückgeben (z.B. HTML + Bilder)
renderedDocuments = await self._renderResult(
filledStructure,
outputFormat,
title or "Generated Document",
prompt,
aiOperationId
)
# Baue Response: Konvertiere alle gerenderten Dokumente zu DocumentData
documentDataList = []
for renderedDoc in renderedDocuments:
try:
# Erstelle DocumentData für jedes gerenderte Dokument
docDataObj = DocumentData(
documentName=renderedDoc.filename,
documentData=renderedDoc.documentData,
mimeType=renderedDoc.mimeType,
sourceJson=filledStructure if len(documentDataList) == 0 else None # Nur für erstes Dokument
)
documentDataList.append(docDataObj)
logger.debug(f"Added rendered document: {renderedDoc.filename} ({len(renderedDoc.documentData)} bytes, {renderedDoc.mimeType})")
except Exception as e:
logger.warning(f"Error creating document {renderedDoc.filename}: {str(e)}")
if not documentDataList:
raise ValueError("No documents were rendered")
metadata = AiResponseMetadata(
title=title or filledStructure.get("metadata", {}).get("title", "Generated Document"),
operationType=opType.value
title=title or "Extracted Data",
operationType=OperationTypeEnum.DATA_EXTRACT.value
)
# Debug-Log (harmonisiert)
self.services.utils.writeDebugFile(
json.dumps(filledStructure, indent=2, ensure_ascii=False, default=str),
"document_generation_response"
)
self.services.chat.progressLogFinish(aiOperationId, True)
self.services.chat.progressLogFinish(extractOperationId, True)
return AiResponse(
content=json.dumps(filledStructure),
content=aiResponse.content if isinstance(aiResponse.content, str) else aiResponse.content.decode('utf-8', errors='replace'),
metadata=metadata,
documents=documentDataList
documents=[resultDocument]
)
except Exception as e:
logger.error(f"Error in callAiContent: {str(e)}")
self.services.chat.progressLogFinish(aiOperationId, False)
logger.error(f"Error in data extraction: {str(e)}")
self.services.chat.progressLogFinish(extractOperationId, False)
raise
async def _handleCodeGeneration(
self,
prompt: str,
options: AiCallOptions,
contentParts: Optional[List[ContentPart]],
outputFormat: str,
title: str,
parentOperationId: Optional[str]
) -> AiResponse:
"""Handle code generation using code generation path."""
from modules.services.serviceGeneration.paths.codePath import CodeGenerationPath
codePath = CodeGenerationPath(self.services)
return await codePath.generateCode(
userPrompt=prompt,
outputFormat=outputFormat,
contentParts=contentParts,
title=title or "Generated Code",
parentOperationId=parentOperationId
)
async def _handleDocumentGeneration(
self,
prompt: str,
options: AiCallOptions,
documentList: Optional[Any],
documentIntents: Optional[List[DocumentIntent]],
contentParts: Optional[List[ContentPart]],
outputFormat: str,
title: str,
parentOperationId: Optional[str]
) -> AiResponse:
"""Handle document generation using document generation path."""
from modules.services.serviceGeneration.paths.documentPath import DocumentGenerationPath
# Set compression options for document generation
options.compressPrompt = False
options.compressContext = False
documentPath = DocumentGenerationPath(self.services)
return await documentPath.generateDocument(
userPrompt=prompt,
documentList=documentList,
documentIntents=documentIntents,
contentParts=contentParts,
outputFormat=outputFormat,
title=title or "Generated Document",
parentOperationId=parentOperationId
)
def _determineDocumentName(
self,
filledStructure: Dict[str, Any],

View file

@ -16,6 +16,8 @@ from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, Operati
from modules.datamodels.datamodelExtraction import ContentPart
from modules.shared.jsonUtils import buildContinuationContext, extractJsonString, tryParseJson
from modules.services.serviceAi.subJsonResponseHandling import JsonResponseHandler
from modules.services.serviceAi.subLoopingUseCases import LoopingUseCaseRegistry
from modules.workflows.processing.shared.stateTools import checkWorkflowStopped
logger = logging.getLogger(__name__)
@ -28,6 +30,7 @@ class AiCallLooper:
self.services = services
self.aiService = aiService
self.responseParser = responseParser
self.useCaseRegistry = LoopingUseCaseRegistry() # Initialize use case registry
async def callAiWithLooping(
self,
@ -38,7 +41,8 @@ class AiCallLooper:
promptArgs: Optional[Dict[str, Any]] = None,
operationId: Optional[str] = None,
userPrompt: Optional[str] = None,
contentParts: Optional[List[ContentPart]] = None # ARCHITECTURE: Support ContentParts for large content
contentParts: Optional[List[ContentPart]] = None, # ARCHITECTURE: Support ContentParts for large content
useCaseId: str = None # REQUIRED: Explicit use case ID - no auto-detection, no fallback
) -> str:
"""
Shared core function for AI calls with repair-based looping system.
@ -53,16 +57,38 @@ class AiCallLooper:
operationId: Optional operation ID for progress tracking
userPrompt: Optional user prompt for KPI definition
contentParts: Optional content parts for first iteration
useCaseId: REQUIRED: Explicit use case ID - no auto-detection, no fallback
Returns:
Complete AI response after all iterations
"""
# REQUIRED: useCaseId must be provided - no auto-detection, no fallback
if not useCaseId:
errorMsg = (
"useCaseId is REQUIRED for callAiWithLooping. "
"No auto-detection - must explicitly specify use case ID. "
f"Available use cases: {list(self.useCaseRegistry.useCases.keys())}"
)
logger.error(errorMsg)
raise ValueError(errorMsg)
# Validate use case exists
useCase = self.useCaseRegistry.get(useCaseId)
if not useCase:
errorMsg = (
f"Use case '{useCaseId}' not found in registry. "
f"Available use cases: {list(self.useCaseRegistry.useCases.keys())}"
)
logger.error(errorMsg)
raise ValueError(errorMsg)
maxIterations = 50 # Prevent infinite loops
iteration = 0
allSections = [] # Accumulate all sections across iterations
lastRawResponse = None # Store last raw JSON response for continuation
documentMetadata = None # Store document metadata (title, filename) from first iteration
accumulationState = None # Track accumulation state for string accumulation
accumulatedDirectJson = [] # Accumulate JSON strings for direct return use cases (chapter_structure, code_structure)
# Get parent operation ID for iteration operations (parentId should be operationId, not log entry ID)
parentOperationId = operationId # Use the parent's operationId directly
@ -91,24 +117,31 @@ class AiCallLooper:
if not lastRawResponse:
logger.warning(f"Iteration {iteration}: No previous response available for continuation!")
# Filter promptArgs to only include parameters that buildGenerationPrompt accepts
# buildGenerationPrompt accepts: outputFormat, userPrompt, title, extracted_content, continuationContext, services
filteredPromptArgs = {
k: v for k, v in promptArgs.items()
if k in ['outputFormat', 'userPrompt', 'title', 'extracted_content', 'services']
}
# Always include services if available
if not filteredPromptArgs.get('services') and hasattr(self, 'services'):
filteredPromptArgs['services'] = self.services
# Rebuild prompt with continuation context using the provided prompt builder
iterationPrompt = await promptBuilder(**filteredPromptArgs, continuationContext=continuationContext)
# For section_content, pass all promptArgs (it uses buildSectionPromptWithContinuation which needs all args)
# For other use cases (chapter_structure, code_structure), filter to only accepted parameters
if useCaseId == "section_content":
# Pass all promptArgs plus continuationContext for section_content
iterationPrompt = await promptBuilder(**promptArgs, continuationContext=continuationContext)
else:
# Filter promptArgs to only include parameters that buildGenerationPrompt accepts
# buildGenerationPrompt accepts: outputFormat, userPrompt, title, extracted_content, continuationContext, services
filteredPromptArgs = {
k: v for k, v in promptArgs.items()
if k in ['outputFormat', 'userPrompt', 'title', 'extracted_content', 'services']
}
# Always include services if available
if not filteredPromptArgs.get('services') and hasattr(self, 'services'):
filteredPromptArgs['services'] = self.services
# Rebuild prompt with continuation context using the provided prompt builder
iterationPrompt = await promptBuilder(**filteredPromptArgs, continuationContext=continuationContext)
else:
# First iteration - use original prompt
iterationPrompt = prompt
# Make AI call
try:
checkWorkflowStopped(self.services)
if iterationOperationId:
self.services.chat.progressLogUpdate(iterationOperationId, 0.3, "Calling AI model")
# ARCHITECTURE: Pass ContentParts directly to AiCallRequest
@ -199,36 +232,88 @@ class AiCallLooper:
# Store raw response for continuation (even if broken)
lastRawResponse = result
# Check if this is section content generation (has "elements" not "sections")
# Section content generation returns JSON with "elements" array, not document structure with "sections"
isSectionContentGeneration = False
parsedJsonForSection = None
extractedJsonForSection = None
# Parse JSON for use case handling
parsedJsonForUseCase = None
extractedJsonForUseCase = None
try:
extractedJsonForSection = extractJsonString(result)
parsedJson, parseError, _ = tryParseJson(extractedJsonForSection)
extractedJsonForUseCase = extractJsonString(result)
parsedJson, parseError, _ = tryParseJson(extractedJsonForUseCase)
if parseError is None and parsedJson:
parsedJsonForSection = parsedJson
# Check if JSON has "elements" (section content) or "sections" (document structure)
if isinstance(parsedJson, dict):
if "elements" in parsedJson:
isSectionContentGeneration = True
elif isinstance(parsedJson, list) and len(parsedJson) > 0:
# Check if it's a list of elements (section content format)
if isinstance(parsedJson[0], dict) and "type" in parsedJson[0]:
isSectionContentGeneration = True
parsedJsonForUseCase = parsedJson
except Exception:
pass
if isSectionContentGeneration:
# This is section content generation - return the JSON directly
# No need to extract sections, just return the complete JSON string
logger.info(f"Iteration {iteration}: Section content generation detected (elements found), returning JSON directly")
# Handle use cases that return JSON directly (no section extraction needed)
directReturnUseCases = ["section_content", "chapter_structure", "code_structure", "code_content", "image_batch"]
if useCaseId in directReturnUseCases:
# For chapter_structure, code_structure, and section_content, check completeness and support looping
loopingUseCases = ["chapter_structure", "code_structure", "section_content"]
if useCaseId in loopingUseCases:
# If parsing failed (e.g., invalid JSON with comments or truncated JSON), continue looping to get valid JSON
if not parsedJsonForUseCase:
logger.info(f"Iteration {iteration}: Use case '{useCaseId}' - JSON parsing failed (likely incomplete/truncated), continuing iteration to complete")
# Accumulate response for merging in next iteration
accumulatedDirectJson.append(result)
# Continue to next iteration - continuation prompt builder will handle the rest
if iterationOperationId:
self.services.chat.progressLogUpdate(iterationOperationId, 0.7, "JSON incomplete, requesting continuation")
self.services.chat.progressLogFinish(iterationOperationId, True)
continue
# Check completeness if we have parsed JSON
isComplete = JsonResponseHandler.isJsonComplete(parsedJsonForUseCase)
if not isComplete:
logger.warning(f"Iteration {iteration}: Use case '{useCaseId}' - JSON is incomplete, continuing for continuation")
# Accumulate response for merging in next iteration
accumulatedDirectJson.append(result)
# Continue to next iteration - continuation prompt builder will handle the rest
if iterationOperationId:
self.services.chat.progressLogUpdate(iterationOperationId, 0.7, "JSON incomplete, requesting continuation")
self.services.chat.progressLogFinish(iterationOperationId, True)
continue
else:
# JSON is complete - merge accumulated responses if any
if accumulatedDirectJson:
logger.info(f"Iteration {iteration}: Merging {len(accumulatedDirectJson) + 1} accumulated responses")
# Merge accumulated JSON strings with current response
mergedJsonString = accumulatedDirectJson[0] if accumulatedDirectJson else result
for prevJson in accumulatedDirectJson[1:]:
mergedJsonString = JsonResponseHandler.mergeJsonStringsWithOverlap(mergedJsonString, prevJson)
# Finally merge with current response
mergedJsonString = JsonResponseHandler.mergeJsonStringsWithOverlap(mergedJsonString, result)
# Re-parse merged JSON
try:
extractedMerged = extractJsonString(mergedJsonString)
parsedMerged, parseError, _ = tryParseJson(extractedMerged)
if parseError is None and parsedMerged:
parsedJsonForUseCase = parsedMerged
result = mergedJsonString
logger.info(f"Successfully merged and parsed {len(accumulatedDirectJson) + 1} JSON fragments")
except Exception as e:
logger.warning(f"Failed to parse merged JSON, using last response: {e}")
logger.info(f"Iteration {iteration}: Use case '{useCaseId}' - JSON is complete")
logger.info(f"Iteration {iteration}: Use case '{useCaseId}' - returning JSON directly")
if iterationOperationId:
self.services.chat.progressLogFinish(iterationOperationId, True)
# Note: Debug files (_prompt and _response) are already written above for iteration 1
# No need to write _final_result as it's redundant with _response
final_json = json.dumps(parsedJsonForSection, indent=2, ensure_ascii=False) if parsedJsonForSection else (extractedJsonForSection or result)
# For section_content, return raw result to allow merging of multiple JSON blocks
# The merging logic in subStructureFilling.py will handle extraction and merging
if useCaseId == "section_content":
final_json = result # Return raw response to preserve all JSON blocks
else:
final_json = json.dumps(parsedJsonForUseCase, indent=2, ensure_ascii=False) if parsedJsonForUseCase else (extractedJsonForUseCase or result)
# Write final result for chapter structure and code structure (section_content skips it)
if useCaseId in ["chapter_structure", "code_structure"]:
self.services.utils.writeDebugFile(final_json, f"{debugPrefix}_final_result")
return final_json
# Extract sections from response (handles both valid and broken JSON)
@ -558,6 +643,7 @@ If no trackable items can be identified, return: {{"kpis": []}}
# Write KPI definition prompt to debug file
self.services.utils.writeDebugFile(kpiDefinitionPrompt, f"{debugPrefix}_kpi_definition_prompt")
checkWorkflowStopped(self.services)
response = await self.aiService.callAi(request)
# Write KPI definition response to debug file

View file

@ -16,6 +16,7 @@ from typing import Dict, Any, List, Optional
from modules.datamodels.datamodelChat import ChatDocument
from modules.datamodels.datamodelExtraction import ContentPart, DocumentIntent
from modules.workflows.processing.shared.stateTools import checkWorkflowStopped
logger = logging.getLogger(__name__)
@ -70,6 +71,7 @@ class ContentExtractor:
allContentParts = []
for document in documents:
checkWorkflowStopped(self.services)
# Check if document is already a ContentExtracted document (pre-extracted JSON)
logger.debug(f"Checking document {document.id} ({document.fileName}, mimeType={document.mimeType}) for pre-extracted content")
preExtracted = self.intentAnalyzer.resolvePreExtractedDocument(document)
@ -92,12 +94,28 @@ class ContentExtractor:
logger.warning(f" ⚠️ No intent found for pre-extracted document {document.id}! Available intent documentIds: {[i.documentId for i in documentIntents]}")
if contentExtracted.parts:
# CRITICAL: Process pre-extracted parts - analyze structure parts for nested content
processedParts = []
for part in contentExtracted.parts:
# Überspringe leere Parts (Container ohne Daten)
if not part.data or (isinstance(part.data, str) and len(part.data.strip()) == 0):
if part.typeGroup == "container":
continue # Überspringe leere Container
# CRITICAL: Check if structure part contains nested parts (e.g., JSON with documentData.parts)
if part.typeGroup == "structure" and part.mimeType == "application/json" and part.data:
nestedParts = self._extractNestedPartsFromStructure(part, document, preExtracted, intent)
if nestedParts:
# Replace structure part with extracted nested parts
processedParts.extend(nestedParts)
logger.info(f"✅ Extracted {len(nestedParts)} nested parts from structure part {part.id}")
continue # Skip original structure part
# Keep original part if no nested parts found
processedParts.append(part)
# Use processed parts (with nested parts extracted)
for part in processedParts:
if not part.metadata:
part.metadata = {}
@ -180,177 +198,41 @@ class ContentExtractor:
elif hasRenderIntent and not hasPartData:
logger.warning(f"⚠️ Part {part.id} has render intent but no data, skipping render part")
# 3. Extract Intent: Erstelle Extracted ContentPart (möglicherweise mit zusätzlicher Verarbeitung)
# 3. Extract Intent: Erstelle Extracted ContentPart (NO AI processing here - happens during section generation)
if hasExtractIntent:
# Spezielle Behandlung für Images: Vision AI für Text-Extraktion
# For images: Keep as image part with extract intent - Vision AI extraction happens during section generation
if part.typeGroup == "image" and hasPartData:
logger.info(f"🔄 Processing image {part.id} with Vision AI (extract intent)")
try:
extractionPrompt = intent.extractionPrompt if intent and intent.extractionPrompt else "Extract all text content from this image. Return only the extracted text, no additional formatting."
extractedText = await self.extractTextFromImage(part, extractionPrompt)
if extractedText:
# Prüfe ob es ein Error-Message ist
isError = extractedText.startswith("[ERROR:")
# Erstelle neuen Text-Part mit extrahiertem Text oder Error-Message
textPart = ContentPart(
id=f"extracted_{document.id}_{part.id}",
label=f"Extracted text from {part.label or 'Image'}" if not isError else f"Error extracting from {part.label or 'Image'}",
typeGroup="text",
mimeType="text/plain",
data=extractedText,
metadata={
"contentFormat": "extracted",
"documentId": document.id,
"intent": "extract",
"originalFileName": preExtracted["originalDocument"]["fileName"],
"relatedObjectPartId": f"obj_{document.id}_{part.id}" if hasRenderIntent else None,
"extractionPrompt": extractionPrompt,
"extractionMethod": "vision",
"isError": isError
}
)
allContentParts.append(textPart)
if isError:
logger.error(f"❌ Vision AI extraction failed for image {part.id}: {extractedText}")
else:
logger.info(f"✅ Extracted text from image {part.id} using Vision AI: {len(extractedText)} chars")
else:
# Sollte nicht vorkommen (Funktion gibt jetzt immer Error-Message zurück)
errorMsg = f"Vision AI extraction failed: Unexpected empty response for image {part.id}"
logger.error(errorMsg)
errorPart = ContentPart(
id=f"extracted_{document.id}_{part.id}",
label=f"Error extracting from {part.label or 'Image'}",
typeGroup="text",
mimeType="text/plain",
data=f"[ERROR: {errorMsg}]",
metadata={
"contentFormat": "extracted",
"documentId": document.id,
"intent": "extract",
"originalFileName": preExtracted["originalDocument"]["fileName"],
"extractionPrompt": extractionPrompt,
"extractionMethod": "vision",
"isError": True
}
)
allContentParts.append(errorPart)
except Exception as e:
logger.error(f"❌ Failed to extract text from image {part.id}: {str(e)}")
import traceback
logger.debug(f"Traceback: {traceback.format_exc()}")
# Kein Fallback: Wenn render Intent vorhanden, haben wir bereits object Part
# Wenn nur extract Intent: Original Part ist kein Text, daher nicht als extracted hinzufügen
if not hasRenderIntent:
logger.debug(f"Image {part.id} has only extract intent, Vision AI failed - no extracted text available")
logger.info(f"📷 Image {part.id} with extract intent - will be processed with Vision AI during section generation")
# Keep image part as-is, mark with extract intent
part.metadata.update({
"contentFormat": "extracted", # Marked for extraction, but not yet extracted
"intent": "extract",
"originalFileName": preExtracted["originalDocument"]["fileName"],
"relatedObjectPartId": f"obj_{document.id}_{part.id}" if hasRenderIntent else None,
"extractionPrompt": intent.extractionPrompt if intent and intent.extractionPrompt else "Extract all text content from this image.",
"needsVisionExtraction": True # Flag to indicate Vision AI extraction needed
})
allContentParts.append(part)
originalPartAdded = True
else:
# Für alle anderen Content-Typen: Prüfe ob AI-Verarbeitung benötigt wird
# WICHTIG: Pre-extracted ContentParts von context.extractContent enthalten RAW extrahierten Content
# (z.B. Text aus PDF-Text-Layer, Tabellen, etc.). Wenn "extract" Intent vorhanden ist,
# muss dieser Content mit AI verarbeitet werden basierend auf extractionPrompt.
# Prüfe ob Part Text-Content hat (kann mit AI verarbeitet werden)
isTextContent = (
part.typeGroup == "text" or
part.typeGroup == "table" or
(part.data and isinstance(part.data, str) and len(part.data.strip()) > 0)
)
if isTextContent and intent and intent.extractionPrompt:
# Text-Content mit extractionPrompt: Verarbeite mit AI
logger.info(f"🔄 Processing text content {part.id} with AI (extract intent with prompt)")
try:
extractionPrompt = intent.extractionPrompt
processedText = await self.processTextContentWithAi(part, extractionPrompt)
if processedText:
# Prüfe ob es ein Error-Message ist
isError = processedText.startswith("[ERROR:")
# Erstelle neuen Text-Part mit AI-verarbeitetem Text oder Error-Message
processedPart = ContentPart(
id=f"extracted_{document.id}_{part.id}",
label=f"AI-processed: {part.label or 'Content'}" if not isError else f"Error processing {part.label or 'Content'}",
typeGroup="text",
mimeType="text/plain",
data=processedText,
metadata={
"contentFormat": "extracted",
"documentId": document.id,
"intent": "extract",
"originalFileName": preExtracted["originalDocument"]["fileName"],
"relatedObjectPartId": f"obj_{document.id}_{part.id}" if hasRenderIntent else None,
"extractionPrompt": extractionPrompt,
"extractionMethod": "ai",
"sourcePartId": part.id,
"fromExtractContent": True,
"isError": isError
}
)
allContentParts.append(processedPart)
originalPartAdded = True
if isError:
logger.error(f"❌ AI text processing failed for part {part.id}: {processedText}")
else:
logger.info(f"✅ Processed text content {part.id} with AI: {len(processedText)} chars")
else:
# Sollte nicht vorkommen (Funktion gibt jetzt immer Error-Message zurück)
errorMsg = f"AI text processing failed: Unexpected empty response for part {part.id}"
logger.error(errorMsg)
errorPart = ContentPart(
id=f"extracted_{document.id}_{part.id}",
label=f"Error processing {part.label or 'Content'}",
typeGroup="text",
mimeType="text/plain",
data=f"[ERROR: {errorMsg}]",
metadata={
"contentFormat": "extracted",
"documentId": document.id,
"intent": "extract",
"originalFileName": preExtracted["originalDocument"]["fileName"],
"extractionPrompt": extractionPrompt,
"extractionMethod": "ai",
"sourcePartId": part.id,
"isError": True
}
)
allContentParts.append(errorPart)
originalPartAdded = True
except Exception as e:
logger.error(f"❌ Failed to process text content {part.id} with AI: {str(e)}")
import traceback
logger.debug(f"Traceback: {traceback.format_exc()}")
# Fallback: Verwende Original-Part
if not originalPartAdded:
part.metadata.update({
"contentFormat": "extracted",
"intent": "extract",
"fromExtractContent": True,
"skipExtraction": True,
"originalFileName": preExtracted["originalDocument"]["fileName"],
"relatedObjectPartId": f"obj_{document.id}_{part.id}" if hasRenderIntent else None
})
allContentParts.append(part)
originalPartAdded = True
else:
# Kein extractionPrompt oder kein Text-Content: Verwende Part direkt als extracted
# (Content ist bereits extrahiert von context.extractContent, keine weitere AI-Verarbeitung nötig)
# WICHTIG: Nur hinzufügen wenn noch nicht hinzugefügt (z.B. durch render Intent)
if not originalPartAdded:
part.metadata.update({
"contentFormat": "extracted",
"intent": "extract",
"fromExtractContent": True,
"skipExtraction": True, # Bereits extrahiert
"originalFileName": preExtracted["originalDocument"]["fileName"],
"relatedObjectPartId": f"obj_{document.id}_{part.id}" if hasRenderIntent else None
})
# Stelle sicher dass contentFormat gesetzt ist
if "contentFormat" not in part.metadata:
part.metadata["contentFormat"] = "extracted"
allContentParts.append(part)
originalPartAdded = True
logger.debug(f"✅ Using pre-extracted ContentPart {part.id} as extracted (no AI processing needed)")
# For text/table content: Use directly as extracted (no AI processing here)
# AI processing with extractionPrompt happens during section generation
if not originalPartAdded:
part.metadata.update({
"contentFormat": "extracted",
"intent": "extract",
"fromExtractContent": True,
"skipExtraction": True, # Already extracted (raw extraction)
"originalFileName": preExtracted["originalDocument"]["fileName"],
"relatedObjectPartId": f"obj_{document.id}_{part.id}" if hasRenderIntent else None,
"extractionPrompt": intent.extractionPrompt if intent and intent.extractionPrompt else None
})
# Stelle sicher dass contentFormat gesetzt ist
if "contentFormat" not in part.metadata:
part.metadata["contentFormat"] = "extracted"
allContentParts.append(part)
originalPartAdded = True
logger.debug(f"✅ Using pre-extracted ContentPart {part.id} as extracted (no AI processing needed)")
# 4. Fallback: Wenn kein Intent vorhanden oder Part wurde noch nicht hinzugefügt
# (sollte normalerweise nicht vorkommen, da default "extract" ist)
@ -488,6 +370,7 @@ class ContentExtractor:
)
# extractContent ist nicht async - keine await nötig
checkWorkflowStopped(self.services)
extractedResults = self.services.extraction.extractContent(
[document],
extractionOptions,
@ -508,6 +391,12 @@ class ContentExtractor:
# Verknüpfung zu object Part (falls vorhanden)
"relatedObjectPartId": f"obj_{document.id}" if "render" in intent.intents else None
})
# For images: Mark that Vision AI extraction is needed during section generation
if part.typeGroup == "image":
part.metadata["needsVisionExtraction"] = True
logger.info(f"📷 Image part {part.id} marked for Vision AI extraction during section generation")
# Stelle sicher, dass ID eindeutig ist (falls object Part existiert)
if "render" in intent.intents:
part.id = f"ext_{document.id}_{part.id}"
@ -519,10 +408,28 @@ class ContentExtractor:
"content_extraction_result"
)
# State 2 Validation: Validate and auto-fix ContentParts
validatedParts = []
for part in allContentParts:
# Validation 2.1: Skip ContentParts without documentId
if not part.metadata.get("documentId"):
logger.warning(f"Skipping ContentPart {part.id} - missing documentId in metadata")
continue
# Validation 2.2: Skip ContentParts with invalid contentFormat
contentFormat = part.metadata.get("contentFormat")
if contentFormat not in ["extracted", "object", "reference"]:
logger.warning(
f"Skipping ContentPart {part.id} - invalid contentFormat: {contentFormat}"
)
continue
validatedParts.append(part)
# ChatLog abschließen
self.services.chat.progressLogFinish(extractionOperationId, True)
return allContentParts
return validatedParts
except Exception as e:
self.services.chat.progressLogFinish(extractionOperationId, False)
@ -561,6 +468,7 @@ class ContentExtractor:
)
# Verwende AI-Service für Vision AI-Verarbeitung
checkWorkflowStopped(self.services)
response = await self.aiService.callAi(request)
# Debug-Log für Response (harmonisiert)
@ -634,6 +542,7 @@ class ContentExtractor:
)
# Verwende AI-Service für Text-Verarbeitung
checkWorkflowStopped(self.services)
response = await self.aiService.callAi(request)
# Debug-Log für Response (harmonisiert)
@ -667,4 +576,84 @@ class ContentExtractor:
"application/x-zip-compressed"
]
return mimeType in binaryTypes or mimeType.startswith("image/") or mimeType.startswith("video/") or mimeType.startswith("audio/")
def _extractNestedPartsFromStructure(
self,
structurePart: ContentPart,
document: ChatDocument,
preExtracted: Dict[str, Any],
intent: Optional[Any]
) -> List[ContentPart]:
"""
Extract nested parts from a structure ContentPart (e.g., JSON with documentData.parts).
This is a generic function that analyzes pre-processed ContentParts and extracts
any nested parts that are embedded in structure data (typically JSON).
Works with standard ContentExtracted format: documentData.parts array.
Each nested part is extracted as a separate ContentPart with proper metadata.
Args:
structurePart: ContentPart with typeGroup="structure" containing nested parts
document: The document this part belongs to
preExtracted: Pre-extracted document metadata
intent: Document intent for nested parts
Returns:
List of extracted ContentParts, empty if no nested parts found
"""
nestedParts = []
try:
# Parse JSON structure
jsonData = json.loads(structurePart.data)
# Check for standard ContentExtracted format: documentData.parts
if isinstance(jsonData, dict):
documentData = jsonData.get("documentData")
if isinstance(documentData, dict):
parts = documentData.get("parts", [])
if isinstance(parts, list) and len(parts) > 0:
# Extract each nested part
for nestedPartData in parts:
if not isinstance(nestedPartData, dict):
continue
nestedPartId = nestedPartData.get("id") or f"nested_{len(nestedParts)}"
nestedTypeGroup = nestedPartData.get("typeGroup", "text")
nestedMimeType = nestedPartData.get("mimeType", "text/plain")
nestedLabel = nestedPartData.get("label", structurePart.label)
nestedData = nestedPartData.get("data", "")
nestedMetadata = nestedPartData.get("metadata", {})
# Create ContentPart for nested part
nestedPart = ContentPart(
id=f"{structurePart.id}_{nestedPartId}",
parentId=structurePart.id,
label=nestedLabel,
typeGroup=nestedTypeGroup,
mimeType=nestedMimeType,
data=nestedData,
metadata={
**nestedMetadata,
"documentId": document.id,
"fromNestedStructure": True,
"parentStructurePartId": structurePart.id,
"originalFileName": preExtracted["originalDocument"]["fileName"]
}
)
nestedParts.append(nestedPart)
logger.debug(f"✅ Extracted nested part: {nestedPart.id} (typeGroup={nestedTypeGroup}, mimeType={nestedMimeType})")
# If no nested parts found, return empty list (original part will be kept)
if not nestedParts:
logger.debug(f"No nested parts found in structure part {structurePart.id}")
except json.JSONDecodeError as e:
logger.warning(f"Could not parse structure part {structurePart.id} as JSON: {str(e)}")
except Exception as e:
logger.error(f"Error extracting nested parts from structure part {structurePart.id}: {str(e)}")
return nestedParts

View file

@ -14,6 +14,7 @@ from typing import Dict, Any, List, Optional
from modules.datamodels.datamodelChat import ChatDocument
from modules.datamodels.datamodelExtraction import DocumentIntent
from modules.workflows.processing.shared.stateTools import checkWorkflowStopped
logger = logging.getLogger(__name__)
@ -86,6 +87,7 @@ class DocumentIntentAnalyzer:
# AI-Call (verwende callAiPlanning für einfache JSON-Responses)
# Debug-Logs werden bereits von callAiPlanning geschrieben
checkWorkflowStopped(self.services)
aiResponse = await self.aiService.callAiPlanning(
prompt=intentPrompt,
debugType="document_intent_analysis"
@ -107,6 +109,21 @@ class DocumentIntentAnalyzer:
"document_intent_analysis_result"
)
# State 1 Validation: Validate and auto-fix document intents
documentIds = {d.id for d in documents}
validatedIntents = []
for intent in documentIntents:
# Validation 1.2: Skip intents for unknown documents
if intent.documentId not in documentIds:
logger.warning(f"Skipping intent for unknown document: {intent.documentId}")
continue
validatedIntents.append(intent)
# Validation 1.1: Documents without intents are OK (not needed)
# Intents for non-existing documents are already filtered above
documentIntents = validatedIntents
# ChatLog abschließen
self.services.chat.progressLogFinish(intentOperationId, True)
@ -243,8 +260,13 @@ class DocumentIntentAnalyzer:
outputFormat = actionParameters.get("outputFormat", "txt")
prompt = f"""USER REQUEST:
# FENCE user input to prevent prompt injection
fencedUserPrompt = f"""```user_request
{userPrompt}
```"""
prompt = f"""USER REQUEST:
{fencedUserPrompt}
DOCUMENTS TO ANALYZE:
{docListText}
@ -254,20 +276,25 @@ TASK: For each document, determine its intents (can be multiple):
- "render": Image/binary should be rendered as-is (visual element)
- "reference": Document reference/attachment (no extraction, just reference)
OUTPUT FORMAT: {outputFormat}
TASK: For each document, determine:
1. Intents (can be multiple): "extract", "render", "reference"
Note: Output format and language are NOT determined here - they will be
determined during structure generation (Phase 3) in the chapter structure JSON
OUTPUT FORMAT: {outputFormat} (global fallback - for reference only)
RETURN JSON:
{{
"intents": [
{{
"documentId": "doc_1",
"intents": ["extract"], # Array - can contain multiple!
"intents": ["extract"],
"extractionPrompt": "Extract all text content, preserving structure",
"reasoning": "User needs text content for document generation"
}},
{{
"documentId": "doc_2",
"intents": ["extract", "render"], # Both! Image needs text extraction AND visual rendering
"intents": ["extract", "render"],
"extractionPrompt": "Extract text content from image using vision AI",
"reasoning": "Image contains text that needs extraction, but also should be rendered visually"
}},

View file

@ -0,0 +1,231 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Generic Looping Use Case System
Provides parametrized looping infrastructure supporting different JSON formats and use cases.
"""
import logging
from dataclasses import dataclass, field
from typing import Dict, Any, List, Optional, Callable
logger = logging.getLogger(__name__)
@dataclass
class LoopingUseCase:
"""Configuration for a specific looping use case."""
# Identification
useCaseId: str # "section_content", "chapter_structure", "document_structure", "code_structure", "code_content", "image_batch"
# JSON Format Detection
jsonTemplate: Dict[str, Any] # Expected JSON structure template
detectionKeys: List[str] # Keys to check for format detection (e.g., ["elements"], ["chapters"], ["files"])
detectionPath: str # JSONPath to check (e.g., "documents[0].chapters", "files[0].content")
# Prompt Building
initialPromptBuilder: Optional[Callable] = None # Function to build initial prompt
continuationPromptBuilder: Optional[Callable] = None # Function to build continuation prompt
# Accumulation & Merging
accumulator: Optional[Callable] = None # Function to accumulate fragments
merger: Optional[Callable] = None # Function to merge accumulated data
# Continuation Context
continuationContextBuilder: Optional[Callable] = None # Build continuation context for this format
# Result Building
resultBuilder: Optional[Callable] = None # Build final result from accumulated data
# Metadata
supportsAccumulation: bool = True # Whether this use case supports accumulation
requiresExtraction: bool = False # Whether this requires extraction (like sections)
class LoopingUseCaseRegistry:
"""Registry of all looping use cases."""
def __init__(self):
self.useCases: Dict[str, LoopingUseCase] = {}
self._registerDefaultUseCases()
def register(self, useCase: LoopingUseCase):
"""Register a new use case."""
self.useCases[useCase.useCaseId] = useCase
logger.debug(f"Registered looping use case: {useCase.useCaseId}")
def get(self, useCaseId: str) -> Optional[LoopingUseCase]:
"""Get use case by ID."""
return self.useCases.get(useCaseId)
def detectUseCase(self, parsedJson: Dict[str, Any]) -> Optional[str]:
"""Detect which use case matches the JSON structure."""
for useCaseId, useCase in self.useCases.items():
if self._matchesFormat(parsedJson, useCase):
return useCaseId
return None
def _matchesFormat(self, json: Dict[str, Any], useCase: LoopingUseCase) -> bool:
"""Check if JSON matches use case format."""
# Check top-level keys
for key in useCase.detectionKeys:
if key in json:
return True
# Check nested path using simple dictionary traversal (no jsonpath_ng needed)
if useCase.detectionPath:
try:
# Simple path matching without jsonpath_ng
# Format: "documents[0].chapters" or "files[0].content"
pathParts = useCase.detectionPath.split(".")
current = json
for part in pathParts:
# Handle array indices like "documents[0]"
if "[" in part and "]" in part:
key = part.split("[")[0]
index = int(part.split("[")[1].split("]")[0])
if isinstance(current, dict) and key in current:
if isinstance(current[key], list) and 0 <= index < len(current[key]):
current = current[key][index]
else:
return False
else:
return False
else:
# Regular key access
if isinstance(current, dict) and part in current:
current = current[part]
else:
return False
# If we successfully traversed the path, it matches
return True
except Exception as e:
logger.debug(f"Path matching failed for {useCase.useCaseId}: {e}")
return False
def _registerDefaultUseCases(self):
"""Register default use cases."""
# Use Case 1: Section Content Generation
# Returns JSON with "elements" array directly
self.register(LoopingUseCase(
useCaseId="section_content",
jsonTemplate={"elements": []},
detectionKeys=["elements"],
detectionPath="",
initialPromptBuilder=None, # Will use default prompt builder
continuationPromptBuilder=None, # Will use default continuation builder
accumulator=None, # Direct return, no accumulation
merger=None,
continuationContextBuilder=None, # Will use default continuation context
resultBuilder=None, # Return JSON directly
supportsAccumulation=False,
requiresExtraction=False
))
# Use Case 2: Chapter Structure Generation
# Returns JSON with "documents[0].chapters" structure
self.register(LoopingUseCase(
useCaseId="chapter_structure",
jsonTemplate={"documents": [{"chapters": []}]},
detectionKeys=["chapters"],
detectionPath="documents[0].chapters",
initialPromptBuilder=None,
continuationPromptBuilder=None,
accumulator=None, # Direct return, no accumulation
merger=None,
continuationContextBuilder=None,
resultBuilder=None, # Return JSON directly
supportsAccumulation=False,
requiresExtraction=False
))
# Use Case 3: Document Structure Generation
# Returns JSON with "documents[0].sections" structure, requires extraction and accumulation
self.register(LoopingUseCase(
useCaseId="document_structure",
jsonTemplate={"documents": [{"sections": []}]},
detectionKeys=["sections"],
detectionPath="documents[0].sections",
initialPromptBuilder=None,
continuationPromptBuilder=None,
accumulator=None, # Will use default accumulator
merger=None, # Will use default merger
continuationContextBuilder=None,
resultBuilder=None, # Will use default result builder
supportsAccumulation=True,
requiresExtraction=True
))
# Use Case 4: Code Structure Generation (NEW)
self.register(LoopingUseCase(
useCaseId="code_structure",
jsonTemplate={
"metadata": {
"language": "",
"projectType": "single_file|multi_file",
"projectName": ""
},
"files": [
{
"id": "",
"filename": "",
"fileType": "",
"dependencies": [],
"imports": [],
"functions": [],
"classes": []
}
]
},
detectionKeys=["files"],
detectionPath="files",
initialPromptBuilder=None,
continuationPromptBuilder=None,
accumulator=None, # Direct return
merger=None,
continuationContextBuilder=None,
resultBuilder=None,
supportsAccumulation=False,
requiresExtraction=False
))
# Use Case 5: Code Content Generation (NEW)
self.register(LoopingUseCase(
useCaseId="code_content",
jsonTemplate={"files": [{"content": "", "functions": []}]},
detectionKeys=["content", "functions"],
detectionPath="files[0].content",
initialPromptBuilder=None,
continuationPromptBuilder=None,
accumulator=None, # Will use default accumulator
merger=None, # Will use default merger
continuationContextBuilder=None,
resultBuilder=None, # Will use default result builder
supportsAccumulation=True,
requiresExtraction=False
))
# Use Case 6: Image Batch Generation (NEW)
self.register(LoopingUseCase(
useCaseId="image_batch",
jsonTemplate={"images": []},
detectionKeys=["images"],
detectionPath="images",
initialPromptBuilder=None,
continuationPromptBuilder=None,
accumulator=None, # Direct return
merger=None,
continuationContextBuilder=None,
resultBuilder=None,
supportsAccumulation=False,
requiresExtraction=False
))
logger.info(f"Registered {len(self.useCases)} default looping use cases")

File diff suppressed because it is too large Load diff

View file

@ -9,9 +9,11 @@ Handles document structure generation, including:
"""
import json
import logging
from typing import Dict, Any, List
from typing import Dict, Any, List, Optional
from modules.datamodels.datamodelExtraction import ContentPart
from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum
from modules.workflows.processing.shared.stateTools import checkWorkflowStopped
logger = logging.getLogger(__name__)
@ -42,35 +44,47 @@ class StructureGenerator:
self,
userPrompt: str,
contentParts: List[ContentPart],
outputFormat: str,
parentOperationId: str
outputFormat: Optional[str] = None,
parentOperationId: str = None
) -> Dict[str, Any]:
"""
Phase 5C: Generiert Chapter-Struktur (Table of Contents).
Definiert für jedes Chapter:
- Level, Title
- contentPartIds
- contentPartInstructions
- contentParts (unified object with instruction and/or caption per part)
- generationHint
Generate document structure with per-document format determination.
Multiple documents can be produced with different formats (e.g., one PDF, one HTML).
AI determines formats per-document from user prompt. The outputFormat parameter is
only a validation fallback - used if AI doesn't return format per document.
Args:
userPrompt: User-Anfrage
contentParts: Alle vorbereiteten ContentParts mit Metadaten
outputFormat: Ziel-Format (html, docx, pdf, etc.)
outputFormat: Optional global format fallback. If omitted, formats are determined
from user prompt by AI. Used as validation fallback if AI doesn't
return format per document. Defaults to "txt" if not provided.
parentOperationId: Parent Operation-ID für ChatLog-Hierarchie
Returns:
Struktur-Dict mit documents und chapters (nicht sections!)
"""
# If outputFormat not provided, use "txt" as fallback for validation
# AI will determine formats per document from user prompt
if not outputFormat:
outputFormat = "txt"
logger.debug("outputFormat not provided - using 'txt' as validation fallback, formats determined from prompt")
# Erstelle Operation-ID für Struktur-Generierung
structureOperationId = f"{parentOperationId}_structure_generation"
# Starte ChatLog mit Parent-Referenz
formatDisplay = outputFormat if outputFormat else "auto-determined"
self.services.chat.progressLogStart(
structureOperationId,
"Chapter Structure Generation",
"Structure",
f"Generating chapter structure for {outputFormat}",
f"Generating chapter structure (format: {formatDisplay})",
parentOperationId=parentOperationId
)
@ -82,28 +96,93 @@ class StructureGenerator:
outputFormat=outputFormat
)
# AI-Call für Chapter-Struktur-Generierung
# Note: Debug logging is handled by callAiPlanning
aiResponse = await self.aiService.callAiPlanning(
prompt=structurePrompt,
debugType="chapter_structure_generation"
# AI-Call für Chapter-Struktur-Generierung mit Looping-Unterstützung
# Use _callAiWithLooping instead of callAiPlanning to support continuation if response is cut
options = AiCallOptions(
operationType=OperationTypeEnum.DATA_GENERATE,
priority=PriorityEnum.QUALITY,
processingMode=ProcessingModeEnum.DETAILED,
compressPrompt=False,
compressContext=False,
resultFormat="json"
)
# Parse Struktur
# Use tryParseJson which handles malformed JSON and unterminated strings
extractedJson = self.services.utils.jsonExtractString(aiResponse)
# Create prompt builder for continuation support
async def buildChapterStructurePromptWithContinuation(
continuationContext: Optional[Dict[str, Any]] = None,
**kwargs
) -> str:
"""Build chapter structure prompt with optional continuation context."""
basePrompt = self._buildChapterStructurePrompt(
userPrompt=userPrompt,
contentParts=contentParts,
outputFormat=outputFormat
)
if continuationContext:
# Add continuation instructions
deliveredSummary = continuationContext.get("delivered_summary", "")
elementBeforeCutoff = continuationContext.get("element_before_cutoff", "")
cutOffElement = continuationContext.get("cut_off_element", "")
continuationText = f"{deliveredSummary}\n\n"
continuationText += "⚠️ CONTINUATION: Response was cut off. Generate ONLY the remaining content that comes AFTER the reference elements below.\n\n"
if elementBeforeCutoff:
continuationText += "# REFERENCE: Last complete element (already delivered - DO NOT repeat):\n"
continuationText += f"{elementBeforeCutoff}\n\n"
if cutOffElement:
continuationText += "# REFERENCE: Incomplete element (cut off here - DO NOT repeat):\n"
continuationText += f"{cutOffElement}\n\n"
continuationText += "⚠️ CRITICAL: The elements above are REFERENCE ONLY. They are already delivered.\n"
continuationText += "Generate ONLY what comes AFTER these elements. DO NOT regenerate the entire JSON structure.\n"
continuationText += "Start directly with the next chapter that should follow.\n\n"
return f"""{basePrompt}
{continuationText}
Continue generating the remaining chapters now.
"""
else:
return basePrompt
# Call AI with looping support
# NOTE: Do NOT pass contentParts here - we only need metadata for structure generation
# The contentParts metadata is already included in the prompt (contentPartsIndex)
# Actual content extraction happens later during section generation
checkWorkflowStopped(self.services)
aiResponseJson = await self.aiService.callAiWithLooping(
prompt=structurePrompt,
options=options,
debugPrefix="chapter_structure_generation",
promptBuilder=buildChapterStructurePromptWithContinuation,
promptArgs={
"userPrompt": userPrompt,
"outputFormat": outputFormat,
"services": self.services
},
useCaseId="chapter_structure", # REQUIRED: Explicit use case ID
operationId=structureOperationId,
userPrompt=userPrompt,
contentParts=None # Do not pass ContentParts - only metadata needed, not content extraction
)
# Parse the complete JSON response (looping system already handles completion)
extractedJson = self.services.utils.jsonExtractString(aiResponseJson)
parsedJson, parseError, cleanedJson = self.services.utils.jsonTryParse(extractedJson)
if parseError is not None:
# Try to repair broken JSON (handles unterminated strings, incomplete structures, etc.)
logger.warning(f"Initial JSON parsing failed: {str(parseError)}. Attempting repair...")
# Even with looping, try repair as fallback
logger.warning(f"JSON parsing failed after looping: {str(parseError)}. Attempting repair...")
from modules.shared import jsonUtils
repairedJson = jsonUtils.repairBrokenJson(extractedJson)
if repairedJson:
# Try parsing repaired JSON
parsedJson, parseError, _ = self.services.utils.jsonTryParse(json.dumps(repairedJson))
if parseError is None:
logger.info("Successfully repaired and parsed JSON structure")
logger.info("Successfully repaired and parsed JSON structure after looping")
structure = parsedJson
else:
logger.error(f"Failed to parse repaired JSON: {str(parseError)}")
@ -115,6 +194,72 @@ class StructureGenerator:
else:
structure = parsedJson
# State 3 Validation: Validate and auto-fix structure
# Validation 3.1: Structure missing 'documents' field
if "documents" not in structure:
raise ValueError("Structure missing 'documents' field - cannot auto-fix")
documents = structure["documents"]
# Validation 3.2: Structure has no documents
if not isinstance(documents, list) or len(documents) == 0:
raise ValueError("Structure has no documents - cannot generate without documents")
# Import renderer registry for format validation (existing infrastructure)
from modules.services.serviceGeneration.renderers.registry import getRenderer
# Validate and fix each document
for doc in documents:
# Validation 3.3 & 3.4: Document outputFormat
# outputFormat parameter is optional - if omitted, formats determined from prompt by AI
# Use as fallback only if AI doesn't return format per document
# Multiple documents can have different formats (e.g., one PDF, one HTML)
globalFormatFallback = outputFormat or "txt" # Fallback for validation
if "outputFormat" not in doc or not doc["outputFormat"]:
# AI didn't return format or returned empty - use global fallback
doc["outputFormat"] = globalFormatFallback
logger.warning(f"Document {doc.get('id')} missing outputFormat - using fallback: {doc['outputFormat']}")
else:
# AI returned format - validate using existing renderer registry
formatName = str(doc["outputFormat"]).lower().strip()
renderer = getRenderer(formatName) # Uses existing infrastructure
if not renderer:
# Format doesn't match any renderer - use txt (simple approach)
logger.warning(f"Document {doc.get('id')} has format without renderer: {formatName}, using 'txt'")
doc["outputFormat"] = "txt"
else:
# Valid format with renderer - normalize and keep AI result
doc["outputFormat"] = formatName
logger.debug(f"Document {doc.get('id')} using AI-determined format: {formatName}")
# Validation 3.5 & 3.6: Document language
# Use validated currentUserLanguage (always valid, validated during user intention analysis)
# Access via _getUserLanguage() which uses self.services.currentUserLanguage
userPromptLanguage = self._getUserLanguage() # Uses validated currentUserLanguage infrastructure
if "language" not in doc or not isinstance(doc["language"], str) or len(doc["language"]) != 2:
# AI didn't return language or invalid format - use validated currentUserLanguage
doc["language"] = userPromptLanguage
if "language" not in doc:
logger.warning(f"Document {doc.get('id')} missing language - using currentUserLanguage: {userPromptLanguage}")
else:
logger.warning(f"Document {doc.get('id')} has invalid language format from AI: {doc['language']}, using currentUserLanguage")
else:
# AI returned valid language format - normalize
doc["language"] = doc["language"].lower().strip()[:2]
logger.debug(f"Document {doc.get('id')} using AI-determined language: {doc['language']}")
# Validation 3.7: Document missing 'chapters' field
if "chapters" not in doc:
raise ValueError(f"Document {doc.get('id')} missing 'chapters' field - cannot auto-fix")
# Validation 3.8: Chapter missing 'contentParts' field
for chapter in doc["chapters"]:
if "contentParts" not in chapter:
raise ValueError(f"Chapter {chapter.get('id')} missing 'contentParts' field - cannot auto-fix")
# ChatLog abschließen
self.services.chat.progressLogFinish(structureOperationId, True)
@ -186,59 +331,79 @@ class StructureGenerator:
language = self._getUserLanguage()
logger.debug(f"Using language from services (user intention analysis) for structure generation: {language}")
prompt = f"""USER REQUEST (for context):
prompt = f"""# TASK: Generate Chapter Structure
This is a PLANNING task. Return EXACTLY ONE complete JSON object. Do not generate multiple JSON objects, alternatives, or variations. Do not use separators like "---" between JSON objects.
## USER REQUEST (for context)
```
{userPrompt}
```
LANGUAGE: Generate all content in {language.upper()} language. All text, titles, headings, paragraphs, and content must be written in {language.upper()}.
AVAILABLE CONTENT PARTS:
## AVAILABLE CONTENT PARTS
{contentPartsIndex}
TASK: Generate Chapter Structure for the documents to be generated.
## CONTENT ASSIGNMENT RULE
If the user request mentions documents/images/data, then EVERY chapter that generates content related to those references MUST assign the relevant ContentParts explicitly.
IMPORTANT - CHAPTER INDEPENDENCE:
- Each chapter is independent and self-contained
- One chapter does NOT have information about another chapter
- Each chapter must provide its own context and be understandable alone
Assignment logic:
- If chapter DISPLAYS a document/image assign "object" format ContentPart with "caption"
- If chapter generates text content ABOUT a document/image/data assign ContentPart with "instruction":
- Prefer "extracted" format if available (contains analyzed/extracted content)
- If only "object" format is available, use "object" format with "instruction" (to write ABOUT the image/document)
- If chapter's generationHint or purpose relates to a document/image/data mentioned in user request → it MUST have ContentParts assigned
- Multiple chapters might assign the same ContentPart (e.g., one chapter displays image, another writes about it)
- Use ContentPart IDs exactly as listed in AVAILABLE CONTENT PARTS above
- Empty contentParts are only allowed if chapter generates content WITHOUT referencing any documents/images/data from the user request
CRITICAL - CONTENT ASSIGNMENT TO CHAPTERS:
- You MUST assign available ContentParts to chapters using contentPartIds
- Based on the user request, determine which content should be used in which chapter
- If the user request mentions specific content, assign the corresponding ContentPart to the appropriate chapter
- Chapters WITHOUT contentPartIds can only generate generic content, NOT document-specific analysis
- To include document content analysis, chapters MUST have contentPartIds assigned
- Review the user request carefully to match ContentParts to chapters based on context and purpose
CRITICAL RULE: If the user request mentions BOTH:
a) Documents/images/data (listed in AVAILABLE CONTENT PARTS above), AND
b) Generic content types (article text, main content, body text, etc.)
Then chapters that generate those generic content types MUST assign the relevant ContentParts, because the content should relate to or be based on the provided documents/images/data.
CRITICAL - CHAPTERS WITHOUT CONTENT PARTS:
- If contentPartIds is EMPTY, generationHint MUST be VERY DETAILED with all context needed to generate content from scratch
- Include: what to generate, what information to include, purpose, specific details
- Without content parts, AI relies ENTIRELY on generationHint and CANNOT analyze document content
## CHAPTER STRUCTURE REQUIREMENTS
- Generate chapters based on USER REQUEST - analyze what structure the user wants
- Each chapter needs: id, level (1, 2, 3, etc.), title
- contentParts: {{"partId": {{"instruction": "..."}} or {{"caption": "..."}} or both}} - Assign ContentParts as required by CONTENT ASSIGNMENT RULE above
- The "instruction" field for each ContentPart MUST contain ALL relevant details from the USER REQUEST that apply to content extraction for this specific chapter. Include all formatting rules, data requirements, constraints, and specifications mentioned in the user request that are relevant for processing this ContentPart in this chapter.
- generationHint: Description of what content to generate for this chapter
The generationHint MUST contain ALL relevant details from the USER REQUEST that apply to this specific chapter. Include all formatting rules, data requirements, constraints, column specifications, validation rules, and any other specifications mentioned in the user request that are relevant for generating content for this chapter. Do NOT use generic descriptions - include specific details from the user request.
- The number of chapters depends on the user request - create only what is requested
IMPORTANT - FORMATTING:
- Formatting (fonts, colors, layouts, styles) is handled AUTOMATICALLY by the renderer
- Do NOT specify formatting details in generationHint unless it's content-specific (e.g., "pie chart with 3 segments")
- Focus on CONTENT and STRUCTURE, not visual formatting
- The renderer will apply appropriate styling based on the output format ({outputFormat})
## DOCUMENT OUTPUT FORMAT
For each document, determine the output format by analyzing the USER REQUEST:
- Look for explicit format mentions
- Infer from document purpose
- Infer from content type
- If format cannot be determined from the prompt, use: "{outputFormat}"
- Include "outputFormat" field in each document in the JSON structure
- Multiple documents can have different formats
For each chapter:
- chapter id
- level (1, 2, 3, etc.)
- title
- contentPartIds: [List of ContentPart IDs] - ASSIGN content based on user request and chapter purpose
- contentPartInstructions: {{
"partId": {{
"instruction": "How content should be structured"
}}
}}
- generationHint: Description of the content (must be self-contained with all necessary context)
* If contentPartIds is EMPTY, generationHint MUST be VERY DETAILED with all context needed to generate content from scratch
* Focus on content and structure, NOT formatting details
## DOCUMENT LANGUAGE
For each document, determine the language by analyzing the USER REQUEST:
- Look for explicit language mentions
- Map language names to ISO 639-1 codes
- If language cannot be determined from the prompt, use: "{language}"
- Include "language" field in each document in the JSON structure
- Multiple documents can have different languages
OUTPUT FORMAT: {outputFormat}
## JSON STRUCTURE REQUIREMENTS
- metadata: {{"title": "...", "language": "..."}}
- documents: Array of document objects, each with:
- id: Unique document identifier (e.g., "doc_1")
- title: Document title
- filename: Output filename with extension (e.g., "document.docx")
- outputFormat: Format code (e.g., "docx", "pdf", "html", "xlsx", "pptx", "txt")
- language: ISO 639-1 language code (e.g., "de", "en", "fr", "it")
- chapters: Array of chapter objects, each with:
- id: Unique chapter identifier (e.g., "chapter_1")
- level: Heading level (1, 2, 3, etc.)
- title: Chapter title
- contentParts: Object mapping ContentPart IDs to usage instructions {{"partId": {{"instruction": "..."}} or {{"caption": "..."}}}}
- generationHint: Description of what content to generate
- sections: Empty array []
RETURN JSON:
EXAMPLE STRUCTURE (for reference only - adapt to user request):
{{
"metadata": {{
"title": "Document Title",
@ -248,34 +413,43 @@ RETURN JSON:
"id": "doc_1",
"title": "Document Title",
"filename": "document.{outputFormat}",
"outputFormat": "{outputFormat}",
"language": "{language}",
"chapters": [
{{
"id": "chapter_1",
"level": 1,
"title": "Introduction",
"contentPartIds": ["part_ext_1"],
"contentPartInstructions": {{
"part_ext_1": {{
"instruction": "Use full extracted text"
"title": "Chapter Title",
"contentParts": {{
"extracted_part_id": {{
"instruction": "Use extracted content with ALL relevant details from user request"
}}
}},
"generationHint": "Create introduction section",
"sections": []
}},
{{
"id": "chapter_2",
"level": 1,
"title": "Main Title",
"contentPartIds": [],
"contentPartInstructions": {{}},
"generationHint": "Create [specific content description] with [formatting details]. Include [required information]. Purpose: [explanation of what this chapter provides].",
"generationHint": "Detailed description including ALL relevant details from user request for this chapter",
"sections": []
}}
]
}}]
}}
Return ONLY valid JSON following the structure above.
CRITICAL INSTRUCTIONS:
- Generate chapters based on USER REQUEST, NOT based on the example above
- The example shows the JSON structure format, NOT the required chapters
- Create only the chapters that match the user's request
- Adapt chapter titles and structure to match the user's specific request
- Determine outputFormat and language for each document by analyzing the USER REQUEST above
- The example shows placeholders "{outputFormat}" and "{language}" - YOU MUST REPLACE THESE with actual values determined from the USER REQUEST
MANDATORY CONTENT ASSIGNMENT CHECK:
For each chapter, verify:
1. Does the user request mention documents/images/data? (e.g., "photo", "image", "document", "data", "based on", "about")
2. Does this chapter's generationHint, title, or purpose relate to those documents/images/data mentioned in step 1?
- Examples: "article about the photo", "text describing the image", "analysis of the document", "content based on the data"
- Even if chapter doesn't explicitly say "about the image", if user request mentions both the image AND this chapter's content type relate them
3. If YES to both chapter MUST have contentParts assigned (cannot be empty {{}})
4. If ContentPart is "object" format and chapter needs to write ABOUT it assign with "instruction" field, not just "caption"
OUTPUT FORMAT: Start with {{ and end with }}. Do NOT use markdown code fences (```json). Do NOT add explanatory text before or after the JSON. Return ONLY the JSON object itself.
"""
return prompt

View file

@ -15,6 +15,7 @@ from modules.datamodels.datamodelChat import ChatDocument
from modules.datamodels.datamodelAi import AiCallResponse, AiCallRequest, AiCallOptions, OperationTypeEnum, AiModelCall
from modules.aicore.aicoreModelRegistry import modelRegistry
from modules.aicore.aicoreModelSelector import modelSelector
from modules.shared.jsonUtils import stripCodeFences
logger = logging.getLogger(__name__)
@ -164,6 +165,29 @@ class ExtractionService:
if "sourceAction" not in p.metadata:
p.metadata["sourceAction"] = "extraction.extractContent"
# Write debug file for each text part extracted (without AI)
for j, part in enumerate(ec.parts):
if part.typeGroup == "text" and part.data and self.services and hasattr(self.services, 'utils') and hasattr(self.services.utils, 'writeDebugFile'):
try:
debug_content = {
"partIndex": j + 1,
"partId": part.id,
"typeGroup": part.typeGroup,
"mimeType": part.mimeType or "text/plain",
"label": part.label,
"dataLength": len(part.data),
"metadata": part.metadata.copy() if part.metadata else {},
"data": part.data # Full extracted text
}
debug_json = json.dumps(debug_content, indent=2, ensure_ascii=False)
# Use document name and part index for filename
doc_name_safe = documentData["fileName"].replace(" ", "_").replace("/", "_").replace("\\", "_")[:50]
debug_filename = f"extraction_text_part_{j+1}_{doc_name_safe}.txt"
self.services.utils.writeDebugFile(debug_json, debug_filename)
logger.info(f"Wrote debug file for extracted text part {j+1}/{len(ec.parts)}: {debug_filename}")
except Exception as e:
logger.warning(f"Failed to write debug file for text part {j+1}: {str(e)}")
# Log chunking information
chunkedParts = [p for p in ec.parts if p.metadata.get("chunk", False)]
if chunkedParts:
@ -263,256 +287,6 @@ class ExtractionService:
return results
def mergeAiResults(
self,
extractedContent: List[ContentExtracted],
aiResults: List[str],
strategy: MergeStrategy
) -> ContentExtracted:
"""
Merge AI results from chunked content back into a single ContentExtracted.
Args:
extractedContent: List of ContentExtracted objects that were processed
aiResults: List of AI response strings, one per chunk
strategy: Merge strategy configuration (dict or MergeStrategy object)
Returns:
Single ContentExtracted with merged AI results
"""
logger.debug(f"=== MERGING AI RESULTS ===")
logger.debug(f"Extracted content: {len(extractedContent)} documents")
logger.debug(f"AI results: {len(aiResults)} responses")
logger.debug(f"Merge strategy: {strategy.mergeType}")
mergeStrategy = strategy
# Collect all parts from all extracted content
allParts: List[ContentPart] = []
for ec in extractedContent:
allParts.extend(ec.parts)
logger.debug(f"Total original parts: {len(allParts)}")
# Create AI result parts
aiResultParts: List[ContentPart] = []
for i, aiResult in enumerate(aiResults):
aiPart = ContentPart(
id=f"ai_result_{i}",
parentId=None, # Will be set based on strategy
label="ai_result",
typeGroup="text",
mimeType="text/plain",
data=aiResult,
metadata={
"aiResult": True,
"order": i,
"size": len(aiResult.encode('utf-8'))
}
)
aiResultParts.append(aiPart)
logger.debug(f"Created {len(aiResultParts)} AI result parts")
# Apply merging strategy
if mergeStrategy.mergeType == "concatenate":
mergedParts = self._mergeConcatenate(allParts, aiResultParts, mergeStrategy)
elif mergeStrategy.mergeType == "hierarchical":
mergedParts = self._mergeHierarchical(allParts, aiResultParts, mergeStrategy)
elif mergeStrategy.mergeType == "intelligent":
mergedParts = self._mergeIntelligent(allParts, aiResultParts, mergeStrategy)
else:
# Default to concatenate
mergedParts = self._mergeConcatenate(allParts, aiResultParts, mergeStrategy)
# Create final ContentExtracted
mergedContent = ContentExtracted(
id=f"merged_{uuid.uuid4()}",
parts=mergedParts
)
logger.debug(f"=== MERGE COMPLETED ===")
logger.debug(f"Final merged parts: {len(mergedParts)}")
logger.debug(f"Merged content ID: {mergedContent.id}")
return mergedContent
def _mergeConcatenate(
self,
originalParts: List[ContentPart],
aiResultParts: List[ContentPart],
strategy: MergeStrategy
) -> List[ContentPart]:
"""Merge parts by simple concatenation."""
mergedParts = []
# Add original parts (filtered if needed)
for part in originalParts:
if strategy.preserveChunks or not part.metadata.get("chunk", False):
mergedParts.append(part)
# Add AI results
if aiResultParts:
# Group AI results by parentId if available
aiResultsByParent = {}
for aiPart in aiResultParts:
parentId = aiPart.parentId or "root"
if parentId not in aiResultsByParent:
aiResultsByParent[parentId] = []
aiResultsByParent[parentId].append(aiPart)
# Merge AI results for each parent
for parentId, aiParts in aiResultsByParent.items():
if len(aiParts) == 1:
mergedParts.append(aiParts[0])
else:
# Concatenate multiple AI results for same parent
combinedData = strategy.chunkSeparator.join([p.data for p in aiParts])
combinedPart = ContentPart(
id=f"merged_ai_{parentId}",
parentId=parentId if parentId != "root" else None,
label="merged_ai_result",
typeGroup="text",
mimeType="text/plain",
data=combinedData,
metadata={
"aiResult": True,
"merged": True,
"sourceCount": len(aiParts),
"size": len(combinedData.encode('utf-8'))
}
)
mergedParts.append(combinedPart)
return mergedParts
def _mergeHierarchical(
self,
originalParts: List[ContentPart],
aiResultParts: List[ContentPart],
strategy: MergeStrategy
) -> List[ContentPart]:
"""Merge parts hierarchically based on parentId relationships."""
# Group parts by parentId
partsByParent = {}
for part in originalParts:
parentId = part.parentId or "root"
if parentId not in partsByParent:
partsByParent[parentId] = []
partsByParent[parentId].append(part)
# Group AI results by parentId
aiResultsByParent = {}
for aiPart in aiResultParts:
parentId = aiPart.parentId or "root"
if parentId not in aiResultsByParent:
aiResultsByParent[parentId] = []
aiResultsByParent[parentId].append(aiPart)
mergedParts = []
# Process each parent group
for parentId in set(list(partsByParent.keys()) + list(aiResultsByParent.keys())):
originalGroup = partsByParent.get(parentId, [])
aiGroup = aiResultsByParent.get(parentId, [])
# Add original parts
mergedParts.extend(originalGroup)
# Add AI results for this parent
if aiGroup:
if len(aiGroup) == 1:
mergedParts.append(aiGroup[0])
else:
# Merge multiple AI results
combinedData = strategy.chunkSeparator.join([p.data for p in aiGroup])
combinedPart = ContentPart(
id=f"hierarchical_ai_{parentId}",
parentId=parentId if parentId != "root" else None,
label="hierarchical_ai_result",
typeGroup="text",
mimeType="text/plain",
data=combinedData,
metadata={
"aiResult": True,
"hierarchical": True,
"sourceCount": len(aiGroup),
"size": len(combinedData.encode('utf-8'))
}
)
mergedParts.append(combinedPart)
return mergedParts
def _mergeIntelligent(
self,
originalParts: List[ContentPart],
aiResultParts: List[ContentPart],
strategy: MergeStrategy
) -> List[ContentPart]:
"""Merge parts using intelligent strategies based on content type."""
mergedParts = []
# Group by typeGroup for intelligent merging
partsByType = {}
for part in originalParts:
typeGroup = part.typeGroup
if typeGroup not in partsByType:
partsByType[typeGroup] = []
partsByType[typeGroup].append(part)
# Process each type group
for typeGroup, parts in partsByType.items():
if typeGroup == "text":
mergedParts.extend(self._mergeTextIntelligent(parts, aiResultParts, strategy))
elif typeGroup == "table":
mergedParts.extend(self._mergeTableIntelligent(parts, aiResultParts, strategy))
elif typeGroup == "structure":
mergedParts.extend(self._mergeStructureIntelligent(parts, aiResultParts, strategy))
else:
# Default handling for other types
mergedParts.extend(parts)
# Add any remaining AI results that weren't merged
for aiPart in aiResultParts:
if not any(p.id == aiPart.id for p in mergedParts):
mergedParts.append(aiPart)
return mergedParts
def _mergeTextIntelligent(
self,
textParts: List[ContentPart],
aiResultParts: List[ContentPart],
strategy: MergeStrategy
) -> List[ContentPart]:
"""Intelligent merging for text content."""
# For now, use concatenate strategy
# This could be enhanced with semantic analysis, summarization, etc.
return self._mergeConcatenate(textParts, aiResultParts, strategy)
def _mergeTableIntelligent(
self,
tableParts: List[ContentPart],
aiResultParts: List[ContentPart],
strategy: MergeStrategy
) -> List[ContentPart]:
"""Intelligent merging for table content."""
# For now, use concatenate strategy
# This could be enhanced with table merging logic
return self._mergeConcatenate(tableParts, aiResultParts, strategy)
def _mergeStructureIntelligent(
self,
structureParts: List[ContentPart],
aiResultParts: List[ContentPart],
strategy: MergeStrategy
) -> List[ContentPart]:
"""Intelligent merging for structured content."""
# For now, use concatenate strategy
# This could be enhanced with structure-aware merging
return self._mergeConcatenate(structureParts, aiResultParts, strategy)
async def processDocumentsPerChunk(
self,
documents: List[ChatDocument],
@ -756,11 +530,15 @@ class ExtractionService:
return processedResults
def _convertToContentParts(
self, partResults: Union[List[PartResult], List[AiCallResponse]]
self, partResults: Union[List[PartResult], List[AiCallResponse]], originalContentParts: Optional[List[ContentPart]] = None
) -> List[ContentPart]:
"""Convert part results to ContentParts (internal helper for consolidation).
Handles both PartResult (from extraction workflow) and AiCallResponse (from content parts processing).
Args:
partResults: List of PartResult or AiCallResponse objects
originalContentParts: Optional list of original ContentPart objects to preserve typeGroup and metadata
"""
content_parts = []
@ -794,14 +572,30 @@ class ExtractionService:
elif isinstance(partResults[0], AiCallResponse):
# Logic from interfaceAiObjects (from content parts processing)
# Phase 7: Add originalIndex for explicit ordering
# REQUIRED: originalContentParts must be provided for AiCallResponse path to preserve typeGroup
if not originalContentParts:
raise ValueError("originalContentParts is required when merging AiCallResponse objects. All callers must provide the original ContentPart objects to preserve typeGroup.")
for i, result in enumerate(partResults):
if result.content:
# Handle one-to-many relationships (e.g., chunking: 1 contentPart -> N chunkResults)
# If we have fewer originalContentParts than partResults, use the first one for all
if i < len(originalContentParts):
originalPart = originalContentParts[i]
else:
# One-to-many: use first originalContentPart for remaining results
originalPart = originalContentParts[0]
originalTypeGroup = originalPart.typeGroup or "text"
originalMimeType = originalPart.mimeType or "text/plain"
originalLabel = originalPart.label or f"ai_result_{i}"
content_part = ContentPart(
id=str(uuid.uuid4()),
parentId=None,
label=f"ai_result_{i}",
typeGroup="text", # Default to text for AI results
mimeType="text/plain",
label=originalLabel,
typeGroup=originalTypeGroup, # Preserve original typeGroup from originalContentParts
mimeType=originalMimeType,
data=result.content,
metadata={
"aiResult": True,
@ -821,17 +615,23 @@ class ExtractionService:
def mergePartResults(
self,
partResults: Union[List[PartResult], List[AiCallResponse]],
options: Optional[AiCallOptions] = None
options: Optional[AiCallOptions] = None,
originalContentParts: Optional[List[ContentPart]] = None
) -> str:
"""Unified merge for both PartResult and AiCallResponse.
Consolidated from both interfaceAiObjects.py and existing serviceExtraction method.
Args:
partResults: List of PartResult or AiCallResponse objects to merge
options: Optional AiCallOptions for merge strategy
originalContentParts: Optional list of original ContentPart objects to preserve typeGroup
"""
if not partResults:
return ""
# Convert to ContentParts using unified helper
content_parts = self._convertToContentParts(partResults)
# Convert to ContentParts using unified helper, preserving original typeGroup
content_parts = self._convertToContentParts(partResults, originalContentParts)
# Determine merge strategy based on input type
if isinstance(partResults[0], PartResult):
@ -852,7 +652,31 @@ class ExtractionService:
mergeType="concatenate"
)
# Apply merging
# Check if this is an elements response format (elements array structure)
# This is used for section content generation where multiple ContentParts are processed
isElementsResponse = self._isElementsResponse(content_parts)
if isElementsResponse:
# Merge JSON elements responses intelligently (merge tables, combine elements)
logger.info(f"Detected 'elements' JSON response format - merging {len(content_parts)} JSON responses")
merged_json = self._mergeElementsResponses(content_parts)
merged_json_str = json.dumps(merged_json, indent=2, ensure_ascii=False)
logger.info(f"Successfully merged 'elements' JSON responses into single unified JSON ({len(merged_json_str)} chars)")
return merged_json_str
# Check if this is a JSON extraction response format (extracted_content structure)
# If so, merge JSON structures intelligently before applying regular merging
isJsonExtractionResponse = self._isJsonExtractionResponse(content_parts)
if isJsonExtractionResponse:
# Merge JSON extraction responses intelligently
logger.info(f"Detected JSON extraction response format - merging {len(content_parts)} JSON responses")
merged_json = self._mergeJsonExtractionResponses(content_parts, originalContentParts)
merged_json_str = json.dumps(merged_json, indent=2, ensure_ascii=False)
logger.info(f"Successfully merged JSON extraction responses into single unified JSON ({len(merged_json_str)} chars)")
return merged_json_str
# Apply regular merging for non-JSON extraction responses
merged_parts = applyMerging(content_parts, merge_strategy)
# Phase 6: Enhanced format with metadata preservation
@ -897,6 +721,428 @@ class ExtractionService:
logger.info(f"Merged {len(partResults)} parts using unified merging system with metadata preservation (generationResponse={isGenerationResponse})")
return final_content.strip()
def _isJsonExtractionResponse(self, content_parts: List[ContentPart]) -> bool:
"""Check if contentParts contain JSON extraction responses (extracted_content format)."""
if not content_parts:
return False
# Check first part to see if it's JSON extraction response format
firstPartData = content_parts[0].data if content_parts[0].data else ""
if not isinstance(firstPartData, str):
return False
# Strip markdown code fences (```json ... ```) before checking
strippedData = stripCodeFences(firstPartData.strip())
# Check if it starts with JSON object/array
if not strippedData.startswith(('{', '[')):
return False
try:
parsed = json.loads(strippedData)
# Check if it has the extraction response structure: {"extracted_content": {...}}
if isinstance(parsed, dict) and "extracted_content" in parsed:
return True
except:
pass
return False
def _isElementsResponse(self, content_parts: List[ContentPart]) -> bool:
"""Check if contentParts contain JSON responses with an 'elements' array (e.g., section content)."""
if not content_parts:
return False
firstPartData = content_parts[0].data if content_parts[0].data else ""
if not isinstance(firstPartData, str):
return False
strippedData = stripCodeFences(firstPartData.strip())
if not strippedData.startswith(('{', '[')):
return False
try:
parsed = json.loads(strippedData)
if isinstance(parsed, dict) and "elements" in parsed and isinstance(parsed["elements"], list):
return True
except:
pass
return False
def _mergeElementsResponses(self, content_parts: List[ContentPart]) -> Dict[str, Any]:
"""Merge multiple JSON responses with an 'elements' array into one unified response.
Specifically designed to merge tables within the 'elements' array.
Empty tables (no rows) are ignored if a table with the same headers already has data.
"""
merged_elements = []
table_headers_map: Dict[str, List[Dict[str, Any]]] = {} # headers_tuple -> [table_contents]
for part in content_parts:
if not part.data:
continue
# Handle multiple JSON blocks in a single response (separated by ---)
partDataBlocks = part.data.split('---')
for blockData in partDataBlocks:
if not blockData.strip():
continue
try:
strippedData = stripCodeFences(blockData.strip())
if not strippedData:
continue
parsed = json.loads(strippedData)
if isinstance(parsed, dict) and "elements" in parsed and isinstance(parsed["elements"], list):
for element in parsed["elements"]:
if isinstance(element, dict) and element.get("type") == "table" and "content" in element:
table_content = element["content"]
headers = table_content.get("headers", [])
rows = table_content.get("rows", [])
if headers:
headers_key = tuple(headers)
# If table has no rows, only add it if no table with these headers exists yet
if not rows:
if headers_key not in table_headers_map:
# No table with these headers exists - keep empty table for now
table_headers_map[headers_key] = []
# If a table with these headers already exists (with or without data), skip empty table
continue
# Table has rows - add to merge map
if headers_key not in table_headers_map:
table_headers_map[headers_key] = []
table_headers_map[headers_key].append(table_content)
else:
# Keep non-table elements as is, but avoid duplicates if possible
if element not in merged_elements:
merged_elements.append(element)
except Exception as e:
logger.warning(f"Failed to parse JSON elements response from part {part.id}: {str(e)}")
continue
# Merge tables by headers - combine rows from tables with same headers
for headers_key, tables in table_headers_map.items():
if not tables:
# Only empty tables with these headers - skip them
continue
all_rows = []
for table_content in tables:
rows = table_content.get("rows", [])
all_rows.extend(rows)
# Only add table if it has rows
if all_rows:
merged_elements.append({
"type": "table",
"content": {
"headers": list(headers_key),
"rows": all_rows
}
})
return {"elements": merged_elements}
def _mergeJsonExtractionResponses(self, content_parts: List[ContentPart], originalContentParts: Optional[List[ContentPart]] = None) -> Dict[str, Any]:
"""Merge multiple JSON extraction responses into one unified response.
Merges:
- Tables: Combines all table rows, preserves headers (duplicates preserved)
- Text: Combines all text blocks
- Headings: Combines all headings arrays
- Lists: Combines all list items
- Images: Combines all image descriptions
"""
merged = {
"extracted_content": {
"text": "",
"tables": [],
"headings": [],
"lists": [],
"images": []
}
}
# Track table headers to merge tables with same structure
table_headers_map: Dict[str, List[Dict[str, Any]]] = {} # headers_tuple -> [tables]
all_text_parts = []
all_headings = []
all_lists = []
all_images = []
# Collect per-part extracted data for debug file
per_part_extracted_data = []
# Track original parts and their extracted data
original_parts_extracted_data = []
for part_idx, part in enumerate(content_parts, 1):
logger.info(f"=== Processing ContentPart {part_idx}/{len(content_parts)}: id={part.id}, label={part.label}, typeGroup={part.typeGroup} ===")
if not part.data:
logger.warning(f"ContentPart {part.id} has no data, skipping")
continue
# Handle multiple JSON blocks in a single response (separated by ---)
# Split by --- to handle multiple JSON blocks per ContentPart
partDataBlocks = part.data.split('---')
logger.debug(f"ContentPart {part.id}: Found {len(partDataBlocks)} JSON block(s) (split by ---)")
for block_idx, blockData in enumerate(partDataBlocks, 1):
if not blockData.strip():
continue
try:
# Strip markdown code fences before parsing
strippedData = stripCodeFences(blockData.strip())
if not strippedData:
logger.debug(f"ContentPart {part.id}, Block {block_idx}: Empty after stripping code fences")
continue
parsed = json.loads(strippedData)
if not isinstance(parsed, dict) or "extracted_content" not in parsed:
logger.debug(f"ContentPart {part.id}, Block {block_idx}: Not a valid extraction response format")
continue
extracted = parsed["extracted_content"]
# Find corresponding original part (if available)
original_part = None
if originalContentParts and part_idx <= len(originalContentParts):
original_part = originalContentParts[part_idx - 1]
elif originalContentParts and len(originalContentParts) > 0:
# Handle one-to-many (chunking) - use first original part
original_part = originalContentParts[0]
# Store extracted data for this part/block for debug file
part_extracted = {
"contentPartId": part.id,
"contentPartLabel": part.label,
"contentPartTypeGroup": part.typeGroup,
"blockIndex": block_idx,
"extracted_content": extracted.copy() # Store full extracted content
}
per_part_extracted_data.append(part_extracted)
# Store original part extracted data
if original_part:
# Extract text from extracted_content for display
extracted_text = extracted.get("text", "") if isinstance(extracted.get("text"), str) else ""
if not extracted_text and extracted.get("tables"):
# If no text but has tables, create a text representation
table_texts = []
for table in extracted.get("tables", []):
if isinstance(table, dict):
headers = table.get("headers", [])
rows = table.get("rows", [])
if headers and rows:
table_texts.append(f"Table: {', '.join(headers)}\nRows: {len(rows)}")
extracted_text = "\n".join(table_texts) if table_texts else ""
original_part_data = {
"id": original_part.id,
"typeGroup": original_part.typeGroup,
"mimeType": original_part.mimeType or "text/plain",
"label": original_part.label,
"dataLength": len(extracted_text),
"metadata": {
"documentId": original_part.metadata.get("documentId") if original_part.metadata else None,
"documentMimeType": original_part.metadata.get("documentMimeType") if original_part.metadata else None,
"originalFileName": original_part.metadata.get("originalFileName") if original_part.metadata else None,
},
"data": extracted_text, # Full extracted text
"extracted_content": extracted.copy() # Full extracted content structure
}
original_parts_extracted_data.append(original_part_data)
# Log extracted content summary
extracted_summary = {
"text": len(extracted.get("text", "")) if extracted.get("text") else 0,
"tables": len(extracted.get("tables", [])) if isinstance(extracted.get("tables"), list) else 0,
"headings": len(extracted.get("headings", [])) if isinstance(extracted.get("headings"), list) else 0,
"lists": len(extracted.get("lists", [])) if isinstance(extracted.get("lists"), list) else 0,
"images": len(extracted.get("images", [])) if isinstance(extracted.get("images"), list) else 0,
}
logger.info(f"ContentPart {part.id}, Block {block_idx} extracted: text={extracted_summary['text']} chars, tables={extracted_summary['tables']}, headings={extracted_summary['headings']}, lists={extracted_summary['lists']}, images={extracted_summary['images']}")
# Log table details
if extracted_summary['tables'] > 0:
for table_idx, table in enumerate(extracted.get("tables", []), 1):
if isinstance(table, dict):
headers = table.get("headers", [])
rows = table.get("rows", [])
logger.info(f" Table {table_idx}: headers={headers}, rows={len(rows) if isinstance(rows, list) else 0}")
# Log list details
if extracted_summary['lists'] > 0:
for list_idx, list_item in enumerate(extracted.get("lists", []), 1):
if isinstance(list_item, dict):
list_type = list_item.get("type", "unknown")
items = list_item.get("items", [])
logger.info(f" List {list_idx}: type={list_type}, items={len(items) if isinstance(items, list) else 0}")
# Merge text
if "text" in extracted and extracted["text"]:
text_content = extracted["text"].strip()
if text_content:
all_text_parts.append(text_content)
# Merge tables - group by headers to merge compatible tables
if "tables" in extracted and isinstance(extracted["tables"], list):
for table in extracted["tables"]:
if not isinstance(table, dict) or "headers" not in table or "rows" not in table:
continue
headers = table["headers"]
rows = table["rows"]
if not headers or not rows:
continue
# Use headers as key for grouping
headers_key = tuple(headers)
if headers_key not in table_headers_map:
table_headers_map[headers_key] = []
table_headers_map[headers_key].append(table)
# Merge headings
if "headings" in extracted and isinstance(extracted["headings"], list):
for heading in extracted["headings"]:
if isinstance(heading, dict) and "text" in heading:
all_headings.append(heading)
# Merge lists
if "lists" in extracted and isinstance(extracted["lists"], list):
for list_item in extracted["lists"]:
if isinstance(list_item, dict) and "items" in list_item:
all_lists.append(list_item)
# Merge images
if "images" in extracted and isinstance(extracted["images"], list):
for image in extracted["images"]:
if isinstance(image, dict) and "description" in image:
all_images.append(image)
except Exception as e:
logger.warning(f"Failed to parse JSON extraction response block from part {part.id}: {str(e)}")
continue
# Combine text parts
if all_text_parts:
merged["extracted_content"]["text"] = "\n\n".join(all_text_parts)
# Merge tables by headers - combine rows from tables with same headers
for headers_key, tables in table_headers_map.items():
# Collect all rows from tables with same headers
all_rows = []
for table in tables:
rows = table.get("rows", [])
all_rows.extend(rows)
# Create merged table
if all_rows:
merged["extracted_content"]["tables"].append({
"headers": list(headers_key),
"rows": all_rows
})
# Add headings
if all_headings:
merged["extracted_content"]["headings"] = all_headings
# Add lists - keep them separate (like headings) to preserve document structure
if all_lists:
merged["extracted_content"]["lists"] = all_lists
# Add images
if all_images:
merged["extracted_content"]["images"] = all_images
logger.info(f"=== Merging Summary ===")
logger.info(f"Total ContentParts processed: {len(content_parts)}")
logger.info(f"Text parts collected: {len(all_text_parts)}")
logger.info(f"Table groups (by headers): {len(table_headers_map)}")
logger.info(f"Headings collected: {len(all_headings)}")
logger.info(f"Lists collected: {len(all_lists)}")
logger.info(f"Images collected: {len(all_images)}")
# Log table merging details
for headers_key, tables in table_headers_map.items():
total_rows = sum(len(table.get("rows", [])) for table in tables)
logger.info(f" Table group with headers {list(headers_key)}: {len(tables)} table(s), {total_rows} total rows")
logger.info(f"Merged JSON extraction responses: {len(table_headers_map)} table groups, {len(all_text_parts)} text parts, {len(all_headings)} headings, {len(all_lists)} lists, {len(all_images)} images")
# Write per-part extracted data to debug file
if per_part_extracted_data and self.services and hasattr(self.services, 'utils') and hasattr(self.services.utils, 'writeDebugFile'):
try:
debug_content = {
"summary": {
"totalContentParts": len(content_parts),
"totalExtractedBlocks": len(per_part_extracted_data),
"mergedResult": {
"textParts": len(all_text_parts),
"tableGroups": len(table_headers_map),
"headings": len(all_headings),
"lists": len(all_lists),
"images": len(all_images)
}
},
"perPartExtractedData": per_part_extracted_data
}
debug_json = json.dumps(debug_content, indent=2, ensure_ascii=False)
self.services.utils.writeDebugFile(debug_json, "content_extraction_per_part")
logger.info(f"Wrote per-part extracted data to debug file: {len(per_part_extracted_data)} blocks from {len(content_parts)} content parts")
except Exception as e:
logger.warning(f"Failed to write per-part extracted data to debug file: {str(e)}")
# Write original parts extracted data in extraction_result format
if original_parts_extracted_data and self.services and hasattr(self.services, 'utils') and hasattr(self.services.utils, 'writeDebugFile'):
try:
# Get document info from first original part if available
document_name = None
document_mime_type = None
if originalContentParts and len(originalContentParts) > 0:
first_part = originalContentParts[0]
if first_part.metadata:
document_name = first_part.metadata.get("originalFileName")
document_mime_type = first_part.metadata.get("documentMimeType")
# Format similar to extraction_result file
extraction_result_format = {
"documentName": document_name or "Unknown",
"documentMimeType": document_mime_type or "application/octet-stream",
"partsCount": len(original_parts_extracted_data),
"parts": []
}
for part_data in original_parts_extracted_data:
# Format each part similar to extraction_result format
formatted_part = {
"typeGroup": part_data["typeGroup"],
"mimeType": part_data["mimeType"],
"label": part_data["label"],
"dataLength": part_data["dataLength"],
"metadata": part_data["metadata"],
"data": part_data["data"], # Full extracted text
"extracted_content": part_data["extracted_content"] # Full structure
}
extraction_result_format["parts"].append(formatted_part)
result_json = json.dumps(extraction_result_format, indent=2, ensure_ascii=False)
self.services.utils.writeDebugFile(result_json, "content_extraction_original_parts")
logger.info(f"Wrote original parts extracted data to debug file: {len(original_parts_extracted_data)} original parts")
except Exception as e:
logger.warning(f"Failed to write original parts extracted data to debug file: {str(e)}")
return merged
async def chunkContentPartForAi(self, contentPart, model, options, prompt: str = "") -> List[Dict[str, Any]]:
"""Chunk a content part based on model capabilities, accounting for prompt, system message overhead, and maxTokens output.
@ -1162,7 +1408,8 @@ class ExtractionService:
if not chunkResults:
raise ValueError(f"All chunks failed for content part")
mergedContent = self.mergePartResults(chunkResults, options)
# Pass original contentPart to preserve typeGroup for all chunks (one-to-many: 1 part -> N chunks)
mergedContent = self.mergePartResults(chunkResults, options, [contentPart])
return AiCallResponse(
content=mergedContent,
modelName=model.name,
@ -1208,7 +1455,8 @@ class ExtractionService:
raise
# Merge chunk results using unified mergePartResults
mergedContent = self.mergePartResults(chunkResults, options)
# Pass original contentPart to preserve typeGroup for all chunks (one-to-many: 1 part -> N chunks)
mergedContent = self.mergePartResults(chunkResults, options, [contentPart])
logger.info(f"✅ Content part chunked and processed with model: {model.name} ({len(chunks)} chunks)")
return AiCallResponse(
@ -1254,9 +1502,13 @@ class ExtractionService:
aiObjects, # Pass interface for AI calls
progressCallback=None
) -> AiCallResponse:
"""Process content parts with model-aware chunking and AI calls.
"""Process content parts with model-aware chunking and AI calls in parallel.
Moved from interfaceAiObjects.callWithContentParts() - entry point for content parts processing.
Uses parallel processing similar to section generation for better performance.
SPECIAL CASE: For DATA_EXTRACT operations, processes all contentParts together in ONE call
to enable proper merging (e.g., merging tables from multiple PDFs into one table).
"""
prompt = request.prompt
options = request.options
@ -1269,16 +1521,100 @@ class ExtractionService:
if not failoverModelList:
return self._createErrorResponse("No suitable models found", 0, 0)
# Process each content part
allResults = []
for contentPart in contentParts:
partResult = await self.processContentPartWithFallback(
contentPart, prompt, options, failoverModelList, aiObjects, progressCallback
)
allResults.append(partResult)
totalParts = len(contentParts)
if totalParts == 0:
return self._createErrorResponse("No content parts to process", 0, 0)
# Merge all results using unified mergePartResults
mergedContent = self.mergePartResults(allResults)
# NOTE: For DATA_EXTRACT operations, the extraction prompt explicitly asks the AI to merge
# all contentParts into ONE unified JSON response. Even though we process parts separately,
# each response should contain merged content. The mergePartResults will concatenate responses,
# but the new prompt format (flat extracted_content structure) is designed for easier merging.
# DEFAULT: Process parts in parallel
# Thread-safe counter for progress tracking
completedCount = [0] # Use list to allow modification in nested function
# Process parts in parallel with concurrency control
maxConcurrent = 5
if options and hasattr(options, 'maxConcurrentParts'):
maxConcurrent = options.maxConcurrentParts
semaphore = asyncio.Semaphore(maxConcurrent)
async def processSinglePart(contentPart, partIndex: int) -> AiCallResponse:
"""Process a single content part with progress logging."""
async with semaphore:
partLabel = contentPart.label or f"Part {partIndex+1}"
partType = contentPart.typeGroup or "unknown"
# Log start of processing
if progressCallback:
progressCallback(0.1 + (partIndex / totalParts) * 0.8, f"Processing {partLabel} ({partType}) - {partIndex+1}/{totalParts}")
try:
# Process the part
partResult = await self.processContentPartWithFallback(
contentPart, prompt, options, failoverModelList, aiObjects, None # Don't pass progressCallback to avoid double logging
)
# Write debug files for generation phase (section content generation)
# Check for DATA_GENERATE or DATA_ANALYSE (used for section generation)
isGenerationPhase = False
if options and hasattr(options, 'operationType'):
isGenerationPhase = (options.operationType == OperationTypeEnum.DATA_GENERATE or
options.operationType == OperationTypeEnum.DATA_ANALYSE)
if isGenerationPhase:
if self.services and hasattr(self.services, 'utils') and hasattr(self.services.utils, 'writeDebugFile'):
try:
# Create debug filename with contentPart ID or label
partId = contentPart.id[:8] if contentPart.id else f"part_{partIndex+1}"
partLabelSafe = (contentPart.label or f"part_{partIndex+1}").replace(" ", "_").replace("/", "_").replace("\\", "_")[:30]
debugPrefix = f"generation_contentPart_{partId}_{partLabelSafe}"
# Write prompt
self.services.utils.writeDebugFile(prompt, f"{debugPrefix}_prompt")
# Write response
responseContent = partResult.content if partResult.content else ""
self.services.utils.writeDebugFile(responseContent, f"{debugPrefix}_response")
logger.debug(f"Wrote debug files for contentPart {partId} (generation): {debugPrefix}_prompt, {debugPrefix}_response")
except Exception as debugError:
logger.warning(f"Failed to write debug file for contentPart {contentPart.id}: {str(debugError)}")
# Update completed count and log progress
completedCount[0] += 1
if progressCallback:
progressCallback(0.1 + (completedCount[0] / totalParts) * 0.8, f"Completed {partLabel} ({partType}) - {completedCount[0]}/{totalParts}")
return partResult
except Exception as e:
# Update completed count even on error
completedCount[0] += 1
logger.error(f"Error processing part {partIndex+1} ({partLabel}): {str(e)}")
if progressCallback:
progressCallback(0.1 + (completedCount[0] / totalParts) * 0.8, f"Error processing {partLabel} ({partType}) - {completedCount[0]}/{totalParts}")
# Return error response
return self._createErrorResponse(f"Error processing part: {str(e)}", 0, 0)
# Create tasks for all parts
tasks = [processSinglePart(contentPart, i) for i, contentPart in enumerate(contentParts)]
# Execute all tasks in parallel with error handling
results = await asyncio.gather(*tasks, return_exceptions=True)
# Process results and handle exceptions
allResults = []
for i, result in enumerate(results):
if isinstance(result, Exception):
logger.error(f"Exception processing part {i+1}: {str(result)}")
allResults.append(self._createErrorResponse(f"Exception: {str(result)}", 0, 0))
elif result is not None:
allResults.append(result)
# Merge all results using unified mergePartResults, preserving original typeGroup
mergedContent = self.mergePartResults(allResults, options, contentParts)
return AiCallResponse(
content=mergedContent,

View file

@ -45,58 +45,51 @@ async def buildExtractionPrompt(
Complete extraction prompt string
"""
# Unified multi-file example (single doc = multi with n=1)
# Flat extraction format - returns extracted content as structured data, not documents/sections
# This format allows merging multiple contentParts into one response
json_example = {
"metadata": {
"title": "Multi-Document Example",
"split_strategy": "by_section",
"source_documents": ["doc_001"],
"extraction_method": "ai_extraction"
},
"documents": [
{
"id": "doc_section_1",
"title": "Section 1 Title",
"filename": "section_1.xlsx",
"sections": [
{
"id": "section_1",
"content_type": "heading",
"elements": [
{
"level": 1,
"text": "1. SECTION TITLE"
}
],
"order": 1
},
{
"id": "section_2",
"content_type": "paragraph",
"elements": [
{
"text": "This is the actual content that should be extracted from the document."
}
],
"order": 2
},
{
"id": "section_3",
"content_type": "table",
"elements": [
{
"headers": ["Column 1", "Column 2"],
"rows": [["Value 1", "Value 2"]]
}
],
"order": 3
}
]
}
]
"extracted_content": {
"text": "Extracted text content from the document...",
"tables": [
{
"headers": ["Column 1", "Column 2"],
"rows": [
["Value 1", "Value 2"],
["Value 3", "Value 4"]
]
}
],
"headings": [
{
"level": 1,
"text": "Main Heading"
},
{
"level": 2,
"text": "Subheading"
}
],
"lists": [
{
"type": "bullet",
"items": ["Item 1", "Item 2", "Item 3"]
}
],
"images": [
{
"description": "Description of image content, including all visible text, tables, and visual elements"
}
]
}
}
structure_instruction = "CRITICAL: You MUST return a JSON structure with a \"documents\" array. For single documents, create one document entry with all sections."
structure_instruction = """CRITICAL EXTRACTION REQUIREMENTS:
1. Extract content from the provided ContentPart(s) - process what is provided in this call
2. If this ContentPart contains tables, extract them with proper structure (headers and rows)
3. If this ContentPart contains text, extract it as structured text
4. Return ONE JSON object with extracted content from this ContentPart
5. Preserve all original data - do not summarize or interpret
6. The system will merge results from multiple ContentParts automatically - focus on extracting this ContentPart's content accurately"""
# Parse extraction intent if AI service is available
extraction_intent = await _parseExtractionIntent(userPrompt, outputFormat, aiService, services) if aiService else userPrompt
@ -124,30 +117,25 @@ USER REQUEST / USER PROMPT:
END OF USER REQUEST / USER PROMPT
{'='*80}
You are a document processing assistant that extracts and structures content from documents. Your task is to analyze the provided document content and create a structured JSON output.
You are a document processing assistant that extracts content from documents. Your task is to analyze the provided ContentPart(s) and extract their content into a structured JSON format.
TASK: Extract the actual content from the document and organize it into documents. For single documents, create one document entry. For multi-document requests, create multiple document entries.
TASK: Extract content from the provided ContentPart(s). Extract all tables, text, headings, lists, and other content types accurately. The system processes ContentParts individually and merges results automatically.
LANGUAGE REQUIREMENT: All extracted content must be in the language '{userLanguage}'. Extract and preserve content in this language.
{extraction_intent}
REQUIREMENTS:
1. Analyze the document content provided in the context below
2. Identify distinct sections in the document (by headings, topics, or logical breaks)
3. Create one or more JSON document entries based on the content structure
4. Extract the real content from each section (headings, paragraphs, lists, etc.)
5. Generate appropriate filenames for each document
{structure_instruction}
OUTPUT FORMAT: Return only valid JSON in this exact structure:
{json.dumps(json_example, indent=2)}
Requirements:
CRITICAL EXTRACTION RULES:
- Extract only content that is ACTUALLY PRESENT in the ContentPart - never create fake or placeholder data
- Return empty arrays [] or empty strings "" when content is missing - this is normal and expected
- Extract all tables, text, headings, lists accurately with proper structure
- Preserve all original data - do not summarize or interpret
- Use the exact JSON format shown above
- Maintain data integrity and structure
- Return ONE JSON object per ContentPart (the system merges multiple ContentParts automatically)
Content Types to Extract:
1. Tables: Extract all rows and columns with proper headers
@ -166,7 +154,7 @@ Image Analysis Requirements:
Return only the JSON structure with actual data from the documents. Do not include any text before or after the JSON.
Extract the ACTUAL CONTENT from the source documents. Do not use placeholder text like "Section 1", "Section 2", etc. Extract the real headings, paragraphs, and content from the documents.
Extract only actual content from the ContentPart. Return empty arrays/strings when content is missing - never create fake data.
""".strip()
# Add renderer-specific guidelines if provided

View file

@ -346,16 +346,19 @@ class GenerationService:
'workflowId': 'unknown'
}
async def renderReport(self, extractedContent: Dict[str, Any], outputFormat: str, title: str, userPrompt: str = None, aiService=None, parentOperationId: Optional[str] = None) -> List[RenderedDocument]:
async def renderReport(self, extractedContent: Dict[str, Any], outputFormat: str, language: str, title: str, userPrompt: str = None, aiService=None, parentOperationId: Optional[str] = None) -> List[RenderedDocument]:
"""
Render extracted JSON content to the specified output format.
Processes EACH document separately and calls renderer for each.
Each renderer can return 1..n documents (e.g., HTML + images).
Per-document format and language are extracted from structure (validated in State 3).
Multiple documents can have different formats and languages.
Args:
extractedContent: Structured JSON document with documents array
outputFormat: Target format (html, pdf, docx, txt, md, json, csv, xlsx)
In future, each document can have its own format
outputFormat: Target format (html, pdf, docx, txt, md, json, csv, xlsx) - Global fallback
language: Language (global fallback) - Per-document language extracted from structure
title: Report title
userPrompt: User's original prompt for report generation
aiService: AI service instance for generation prompt creation
@ -392,9 +395,17 @@ class GenerationService:
continue
# Determine format for this document
# TODO: In future, each document can have its own format field
# For now, use the global outputFormat
docFormat = doc.get("format", outputFormat)
# Check outputFormat field first (per-document), then format field (legacy), then global fallback
docFormat = doc.get("outputFormat") or doc.get("format") or outputFormat
# Determine language for this document
# Extract per-document language from structure (validated in State 3), fallback to global
docLanguage = doc.get("language") or language
# Validate language format (should be 2-character ISO code, validated in State 3)
if not isinstance(docLanguage, str) or len(docLanguage) != 2:
logger.warning(f"Document {doc.get('id')} has invalid language format: {docLanguage}, using fallback")
docLanguage = language # Use global fallback
# Get renderer for this document's format
renderer = self._getFormatRenderer(docFormat)
@ -402,9 +413,19 @@ class GenerationService:
logger.warning(f"Unsupported format '{docFormat}' for document {doc.get('id', docIndex)}, skipping")
continue
# Check output style classification (code/document/image/etc.) from renderer
from modules.services.serviceGeneration.renderers.registry import getOutputStyle
outputStyle = getOutputStyle(docFormat)
if outputStyle:
logger.debug(f"Document {doc.get('id', docIndex)} format '{docFormat}' classified as '{outputStyle}' style")
# Store style in document metadata for potential use in processing paths
if "metadata" not in doc:
doc["metadata"] = {}
doc["metadata"]["outputStyle"] = outputStyle
# Create JSON structure with single document (preserving metadata)
singleDocContent = {
"metadata": metadata,
"metadata": {**metadata, "language": docLanguage}, # Add per-document language to metadata
"documents": [doc] # Only this document
}

View file

@ -0,0 +1,114 @@
# Document Generation Architecture Analysis
## Current Flow
### 1. Document Input → ContentParts (`extractAndPrepareContent`)
**Location**: `gateway/modules/services/serviceAi/subContentExtraction.py`
**Flow**:
- Regular documents → Calls `extractContent()` (NON-AI extraction) → Creates contentParts with raw extracted text
- **BUT THEN**:
- Images with "extract" intent → Calls Vision AI (line 190) → AI extraction
- Text with "extract" intent + extractionPrompt → Calls AI processing (line 265) → AI extraction
- Pre-extracted JSON → Uses contentParts directly (no AI)
**Result**: ContentParts may already be AI-processed before structure generation
### 2. Structure Generation
**Location**: `gateway/modules/services/serviceAi/subStructureGeneration.py`
**Flow**:
- Uses contentParts (may already be AI-processed)
- Generates document structure (chapters, sections)
### 3. Section Generation (`_processSingleSection`)
**Location**: `gateway/modules/services/serviceAi/subStructureFilling.py`
**Flow**:
- Uses contentParts (which may already be AI-processed)
- Aggregates "extracted" contentParts with AI (line 554-682)
- Generates section content using `callAiWithLooping` with `useCaseId="section_content"`
## Issues Identified
### Issue 1: Duplicate AI Processing
- AI extraction happens in `extractAndPrepareContent` (for images/text)
- AI generation happens again in section generation
- This is redundant and inefficient
### Issue 2: Architecture Inconsistency
- Pre-extracted JSON files → contentParts directly (no AI)
- Regular documents → contentParts + AI extraction (inconsistent)
- User wants: Documents → contentParts (like pre-extracted JSON) → AI only in section generation
### Issue 3: Image Processing
- Images need Vision AI to extract text
- Currently happens in `extractAndPrepareContent`
- Question: Should this happen during section generation instead?
## Proposed Architecture
### Option A: Remove All AI from `extractAndPrepareContent`
- Documents → `extractContent()` → Raw contentParts (text, tables, etc.)
- Images → Keep as image contentParts (no Vision AI extraction)
- Section generation → Handle images with Vision AI when needed
**Pros**:
- Consistent with pre-extracted JSON flow
- Single point of AI processing (section generation)
- Clear separation of concerns
**Cons**:
- Images won't have extracted text until section generation
- May need to handle images differently in section generation
### Option B: Keep Vision AI for Images Only
- Documents → `extractContent()` → Raw contentParts
- Images → Vision AI extraction → Text contentParts
- Section generation → Uses text contentParts (no additional AI extraction)
**Pros**:
- Images get text extracted early
- Section generation can use text directly
**Cons**:
- Still has AI extraction before structure generation
- Inconsistent with user's request
## Recommendation
**Follow Option A** - Remove all AI extraction from `extractAndPrepareContent`:
1. **Documents → ContentParts** (like pre-extracted JSON):
- Call `extractContent()` (NON-AI)
- Create contentParts with raw extracted content
- Images remain as image contentParts (no Vision AI)
2. **Section Generation**:
- Handle images with Vision AI when needed
- Aggregate all contentParts with AI
- Single point of AI processing
**Benefits**:
- Clear architecture: Documents = raw contentParts
- Consistent with pre-extracted JSON flow
- AI processing only where needed (section generation)
- Easier to understand and maintain
## Questions to Resolve
1. **Image handling**: How should images be processed during section generation?
- Option 1: Vision AI extraction happens automatically when image contentParts are used
- Option 2: Images are passed to AI with Vision models during section generation
- Option 3: Images remain as binary and are rendered directly (no text extraction)
2. **Text with extractionPrompt**: Should text contentParts with extractionPrompt be processed differently?
- Currently: AI processing in `extractAndPrepareContent`
- Proposed: Raw text → AI processing during section generation
3. **Performance**: Will deferring image extraction to section generation cause performance issues?
- Need to test with multiple images

View file

@ -0,0 +1,77 @@
# Architecture Changes Summary
## Problem Identified
The architecture had AI extraction happening in TWO places:
1. **`extractAndPrepareContent`**: Vision AI for images, AI processing for text with extractionPrompt
2. **Section generation**: AI aggregation of contentParts
This was:
- Redundant (double AI processing)
- Inconsistent (pre-extracted JSON had no AI, regular documents had AI)
- Against the desired architecture (documents should become contentParts like pre-extracted JSON)
## Solution Implemented
### 1. Removed AI Extraction from `extractAndPrepareContent`
**File**: `gateway/modules/services/serviceAi/subContentExtraction.py`
**Changes**:
- **Removed**: Vision AI extraction for images (lines 186-246)
- **Removed**: AI text processing with extractionPrompt (lines 260-334)
- **Updated**: Images with extract intent are now marked with `needsVisionExtraction=True` flag
- **Updated**: Regular documents mark images with `needsVisionExtraction=True` when extract intent is present
**Result**: Documents → contentParts (raw extraction only, no AI)
### 2. Added Vision AI Extraction in Section Generation
**File**: `gateway/modules/services/serviceAi/subStructureFilling.py`
**Changes**:
- **Added**: Vision AI extraction logic before aggregation (lines 553-610)
- **Added**: Vision AI extraction logic for single-part processing (lines 1074-1115)
- **Logic**:
- Checks if `part.typeGroup == "image"` AND `needsVisionExtraction == True` AND `intent == "extract"`
- Extracts text using Vision AI (`IMAGE_ANALYSE` operation)
- Replaces image part with text part for further processing
- Images with `contentFormat == "object"` (render intent) are rendered directly (no extraction)
**Result**: AI extraction happens ONLY during section generation
## Architecture Flow (After Changes)
### Document Input → ContentParts
1. **Regular documents**: `extractContent()` (NON-AI) → Raw contentParts
- Images with extract intent: `contentFormat="extracted"`, `needsVisionExtraction=True`
- Images with render intent: `contentFormat="object"` (rendered directly)
- Text: `contentFormat="extracted"` (raw text, no AI processing)
2. **Pre-extracted JSON**: Direct contentParts (no changes)
### Section Generation → AI Processing
1. **Images with extract intent**: Vision AI extraction → Text part → AI aggregation
2. **Images with render intent**: Rendered directly (no extraction)
3. **Text contentParts**: AI aggregation with extractionPrompt (if provided)
## Key Benefits
1. **Consistent Architecture**: Documents = raw contentParts (like pre-extracted JSON)
2. **Single Point of AI Processing**: Only in section generation
3. **Clear Separation**: Extraction vs Generation
4. **Intent-Based Logic**:
- `intent == "extract"` → Vision AI extraction during section generation
- `intent == "render"` → Direct rendering (no extraction)
- `contentFormat == "object"` → Embedded/referenced images (no extraction)
## Testing Checklist
- [ ] Regular documents create contentParts without AI extraction
- [ ] Images with extract intent are marked with `needsVisionExtraction=True`
- [ ] Images with render intent are marked with `contentFormat="object"`
- [ ] Section generation extracts images with Vision AI when needed
- [ ] Section generation renders images with object format directly
- [ ] Text contentParts are processed with AI during section generation
- [ ] Pre-extracted JSON flow still works correctly

View file

@ -0,0 +1,584 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Code Generation Path
Handles code generation with multi-file project support, dependency handling,
and proper cross-file references.
"""
import json
import logging
import time
import re
from typing import Dict, Any, List, Optional
from modules.datamodels.datamodelWorkflow import AiResponse, AiResponseMetadata, DocumentData
from modules.datamodels.datamodelExtraction import ContentPart
from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum
logger = logging.getLogger(__name__)
class CodeGenerationPath:
"""Code generation path."""
def __init__(self, services):
self.services = services
async def generateCode(
self,
userPrompt: str,
outputFormat: str = None,
contentParts: Optional[List[ContentPart]] = None,
title: str = "Generated Code",
parentOperationId: Optional[str] = None
) -> AiResponse:
"""
Generate code files with multi-file project support.
Returns: AiResponse with code files as documents
"""
# Create operation ID
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
codeOperationId = f"code_gen_{workflowId}_{int(time.time())}"
# Start progress tracking
self.services.chat.progressLogStart(
codeOperationId,
"Code Generation",
"Code Generation",
f"Format: {outputFormat or 'txt'}",
parentOperationId=parentOperationId
)
try:
# Detect language and project type from prompt or outputFormat
language, projectType = self._detectLanguageAndProjectType(userPrompt, outputFormat)
# Phase 1: Code structure generation (with looping)
self.services.chat.progressLogUpdate(codeOperationId, 0.2, "Generating code structure")
codeStructure = await self._generateCodeStructure(
userPrompt=userPrompt,
language=language,
outputFormat=outputFormat,
contentParts=contentParts
)
# Phase 2: Code content generation (with dependency handling)
self.services.chat.progressLogUpdate(codeOperationId, 0.5, "Generating code content")
codeFiles = await self._generateCodeContent(codeStructure, codeOperationId)
# Phase 3: Code formatting & validation
self.services.chat.progressLogUpdate(codeOperationId, 0.9, "Formatting code files")
formattedFiles = await self._formatAndValidateCode(codeFiles)
# Convert to unified document format
documents = []
for file in formattedFiles:
mimeType = self._getMimeType(file.get("fileType", outputFormat or "txt"))
content = file.get("content", "")
if isinstance(content, str):
contentBytes = content.encode('utf-8')
else:
contentBytes = content
documents.append(DocumentData(
documentName=file.get("filename", "generated.txt"),
documentData=contentBytes,
mimeType=mimeType,
sourceJson=file
))
metadata = AiResponseMetadata(
title=title,
operationType=OperationTypeEnum.DATA_GENERATE.value
)
self.services.chat.progressLogFinish(codeOperationId, True)
return AiResponse(
documents=documents,
content=None,
metadata=metadata
)
except Exception as e:
logger.error(f"Error in code generation: {str(e)}")
self.services.chat.progressLogFinish(codeOperationId, False)
raise
def _detectLanguageAndProjectType(self, userPrompt: str, outputFormat: Optional[str]) -> tuple:
"""Detect programming language and project type from prompt or format."""
promptLower = userPrompt.lower()
# Detect language
language = None
if outputFormat:
if outputFormat == "py":
language = "python"
elif outputFormat in ["js", "ts"]:
language = outputFormat
elif outputFormat == "html":
language = "html"
if not language:
if "python" in promptLower or ".py" in promptLower:
language = "python"
elif "javascript" in promptLower or ".js" in promptLower:
language = "javascript"
elif "typescript" in promptLower or ".ts" in promptLower:
language = "typescript"
elif "html" in promptLower:
language = "html"
else:
language = "python" # Default
# Detect project type
projectType = "single_file"
if "multi" in promptLower or "multiple files" in promptLower or "project" in promptLower:
projectType = "multi_file"
return language, projectType
async def _generateCodeStructure(
self,
userPrompt: str,
language: str,
outputFormat: Optional[str],
contentParts: Optional[List[ContentPart]]
) -> Dict[str, Any]:
"""Generate code structure using looping system."""
# Build structure generation prompt
structurePrompt = f"""Analyze the following code generation request and create a project structure.
Request: {userPrompt}
Language: {language}
Create a JSON structure with:
1. metadata: {{"language": "{language}", "projectType": "single_file|multi_file", "projectName": "..."}}
2. files: Array of file structures, each with:
- id: Unique identifier
- filename: File name (e.g., "main.py", "utils.py")
- fileType: File extension (e.g., "py", "js")
- dependencies: List of file IDs this file depends on (for multi-file projects)
- imports: List of import statements (for dependency extraction)
- functions: Array of function signatures {{"name": "...", "signature": "..."}}
- classes: Array of class definitions {{"name": "...", "signature": "..."}}
For single-file projects, return one file. For multi-file projects, break down into logical modules.
Return ONLY valid JSON in this format:
{{
"metadata": {{
"language": "{language}",
"projectType": "single_file",
"projectName": "generated-project"
}},
"files": [
{{
"id": "file_1",
"filename": "main.py",
"fileType": "py",
"dependencies": [],
"imports": [],
"functions": [],
"classes": []
}}
]
}}
"""
# Use generic looping system with code_structure use case
options = AiCallOptions(
operationType=OperationTypeEnum.DATA_GENERATE,
resultFormat="json"
)
structureJson = await self.services.ai.callAiWithLooping(
prompt=structurePrompt,
options=options,
useCaseId="code_structure",
debugPrefix="code_structure_generation",
contentParts=contentParts
)
parsed = json.loads(structureJson)
return parsed
async def _generateCodeContent(
self,
codeStructure: Dict[str, Any],
parentOperationId: str
) -> List[Dict[str, Any]]:
"""Generate code content for each file with dependency handling."""
files = codeStructure.get("files", [])
metadata = codeStructure.get("metadata", {})
if not files:
raise ValueError("No files found in code structure")
# Step 1: Resolve dependency order
orderedFiles = self._resolveDependencyOrder(files)
# Step 2: Generate dependency files first (requirements.txt, package.json, etc.)
dependencyFiles = await self._generateDependencyFiles(metadata, orderedFiles)
# Step 3: Generate code files in dependency order (not fully parallel)
codeFiles = []
generatedFileContext = {} # Track what's been generated for cross-file references
for idx, fileStructure in enumerate(orderedFiles):
# Update progress
progress = 0.5 + (0.4 * (idx / len(orderedFiles)))
self.services.chat.progressLogUpdate(
parentOperationId,
progress,
f"Generating {fileStructure.get('filename', 'file')}"
)
# Provide context about already-generated files for proper imports
fileContext = self._buildFileContext(generatedFileContext, fileStructure)
# Generate this file with context
fileContent = await self._generateSingleFileContent(
fileStructure,
fileContext=fileContext,
allFilesStructure=orderedFiles,
metadata=metadata
)
codeFiles.append(fileContent)
# Update context with generated file info (for next files)
generatedFileContext[fileStructure["id"]] = {
"filename": fileContent.get("filename", fileStructure.get("filename")),
"functions": fileContent.get("functions", []),
"classes": fileContent.get("classes", []),
"exports": fileContent.get("exports", [])
}
# Combine dependency files and code files
return dependencyFiles + codeFiles
def _resolveDependencyOrder(self, files: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""Resolve file generation order based on dependencies using topological sort."""
# Build dependency graph
fileMap = {f["id"]: f for f in files}
dependencies = {}
for file in files:
fileId = file["id"]
deps = file.get("dependencies", []) # List of file IDs this file depends on
dependencies[fileId] = deps
# Topological sort
ordered = []
visited = set()
tempMark = set()
def visit(fileId: str):
if fileId in tempMark:
# Circular dependency detected - break it
logger.warning(f"Circular dependency detected involving {fileId}")
return
if fileId in visited:
return
tempMark.add(fileId)
for depId in dependencies.get(fileId, []):
if depId in fileMap:
visit(depId)
tempMark.remove(fileId)
visited.add(fileId)
ordered.append(fileMap[fileId])
for file in files:
if file["id"] not in visited:
visit(file["id"])
return ordered
async def _generateDependencyFiles(
self,
metadata: Dict[str, Any],
files: List[Dict[str, Any]]
) -> List[Dict[str, Any]]:
"""Generate dependency files (requirements.txt, package.json, etc.)."""
language = metadata.get("language", "").lower()
dependencyFiles = []
# Generate requirements.txt for Python
if language in ["python", "py"]:
requirementsContent = await self._generateRequirementsTxt(files)
if requirementsContent:
dependencyFiles.append({
"filename": "requirements.txt",
"content": requirementsContent,
"fileType": "txt",
"id": "requirements_txt"
})
# Generate package.json for JavaScript/TypeScript
elif language in ["javascript", "typescript", "js", "ts"]:
packageJson = await self._generatePackageJson(files, metadata)
if packageJson:
dependencyFiles.append({
"filename": "package.json",
"content": json.dumps(packageJson, indent=2),
"fileType": "json",
"id": "package_json"
})
return dependencyFiles
async def _generateRequirementsTxt(
self,
files: List[Dict[str, Any]]
) -> Optional[str]:
"""Generate requirements.txt content from Python imports."""
pythonPackages = set()
for file in files:
imports = file.get("imports", [])
if isinstance(imports, list):
for imp in imports:
if isinstance(imp, str):
# Extract package name from import
# Handle: "from flask import", "import flask", "from flask import Flask"
imp = imp.strip()
if "import" in imp:
if "from" in imp:
# "from package import ..."
parts = imp.split("from")
if len(parts) > 1:
package = parts[1].split("import")[0].strip()
if package and not package.startswith("."):
pythonPackages.add(package.split(".")[0]) # Get root package
else:
# "import package" or "import package.module"
parts = imp.split("import")
if len(parts) > 1:
package = parts[1].strip().split(".")[0].strip()
if package and not package.startswith("."):
pythonPackages.add(package)
if pythonPackages:
return "\n".join(sorted(pythonPackages))
return None
async def _generatePackageJson(
self,
files: List[Dict[str, Any]],
metadata: Dict[str, Any]
) -> Optional[Dict[str, Any]]:
"""Generate package.json content from JavaScript/TypeScript imports."""
npmPackages = {}
for file in files:
imports = file.get("imports", [])
if isinstance(imports, list):
for imp in imports:
if isinstance(imp, str):
# Extract npm package from import
# Handle: "import express from 'express'", "const express = require('express')"
imp = imp.strip()
if "from" in imp:
# ES6 import: "import ... from 'package'"
parts = imp.split("from")
if len(parts) > 1:
package = parts[1].strip().strip("'\"")
if package and not package.startswith(".") and not package.startswith("/"):
npmPackages[package] = "*"
elif "require" in imp:
# CommonJS: "require('package')"
match = re.search(r"require\(['\"]([^'\"]+)['\"]\)", imp)
if match:
package = match.group(1)
if not package.startswith(".") and not package.startswith("/"):
npmPackages[package] = "*"
if npmPackages:
return {
"name": metadata.get("projectName", "generated-project"),
"version": "1.0.0",
"dependencies": npmPackages
}
return None
def _buildFileContext(
self,
generatedFileContext: Dict[str, Dict[str, Any]],
currentFile: Dict[str, Any]
) -> Dict[str, Any]:
"""Build context about other files for proper imports/references."""
context = {
"availableFiles": [],
"availableFunctions": {},
"availableClasses": {}
}
# Add info about already-generated files
for fileId, fileInfo in generatedFileContext.items():
context["availableFiles"].append({
"id": fileId,
"filename": fileInfo["filename"],
"functions": fileInfo.get("functions", []),
"classes": fileInfo.get("classes", []),
"exports": fileInfo.get("exports", [])
})
# Build function/class maps for easy lookup
for func in fileInfo.get("functions", []):
funcName = func.get("name", "")
if funcName:
context["availableFunctions"][funcName] = {
"file": fileInfo["filename"],
"signature": func.get("signature", "")
}
for cls in fileInfo.get("classes", []):
className = cls.get("name", "")
if className:
context["availableClasses"][className] = {
"file": fileInfo["filename"]
}
return context
async def _generateSingleFileContent(
self,
fileStructure: Dict[str, Any],
fileContext: Dict[str, Any] = None,
allFilesStructure: List[Dict[str, Any]] = None,
metadata: Dict[str, Any] = None
) -> Dict[str, Any]:
"""Generate code content for a single file with context about other files."""
# Build prompt with context about other files for proper imports
filename = fileStructure.get("filename", "generated.py")
fileType = fileStructure.get("fileType", "py")
dependencies = fileStructure.get("dependencies", [])
functions = fileStructure.get("functions", [])
classes = fileStructure.get("classes", [])
contextInfo = ""
if fileContext and fileContext.get("availableFiles"):
contextInfo = "\n\nAvailable files and their exports:\n"
for fileInfo in fileContext["availableFiles"]:
contextInfo += f"- {fileInfo['filename']}: "
funcs = [f.get("name", "") for f in fileInfo.get("functions", [])]
cls = [c.get("name", "") for c in fileInfo.get("classes", [])]
exports = []
if funcs:
exports.extend(funcs)
if cls:
exports.extend(cls)
if exports:
contextInfo += ", ".join(exports)
contextInfo += "\n"
contentPrompt = f"""Generate complete, executable code for the file: {filename}
File Type: {fileType}
Language: {metadata.get('language', 'python') if metadata else 'python'}
Required functions:
{json.dumps(functions, indent=2) if functions else 'None specified'}
Required classes:
{json.dumps(classes, indent=2) if classes else 'None specified'}
Dependencies on other files: {', '.join(dependencies) if dependencies else 'None'}
{contextInfo}
Generate complete, production-ready code with:
1. Proper imports (including imports from other files in the project if dependencies exist)
2. All required functions and classes
3. Error handling
4. Documentation/docstrings
5. Type hints where appropriate
Return ONLY valid JSON in this format:
{{
"files": [
{{
"filename": "{filename}",
"content": "// Complete code here",
"functions": {json.dumps(functions, indent=2) if functions else '[]'},
"classes": {json.dumps(classes, indent=2) if classes else '[]'}
}}
]
}}
"""
# Use generic looping system with code_content use case
options = AiCallOptions(
operationType=OperationTypeEnum.DATA_GENERATE,
resultFormat="json"
)
contentJson = await self.services.ai.callAiWithLooping(
prompt=contentPrompt,
options=options,
useCaseId="code_content",
debugPrefix=f"code_content_{fileStructure.get('id', 'file')}",
)
parsed = json.loads(contentJson)
# Extract file content and metadata
files = parsed.get("files", [])
if files and len(files) > 0:
fileData = files[0]
return {
"filename": fileData.get("filename", filename),
"content": fileData.get("content", ""),
"fileType": fileType,
"functions": fileData.get("functions", functions),
"classes": fileData.get("classes", classes),
"id": fileStructure.get("id")
}
# Fallback if structure is different
return {
"filename": filename,
"content": parsed.get("content", ""),
"fileType": fileType,
"functions": functions,
"classes": classes,
"id": fileStructure.get("id")
}
async def _formatAndValidateCode(self, codeFiles: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""Format and validate generated code files."""
# For now, just return files as-is
# TODO: Add code formatting (black, prettier, etc.) and validation
formatted = []
for file in codeFiles:
content = file.get("content", "")
# Basic cleanup: remove markdown code fences if present
if isinstance(content, str):
content = re.sub(r'^```[\w]*\n', '', content, flags=re.MULTILINE)
content = re.sub(r'\n```$', '', content, flags=re.MULTILINE)
file["content"] = content.strip()
formatted.append(file)
return formatted
def _getMimeType(self, fileType: str) -> str:
"""Get MIME type for file type."""
mimeTypes = {
"py": "text/x-python",
"js": "application/javascript",
"ts": "application/typescript",
"html": "text/html",
"css": "text/css",
"json": "application/json",
"txt": "text/plain",
"md": "text/markdown",
"java": "text/x-java-source",
"cpp": "text/x-c++src",
"c": "text/x-csrc"
}
return mimeTypes.get(fileType.lower(), "text/plain")

View file

@ -0,0 +1,207 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Document Generation Path
Handles document generation using existing chapter/section model.
"""
import json
import logging
import time
from typing import Dict, Any, List, Optional
from modules.datamodels.datamodelWorkflow import AiResponse, AiResponseMetadata, DocumentData
from modules.datamodels.datamodelExtraction import ContentPart, DocumentIntent
from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum
from modules.datamodels.datamodelDocument import RenderedDocument
from modules.workflows.processing.shared.stateTools import checkWorkflowStopped
logger = logging.getLogger(__name__)
class DocumentGenerationPath:
"""Document generation path (existing functionality, refactored)."""
def __init__(self, services):
self.services = services
async def generateDocument(
self,
userPrompt: str,
documentList: Optional[Any] = None, # DocumentReferenceList
documentIntents: Optional[List[DocumentIntent]] = None,
contentParts: Optional[List[ContentPart]] = None,
outputFormat: str = "txt",
title: Optional[str] = None,
parentOperationId: Optional[str] = None
) -> AiResponse:
"""
Generate document using existing chapter/section model.
Returns: AiResponse with documents list
"""
# Create operation ID
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
docOperationId = f"doc_gen_{workflowId}_{int(time.time())}"
# Start progress tracking
self.services.chat.progressLogStart(
docOperationId,
"Document Generation",
"Document Generation",
f"Format: {outputFormat}",
parentOperationId=parentOperationId
)
try:
# Schritt 5A: Kläre Dokument-Intents
documents = []
if documentList:
documents = self.services.chat.getChatDocumentsFromDocumentList(documentList)
# Filter: Entferne Original-Dokumente, wenn bereits Pre-Extracted JSONs existieren
# (um Duplikate zu vermeiden - Pre-Extracted JSONs enthalten bereits die ContentParts)
# Schritt 1: Identifiziere alle Original-Dokument-IDs, die durch Pre-Extracted JSONs abgedeckt werden
originalDocIdsCoveredByPreExtracted = set()
for doc in documents:
preExtracted = self.services.ai.intentAnalyzer.resolvePreExtractedDocument(doc)
if preExtracted:
originalDocId = preExtracted["originalDocument"]["id"]
originalDocIdsCoveredByPreExtracted.add(originalDocId)
logger.debug(f"Found pre-extracted JSON {doc.id} covering original document {originalDocId}")
# Schritt 2: Filtere Dokumente - entferne Original-Dokumente, die bereits durch Pre-Extracted JSONs abgedeckt werden
filteredDocuments = []
for doc in documents:
preExtracted = self.services.ai.intentAnalyzer.resolvePreExtractedDocument(doc)
if preExtracted:
# Pre-Extracted JSON behalten
filteredDocuments.append(doc)
elif doc.id in originalDocIdsCoveredByPreExtracted:
# Original-Dokument, das bereits durch Pre-Extracted JSON abgedeckt wird - entfernen
logger.info(f"Skipping original document {doc.id} ({doc.fileName}) - already covered by pre-extracted JSON")
else:
# Normales Dokument ohne Pre-Extracted JSON - behalten
filteredDocuments.append(doc)
documents = filteredDocuments
checkWorkflowStopped(self.services)
if not documentIntents and documents:
documentIntents = await self.services.ai.clarifyDocumentIntents(
documents,
userPrompt,
{"outputFormat": outputFormat},
docOperationId
)
checkWorkflowStopped(self.services)
# Schritt 5B: Extrahiere und bereite Content vor
if documents:
preparedContentParts = await self.services.ai.extractAndPrepareContent(
documents,
documentIntents or [],
docOperationId
)
# Merge mit bereitgestellten contentParts (falls vorhanden)
if contentParts:
# Prüfe auf pre-extracted Content
for part in contentParts:
if part.metadata.get("skipExtraction", False):
# Bereits extrahiert - verwende as-is, stelle sicher dass Metadaten vollständig
part.metadata.setdefault("contentFormat", "extracted")
part.metadata.setdefault("isPreExtracted", True)
preparedContentParts.extend(contentParts)
contentParts = preparedContentParts
# Schritt 5B.5: Documents are converted to contentParts (like pre-processed JSON files)
# No AI extraction here - AI extraction happens during section generation
if contentParts:
logger.info(f"Using {len(contentParts)} content parts for generation (no AI extraction at this stage)")
checkWorkflowStopped(self.services)
# Schritt 5C: Generiere Struktur
structure = await self.services.ai.generateStructure(
userPrompt,
contentParts or [],
outputFormat,
docOperationId
)
checkWorkflowStopped(self.services)
# Schritt 5D: Fülle Struktur
# Language will be extracted from services (user intention analysis) in fillStructure
filledStructure = await self.services.ai.fillStructure(
structure,
contentParts or [],
userPrompt,
docOperationId
)
checkWorkflowStopped(self.services)
# Schritt 5E: Rendere Resultat
# Jedes Dokument wird einzeln gerendert, kann 1..n Dateien zurückgeben (z.B. HTML + Bilder)
# Language is already validated in structure (State 3) and preserved in filled structure (State 4)
# Per-document language will be extracted in renderReport() from filledStructure
# Use validated currentUserLanguage as global fallback (always valid infrastructure)
language = self.services.currentUserLanguage if hasattr(self.services, 'currentUserLanguage') and self.services.currentUserLanguage else "en"
renderedDocuments = await self.services.ai.renderResult(
filledStructure,
outputFormat,
language, # Global fallback (per-document language extracted from structure in renderReport)
title or "Generated Document",
userPrompt,
docOperationId
)
# Baue Response: Konvertiere alle gerenderten Dokumente zu DocumentData
documentDataList = []
for renderedDoc in renderedDocuments:
try:
# Erstelle DocumentData für jedes gerenderte Dokument
docDataObj = DocumentData(
documentName=renderedDoc.filename,
documentData=renderedDoc.documentData,
mimeType=renderedDoc.mimeType,
sourceJson=filledStructure if len(documentDataList) == 0 else None # Nur für erstes Dokument
)
documentDataList.append(docDataObj)
logger.debug(f"Added rendered document: {renderedDoc.filename} ({len(renderedDoc.documentData)} bytes, {renderedDoc.mimeType})")
except Exception as e:
logger.warning(f"Error creating document {renderedDoc.filename}: {str(e)}")
if not documentDataList:
raise ValueError("No documents were rendered")
metadata = AiResponseMetadata(
title=title or filledStructure.get("metadata", {}).get("title", "Generated Document"),
operationType=OperationTypeEnum.DATA_GENERATE.value
)
# Debug-Log (harmonisiert)
self.services.utils.writeDebugFile(
json.dumps(filledStructure, indent=2, ensure_ascii=False, default=str),
"document_generation_response"
)
self.services.chat.progressLogFinish(docOperationId, True)
return AiResponse(
content=json.dumps(filledStructure),
metadata=metadata,
documents=documentDataList
)
except Exception as e:
logger.error(f"Error in document generation: {str(e)}")
self.services.chat.progressLogFinish(docOperationId, False)
raise

View file

@ -0,0 +1,132 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Image Generation Path
Handles image generation with support for single and batch generation.
"""
import logging
import time
from typing import List, Optional
from modules.datamodels.datamodelWorkflow import AiResponse, AiResponseMetadata, DocumentData
from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum, AiCallRequest
logger = logging.getLogger(__name__)
class ImageGenerationPath:
"""Image generation path."""
def __init__(self, services):
self.services = services
async def generateImages(
self,
userPrompt: str,
count: int = 1,
style: Optional[str] = None,
format: str = "png",
title: Optional[str] = None,
parentOperationId: Optional[str] = None
) -> AiResponse:
"""
Generate image files.
Returns: AiResponse with image files as documents
"""
# Create operation ID
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
imageOperationId = f"image_gen_{workflowId}_{int(time.time())}"
# Start progress tracking
self.services.chat.progressLogStart(
imageOperationId,
"Image Generation",
"Image Generation",
f"Format: {format}",
parentOperationId=parentOperationId
)
try:
self.services.chat.progressLogUpdate(imageOperationId, 0.4, "Calling AI for image generation")
# Build prompt with style if provided
imagePrompt = userPrompt
if style:
imagePrompt = f"{userPrompt}\n\nStyle: {style}"
# Use IMAGE_GENERATE operation
options = AiCallOptions(
operationType=OperationTypeEnum.IMAGE_GENERATE,
resultFormat=format
)
request = AiCallRequest(
prompt=imagePrompt,
context="",
options=options
)
response = await self.services.ai.callAi(request)
if not response.content:
errorMsg = f"No image data returned: {response.content}"
logger.error(f"Error in AI image generation: {errorMsg}")
self.services.chat.progressLogFinish(imageOperationId, False)
raise ValueError(errorMsg)
# Handle response content (could be base64 string or bytes)
imageData = response.content
if isinstance(imageData, str):
# Assume base64 encoded string
import base64
try:
imageData = base64.b64decode(imageData)
except Exception:
# If not base64, try encoding as bytes
imageData = imageData.encode('utf-8')
elif not isinstance(imageData, bytes):
imageData = bytes(imageData)
# Create document
imageDoc = DocumentData(
documentName=f"generated_image.{format}",
documentData=imageData,
mimeType=f"image/{format}"
)
metadata = AiResponseMetadata(
title=title or "Generated Image",
operationType=OperationTypeEnum.IMAGE_GENERATE.value
)
self.services.chat.storeWorkflowStat(
self.services.workflow,
response,
"ai.generate.image"
)
self.services.chat.progressLogUpdate(imageOperationId, 0.9, "Image generated")
self.services.chat.progressLogFinish(imageOperationId, True)
# Create content string describing the image generation
import json
contentJson = json.dumps({
"type": "image",
"format": format,
"prompt": userPrompt,
"filename": imageDoc.documentName
}, ensure_ascii=False)
return AiResponse(
content=contentJson, # JSON string describing the image generation
metadata=metadata,
documents=[imageDoc]
)
except Exception as e:
logger.error(f"Error in image generation: {str(e)}")
self.services.chat.progressLogFinish(imageOperationId, False)
raise

View file

@ -139,6 +139,32 @@ class RendererRegistry:
}
return info
def getOutputStyle(self, outputFormat: str) -> Optional[str]:
"""
Get the output style classification for a given format.
Returns: 'code', 'document', 'image', or other (e.g., 'video' for future use)
"""
if not self._discovered:
self.discoverRenderers()
# Normalize format name
formatName = outputFormat.lower().strip()
# Check for aliases first
if formatName in self._format_mappings:
formatName = self._format_mappings[formatName]
# Get renderer class and call getOutputStyle (all renderers have same signature)
rendererClass = self._renderers.get(formatName)
try:
return rendererClass.getOutputStyle(formatName)
except (AttributeError, TypeError) as e:
logger.warning(f"No renderer found for format: {outputFormat}, cannot determine output style")
return None
except Exception as e:
logger.warning(f"Error getting output style for {outputFormat}: {str(e)}")
return None
# Global registry instance
_registry = RendererRegistry()
@ -154,3 +180,7 @@ def getSupportedFormats() -> List[str]:
def getRendererInfo() -> Dict[str, Dict[str, str]]:
"""Get information about all registered renderers."""
return _registry.getRendererInfo()
def getOutputStyle(outputFormat: str) -> Optional[str]:
"""Get the output style classification for a given format."""
return _registry.getOutputStyle(outputFormat)

View file

@ -5,7 +5,7 @@ Base renderer class for all format renderers.
"""
from abc import ABC, abstractmethod
from typing import Dict, Any, List, Tuple
from typing import Dict, Any, List, Tuple, Optional
from modules.datamodels.datamodelJson import supportedSectionTypes
from modules.datamodels.datamodelDocument import RenderedDocument
import json
@ -50,6 +50,19 @@ class BaseRenderer(ABC):
"""
return 0
@classmethod
def getOutputStyle(cls, formatName: Optional[str] = None) -> str:
"""
Return the output style classification for this renderer.
Returns: 'code', 'document', 'image', or other (e.g., 'video' for future use)
Override this method in subclasses to specify the output style.
Args:
formatName: Optional format name (e.g., 'txt', 'js', 'csv') - useful for renderers
that handle multiple formats with different styles (e.g., RendererText)
"""
return 'document' # Default to document style
@abstractmethod
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
"""

View file

@ -6,7 +6,7 @@ CSV renderer for report generation.
from .rendererBaseTemplate import BaseRenderer
from modules.datamodels.datamodelDocument import RenderedDocument
from typing import Dict, Any, List
from typing import Dict, Any, List, Optional
class RendererCsv(BaseRenderer):
"""Renders content to CSV format with format-specific extraction."""
@ -26,6 +26,11 @@ class RendererCsv(BaseRenderer):
"""Return priority for CSV renderer."""
return 70
@classmethod
def getOutputStyle(cls, formatName: Optional[str] = None) -> str:
"""Return output style classification: CSV requires specific structure (header, then data rows)."""
return 'code'
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
"""Render extracted JSON content to CSV format."""
try:
@ -71,8 +76,9 @@ class RendererCsv(BaseRenderer):
sections = self._extractSections(jsonContent)
metadata = self._extractMetadata(jsonContent)
# Use title from JSON metadata if available, otherwise use provided title
documentTitle = metadata.get("title", title)
# Use provided title (which comes from documents[].title) as primary source
# Fallback to metadata.title only if title parameter is empty
documentTitle = title if title else metadata.get("title", "Generated Document")
# Generate CSV content
csvRows = []

View file

@ -6,7 +6,7 @@ DOCX renderer for report generation using python-docx.
from .rendererBaseTemplate import BaseRenderer
from modules.datamodels.datamodelDocument import RenderedDocument
from typing import Dict, Any, List
from typing import Dict, Any, List, Optional
import io
import base64
import re
@ -39,6 +39,11 @@ class RendererDocx(BaseRenderer):
"""Return priority for DOCX renderer."""
return 115
@classmethod
def getOutputStyle(cls, formatName: Optional[str] = None) -> str:
"""Return output style classification: Word documents are formatted documents."""
return 'document'
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
"""Render extracted JSON content to DOCX format using AI-analyzed styling."""
self.services.utils.debugLogToFile(f"DOCX RENDER CALLED: title={title}, user_prompt={userPrompt[:50] if userPrompt else 'None'}...", "DOCX_RENDERER")
@ -121,8 +126,9 @@ class RendererDocx(BaseRenderer):
sections = self._extractSections(json_content)
metadata = self._extractMetadata(json_content)
# Use title from JSON metadata if available, otherwise use provided title
document_title = metadata.get("title", title)
# Use provided title (which comes from documents[].title) as primary source
# Fallback to metadata.title only if title parameter is empty
document_title = title if title else metadata.get("title", "Generated Document")
# Add document title using Title style
if document_title:
@ -655,10 +661,12 @@ class RendererDocx(BaseRenderer):
content = image_data.get("content", {})
base64_data = ""
alt_text = "Image"
caption = ""
if isinstance(content, dict):
base64_data = content.get("base64Data", "")
alt_text = content.get("altText", "Image")
caption = content.get("caption", "")
elif isinstance(content, str):
# Content might be base64 string directly (shouldn't happen, but handle it)
self.logger.warning("Image content is a string, not a dict. This should not happen.")
@ -669,6 +677,8 @@ class RendererDocx(BaseRenderer):
base64_data = image_data.get("base64Data", "")
if not alt_text or alt_text == "Image":
alt_text = image_data.get("altText", "Image")
if not caption:
caption = image_data.get("caption", "")
# CRITICAL: Ensure we don't render base64 data as text
# If base64_data looks like it might be rendered elsewhere, skip it
@ -712,8 +722,26 @@ class RendererDocx(BaseRenderer):
image_stream.seek(0)
doc.add_picture(image_stream, width=Inches(6.0))
if alt_text and alt_text != "Image":
caption_para = doc.add_paragraph(f"Figure: {alt_text}")
# Use caption from section if available, otherwise use alt_text
if caption:
caption_text = caption
elif alt_text and alt_text != "Image":
# Only use alt_text if it doesn't look like a usageHint
if "Render as visual element:" in alt_text:
# Extract filename from usageHint if possible
parts = alt_text.split("Render as visual element:")
if len(parts) > 1:
filename = parts[1].strip()
caption_text = f"Figure: {filename}"
else:
caption_text = alt_text
else:
caption_text = f"Figure: {alt_text}"
else:
caption_text = None
if caption_text:
caption_para = doc.add_paragraph(caption_text)
caption_para.runs[0].italic = True
except Exception as embedError:
# Image decoding or embedding failed

View file

@ -6,7 +6,7 @@ HTML renderer for report generation.
from .rendererBaseTemplate import BaseRenderer
from modules.datamodels.datamodelDocument import RenderedDocument
from typing import Dict, Any, List
from typing import Dict, Any, List, Optional
class RendererHtml(BaseRenderer):
"""Renders content to HTML format with format-specific extraction."""
@ -26,6 +26,11 @@ class RendererHtml(BaseRenderer):
"""Return priority for HTML renderer."""
return 100
@classmethod
def getOutputStyle(cls, formatName: Optional[str] = None) -> str:
"""Return output style classification: HTML web pages are rendered documents."""
return 'document'
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
"""
Render HTML document with images as separate files.
@ -107,8 +112,9 @@ class RendererHtml(BaseRenderer):
sections = self._extractSections(jsonContent)
metadata = self._extractMetadata(jsonContent)
# Use title from JSON metadata if available, otherwise use provided title
documentTitle = metadata.get("title", title)
# Use provided title (which comes from documents[].title) as primary source
# Fallback to metadata.title only if title parameter is empty
documentTitle = title if title else metadata.get("title", "Generated Document")
# Build HTML document
htmlParts = []

View file

@ -6,7 +6,7 @@ Image renderer for report generation using AI image generation.
from .rendererBaseTemplate import BaseRenderer
from modules.datamodels.datamodelDocument import RenderedDocument
from typing import Dict, Any, List
from typing import Dict, Any, List, Optional
import logging
import base64
@ -30,6 +30,11 @@ class RendererImage(BaseRenderer):
"""Return priority for image renderer."""
return 90
@classmethod
def getOutputStyle(cls, formatName: Optional[str] = None) -> str:
"""Return output style classification: Images are visual media."""
return 'image'
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
"""Render extracted JSON content to image format using AI image generation."""
try:
@ -86,8 +91,9 @@ class RendererImage(BaseRenderer):
# Extract metadata from standardized schema
metadata = self._extractMetadata(extractedContent)
# Use title from JSON metadata if available, otherwise use provided title
documentTitle = metadata.get("title", title)
# Use provided title (which comes from documents[].title) as primary source
# Fallback to metadata.title only if title parameter is empty
documentTitle = title if title else metadata.get("title", "Generated Document")
# Create AI prompt for image generation
imagePrompt = await self._createImageGeneratePrompt(extractedContent, documentTitle, userPrompt, aiService)

View file

@ -6,7 +6,7 @@ JSON renderer for report generation.
from .rendererBaseTemplate import BaseRenderer
from modules.datamodels.datamodelDocument import RenderedDocument
from typing import Dict, Any, List
from typing import Dict, Any, List, Optional
import json
class RendererJson(BaseRenderer):
@ -27,6 +27,11 @@ class RendererJson(BaseRenderer):
"""Return priority for JSON renderer."""
return 80
@classmethod
def getOutputStyle(cls, formatName: Optional[str] = None) -> str:
"""Return output style classification: JSON is structured data format."""
return 'code'
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
"""Render extracted JSON content to JSON format."""
try:

View file

@ -6,7 +6,7 @@ Markdown renderer for report generation.
from .rendererBaseTemplate import BaseRenderer
from modules.datamodels.datamodelDocument import RenderedDocument
from typing import Dict, Any, List
from typing import Dict, Any, List, Optional
class RendererMarkdown(BaseRenderer):
"""Renders content to Markdown format with format-specific extraction."""
@ -26,6 +26,11 @@ class RendererMarkdown(BaseRenderer):
"""Return priority for markdown renderer."""
return 95
@classmethod
def getOutputStyle(cls, formatName: Optional[str] = None) -> str:
"""Return output style classification: Markdown documents are formatted documents."""
return 'document'
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
"""Render extracted JSON content to Markdown format."""
try:
@ -82,8 +87,9 @@ class RendererMarkdown(BaseRenderer):
sections = self._extractSections(jsonContent)
metadata = self._extractMetadata(jsonContent)
# Use title from JSON metadata if available, otherwise use provided title
documentTitle = metadata.get("title", title)
# Use provided title (which comes from documents[].title) as primary source
# Fallback to metadata.title only if title parameter is empty
documentTitle = title if title else metadata.get("title", "Generated Document")
# Build markdown content
markdownParts = []

View file

@ -6,7 +6,7 @@ PDF renderer for report generation using reportlab.
from .rendererBaseTemplate import BaseRenderer
from modules.datamodels.datamodelDocument import RenderedDocument
from typing import Dict, Any, List
from typing import Dict, Any, List, Optional
import io
import base64
@ -39,6 +39,11 @@ class RendererPdf(BaseRenderer):
"""Return priority for PDF renderer."""
return 120
@classmethod
def getOutputStyle(cls, formatName: Optional[str] = None) -> str:
"""Return output style classification: PDF documents are formatted documents."""
return 'document'
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
"""Render extracted JSON content to PDF format using AI-analyzed styling."""
try:
@ -110,8 +115,9 @@ class RendererPdf(BaseRenderer):
sections = self._extractSections(json_content)
metadata = self._extractMetadata(json_content)
# Use title from JSON metadata if available, otherwise use provided title
document_title = metadata.get("title", title)
# Use provided title (which comes from documents[].title) as primary source
# Fallback to metadata.title only if title parameter is empty
document_title = title if title else metadata.get("title", "Generated Document")
# Make title shorter to prevent wrapping/overlapping
if len(document_title) > 40:
@ -895,11 +901,21 @@ class RendererPdf(BaseRenderer):
captionStyle.textColor = self._hexToColor(styles.get("paragraph", {}).get("color", "#666666"))
elements.append(Paragraph(f"<i>{caption}</i>", captionStyle))
elif alt_text and alt_text != "Image":
# Use alt text as caption if no caption provided
# Use alt text as caption if no caption provided, but avoid usageHint format
if "Render as visual element:" in alt_text:
# Extract filename from usageHint if possible
parts = alt_text.split("Render as visual element:")
if len(parts) > 1:
filename = parts[1].strip()
caption_text = f"Figure: {filename}"
else:
caption_text = alt_text
else:
caption_text = f"Figure: {alt_text}"
captionStyle = self._createNormalStyle(styles)
captionStyle.fontSize = 10
captionStyle.textColor = self._hexToColor(styles.get("paragraph", {}).get("color", "#666666"))
elements.append(Paragraph(f"<i>Figure: {alt_text}</i>", captionStyle))
elements.append(Paragraph(f"<i>{caption_text}</i>", captionStyle))
return elements

View file

@ -26,6 +26,21 @@ class RendererPptx(BaseRenderer):
"""Get list of supported output formats."""
return ["pptx", "ppt"]
@classmethod
def getFormatAliases(cls) -> List[str]:
"""Return format aliases."""
return []
@classmethod
def getPriority(cls) -> int:
"""Return priority for PowerPoint renderer."""
return 105
@classmethod
def getOutputStyle(cls, formatName: Optional[str] = None) -> str:
"""Return output style classification: PowerPoint presentations are formatted documents."""
return 'document'
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
"""
Render content as PowerPoint presentation from JSON data.
@ -601,8 +616,9 @@ JSON ONLY. NO OTHER TEXT."""
sections = self._extractSections(json_content)
metadata = self._extractMetadata(json_content)
# Use title from JSON metadata if available, otherwise use provided title
document_title = metadata.get("title", title)
# Use provided title (which comes from documents[].title) as primary source
# Fallback to metadata.title only if title parameter is empty
document_title = title if title else metadata.get("title", "Generated Document")
# Create title slide
slides.append({

View file

@ -6,7 +6,7 @@ Text renderer for report generation.
from .rendererBaseTemplate import BaseRenderer
from modules.datamodels.datamodelDocument import RenderedDocument
from typing import Dict, Any, List
from typing import Dict, Any, List, Optional
class RendererText(BaseRenderer):
"""Renders content to plain text format with format-specific extraction."""
@ -48,6 +48,21 @@ class RendererText(BaseRenderer):
"""Return priority for text renderer."""
return 90
@classmethod
def getOutputStyle(cls, formatName: str = None) -> str:
"""
Return output style classification based on format.
For txt/text/plain: 'document' (unstructured text)
For all other formats: 'code' (structured formats with rules/syntax)
Note: formatName parameter is provided by registry when calling this method.
"""
# Plain text formats are document style
if formatName and formatName.lower() in ['txt', 'text', 'plain']:
return 'document'
# All other formats handled by RendererText are code style
return 'code'
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
"""Render extracted JSON content to plain text format."""
try:
@ -104,8 +119,9 @@ class RendererText(BaseRenderer):
sections = self._extractSections(jsonContent)
metadata = self._extractMetadata(jsonContent)
# Use title from JSON metadata if available, otherwise use provided title
documentTitle = metadata.get("title", title)
# Use provided title (which comes from documents[].title) as primary source
# Fallback to metadata.title only if title parameter is empty
documentTitle = title if title else metadata.get("title", "Generated Document")
# Build text content
textParts = []

View file

@ -6,10 +6,15 @@ Excel renderer for report generation using openpyxl.
from .rendererBaseTemplate import BaseRenderer
from modules.datamodels.datamodelDocument import RenderedDocument
from typing import Dict, Any, List
from typing import Dict, Any, List, Optional
import io
import base64
from datetime import datetime, UTC
from datetime import datetime, UTC, date
try:
from dateutil import parser as date_parser
DATEUTIL_AVAILABLE = True
except ImportError:
DATEUTIL_AVAILABLE = False
try:
from openpyxl import Workbook
@ -38,6 +43,11 @@ class RendererXlsx(BaseRenderer):
"""Return priority for Excel renderer."""
return 110
@classmethod
def getOutputStyle(cls, formatName: Optional[str] = None) -> str:
"""Return output style classification: Excel spreadsheets are formatted documents."""
return 'document'
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
"""Render extracted JSON content to Excel format using AI-analyzed styling."""
try:
@ -285,8 +295,9 @@ class RendererXlsx(BaseRenderer):
# Extract metadata from standardized schema
metadata = self._extractMetadata(jsonContent)
# Use title from JSON metadata if available, otherwise use provided title
document_title = metadata.get("title", title)
# Use provided title (which comes from documents[].title) as primary source
# Fallback to metadata.title only if title parameter is empty
document_title = title if title else metadata.get("title", "Generated Document")
# Create workbook
wb = Workbook()
@ -684,7 +695,12 @@ class RendererXlsx(BaseRenderer):
# If no level 1 headings found, use document title
if not sheetNames:
documentTitle = jsonContent.get("metadata", {}).get("title", "Document")
# Use documents[].title as primary source, fallback to metadata.title
documents = jsonContent.get("documents", [])
if documents and isinstance(documents[0], dict) and documents[0].get("title"):
documentTitle = documents[0].get("title")
else:
documentTitle = jsonContent.get("metadata", {}).get("title", "Document")
sheetNames.append(self._sanitizeSheetName(documentTitle))
return sheetNames
@ -787,6 +803,7 @@ class RendererXlsx(BaseRenderer):
# Add rows - handle both array format and cells object format
cell_style = styles.get("table_cell", {})
header_count = len(headers)
for row_idx, row_data in enumerate(rows, 4):
# Handle different row formats
if isinstance(row_data, list):
@ -799,6 +816,14 @@ class RendererXlsx(BaseRenderer):
# Unknown format, skip
continue
# Validate row column count matches headers - pad or truncate if needed
if len(cell_values) < header_count:
# Pad with empty strings if row has fewer columns
cell_values.extend([""] * (header_count - len(cell_values)))
elif len(cell_values) > header_count:
# Truncate if row has more columns than headers
cell_values = cell_values[:header_count]
for col_idx, cell_value in enumerate(cell_values, 1):
# Extract value if it's a dict with "value" key
if isinstance(cell_value, dict):
@ -820,8 +845,12 @@ class RendererXlsx(BaseRenderer):
def _populateMainSheet(self, sheet, jsonContent: Dict[str, Any], styles: Dict[str, Any]):
"""Populate the main sheet with document overview and all content."""
try:
# Document title
documentTitle = jsonContent.get("metadata", {}).get("title", "Generated Report")
# Document title - use documents[].title as primary source, fallback to metadata.title
documents = jsonContent.get("documents", [])
if documents and isinstance(documents[0], dict) and documents[0].get("title"):
documentTitle = documents[0].get("title")
else:
documentTitle = jsonContent.get("metadata", {}).get("title", "Generated Report")
sheet['A1'] = documentTitle
# Safety check for title style
@ -976,8 +1005,49 @@ class RendererXlsx(BaseRenderer):
self.logger.warning(f"Could not add section to sheet: {str(e)}")
return startRow + 1
def _parseDateString(self, text: str) -> Any:
"""Try to parse a string as a date/datetime. Returns datetime object if successful, None otherwise."""
if not text or not isinstance(text, str):
return None
text = text.strip()
if not text:
return None
# Common date formats to try (in order of likelihood)
date_formats = [
"%Y-%m-%d", # 2025-01-01
"%d.%m.%Y", # 01.01.2025
"%d/%m/%Y", # 01/01/2025
"%m/%d/%Y", # 01/01/2025 (US format)
"%Y-%m-%d %H:%M:%S", # 2025-01-01 12:00:00
"%d.%m.%Y %H:%M:%S", # 01.01.2025 12:00:00
"%d/%m/%Y %H:%M:%S", # 01/01/2025 12:00:00
"%Y-%m-%d %H:%M", # 2025-01-01 12:00
"%d.%m.%Y %H:%M", # 01.01.2025 12:00
"%d/%m/%Y %H:%M", # 01/01/2025 12:00
]
# Try parsing with common formats first
for date_format in date_formats:
try:
parsed_date = datetime.strptime(text, date_format)
return parsed_date
except ValueError:
continue
# If dateutil is available, use it for more flexible parsing
if DATEUTIL_AVAILABLE:
try:
parsed_date = date_parser.parse(text, dayfirst=True, yearfirst=False)
return parsed_date
except (ValueError, TypeError):
pass
return None
def _sanitizeCellValue(self, value: Any) -> Any:
"""Sanitize cell value: remove markdown, convert to string, handle None, limit length."""
"""Sanitize cell value: remove markdown, convert to string, handle None, limit length. Preserve numbers as numbers."""
if value is None:
return ""
if isinstance(value, dict):
@ -994,6 +1064,45 @@ class RendererXlsx(BaseRenderer):
# Remove other markdown
text = text.replace("__", "").replace("_", "")
text = text.strip()
# Try to convert numeric strings to actual numbers
# This ensures Excel treats them as numbers, not strings
if text:
# Clean text for number conversion: remove common formatting characters
# but preserve the original for fallback
cleaned_for_number = text.replace("'", "").replace(",", "").replace(" ", "").strip()
# Only attempt conversion if cleaned text looks like a number
# (starts with digit, +, -, or . followed by digit)
if cleaned_for_number and (cleaned_for_number[0].isdigit() or cleaned_for_number[0] in '+-.'):
# Try integer first (more restrictive)
try:
# Check if it's a valid integer (no decimal point, no scientific notation)
if '.' not in cleaned_for_number and 'e' not in cleaned_for_number.lower() and 'E' not in cleaned_for_number:
int_value = int(cleaned_for_number)
return int_value
except (ValueError, OverflowError):
pass
# Try float if integer conversion failed
try:
float_value = float(cleaned_for_number)
# Only return as float if it's actually a number representation
# Avoid converting things like "NaN", "inf" which are valid floats but not useful
if cleaned_for_number.lower() not in ['nan', 'inf', '-inf', 'infinity', '-infinity']:
# Check for reasonable float values (not too large/small)
if abs(float_value) < 1e308: # Avoid overflow
return float_value
except (ValueError, OverflowError):
pass
# Try to convert date strings to datetime objects
# This ensures Excel treats them as dates, not strings
# Use original text (not cleaned) for date parsing
date_value = self._parseDateString(text)
if date_value is not None:
return date_value
# Excel cell value limit is 32,767 characters - truncate if necessary
if len(text) > 32767:
text = text[:32764] + "..."
@ -1083,6 +1192,7 @@ class RendererXlsx(BaseRenderer):
# Add rows with formatting
cell_style = styles.get("table_cell", {})
header_count = len(headers)
for row_data in rows:
# Handle different row formats
if isinstance(row_data, list):
@ -1092,6 +1202,14 @@ class RendererXlsx(BaseRenderer):
else:
continue
# Validate row column count matches headers - pad or truncate if needed
if len(cell_values) < header_count:
# Pad with empty strings if row has fewer columns
cell_values.extend([""] * (header_count - len(cell_values)))
elif len(cell_values) > header_count:
# Truncate if row has more columns than headers
cell_values = cell_values[:header_count]
for col, cell_value in enumerate(cell_values, 1):
sanitized_value = self._sanitizeCellValue(cell_value)
cell = sheet.cell(row=startRow, column=col, value=sanitized_value)

View file

@ -13,6 +13,7 @@ import re
import traceback
from typing import Dict, Any, Optional, List, Callable
from modules.services.serviceGeneration.subContentIntegrator import ContentIntegrator
from modules.workflows.processing.shared.stateTools import checkWorkflowStopped
logger = logging.getLogger(__name__)
@ -167,6 +168,7 @@ class ContentGenerator:
contentPartsMap[partId] = part
for idx, section in enumerate(sections):
checkWorkflowStopped(self.services)
try:
contentType = section.get("content_type", "content")
sectionId = section.get("id", f"section_{idx}")
@ -229,7 +231,8 @@ class ContentGenerator:
sections: List[Dict[str, Any]],
cachedContent: Optional[Dict[str, Any]],
userPrompt: str,
documentMetadata: Dict[str, Any],
contentParts: Optional[List[Any]] = None,
documentMetadata: Dict[str, Any] = {},
progressCallback: Optional[Callable] = None,
batchSize: int = 10
) -> List[Dict[str, Any]]:
@ -240,6 +243,7 @@ class ContentGenerator:
sections: List of sections to generate
cachedContent: Extracted content cache
userPrompt: Original user prompt
contentParts: List of all available ContentParts (for mapping by contentPartIds)
documentMetadata: Document metadata
progressCallback: Progress callback function
batchSize: Number of sections to process in parallel per batch
@ -253,6 +257,14 @@ class ContentGenerator:
if totalSections == 0:
return []
# Create ContentParts lookup map by ID
contentPartsMap = {}
if contentParts:
for part in contentParts:
partId = part.id if hasattr(part, 'id') else part.get('id', '')
if partId:
contentPartsMap[partId] = part
# Adjust batch size based on section types (images take longer)
imageCount = sum(1 for s in sections if s.get("content_type") == "image")
if imageCount > 0:
@ -277,6 +289,7 @@ class ContentGenerator:
)
async def generateWithProgress(section: Dict[str, Any], globalIndex: int, localIndex: int, batchPreviousSections: List[Dict[str, Any]]):
checkWorkflowStopped(self.services)
try:
contentType = section.get("content_type", "content")
sectionId = section.get("id", f"section_{globalIndex}")
@ -422,6 +435,7 @@ class ContentGenerator:
resultFormat="json"
)
checkWorkflowStopped(self.services)
aiResponse = await self.services.ai.callAiContent(
prompt=sectionPrompt,
options=options,
@ -603,6 +617,59 @@ class ContentGenerator:
) -> Dict[str, Any]:
"""Generate image for image section or include existing image"""
try:
# First, check if section has image ContentParts to integrate directly
sectionContentParts = context.get("sectionContentParts", [])
if sectionContentParts:
# Look for image ContentParts
for part in sectionContentParts:
partTypeGroup = part.typeGroup if hasattr(part, 'typeGroup') else part.get('typeGroup', '')
partMimeType = part.mimeType if hasattr(part, 'mimeType') else part.get('mimeType', '')
isImage = partTypeGroup == "image" or (partMimeType and partMimeType.startswith("image/"))
if isImage:
# Extract image data from ContentPart
partData = part.data if hasattr(part, 'data') else part.get('data', '')
partId = part.id if hasattr(part, 'id') else part.get('id', '')
# Get base64 data
base64Data = None
if isinstance(partData, str):
# Check if it's already base64 or needs extraction
if partData.startswith("data:image"):
# Extract base64 from data URL
base64Data = partData.split(",", 1)[1] if "," in partData else partData
elif len(partData) > 100: # Likely base64 string
base64Data = partData
elif isinstance(partData, bytes):
import base64
base64Data = base64.b64encode(partData).decode('utf-8')
if base64Data:
# Get caption from section (priority: section.caption > metadata.caption)
caption = section.get("caption") or section.get("metadata", {}).get("caption")
# Get alt text from ContentPart metadata or section
altText = part.metadata.get("altText") if hasattr(part, 'metadata') else part.get('metadata', {}).get('altText')
if not altText:
altText = section.get("generation_hint", "Image")
# Get mime type
mimeType = partMimeType or "image/png"
# Create image element with caption
section["elements"] = [{
"type": "image",
"content": {
"base64Data": base64Data,
"altText": altText,
"caption": caption # Include caption from section
},
"caption": caption # Also at element level for compatibility
}]
logger.info(f"Successfully integrated image from ContentPart {partId} for section {section.get('id')} with caption: {caption}")
return section
# Check if this is an existing image to include or render
imageSource = section.get("image_source", "generate")
@ -623,12 +690,17 @@ class ContentGenerator:
# Create image element from existing/render image
altText = imageDoc.get("altText", section.get("generation_hint", "Image"))
mimeType = imageDoc.get("mimeType", "image/png")
caption = section.get("caption") or section.get("metadata", {}).get("caption")
# Use nested content structure for consistency with renderers
section["elements"] = [{
"base64Data": imageDoc.get("base64Data"),
"altText": altText,
"mimeType": mimeType,
"caption": section.get("metadata", {}).get("caption")
"type": "image",
"content": {
"base64Data": imageDoc.get("base64Data"),
"altText": altText,
"caption": caption # Include caption in content structure
},
"caption": caption # Also at element level for compatibility
}]
logger.info(f"Successfully integrated image {imageRefId} for section {section.get('id')} (source={imageSource})")
@ -666,6 +738,7 @@ class ContentGenerator:
logger.info(f"Starting image generation for section {section.get('id')}: {imagePrompt[:100]}...")
# Call AI for image generation
checkWorkflowStopped(self.services)
aiResponse = await self.services.ai.callAiContent(
prompt=promptJson,
options=options,
@ -702,13 +775,17 @@ class ContentGenerator:
# Use image_prompt as alt text if generation_hint is generic
altText = section.get("image_prompt", "Image")[:100] # Limit length
caption = section.get("metadata", {}).get("caption")
caption = section.get("caption") or section.get("metadata", {}).get("caption")
# Use nested content structure for consistency with renderers
section["elements"] = [{
"url": f"data:image/png;base64,{base64Data}",
"base64Data": base64Data,
"altText": altText,
"caption": caption
"type": "image",
"content": {
"base64Data": base64Data,
"altText": altText,
"caption": caption # Include caption in content structure
},
"caption": caption # Also at element level for compatibility
}]
logger.info(f"Successfully generated image for section {section.get('id')}")

View file

@ -2,7 +2,7 @@
# All rights reserved.
"""
Web crawl service for handling web research operations.
Manages the two-step process: WEB_SEARCH then WEB_CRAWL.
Manages the two-step process: WEB_SEARCH_DATA then WEB_CRAWL.
"""
import json
@ -35,7 +35,7 @@ class WebService:
"""
Perform web research in two steps:
1. Use AI to analyze prompt and extract parameters + URLs
2. Call WEB_SEARCH to get URLs (if needed)
2. Call WEB_SEARCH_DATA to get URLs (if needed)
3. Combine URLs and filter to maxNumberPages
4. Call WEB_CRAWL for each URL
5. Return consolidated result
@ -337,9 +337,9 @@ Return ONLY valid JSON, no additional text:
# Debug: persist search prompt
self.services.utils.writeDebugFile(searchPrompt, "websearch_prompt")
# Call AI with WEB_SEARCH operation
# Call AI with WEB_SEARCH_DATA operation
searchOptions = AiCallOptions(
operationType=OperationTypeEnum.WEB_SEARCH,
operationType=OperationTypeEnum.WEB_SEARCH_DATA,
resultFormat="json"
)

View file

@ -42,6 +42,7 @@ class FrontendType(str, Enum):
JSON = "json"
MULTILINGUAL = "multilingual"
FILE = "file"
HIDDEN = "hidden"
# Custom Types for Actions
USER_CONNECTION = "userConnection"

View file

@ -1,742 +0,0 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
AI processing method module.
Handles direct AI calls for any type of task.
"""
import time
import logging
from typing import Dict, Any, List, Optional
from datetime import datetime, UTC
from modules.workflows.methods.methodBase import MethodBase, action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum
from modules.datamodels.datamodelWorkflow import ExtractContentParameters
from modules.datamodels.datamodelExtraction import ExtractionOptions, MergeStrategy, ContentPart
logger = logging.getLogger(__name__)
class MethodAi(MethodBase):
"""AI processing methods."""
def __init__(self, services):
super().__init__(services)
self.name = "ai"
self.description = "AI processing methods"
def _format_timestamp_for_filename(self) -> str:
"""Format current timestamp as YYYYMMDD-hhmmss for filenames."""
return datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
@action
async def process(self, parameters: Dict[str, Any]) -> ActionResult:
"""
GENERAL:
- Purpose: Universal AI document processing action - accepts MULTIPLE input documents in ANY format (docx, pdf, json, txt, xlsx, html, images, etc.) and processes them together with a prompt to produce MULTIPLE output documents in ANY specified format (via resultType). Use for document generation, format conversion, content transformation, analysis, summarization, translation, extraction, comparison, and any AI-powered document manipulation.
- Input requirements: aiPrompt (required); optional documentList (can contain multiple documents in any format).
- Output format: Multiple documents in the same format per call (via resultType: txt, json, pdf, docx, xlsx, pptx, png, jpg, etc.). The AI can generate multiple files based on the prompt (e.g., "create separate documents for each section"). Default: txt.
- Key capabilities: Can process any number of input documents together, extract data from mixed formats, combine information, generate multiple output files, transform between formats, perform analysis/comparison/summarization on document sets.
Parameters:
- aiPrompt (str, required): Instruction for the AI describing what processing to perform.
- documentList (list, optional): Document reference(s) in any format to use as input/context.
- resultType (str, optional): Output file extension (txt, json, md, csv, xml, html, pdf, docx, xlsx, png, etc.). All output documents will use this format. Default: txt.
"""
try:
# Init progress logger
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
operationId = f"ai_process_{workflowId}_{int(time.time())}"
# Start progress tracking
parentOperationId = parameters.get('parentOperationId')
self.services.chat.progressLogStart(
operationId,
"Generate",
"AI Processing",
f"Format: {parameters.get('resultType', 'txt')}",
parentOperationId=parentOperationId
)
aiPrompt = parameters.get("aiPrompt")
logger.info(f"aiPrompt extracted: '{aiPrompt}' (type: {type(aiPrompt)})")
# Update progress - preparing parameters
self.services.chat.progressLogUpdate(operationId, 0.2, "Preparing parameters")
from modules.datamodels.datamodelDocref import DocumentReferenceList
documentListParam = parameters.get("documentList")
# Convert to DocumentReferenceList if needed
if documentListParam is None:
documentList = DocumentReferenceList(references=[])
elif isinstance(documentListParam, DocumentReferenceList):
documentList = documentListParam
elif isinstance(documentListParam, str):
documentList = DocumentReferenceList.from_string_list([documentListParam])
elif isinstance(documentListParam, list):
documentList = DocumentReferenceList.from_string_list(documentListParam)
else:
logger.error(f"Invalid documentList type: {type(documentListParam)}")
documentList = DocumentReferenceList(references=[])
resultType = parameters.get("resultType", "txt")
if not aiPrompt:
logger.error(f"aiPrompt is missing or empty. Parameters: {parameters}")
return ActionResult.isFailure(
error="AI prompt is required"
)
# Determine output extension and default MIME type without duplicating service logic
normalized_result_type = (str(resultType).strip().lstrip('.').lower() or "txt")
output_extension = f".{normalized_result_type}"
output_mime_type = "application/octet-stream" # Prefer service-provided mimeType when available
logger.info(f"Using result type: {resultType} -> {output_extension}")
# Phase 7.3: Extract content first if documents provided, then use contentParts
# Check if contentParts are already provided (preferred path)
contentParts: Optional[List[ContentPart]] = None
if "contentParts" in parameters:
contentParts = parameters.get("contentParts")
if contentParts and not isinstance(contentParts, list):
# Try to extract from ContentExtracted if it's an ActionDocument
if hasattr(contentParts, 'parts'):
contentParts = contentParts.parts
else:
logger.warning(f"Invalid contentParts type: {type(contentParts)}, treating as empty")
contentParts = None
# If contentParts not provided but documentList is, extract content first
if not contentParts and documentList.references:
self.services.chat.progressLogUpdate(operationId, 0.3, "Extracting content from documents")
# Get ChatDocuments
chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(documentList)
if not chatDocuments:
logger.warning("No documents found in documentList")
else:
logger.info(f"Extracting content from {len(chatDocuments)} documents")
# Prepare extraction options (use defaults if not provided)
extractionOptions = parameters.get("extractionOptions")
if not extractionOptions:
extractionOptions = ExtractionOptions(
prompt="Extract all content from the document",
mergeStrategy=MergeStrategy(
mergeType="concatenate",
groupBy="typeGroup",
orderBy="id"
),
processDocumentsIndividually=True
)
# Extract content using extraction service with hierarchical progress logging
# Pass operationId for per-document progress tracking
extractedResults = self.services.extraction.extractContent(chatDocuments, extractionOptions, operationId=operationId)
# Combine all ContentParts from all extracted results
contentParts = []
for extracted in extractedResults:
if extracted.parts:
contentParts.extend(extracted.parts)
logger.info(f"Extracted {len(contentParts)} content parts from {len(extractedResults)} documents")
# Update progress - preparing AI call
self.services.chat.progressLogUpdate(operationId, 0.4, "Preparing AI call")
# Build options with only resultFormat - let service layer handle all other parameters
output_format = output_extension.replace('.', '') or 'txt'
options = AiCallOptions(
resultFormat=output_format
# Removed all model parameters - service layer will analyze prompt and determine optimal parameters
)
# Update progress - calling AI
self.services.chat.progressLogUpdate(operationId, 0.6, "Calling AI")
# Use unified callAiContent method with contentParts (extraction is now separate)
aiResponse = await self.services.ai.callAiContent(
prompt=aiPrompt,
options=options,
contentParts=contentParts, # Already extracted (or None if no documents)
outputFormat=output_format,
parentOperationId=operationId
)
# Update progress - processing result
self.services.chat.progressLogUpdate(operationId, 0.8, "Processing result")
from modules.datamodels.datamodelChat import ActionDocument
# Extract documents from AiResponse
if aiResponse.documents and len(aiResponse.documents) > 0:
action_documents = []
for doc in aiResponse.documents:
validationMetadata = {
"actionType": "ai.process",
"resultType": normalized_result_type,
"outputFormat": output_format,
"hasDocuments": True,
"documentCount": len(aiResponse.documents)
}
action_documents.append(ActionDocument(
documentName=doc.documentName,
documentData=doc.documentData,
mimeType=doc.mimeType or output_mime_type,
sourceJson=getattr(doc, 'sourceJson', None), # Preserve source JSON for structure validation
validationMetadata=validationMetadata
))
final_documents = action_documents
else:
# Text response - create document from content
extension = output_extension.lstrip('.')
meaningful_name = self._generateMeaningfulFileName(
base_name="ai",
extension=extension,
action_name="result"
)
validationMetadata = {
"actionType": "ai.process",
"resultType": normalized_result_type,
"outputFormat": output_format,
"hasDocuments": False,
"contentType": "text"
}
action_document = ActionDocument(
documentName=meaningful_name,
documentData=aiResponse.content,
mimeType=output_mime_type,
validationMetadata=validationMetadata
)
final_documents = [action_document]
# Complete progress tracking
self.services.chat.progressLogFinish(operationId, True)
return ActionResult.isSuccess(documents=final_documents)
except Exception as e:
logger.error(f"Error in AI processing: {str(e)}")
# Complete progress tracking with failure
try:
self.services.chat.progressLogFinish(operationId, False)
except:
pass # Don't fail on progress logging errors
return ActionResult.isFailure(
error=str(e)
)
@action
async def webResearch(self, parameters: Dict[str, Any]) -> ActionResult:
"""
GENERAL:
- Purpose: Web research with two-step process: search for URLs, then crawl content.
- Input requirements: prompt (required); optional list(url), country, language, researchDepth.
- Output format: JSON with research results including URLs and content.
Parameters:
- prompt (str, required): Natural language research instruction.
- urlList (list, optional): Specific URLs to crawl, if needed.
- country (str, optional): Two-digit country code (lowercase, e.g., ch, us, de).
- language (str, optional): Language code (lowercase, e.g., de, en, fr).
- researchDepth (str, optional): Research depth - fast, general, or deep. Default: general.
"""
try:
prompt = parameters.get("prompt")
if not prompt:
return ActionResult.isFailure(error="Research prompt is required")
# Init progress logger
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
operationId = f"web_research_{workflowId}_{int(time.time())}"
# Start progress tracking
parentOperationId = parameters.get('parentOperationId')
self.services.chat.progressLogStart(
operationId,
"Web Research",
"Searching and Crawling",
"Extracting URLs and Content",
parentOperationId=parentOperationId
)
# Call webcrawl service - service handles all AI intention analysis and processing
result = await self.services.web.performWebResearch(
prompt=prompt,
urls=parameters.get("urlList", []),
country=parameters.get("country"),
language=parameters.get("language"),
researchDepth=parameters.get("researchDepth", "general"),
operationId=operationId
)
# Complete progress tracking
self.services.chat.progressLogFinish(operationId, True)
# Get meaningful filename from research result (generated by intent analyzer)
suggestedFilename = result.get("suggested_filename")
if suggestedFilename:
# Clean and validate filename
import re
cleaned = suggestedFilename.strip().strip('"\'')
cleaned = cleaned.replace('\n', ' ').replace('\r', ' ').strip()
# Ensure it doesn't already have extension
if cleaned.lower().endswith('.json'):
cleaned = cleaned[:-5]
# Validate: should be reasonable length and contain only safe characters
if cleaned and len(cleaned) <= 60 and re.match(r'^[a-zA-Z0-9_\-]+$', cleaned):
meaningfulName = f"{cleaned}.json"
else:
# Fallback to generic meaningful filename
meaningfulName = self._generateMeaningfulFileName(
base_name="web_research",
extension="json",
action_name="research"
)
else:
# Fallback to generic meaningful filename
meaningfulName = self._generateMeaningfulFileName(
base_name="web_research",
extension="json",
action_name="research"
)
from modules.datamodels.datamodelChat import ActionDocument
validationMetadata = {
"actionType": "ai.webResearch",
"prompt": prompt,
"urlList": parameters.get("urlList", []),
"country": parameters.get("country"),
"language": parameters.get("language"),
"researchDepth": parameters.get("researchDepth", "general"),
"resultFormat": "json"
}
actionDocument = ActionDocument(
documentName=meaningfulName,
documentData=result,
mimeType="application/json",
validationMetadata=validationMetadata
)
return ActionResult.isSuccess(documents=[actionDocument])
except Exception as e:
logger.error(f"Error in web research: {str(e)}")
try:
self.services.chat.progressLogFinish(operationId, False)
except:
pass
return ActionResult.isFailure(error=str(e))
# ============================================================================
# Document Transformation Wrappers
# ============================================================================
@action
async def summarizeDocument(self, parameters: Dict[str, Any]) -> ActionResult:
"""
GENERAL:
- Purpose: Summarize one or more documents, extracting key points and main ideas.
- Input requirements: documentList (required); optional summaryLength, focus.
- Output format: Text document with summary (default: txt, can be overridden with resultType).
Parameters:
- documentList (list, required): Document reference(s) to summarize.
- summaryLength (str, optional): Desired summary length - brief, medium, or detailed. Default: medium.
- focus (str, optional): Specific aspect to focus on in the summary (e.g., "financial data", "key decisions").
- resultType (str, optional): Output file extension (txt, md, docx, etc.). Default: txt.
"""
documentList = parameters.get("documentList", [])
if not documentList:
return ActionResult.isFailure(error="documentList is required")
summaryLength = parameters.get("summaryLength", "medium")
focus = parameters.get("focus")
resultType = parameters.get("resultType", "txt")
lengthInstructions = {
"brief": "Create a brief summary (2-3 paragraphs)",
"medium": "Create a medium-length summary (comprehensive but concise)",
"detailed": "Create a detailed summary covering all major points"
}
lengthInstruction = lengthInstructions.get(summaryLength.lower(), lengthInstructions["medium"])
aiPrompt = f"Summarize the provided document(s). {lengthInstruction}."
if focus:
aiPrompt += f" Focus specifically on: {focus}."
aiPrompt += " Extract and present the key points, main ideas, and important information in a clear, well-structured format."
return await self.process({
"aiPrompt": aiPrompt,
"documentList": documentList,
"resultType": resultType
})
@action
async def translateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
"""
GENERAL:
- Purpose: Translate documents to a target language while preserving formatting and structure.
- Input requirements: documentList (required); targetLanguage (required).
- Output format: Translated document in same format as input (default) or specified resultType.
Parameters:
- documentList (list, required): Document reference(s) to translate.
- targetLanguage (str, required): Target language code or name (e.g., "de", "German", "French", "es").
- sourceLanguage (str, optional): Source language if known (e.g., "en", "English"). If not provided, AI will detect.
- preserveFormatting (bool, optional): Whether to preserve original formatting. Default: True.
- resultType (str, optional): Output file extension. If not specified, uses same format as input.
"""
documentList = parameters.get("documentList", [])
if not documentList:
return ActionResult.isFailure(error="documentList is required")
targetLanguage = parameters.get("targetLanguage")
if not targetLanguage:
return ActionResult.isFailure(error="targetLanguage is required")
sourceLanguage = parameters.get("sourceLanguage")
preserveFormatting = parameters.get("preserveFormatting", True)
resultType = parameters.get("resultType")
aiPrompt = f"Translate the provided document(s) to {targetLanguage}."
if sourceLanguage:
aiPrompt += f" The source language is {sourceLanguage}."
if preserveFormatting:
aiPrompt += " Preserve all formatting, structure, tables, and layout exactly as they appear in the original document."
else:
aiPrompt += " Focus on accurate translation of content."
aiPrompt += " Maintain the same document structure, headings, and organization."
processParams = {
"aiPrompt": aiPrompt,
"documentList": documentList
}
if resultType:
processParams["resultType"] = resultType
return await self.process(processParams)
@action
async def convert(self, parameters: Dict[str, Any]) -> ActionResult:
"""
GENERAL:
- Purpose: Convert documents/data between different formats with specific formatting options (e.g., JSONCSV with custom columns, delimiters).
- Input requirements: documentList (required); inputFormat and outputFormat (required).
- Output format: Document in target format with specified formatting options.
- CRITICAL: If input is already in standardized JSON format, uses automatic rendering system (no AI call needed).
Parameters:
- documentList (list, required): Document reference(s) to convert.
- inputFormat (str, required): Source format (json, csv, xlsx, txt, etc.).
- outputFormat (str, required): Target format (csv, json, xlsx, txt, etc.).
- columnsPerRow (int, optional): For CSV output, number of columns per row. Default: auto-detect.
- delimiter (str, optional): For CSV output, delimiter character. Default: comma (,).
- includeHeader (bool, optional): For CSV output, whether to include header row. Default: True.
- language (str, optional): Language for output (e.g., 'de', 'en', 'fr'). Default: 'en'.
"""
documentList = parameters.get("documentList", [])
if not documentList:
return ActionResult.isFailure(error="documentList is required")
inputFormat = parameters.get("inputFormat")
outputFormat = parameters.get("outputFormat")
if not inputFormat or not outputFormat:
return ActionResult.isFailure(error="inputFormat and outputFormat are required")
# Normalize formats (remove leading dot if present)
normalizedInputFormat = inputFormat.strip().lstrip('.').lower()
normalizedOutputFormat = outputFormat.strip().lstrip('.').lower()
# Get documents
from modules.datamodels.datamodelDocref import DocumentReferenceList
if isinstance(documentList, DocumentReferenceList):
docRefList = documentList
elif isinstance(documentList, list):
docRefList = DocumentReferenceList.from_string_list(documentList)
else:
docRefList = DocumentReferenceList.from_string_list([documentList])
chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(docRefList)
if not chatDocuments:
return ActionResult.isFailure(error="No documents found in documentList")
# Check if input is standardized JSON format - if so, use direct rendering
if normalizedInputFormat == "json" and len(chatDocuments) == 1:
try:
import json
doc = chatDocuments[0]
# ChatDocument doesn't have documentData - need to load file content using fileId
docBytes = self.services.chat.getFileData(doc.fileId)
if not docBytes:
raise ValueError(f"No file data found for fileId={doc.fileId}")
# Decode bytes to string
docData = docBytes.decode('utf-8')
# Try to parse as JSON
if isinstance(docData, str):
jsonData = json.loads(docData)
elif isinstance(docData, dict):
jsonData = docData
else:
jsonData = None
# Check if it's standardized JSON format (has "documents" or "sections")
if jsonData and (isinstance(jsonData, dict) and ("documents" in jsonData or "sections" in jsonData)):
# Use direct rendering - no AI call needed!
from modules.services.serviceGeneration.mainServiceGeneration import GenerationService
generationService = GenerationService(self.services)
# Ensure format is "documents" array
if "documents" not in jsonData:
jsonData = {"documents": [{"sections": jsonData.get("sections", []), "metadata": jsonData.get("metadata", {})}]}
# Get title
title = jsonData.get("metadata", {}).get("title", doc.documentName or "Converted Document")
# Render with options
renderOptions = {}
if normalizedOutputFormat == "csv":
renderOptions["delimiter"] = parameters.get("delimiter", ",")
renderOptions["columnsPerRow"] = parameters.get("columnsPerRow")
renderOptions["includeHeader"] = parameters.get("includeHeader", True)
rendered_content, mime_type = await generationService.renderReport(
jsonData, normalizedOutputFormat, title, None, None
)
# Apply CSV options if needed (renderer will handle them)
if normalizedOutputFormat == "csv" and renderOptions:
rendered_content = self._applyCsvOptions(rendered_content, renderOptions)
from modules.datamodels.datamodelChat import ActionDocument
validationMetadata = {
"actionType": "ai.convert",
"inputFormat": normalizedInputFormat,
"outputFormat": normalizedOutputFormat,
"hasSourceJson": True,
"conversionType": "direct_rendering"
}
actionDoc = ActionDocument(
documentName=f"{doc.documentName.rsplit('.', 1)[0] if '.' in doc.documentName else doc.documentName}.{normalizedOutputFormat}",
documentData=rendered_content,
mimeType=mime_type,
sourceJson=jsonData, # Preserve source JSON for structure validation
validationMetadata=validationMetadata
)
return ActionResult.isSuccess(documents=[actionDoc])
except Exception as e:
logger.warning(f"Direct rendering failed, falling back to AI conversion: {str(e)}")
# Fall through to AI-based conversion
# Fallback: Use AI for conversion (for non-JSON inputs or complex conversions)
columnsPerRow = parameters.get("columnsPerRow")
delimiter = parameters.get("delimiter", ",")
includeHeader = parameters.get("includeHeader", True)
language = parameters.get("language", "en")
aiPrompt = f"Convert the provided document(s) from {normalizedInputFormat.upper()} format to {normalizedOutputFormat.upper()} format."
if normalizedOutputFormat == "csv":
aiPrompt += f" Use '{delimiter}' as the delimiter character."
if columnsPerRow:
aiPrompt += f" Format the output with {columnsPerRow} columns per row."
if not includeHeader:
aiPrompt += " Do not include a header row."
else:
aiPrompt += " Include a header row with column names."
if language and language != "en":
aiPrompt += f" Use language: {language}."
aiPrompt += " Preserve all data and ensure accurate conversion. Maintain data integrity and structure."
return await self.process({
"aiPrompt": aiPrompt,
"documentList": documentList,
"resultType": normalizedOutputFormat
})
def _applyCsvOptions(self, csvContent: str, options: Dict[str, Any]) -> str:
"""Apply CSV formatting options to rendered CSV content."""
delimiter = options.get("delimiter", ",")
columnsPerRow = options.get("columnsPerRow")
includeHeader = options.get("includeHeader", True)
# Check if any options need to be applied
needsProcessing = (delimiter != ",") or (columnsPerRow is not None) or (not includeHeader)
if not needsProcessing:
return csvContent
import csv
import io
# Re-read CSV with comma, write with new delimiter
reader = csv.reader(io.StringIO(csvContent))
output = io.StringIO()
writer = csv.writer(output, delimiter=delimiter)
rows = list(reader)
# Handle header
if not includeHeader and rows:
rows = rows[1:] # Skip header
# Handle columnsPerRow
if columnsPerRow:
newRows = []
for row in rows:
# Split row into chunks of columnsPerRow
for i in range(0, len(row), columnsPerRow):
chunk = row[i:i+columnsPerRow]
# Pad to columnsPerRow if needed
while len(chunk) < columnsPerRow:
chunk.append("")
newRows.append(chunk)
rows = newRows
for row in rows:
writer.writerow(row)
return output.getvalue()
@action
async def convertDocument(self, parameters: Dict[str, Any]) -> ActionResult:
"""
GENERAL:
- Purpose: Convert documents between different formats (PDFWord, ExcelCSV, etc.).
- Input requirements: documentList (required); targetFormat (required).
- Output format: Document in target format.
Parameters:
- documentList (list, required): Document reference(s) to convert.
- targetFormat (str, required): Target format extension (docx, pdf, xlsx, csv, txt, html, json, md, etc.).
- preserveStructure (bool, optional): Whether to preserve document structure (headings, tables, etc.). Default: True.
"""
documentList = parameters.get("documentList", [])
if not documentList:
return ActionResult.isFailure(error="documentList is required")
targetFormat = parameters.get("targetFormat")
if not targetFormat:
return ActionResult.isFailure(error="targetFormat is required")
preserveStructure = parameters.get("preserveStructure", True)
# Normalize format (remove leading dot if present)
normalizedFormat = targetFormat.strip().lstrip('.').lower()
aiPrompt = f"Convert the provided document(s) to {normalizedFormat.upper()} format."
if preserveStructure:
aiPrompt += " Preserve all document structure including headings, tables, formatting, lists, and layout."
aiPrompt += " Ensure the converted document maintains the same content and information as the original."
return await self.process({
"aiPrompt": aiPrompt,
"documentList": documentList,
"resultType": normalizedFormat
})
@action
async def extractData(self, parameters: Dict[str, Any]) -> ActionResult:
"""
GENERAL:
- Purpose: Extract structured data from documents (key-value pairs, entities, facts, etc.).
- Input requirements: documentList (required); optional dataStructure, fields.
- Output format: JSON by default, or specified resultType.
Parameters:
- documentList (list, required): Document reference(s) to extract data from.
- dataStructure (str, optional): Desired data structure - flat, nested, or list. Default: nested.
- fields (list, optional): Specific fields/properties to extract (e.g., ["name", "date", "amount"]).
- resultType (str, optional): Output format (json, csv, xlsx, etc.). Default: json.
"""
documentList = parameters.get("documentList", [])
if not documentList:
return ActionResult.isFailure(error="documentList is required")
dataStructure = parameters.get("dataStructure", "nested")
fields = parameters.get("fields", [])
resultType = parameters.get("resultType", "json")
aiPrompt = "Extract structured data from the provided document(s)."
if fields:
fieldsStr = ", ".join(fields)
aiPrompt += f" Extract the following specific fields: {fieldsStr}."
else:
aiPrompt += " Extract all relevant data including names, dates, amounts, entities, and key information."
structureInstructions = {
"flat": "Use a flat key-value structure with simple properties.",
"nested": "Use a nested JSON structure with logical grouping of related data.",
"list": "Structure the data as a list/array of objects, one per entity or record."
}
aiPrompt += f" {structureInstructions.get(dataStructure.lower(), structureInstructions['nested'])}"
aiPrompt += " Ensure all extracted data is accurate and complete."
return await self.process({
"aiPrompt": aiPrompt,
"documentList": documentList,
"resultType": resultType
})
# ============================================================================
# Content Generation Wrapper
# ============================================================================
@action
async def generateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
"""
GENERAL:
- Purpose: Generate documents from scratch or based on templates/inputs.
- Input requirements: prompt or description (required); optional documentList (for templates/references).
- Output format: Document in specified format (default: docx).
Parameters:
- prompt (str, required): Description of the document to generate.
- documentList (list, optional): Template documents or reference documents to use as a guide.
- documentType (str, optional): Type of document - letter, memo, proposal, contract, etc.
- resultType (str, optional): Output format (docx, pdf, txt, md, etc.). Default: docx.
"""
prompt = parameters.get("prompt")
if not prompt:
return ActionResult.isFailure(error="prompt is required")
documentList = parameters.get("documentList", [])
documentType = parameters.get("documentType")
resultType = parameters.get("resultType", "docx")
aiPrompt = f"Generate a document based on the following requirements: {prompt}"
if documentType:
aiPrompt += f" Document type: {documentType}."
if documentList:
aiPrompt += " Use the provided template/reference documents as a guide for structure, format, and style."
aiPrompt += " Create a professional, well-structured document with appropriate formatting and organization."
processParams = {
"aiPrompt": aiPrompt,
"resultType": resultType
}
if documentList:
processParams["documentList"] = documentList
return await self.process(processParams)

View file

@ -10,6 +10,7 @@ from .summarizeDocument import summarizeDocument
from .translateDocument import translateDocument
from .convertDocument import convertDocument
from .generateDocument import generateDocument
from .generateCode import generateCode
__all__ = [
'process',
@ -18,5 +19,6 @@ __all__ = [
'translateDocument',
'convertDocument',
'generateDocument',
'generateCode',
]

View file

@ -1,31 +1,13 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Convert Document action for AI operations.
Converts documents between different formats (PDFWord, ExcelCSV, etc.).
"""
import logging
from typing import Dict, Any
from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult
logger = logging.getLogger(__name__)
@action
async def convertDocument(self, parameters: Dict[str, Any]) -> ActionResult:
"""
GENERAL:
- Purpose: Convert documents between different formats (PDFWord, ExcelCSV, etc.).
- Input requirements: documentList (required); targetFormat (required).
- Output format: Document in target format.
Parameters:
- documentList (list, required): Document reference(s) to convert.
- targetFormat (str, required): Target format extension (docx, pdf, xlsx, csv, txt, html, json, md, etc.).
- preserveStructure (bool, optional): Whether to preserve document structure (headings, tables, etc.). Default: True.
"""
documentList = parameters.get("documentList", [])
if not documentList:
return ActionResult.isFailure(error="documentList is required")

View file

@ -0,0 +1,123 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
import logging
import time
from typing import Dict, Any, Optional, List
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
from modules.datamodels.datamodelExtraction import ContentPart
from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum
from modules.datamodels.datamodelWorkflow import AiResponse, DocumentData
logger = logging.getLogger(__name__)
async def generateCode(self, parameters: Dict[str, Any]) -> ActionResult:
prompt = parameters.get("prompt")
if not prompt:
return ActionResult.isFailure(error="prompt is required")
documentList = parameters.get("documentList", [])
# Optional: if omitted, formats determined from prompt by AI
resultType = parameters.get("resultType")
if not resultType:
logger.debug("resultType not provided - formats will be determined from prompt by AI")
# Create operation ID for progress tracking
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
operationId = f"code_gen_{workflowId}_{int(time.time())}"
parentOperationId = parameters.get('parentOperationId')
try:
# Convert documentList to DocumentReferenceList if needed
docRefList = None
if documentList:
from modules.datamodels.datamodelDocref import DocumentReferenceList
if isinstance(documentList, DocumentReferenceList):
docRefList = documentList
elif isinstance(documentList, str):
docRefList = DocumentReferenceList.from_string_list([documentList])
elif isinstance(documentList, list):
docRefList = DocumentReferenceList.from_string_list(documentList)
else:
docRefList = DocumentReferenceList(references=[])
# Prepare title
title = "Generated Code"
# Call AI service with explicit code intent
options = AiCallOptions(
operationType=OperationTypeEnum.DATA_GENERATE,
priority=PriorityEnum.BALANCED,
processingMode=ProcessingModeEnum.DETAILED
)
# outputFormat: Optional - if None, formats determined from prompt by AI
aiResponse: AiResponse = await self.services.ai.callAiContent(
prompt=prompt,
options=options,
documentList=docRefList,
outputFormat=resultType, # Can be None - AI determines from prompt
title=title,
parentOperationId=parentOperationId,
generationIntent="code" # Explicit intent, skips detection
)
# Convert AiResponse to ActionResult
documents = []
# Convert DocumentData to ActionDocument
if aiResponse.documents:
for docData in aiResponse.documents:
documents.append(ActionDocument(
documentName=docData.documentName,
documentData=docData.documentData,
mimeType=docData.mimeType,
sourceJson=docData.sourceJson if hasattr(docData, 'sourceJson') else None
))
# If no documents but content exists, create a document from content
if not documents and aiResponse.content:
# Determine document name from metadata
resultTypeFallback = resultType or "txt" # Fallback for file naming
docName = f"code.{resultTypeFallback}"
if aiResponse.metadata and aiResponse.metadata.filename:
docName = aiResponse.metadata.filename
elif aiResponse.metadata and aiResponse.metadata.title:
import re
sanitized = re.sub(r"[^a-zA-Z0-9._-]", "_", aiResponse.metadata.title)
sanitized = re.sub(r"_+", "_", sanitized).strip("_")
if sanitized:
if not sanitized.lower().endswith(f".{resultTypeFallback}"):
docName = f"{sanitized}.{resultTypeFallback}"
else:
docName = sanitized
# Determine mime type
mimeType = "text/plain"
if resultType == "html":
mimeType = "text/html"
elif resultType == "js":
mimeType = "application/javascript"
elif resultType == "py":
mimeType = "text/x-python"
elif resultType == "ts":
mimeType = "application/typescript"
elif resultType == "java":
mimeType = "text/x-java-source"
elif resultType == "cpp":
mimeType = "text/x-c++src"
documents.append(ActionDocument(
documentName=docName,
documentData=aiResponse.content.encode('utf-8') if isinstance(aiResponse.content, str) else aiResponse.content,
mimeType=mimeType
))
return ActionResult.isSuccess(documents=documents)
except Exception as e:
logger.error(f"Error in code generation: {str(e)}")
return ActionResult.isFailure(error=str(e))

View file

@ -1,15 +1,9 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Generate Document action for AI operations.
Wrapper around AI service callAiContent method.
"""
import logging
import time
from typing import Dict, Any, Optional, List
from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
from modules.datamodels.datamodelExtraction import ContentPart
from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum
@ -17,46 +11,18 @@ from modules.datamodels.datamodelWorkflow import AiResponse, DocumentData
logger = logging.getLogger(__name__)
@action
async def generateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
"""
GENERAL:
- Purpose: Generate documents from scratch or based on templates/inputs using hierarchical approach.
- Input requirements: prompt or description (required); optional documentList (for templates/references).
- Output format: Document in specified format. Any format supported by dynamically registered renderers is acceptable (default: txt).
Parameters:
- prompt (str, required): Description of the document to generate.
- documentList (list, optional): Template documents or reference documents to use as a guide.
- documentType (str, optional): Type of document - letter, memo, proposal, contract, etc.
- resultType (str, optional): Output format. Any format supported by dynamically registered renderers is acceptable (formats are discovered automatically from renderer registry). Common formats: txt, html, pdf, docx, md, json, csv, xlsx, pptx, png, jpg. Default: txt.
- maxSectionLength (int, optional): Maximum words for simple sections. Default: 500.
- parallelGeneration (bool, optional): Enable parallel section generation. Default: True.
- progressLogging (bool, optional): Send ChatLog progress updates. Default: True.
"""
prompt = parameters.get("prompt")
if not prompt:
return ActionResult.isFailure(error="prompt is required")
documentList = parameters.get("documentList", [])
documentType = parameters.get("documentType")
resultType = parameters.get("resultType", "txt")
# Optional: if omitted, formats determined from prompt by AI
resultType = parameters.get("resultType")
# Auto-detect format from prompt if not explicitly provided
if resultType == "txt" and prompt:
promptLower = prompt.lower()
if "html" in promptLower or "html5" in promptLower:
resultType = "html"
logger.info(f"Auto-detected HTML format from prompt")
elif "pdf" in promptLower:
resultType = "pdf"
logger.info(f"Auto-detected PDF format from prompt")
elif "markdown" in promptLower or " md " in promptLower or promptLower.endswith(" md"):
resultType = "md"
logger.info(f"Auto-detected Markdown format from prompt")
elif ("text" in promptLower or "txt" in promptLower) and "html" not in promptLower:
resultType = "txt"
logger.info(f"Auto-detected Text format from prompt")
if not resultType:
logger.debug("resultType not provided - formats will be determined from prompt by AI")
# Create operation ID for progress tracking
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
@ -91,13 +57,15 @@ async def generateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
compressContext=False
)
# outputFormat: Optional - if None, formats determined from prompt by AI
aiResponse: AiResponse = await self.services.ai.callAiContent(
prompt=prompt,
options=options,
documentList=docRefList, # Übergebe documentList direkt - callAiContent macht Phasen 5A-5E
outputFormat=resultType,
outputFormat=resultType, # Can be None - AI determines from prompt
title=title,
parentOperationId=parentOperationId
parentOperationId=parentOperationId,
generationIntent="document" # NEW: Explicit intent, skips detection
)
# Convert AiResponse to ActionResult
@ -116,7 +84,8 @@ async def generateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
# If no documents but content exists, create a document from content
if not documents and aiResponse.content:
# Determine document name from metadata
docName = f"document.{resultType}"
resultTypeFallback = resultType or "txt" # Fallback for file naming
docName = f"document.{resultTypeFallback}"
if aiResponse.metadata and aiResponse.metadata.filename:
docName = aiResponse.metadata.filename
elif aiResponse.metadata and aiResponse.metadata.title:
@ -124,8 +93,8 @@ async def generateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
sanitized = re.sub(r"[^a-zA-Z0-9._-]", "_", aiResponse.metadata.title)
sanitized = re.sub(r"_+", "_", sanitized).strip("_")
if sanitized:
if not sanitized.lower().endswith(f".{resultType}"):
docName = f"{sanitized}.{resultType}"
if not sanitized.lower().endswith(f".{resultTypeFallback}"):
docName = f"{sanitized}.{resultTypeFallback}"
else:
docName = sanitized

View file

@ -1,36 +1,17 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Process action for AI operations.
Universal AI document processing action.
"""
import logging
import time
import json
from typing import Dict, Any, List, Optional
from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
from modules.datamodels.datamodelAi import AiCallOptions
from modules.datamodels.datamodelExtraction import ContentPart
logger = logging.getLogger(__name__)
@action
async def process(self, parameters: Dict[str, Any]) -> ActionResult:
"""
GENERAL:
- Purpose: Universal AI document processing action - accepts MULTIPLE input documents in ANY format (docx, pdf, json, txt, xlsx, html, images, etc.) and processes them together with a prompt to produce MULTIPLE output documents in ANY specified format (via resultType). Use for document generation, format conversion, content transformation, analysis, summarization, translation, extraction, comparison, and any AI-powered document manipulation.
- Input requirements: aiPrompt (required); optional documentList (can contain multiple documents in any format).
- Output format: Multiple documents in the same format per call (via resultType: txt, json, pdf, docx, xlsx, pptx, png, jpg, etc.). The AI can generate multiple files based on the prompt (e.g., "create separate documents for each section"). Default: txt.
- Key capabilities: Can process any number of input documents together, extract data from mixed formats, combine information, generate multiple output files, transform between formats, perform analysis/comparison/summarization on document sets.
Parameters:
- aiPrompt (str, required): Instruction for the AI describing what processing to perform.
- documentList (list, optional): Document reference(s) in any format to use as input/context.
- resultType (str, optional): Output file extension (txt, json, md, csv, xml, html, pdf, docx, xlsx, png, etc.). All output documents will use this format. Default: txt.
"""
try:
# Init progress logger
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
@ -73,8 +54,8 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult:
logger.error(f"Invalid documentList type: {type(documentListParam)}")
documentList = DocumentReferenceList(references=[])
resultType = parameters.get("resultType", "txt")
# Optional: if omitted, formats determined from prompt. Default "txt" is validation fallback only.
resultType = parameters.get("resultType")
if not aiPrompt:
logger.error(f"aiPrompt is missing or empty. Parameters: {parameters}")
@ -82,60 +63,71 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult:
error="AI prompt is required"
)
# Determine output extension and default MIME type without duplicating service logic
normalized_result_type = (str(resultType).strip().lstrip('.').lower() or "txt")
output_extension = f".{normalized_result_type}"
output_mime_type = "application/octet-stream" # Prefer service-provided mimeType when available
logger.info(f"Using result type: {resultType} -> {output_extension}")
# Handle optional resultType: if None, formats determined from prompt by AI
if resultType:
normalized_result_type = (str(resultType).strip().lstrip('.').lower() or "txt")
output_extension = f".{normalized_result_type}"
output_format = output_extension.replace('.', '') or 'txt'
logger.info(f"Using result type: {resultType} -> {output_extension}")
else:
# No format specified - AI will determine formats from prompt
normalized_result_type = None
output_extension = None
output_format = None
logger.debug("resultType not provided - formats will be determined from prompt by AI")
# Check if contentParts are already provided (from context.extractContent or other sources)
output_mime_type = "application/octet-stream" # Prefer service-provided mimeType when available
# Phase 7.3: Pass both documentList and contentParts to AI service
# (Extraction logic removed - handled by AI service)
contentParts: Optional[List[ContentPart]] = None
if "contentParts" in parameters:
contentParts = parameters.get("contentParts")
if contentParts and not isinstance(contentParts, list):
# Try to extract from ContentExtracted if it's an ActionDocument
if hasattr(contentParts, 'parts'):
contentParts = contentParts.parts
contentPartsParam = parameters.get("contentParts")
if contentPartsParam:
if isinstance(contentPartsParam, list):
contentParts = contentPartsParam
elif hasattr(contentPartsParam, 'parts'):
# Extract from ContentExtracted if it's an ActionDocument
contentParts = contentPartsParam.parts
else:
logger.warning(f"Invalid contentParts type: {type(contentParts)}, treating as empty")
logger.warning(f"Invalid contentParts type: {type(contentPartsParam)}, treating as empty")
contentParts = None
# Update progress - preparing AI call
self.services.chat.progressLogUpdate(operationId, 0.4, "Preparing AI call")
# Build options
output_format = output_extension.replace('.', '') or 'txt'
# Detect image generation from resultType (if provided)
imageFormats = ["png", "jpg", "jpeg", "gif", "webp"]
isImageGeneration = normalized_result_type in imageFormats if normalized_result_type else False
# Build options with correct operationType
from modules.datamodels.datamodelAi import OperationTypeEnum
# resultFormat in options can be None - formats will be determined by AI if not provided
options = AiCallOptions(
resultFormat=output_format
resultFormat=output_format, # Can be None - formats determined by AI
operationType=OperationTypeEnum.IMAGE_GENERATE if isImageGeneration else OperationTypeEnum.DATA_GENERATE
)
# Get generationIntent from parameters (required for DATA_GENERATE)
# Default to "document" if not provided (most common use case)
# For code generation, use ai.generateCode action or explicitly pass generationIntent="code"
generationIntent = parameters.get("generationIntent", "document")
# Update progress - calling AI
self.services.chat.progressLogUpdate(operationId, 0.6, "Calling AI")
# Use unified callAiContent method
# If contentParts provided (pre-extracted), use them directly
# Otherwise, pass documentList and let callAiContent handle Phases 5A-5E internally
# Note: ContentExtracted documents (from context.extractContent) are now handled
# automatically in _extractAndPrepareContent() (Phase 5B)
if contentParts:
# Pre-extracted ContentParts - use them directly
aiResponse = await self.services.ai.callAiContent(
prompt=aiPrompt,
options=options,
contentParts=contentParts, # Pre-extracted ContentParts
outputFormat=output_format,
parentOperationId=operationId
)
else:
# Pass documentList - callAiContent handles Phases 5A-5E internally
# This includes automatic detection of ContentExtracted documents
aiResponse = await self.services.ai.callAiContent(
prompt=aiPrompt,
options=options,
documentList=documentList, # callAiContent macht Phasen 5A-5E
outputFormat=output_format,
parentOperationId=operationId
)
# Use unified callAiContent method with BOTH documentList and contentParts
# Extraction is handled by AI service - no extraction here
# outputFormat: Optional - if None, formats determined from prompt by AI
aiResponse = await self.services.ai.callAiContent(
prompt=aiPrompt,
options=options,
documentList=documentList, # Pass documentList - AI service handles extraction
contentParts=contentParts, # Pass contentParts if provided (or None)
outputFormat=output_format, # Can be None - AI determines from prompt
parentOperationId=operationId,
generationIntent=generationIntent # REQUIRED for DATA_GENERATE
)
# Update progress - processing result
self.services.chat.progressLogUpdate(operationId, 0.8, "Processing result")
@ -162,7 +154,8 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult:
final_documents = action_documents
else:
# Text response - create document from content
extension = output_extension.lstrip('.')
# If no extension provided, use "txt" (required for filename)
extension = output_extension.lstrip('.') if output_extension else "txt"
meaningful_name = self._generateMeaningfulFileName(
base_name="ai",
extension=extension,
@ -170,8 +163,8 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult:
)
validationMetadata = {
"actionType": "ai.process",
"resultType": normalized_result_type,
"outputFormat": output_format,
"resultType": normalized_result_type if normalized_result_type else None,
"outputFormat": output_format if output_format else None,
"hasDocuments": False,
"contentType": "text"
}

View file

@ -1,32 +1,13 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Summarize Document action for AI operations.
Summarizes one or more documents, extracting key points and main ideas.
"""
import logging
from typing import Dict, Any
from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult
logger = logging.getLogger(__name__)
@action
async def summarizeDocument(self, parameters: Dict[str, Any]) -> ActionResult:
"""
GENERAL:
- Purpose: Summarize one or more documents, extracting key points and main ideas.
- Input requirements: documentList (required); optional summaryLength, focus.
- Output format: Text document with summary (default: txt, can be overridden with resultType).
Parameters:
- documentList (list, required): Document reference(s) to summarize.
- summaryLength (str, optional): Desired summary length - brief, medium, or detailed. Default: medium.
- focus (str, optional): Specific aspect to focus on in the summary (e.g., "financial data", "key decisions").
- resultType (str, optional): Output file extension (txt, md, docx, etc.). Default: txt.
"""
documentList = parameters.get("documentList", [])
if not documentList:
return ActionResult.isFailure(error="documentList is required")
@ -50,6 +31,7 @@ async def summarizeDocument(self, parameters: Dict[str, Any]) -> ActionResult:
return await self.process({
"aiPrompt": aiPrompt,
"documentList": documentList,
"resultType": resultType
"resultType": resultType,
"generationIntent": "document" # NEW: Explicit intent
})

View file

@ -1,33 +1,13 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Translate Document action for AI operations.
Translates documents to a target language while preserving formatting and structure.
"""
import logging
from typing import Dict, Any
from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult
logger = logging.getLogger(__name__)
@action
async def translateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
"""
GENERAL:
- Purpose: Translate documents to a target language while preserving formatting and structure.
- Input requirements: documentList (required); targetLanguage (required).
- Output format: Translated document in same format as input (default) or specified resultType.
Parameters:
- documentList (list, required): Document reference(s) to translate.
- targetLanguage (str, required): Target language code or name (e.g., "de", "German", "French", "es").
- sourceLanguage (str, optional): Source language if known (e.g., "en", "English"). If not provided, AI will detect.
- preserveFormatting (bool, optional): Whether to preserve original formatting. Default: True.
- resultType (str, optional): Output file extension. If not specified, uses same format as input.
"""
documentList = parameters.get("documentList", [])
if not documentList:
return ActionResult.isFailure(error="documentList is required")
@ -51,7 +31,8 @@ async def translateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
processParams = {
"aiPrompt": aiPrompt,
"documentList": documentList
"documentList": documentList,
"generationIntent": "document" # NEW: Explicit intent
}
if resultType:
processParams["resultType"] = resultType

View file

@ -1,35 +1,15 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Web Research action for AI operations.
Web research with two-step process: search for URLs, then crawl content.
"""
import logging
import time
import re
from typing import Dict, Any
from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
logger = logging.getLogger(__name__)
@action
async def webResearch(self, parameters: Dict[str, Any]) -> ActionResult:
"""
GENERAL:
- Purpose: Web research with two-step process: search for URLs, then crawl content.
- Input requirements: prompt (required); optional list(url), country, language, researchDepth.
- Output format: JSON with research results including URLs and content.
Parameters:
- prompt (str, required): Natural language research instruction.
- urlList (list, optional): Specific URLs to crawl, if needed.
- country (str, optional): Two-digit country code (lowercase, e.g., ch, us, de).
- language (str, optional): Language code (lowercase, e.g., de, en, fr).
- researchDepth (str, optional): Research depth - fast, general, or deep. Default: general.
"""
try:
prompt = parameters.get("prompt")
if not prompt:

View file

@ -17,6 +17,7 @@ from .actions.summarizeDocument import summarizeDocument
from .actions.translateDocument import translateDocument
from .actions.convertDocument import convertDocument
from .actions.generateDocument import generateDocument
from .actions.generateCode import generateCode
logger = logging.getLogger(__name__)
@ -35,7 +36,8 @@ class MethodAi(MethodBase):
self._actions = {
"process": WorkflowActionDefinition(
actionId="ai.process",
description="Universal AI document processing action - accepts multiple input documents in any format and processes them together with a prompt",
description="Universal AI document processing action - accepts multiple input documents in any format and processes them together with a prompt. If the prompt specifies document formats to deliver, include them in the prompt",
dynamicMode=True,
parameters={
"aiPrompt": WorkflowActionParameter(
name="aiPrompt",
@ -58,14 +60,31 @@ class MethodAi(MethodBase):
frontendOptions=["txt", "json", "md", "csv", "xml", "html", "pdf", "docx", "xlsx", "pptx", "png", "jpg"],
required=False,
default="txt",
description="Output file extension. All output documents will use this format"
)
description="Output file extension. Optional: if omitted, formats are determined from prompt by AI. Default \"txt\" is validation fallback only. With per-document format determination, AI can determine different formats for different documents based on prompt."
),
"generationIntent": WorkflowActionParameter(
name="generationIntent",
type="str",
frontendType=FrontendType.SELECT,
frontendOptions=["document", "code", "image"],
required=False,
default="document",
description="Explicit generation intent (\"document\" | \"code\" | \"image\"). Required for DATA_GENERATE operations. Defaults to \"document\" if not provided. For code generation, use ai.generateCode action or explicitly pass generationIntent=\"code\". For IMAGE_GENERATE operations, this parameter is ignored."
),
"contentParts": WorkflowActionParameter(
name="contentParts",
type="List[ContentPart]",
frontendType=FrontendType.HIDDEN,
required=False,
description="Pre-extracted content parts (internal parameter, typically passed between actions). If provided, these will be used instead of extracting from documentList. Can be a list of ContentPart objects or an object with a 'parts' attribute."
),
},
execute=process.__get__(self, self.__class__)
),
"webResearch": WorkflowActionDefinition(
actionId="ai.webResearch",
description="Web research with two-step process: search for URLs, then crawl content",
dynamicMode=True,
parameters={
"prompt": WorkflowActionParameter(
name="prompt",
@ -110,7 +129,8 @@ class MethodAi(MethodBase):
),
"summarizeDocument": WorkflowActionDefinition(
actionId="ai.summarizeDocument",
description="Summarize one or more documents, extracting key points and main ideas",
description="Summarize one or more documents, extracting key points and main ideas. If the prompt specifies document formats to deliver, include them in the prompt",
dynamicMode=True,
parameters={
"documentList": WorkflowActionParameter(
name="documentList",
@ -150,6 +170,7 @@ class MethodAi(MethodBase):
"translateDocument": WorkflowActionDefinition(
actionId="ai.translateDocument",
description="Translate documents to a target language while preserving formatting and structure",
dynamicMode=True,
parameters={
"documentList": WorkflowActionParameter(
name="documentList",
@ -193,6 +214,7 @@ class MethodAi(MethodBase):
"convertDocument": WorkflowActionDefinition(
actionId="ai.convertDocument",
description="Convert documents between different formats (PDF→Word, Excel→CSV, etc.)",
dynamicMode=True,
parameters={
"documentList": WorkflowActionParameter(
name="documentList",
@ -222,7 +244,8 @@ class MethodAi(MethodBase):
),
"generateDocument": WorkflowActionDefinition(
actionId="ai.generateDocument",
description="Generate documents from scratch or based on templates/inputs",
description="Generate documents from scratch or based on templates/inputs. If the prompt specifies document formats to deliver, include them in the prompt",
dynamicMode=True,
parameters={
"prompt": WorkflowActionParameter(
name="prompt",
@ -252,10 +275,40 @@ class MethodAi(MethodBase):
frontendType=FrontendType.TEXT,
required=False,
default="txt",
description="Output format (e.g., txt, html, pdf, docx, md, json, csv, xlsx, pptx, png, jpg). Any format supported by renderers is acceptable. Default: txt"
description="Output format (e.g., txt, html, pdf, docx, md, json, csv, xlsx, pptx, png, jpg). Optional: if omitted, formats are determined from prompt by AI. Default \"txt\" is validation fallback only. With per-document format determination, AI can determine different formats for different documents based on prompt."
)
},
execute=generateDocument.__get__(self, self.__class__)
),
"generateCode": WorkflowActionDefinition(
actionId="ai.generateCode",
description="Generate code files - explicitly sets intent to 'code'. If the prompt specifies file formats to deliver, include them in the prompt",
dynamicMode=True,
parameters={
"prompt": WorkflowActionParameter(
name="prompt",
type="str",
frontendType=FrontendType.TEXTAREA,
required=True,
description="Description of code to generate"
),
"documentList": WorkflowActionParameter(
name="documentList",
type="List[str]",
frontendType=FrontendType.DOCUMENT_REFERENCE,
required=False,
description="Reference documents"
),
"resultType": WorkflowActionParameter(
name="resultType",
type="str",
frontendType=FrontendType.SELECT,
frontendOptions=["py", "js", "ts", "html", "java", "cpp", "txt"],
required=False,
description="Output format (html, js, py, etc.). Optional: if omitted, formats are determined from prompt by AI. With per-document format determination, AI can determine different formats for different documents based on prompt."
)
},
execute=generateCode.__get__(self, self.__class__)
)
}
@ -269,6 +322,7 @@ class MethodAi(MethodBase):
self.translateDocument = translateDocument.__get__(self, self.__class__)
self.convertDocument = convertDocument.__get__(self, self.__class__)
self.generateDocument = generateDocument.__get__(self, self.__class__)
self.generateCode = generateCode.__get__(self, self.__class__)
def _format_timestamp_for_filename(self) -> str:
"""Format current timestamp as YYYYMMDD-hhmmss for filenames."""

View file

@ -1,460 +0,0 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Context and workflow information method module.
Handles workflow context queries and document indexing.
"""
import time
import json
import logging
import aiohttp
from typing import Dict, Any, List
from datetime import datetime, UTC
from modules.workflows.methods.methodBase import MethodBase, action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
from modules.datamodels.datamodelExtraction import ExtractionOptions, MergeStrategy
from modules.shared.configuration import APP_CONFIG
logger = logging.getLogger(__name__)
class MethodContext(MethodBase):
"""Context and workflow information methods."""
def __init__(self, services):
super().__init__(services)
self.name = "context"
self.description = "Context and workflow information methods"
@action
async def getDocumentIndex(self, parameters: Dict[str, Any]) -> ActionResult:
"""
GENERAL:
- Purpose: Generate a comprehensive index of all documents available in the current workflow, including documents from all rounds and tasks.
- Input requirements: No input documents required. Optional resultType parameter.
- Output format: Structured document index in JSON format (default) or text format, listing all documents with their references, metadata, and organization by rounds/tasks.
Parameters:
- resultType (str, optional): Output format (json, txt, md). Default: json.
"""
try:
workflow = self.services.workflow
if not workflow:
return ActionResult.isFailure(
error="No workflow available"
)
resultType = parameters.get("resultType", "json").lower().strip().lstrip('.')
# Get available documents index from chat service
documentsIndex = self.services.chat.getAvailableDocuments(workflow)
if not documentsIndex or documentsIndex == "No documents available" or documentsIndex == "NO DOCUMENTS AVAILABLE - This workflow has no documents to process.":
# Return empty index structure
if resultType == "json":
indexData = {
"workflowId": getattr(workflow, 'id', 'unknown'),
"totalDocuments": 0,
"rounds": [],
"documentReferences": []
}
indexContent = json.dumps(indexData, indent=2, ensure_ascii=False)
else:
indexContent = "Document Index\n==============\n\nNo documents available in this workflow.\n"
else:
# Parse the document index string to extract structured information
indexData = self._parseDocumentIndex(documentsIndex, workflow)
if resultType == "json":
indexContent = json.dumps(indexData, indent=2, ensure_ascii=False)
elif resultType == "md":
indexContent = self._formatAsMarkdown(indexData)
else: # txt
indexContent = self._formatAsText(indexData, documentsIndex)
# Generate meaningful filename
workflowContext = self.services.chat.getWorkflowContext()
filename = self._generateMeaningfulFileName(
"document_index",
resultType if resultType in ["json", "txt", "md"] else "json",
workflowContext,
"getDocumentIndex"
)
validationMetadata = {
"actionType": "context.getDocumentIndex",
"resultType": resultType,
"workflowId": getattr(workflow, 'id', 'unknown'),
"totalDocuments": indexData.get("totalDocuments", 0) if isinstance(indexData, dict) else 0
}
# Create ActionDocument
document = ActionDocument(
documentName=filename,
documentData=indexContent,
mimeType="application/json" if resultType == "json" else "text/plain",
validationMetadata=validationMetadata
)
return ActionResult.isSuccess(documents=[document])
except Exception as e:
logger.error(f"Error generating document index: {str(e)}")
return ActionResult.isFailure(
error=f"Failed to generate document index: {str(e)}"
)
def _parseDocumentIndex(self, documentsIndex: str, workflow: Any) -> Dict[str, Any]:
"""Parse the document index string into structured data."""
try:
indexData = {
"workflowId": getattr(workflow, 'id', 'unknown'),
"generatedAt": datetime.now(UTC).isoformat(),
"totalDocuments": 0,
"rounds": [],
"documentReferences": []
}
# Extract document references from the index string
lines = documentsIndex.split('\n')
currentRound = None
currentDocList = None
for line in lines:
line = line.strip()
if not line:
continue
# Check for round headers
if "Current round documents:" in line:
currentRound = "current"
continue
elif "Past rounds documents:" in line:
currentRound = "past"
continue
# Check for document list references (docList:...)
if line.startswith("- docList:"):
docListRef = line.replace("- docList:", "").strip()
currentDocList = {
"reference": docListRef,
"round": currentRound,
"documents": []
}
indexData["rounds"].append(currentDocList)
continue
# Check for individual document references (docItem:...)
if line.startswith(" - docItem:") or line.startswith("- docItem:"):
docItemRef = line.replace(" - docItem:", "").replace("- docItem:", "").strip()
indexData["documentReferences"].append({
"reference": docItemRef,
"round": currentRound,
"docList": currentDocList["reference"] if currentDocList else None
})
indexData["totalDocuments"] += 1
if currentDocList:
currentDocList["documents"].append(docItemRef)
return indexData
except Exception as e:
logger.error(f"Error parsing document index: {str(e)}")
return {
"workflowId": getattr(workflow, 'id', 'unknown'),
"error": f"Failed to parse document index: {str(e)}",
"rawIndex": documentsIndex
}
def _formatAsMarkdown(self, indexData: Dict[str, Any]) -> str:
"""Format document index as Markdown."""
try:
md = f"# Document Index\n\n"
md += f"**Workflow ID:** {indexData.get('workflowId', 'unknown')}\n\n"
md += f"**Generated At:** {indexData.get('generatedAt', 'unknown')}\n\n"
md += f"**Total Documents:** {indexData.get('totalDocuments', 0)}\n\n"
if indexData.get('rounds'):
md += "## Documents by Round\n\n"
for roundInfo in indexData['rounds']:
roundLabel = roundInfo.get('round', 'unknown').title()
md += f"### {roundLabel} Round\n\n"
md += f"**Document List:** `{roundInfo.get('reference', 'unknown')}`\n\n"
if roundInfo.get('documents'):
md += "**Documents:**\n\n"
for docRef in roundInfo['documents']:
md += f"- `{docRef}`\n"
md += "\n"
if indexData.get('documentReferences'):
md += "## All Document References\n\n"
for docRef in indexData['documentReferences']:
md += f"- `{docRef.get('reference', 'unknown')}`\n"
return md
except Exception as e:
logger.error(f"Error formatting as Markdown: {str(e)}")
return f"# Document Index\n\nError formatting index: {str(e)}\n"
def _formatAsText(self, indexData: Dict[str, Any], rawIndex: str) -> str:
"""Format document index as plain text."""
try:
text = "Document Index\n"
text += "=" * 50 + "\n\n"
text += f"Workflow ID: {indexData.get('workflowId', 'unknown')}\n"
text += f"Generated At: {indexData.get('generatedAt', 'unknown')}\n"
text += f"Total Documents: {indexData.get('totalDocuments', 0)}\n\n"
# Include the raw formatted index for readability
text += rawIndex
return text
except Exception as e:
logger.error(f"Error formatting as text: {str(e)}")
return f"Document Index\n\nError formatting index: {str(e)}\n\nRaw index:\n{rawIndex}\n"
@action
async def extractContent(self, parameters: Dict[str, Any]) -> ActionResult:
"""
Extract content from documents (separate from AI calls).
This action performs pure content extraction without AI processing.
The extracted ContentParts can then be used by subsequent AI processing actions.
Parameters:
- documentList (list, required): Document reference(s) to extract content from.
- extractionOptions (dict, optional): Extraction options (if not provided, defaults are used).
Returns:
- ActionResult with ActionDocument containing ContentExtracted objects
- ContentExtracted.parts contains List[ContentPart] (already chunked if needed)
"""
try:
# Init progress logger
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
operationId = f"context_extract_{workflowId}_{int(time.time())}"
# Extract documentList from parameters dict
from modules.datamodels.datamodelDocref import DocumentReferenceList
documentListParam = parameters.get("documentList")
if not documentListParam:
return ActionResult.isFailure(error="documentList is required")
# Convert to DocumentReferenceList if needed
if isinstance(documentListParam, DocumentReferenceList):
documentList = documentListParam
elif isinstance(documentListParam, str):
documentList = DocumentReferenceList.from_string_list([documentListParam])
elif isinstance(documentListParam, list):
documentList = DocumentReferenceList.from_string_list(documentListParam)
else:
return ActionResult.isFailure(error=f"Invalid documentList type: {type(documentListParam)}")
# Start progress tracking
parentOperationId = parameters.get('parentOperationId')
self.services.chat.progressLogStart(
operationId,
"Extracting content from documents",
"Content Extraction",
f"Documents: {len(documentList.references)}",
parentOperationId=parentOperationId
)
# Get ChatDocuments from documentList
self.services.chat.progressLogUpdate(operationId, 0.2, "Loading documents")
chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(documentList)
if not chatDocuments:
self.services.chat.progressLogFinish(operationId, False)
return ActionResult.isFailure(error="No documents found in documentList")
logger.info(f"Extracting content from {len(chatDocuments)} documents")
# Prepare extraction options
self.services.chat.progressLogUpdate(operationId, 0.3, "Preparing extraction options")
extractionOptionsParam = parameters.get("extractionOptions")
# Convert dict to ExtractionOptions object if needed, or create defaults
if extractionOptionsParam:
if isinstance(extractionOptionsParam, dict):
# Convert dict to ExtractionOptions object
extractionOptions = ExtractionOptions(**extractionOptionsParam)
elif isinstance(extractionOptionsParam, ExtractionOptions):
extractionOptions = extractionOptionsParam
else:
# Invalid type, use defaults
extractionOptions = None
else:
extractionOptions = None
# If extractionOptions not provided, create defaults
if not extractionOptions:
# Default extraction options for pure content extraction (no AI processing)
extractionOptions = ExtractionOptions(
prompt="Extract all content from the document",
mergeStrategy=MergeStrategy(
mergeType="concatenate",
groupBy="typeGroup",
orderBy="id"
),
processDocumentsIndividually=True
)
# Call extraction service with hierarchical progress logging
self.services.chat.progressLogUpdate(operationId, 0.4, "Initiating")
self.services.chat.progressLogUpdate(operationId, 0.5, f"Extracting content from {len(chatDocuments)} documents")
# Pass operationId for hierarchical per-document progress logging
extractedResults = self.services.extraction.extractContent(chatDocuments, extractionOptions, operationId=operationId)
# Build ActionDocuments from ContentExtracted results
self.services.chat.progressLogUpdate(operationId, 0.8, "Building result documents")
actionDocuments = []
# Map extracted results back to original documents by index (results are in same order)
for i, extracted in enumerate(extractedResults):
# Get original document name if available
originalDoc = chatDocuments[i] if i < len(chatDocuments) else None
if originalDoc and hasattr(originalDoc, 'fileName') and originalDoc.fileName:
# Use original filename with "extracted_" prefix
baseName = originalDoc.fileName.rsplit('.', 1)[0] if '.' in originalDoc.fileName else originalDoc.fileName
documentName = f"{baseName}_extracted_{extracted.id}.json"
else:
# Fallback to generic name with index
documentName = f"document_{i+1:03d}_extracted_{extracted.id}.json"
# Store ContentExtracted object in ActionDocument.documentData
validationMetadata = {
"actionType": "context.extractContent",
"documentIndex": i,
"extractedId": extracted.id,
"partCount": len(extracted.parts) if extracted.parts else 0,
"originalFileName": originalDoc.fileName if originalDoc and hasattr(originalDoc, 'fileName') else None
}
actionDoc = ActionDocument(
documentName=documentName,
documentData=extracted, # ContentExtracted object
mimeType="application/json",
validationMetadata=validationMetadata
)
actionDocuments.append(actionDoc)
self.services.chat.progressLogFinish(operationId, True)
return ActionResult.isSuccess(documents=actionDocuments)
except Exception as e:
logger.error(f"Error in content extraction: {str(e)}")
# Complete progress tracking with failure
try:
self.services.chat.progressLogFinish(operationId, False)
except:
pass # Don't fail on progress logging errors
return ActionResult.isFailure(error=str(e))
@action
async def triggerPreprocessingServer(self, parameters: Dict[str, Any]) -> ActionResult:
"""
Trigger preprocessing server at customer tenant to update database with configuration.
This action makes a POST request to the preprocessing server endpoint with the provided
configuration JSON. The authorization secret is retrieved from APP_CONFIG using the provided config key.
Parameters:
- endpoint (str, required): The full URL endpoint for the preprocessing server API.
- configJson (dict or str, required): Configuration JSON object to send to the preprocessing server. Can be provided as a dict or as a JSON string that will be parsed.
- authSecretConfigKey (str, required): The APP_CONFIG key name to retrieve the authorization secret from.
Returns:
- ActionResult with ActionDocument containing "ok" on success, or error message on failure.
"""
try:
endpoint = parameters.get("endpoint")
if not endpoint:
return ActionResult.isFailure(error="endpoint parameter is required")
configJsonParam = parameters.get("configJson")
if not configJsonParam:
return ActionResult.isFailure(error="configJson parameter is required")
authSecretConfigKey = parameters.get("authSecretConfigKey")
if not authSecretConfigKey:
return ActionResult.isFailure(error="authSecretConfigKey parameter is required")
# Handle configJson as either dict or JSON string
if isinstance(configJsonParam, str):
try:
configJson = json.loads(configJsonParam)
except json.JSONDecodeError as e:
return ActionResult.isFailure(error=f"configJson is not valid JSON: {str(e)}")
elif isinstance(configJsonParam, dict):
configJson = configJsonParam
else:
return ActionResult.isFailure(error=f"configJson must be a dict or JSON string, got {type(configJsonParam)}")
# Get authorization secret from APP_CONFIG using the provided config key
authSecret = APP_CONFIG.get(authSecretConfigKey)
if not authSecret:
errorMsg = f"{authSecretConfigKey} not found in APP_CONFIG"
logger.error(errorMsg)
return ActionResult.isFailure(error=errorMsg)
# Prepare headers with authorization (default headers as in original function)
headers = {
"X-PP-API-Key": authSecret,
"Content-Type": "application/json"
}
# Make POST request
timeout = aiohttp.ClientTimeout(total=60)
async with aiohttp.ClientSession(timeout=timeout) as session:
async with session.post(
endpoint,
headers=headers,
json=configJson
) as response:
if response.status in [200, 201]:
responseText = await response.text()
logger.info(f"Preprocessing server trigger successful: {response.status}")
logger.debug(f"Response: {responseText}")
# Generate meaningful filename
workflowContext = self.services.chat.getWorkflowContext() if hasattr(self.services, 'chat') else None
filename = self._generateMeaningfulFileName(
"preprocessing_result",
"txt",
workflowContext,
"triggerPreprocessingServer"
)
# Create validation metadata
validationMetadata = self._createValidationMetadata(
"triggerPreprocessingServer",
endpoint=endpoint,
statusCode=response.status,
responseText=responseText
)
# Return success with "ok" document
document = ActionDocument(
documentName=filename,
documentData="ok",
mimeType="text/plain",
validationMetadata=validationMetadata
)
return ActionResult.isSuccess(documents=[document])
else:
errorText = await response.text()
errorMsg = f"Preprocessing server trigger failed: {response.status} - {errorText}"
logger.error(errorMsg)
return ActionResult.isFailure(error=errorMsg)
except Exception as e:
errorMsg = f"Error triggering preprocessing server: {str(e)}"
logger.error(errorMsg)
return ActionResult.isFailure(error=errorMsg)

View file

@ -1,49 +1,16 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Extract Content action for Context operations.
Extracts content from documents (separate from AI calls).
"""
import logging
import time
from typing import Dict, Any
from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
from modules.datamodels.datamodelDocref import DocumentReferenceList
from modules.datamodels.datamodelExtraction import ExtractionOptions, MergeStrategy, ContentExtracted, ContentPart
logger = logging.getLogger(__name__)
@action
async def extractContent(self, parameters: Dict[str, Any]) -> ActionResult:
"""
Extract raw content parts from documents without AI processing.
This action performs pure content extraction WITHOUT AI/OCR processing.
It returns ContentParts with different typeGroups:
- "text": Extracted text from text-based formats (PDF text layers, Word docs, etc.)
- "image": Images as base64-encoded data (NOT converted to text, no OCR)
- "table": Tables as structured data
- "structure": Structured content (JSON, etc.)
- "container": Container elements (PDF pages, etc.)
IMPORTANT:
- Images are returned as base64 data, NOT as extracted text
- No OCR is performed - images are preserved as visual elements
- Text extraction only works for text-based formats (not images)
- The extracted ContentParts can then be used by subsequent AI processing actions
Parameters:
- documentList (list, required): Document reference(s) to extract content from.
- extractionOptions (dict, optional): Extraction options (if not provided, defaults are used).
Returns:
- ActionResult with ActionDocument containing ContentExtracted objects
- ContentExtracted.parts contains List[ContentPart] with various typeGroups
- Each ContentPart has a typeGroup indicating its type (text, image, table, etc.)
"""
try:
# Init progress logger
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"

View file

@ -1,30 +1,14 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Get Document Index action for Context operations.
Generates a comprehensive index of all documents available in the current workflow.
"""
import logging
import json
from typing import Dict, Any
from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
logger = logging.getLogger(__name__)
@action
async def getDocumentIndex(self, parameters: Dict[str, Any]) -> ActionResult:
"""
GENERAL:
- Purpose: Generate a comprehensive index of all documents available in the current workflow, including documents from all rounds and tasks.
- Input requirements: No input documents required. Optional resultType parameter.
- Output format: Structured document index in JSON format (default) or text format, listing all documents with their references, metadata, and organization by rounds/tasks.
Parameters:
- resultType (str, optional): Output format (json, txt, md). Default: json.
"""
try:
workflow = self.services.workflow
if not workflow:

View file

@ -1,35 +1,16 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Neutralize Data action for Context operations.
Neutralizes extracted content data from ContentExtracted documents.
"""
import logging
import time
from typing import Dict, Any
from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
from modules.datamodels.datamodelDocref import DocumentReferenceList
from modules.datamodels.datamodelExtraction import ContentExtracted, ContentPart
logger = logging.getLogger(__name__)
@action
async def neutralizeData(self, parameters: Dict[str, Any]) -> ActionResult:
"""
Neutralize data from ContentExtracted documents.
This action takes documents containing ContentExtracted objects (from extractContent)
and neutralizes the text data in ContentPart.data fields.
Parameters:
- documentList (list, required): Document reference(s) containing ContentExtracted objects.
Returns:
- ActionResult with ActionDocument containing neutralized ContentExtracted objects
"""
try:
# Init progress logger
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"

View file

@ -1,37 +1,16 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Trigger Preprocessing Server action for Context operations.
Triggers preprocessing server at customer tenant to update database with configuration.
"""
import logging
import json
import aiohttp
from typing import Dict, Any
from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
from modules.shared.configuration import APP_CONFIG
logger = logging.getLogger(__name__)
@action
async def triggerPreprocessingServer(self, parameters: Dict[str, Any]) -> ActionResult:
"""
Trigger preprocessing server at customer tenant to update database with configuration.
This action makes a POST request to the preprocessing server endpoint with the provided
configuration JSON. The authorization secret is retrieved from APP_CONFIG using the provided config key.
Parameters:
- endpoint (str, required): The full URL endpoint for the preprocessing server API.
- configJson (dict or str, required): Configuration JSON object to send to the preprocessing server. Can be provided as a dict or as a JSON string that will be parsed.
- authSecretConfigKey (str, required): The APP_CONFIG key name to retrieve the authorization secret from.
Returns:
- ActionResult with ActionDocument containing "ok" on success, or error message on failure.
"""
try:
endpoint = parameters.get("endpoint")
if not endpoint:

View file

@ -35,6 +35,7 @@ class MethodContext(MethodBase):
"getDocumentIndex": WorkflowActionDefinition(
actionId="context.getDocumentIndex",
description="Generate a comprehensive index of all documents available in the current workflow",
dynamicMode=True,
parameters={
"resultType": WorkflowActionParameter(
name="resultType",
@ -51,6 +52,7 @@ class MethodContext(MethodBase):
"extractContent": WorkflowActionDefinition(
actionId="context.extractContent",
description="Extract raw content parts from documents without AI processing. Returns ContentParts with different typeGroups (text, image, table, structure, container). Images are returned as base64 data, not as extracted text. Text content is extracted from text-based formats (PDF text layers, Word docs, etc.) but NOT from images (no OCR). Use this action to prepare documents for subsequent AI processing actions.",
dynamicMode=True,
parameters={
"documentList": WorkflowActionParameter(
name="documentList",

File diff suppressed because it is too large Load diff

View file

@ -1,37 +1,16 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Connect JIRA action for JIRA operations.
Connects to JIRA instance and creates ticket interface.
"""
import logging
import json
import uuid
from typing import Dict, Any
from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
from modules.shared.configuration import APP_CONFIG
logger = logging.getLogger(__name__)
@action
async def connectJira(self, parameters: Dict[str, Any]) -> ActionResult:
"""
Connect to JIRA instance and create ticket interface.
Parameters:
- apiUsername (str, required): JIRA API username/email
- apiTokenConfigKey (str, required): APP_CONFIG key name for JIRA API token
- apiUrl (str, required): JIRA instance URL (e.g., https://example.atlassian.net)
- projectCode (str, required): JIRA project code (e.g., "DCS")
- issueType (str, required): JIRA issue type (e.g., "Task")
- taskSyncDefinition (str or dict, required): Field mapping definition as JSON string or dict
Returns:
- ActionResult with ActionDocument containing connection ID
"""
try:
apiUsername = parameters.get("apiUsername")
if not apiUsername:

View file

@ -1,11 +1,6 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Create CSV Content action for JIRA operations.
Creates CSV content with custom headers.
"""
import logging
import json
import base64
@ -14,25 +9,11 @@ import csv as csv_module
from io import StringIO
from datetime import datetime, UTC
from typing import Dict, Any
from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
logger = logging.getLogger(__name__)
@action
async def createCsvContent(self, parameters: Dict[str, Any]) -> ActionResult:
"""
Create CSV content with custom headers.
Parameters:
- data (str, required): Document reference containing data as JSON (with "data" field from mergeTicketData)
- headers (str, optional): Document reference containing headers JSON (from parseCsvContent/parseExcelContent)
- columns (str or list, optional): List of column names (if not provided, extracted from taskSyncDefinition or data)
- taskSyncDefinition (str or dict, optional): Field mapping definition (used to extract column names if columns not provided)
Returns:
- ActionResult with ActionDocument containing CSV content as bytes
"""
try:
dataParam = parameters.get("data")
if not dataParam:

View file

@ -1,11 +1,6 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Create Excel Content action for JIRA operations.
Creates Excel content with custom headers.
"""
import logging
import json
import base64
@ -14,25 +9,11 @@ import csv as csv_module
from io import BytesIO
from datetime import datetime, UTC
from typing import Dict, Any
from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
logger = logging.getLogger(__name__)
@action
async def createExcelContent(self, parameters: Dict[str, Any]) -> ActionResult:
"""
Create Excel content with custom headers.
Parameters:
- data (str, required): Document reference containing data as JSON (with "data" field from mergeTicketData)
- headers (str, optional): Document reference containing headers JSON (from parseExcelContent)
- columns (str or list, optional): List of column names (if not provided, extracted from taskSyncDefinition or data)
- taskSyncDefinition (str or dict, optional): Field mapping definition (used to extract column names if columns not provided)
Returns:
- ActionResult with ActionDocument containing Excel content as bytes
"""
try:
dataParam = parameters.get("data")
if not dataParam:

View file

@ -1,31 +1,14 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Export Tickets As JSON action for JIRA operations.
Exports tickets from JIRA as JSON list.
"""
import logging
import json
from typing import Dict, Any
from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
logger = logging.getLogger(__name__)
@action
async def exportTicketsAsJson(self, parameters: Dict[str, Any]) -> ActionResult:
"""
Export tickets from JIRA as JSON list.
Parameters:
- connectionId (str, required): Connection ID from connectJira action result
- taskSyncDefinition (str or dict, optional): Field mapping definition (if not provided, uses stored definition)
Returns:
- ActionResult with ActionDocument containing list of tickets as JSON
"""
try:
connectionIdParam = parameters.get("connectionId")
if not connectionIdParam:

View file

@ -1,32 +1,14 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Import Tickets From JSON action for JIRA operations.
Imports ticket data from JSON back to JIRA.
"""
import logging
import json
from typing import Dict, Any
from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
logger = logging.getLogger(__name__)
@action
async def importTicketsFromJson(self, parameters: Dict[str, Any]) -> ActionResult:
"""
Import ticket data from JSON back to JIRA.
Parameters:
- connectionId (str, required): Connection ID from connectJira action result
- ticketData (str, required): Document reference containing ticket data as JSON
- taskSyncDefinition (str or dict, optional): Field mapping definition (if not provided, uses stored definition)
Returns:
- ActionResult with ActionDocument containing import result with counts
"""
try:
connectionIdParam = parameters.get("connectionId")
if not connectionIdParam:

View file

@ -1,33 +1,14 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Merge Ticket Data action for JIRA operations.
Merges JIRA export data with existing SharePoint data.
"""
import logging
import json
from typing import Dict, Any, List
from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
logger = logging.getLogger(__name__)
@action
async def mergeTicketData(self, parameters: Dict[str, Any]) -> ActionResult:
"""
Merge JIRA export data with existing SharePoint data.
Parameters:
- jiraData (str, required): Document reference containing JIRA ticket data as JSON array
- existingData (str, required): Document reference containing existing SharePoint data as JSON array
- taskSyncDefinition (str or dict, required): Field mapping definition
- idField (str, optional): Field name to use as ID for merging (default: "ID")
Returns:
- ActionResult with ActionDocument containing merged data and merge details
"""
try:
jiraDataParam = parameters.get("jiraData")
if not jiraDataParam:

View file

@ -1,34 +1,16 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Parse CSV Content action for JIRA operations.
Parses CSV content with custom headers.
"""
import logging
import json
import io
import pandas as pd
from typing import Dict, Any
from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
logger = logging.getLogger(__name__)
@action
async def parseCsvContent(self, parameters: Dict[str, Any]) -> ActionResult:
"""
Parse CSV content with custom headers.
Parameters:
- csvContent (str, required): Document reference containing CSV file content as bytes
- skipRows (int, optional): Number of header rows to skip (default: 2)
- hasCustomHeaders (bool, optional): Whether CSV has custom header rows (default: true)
Returns:
- ActionResult with ActionDocument containing parsed data and headers as JSON
"""
try:
csvContentParam = parameters.get("csvContent")
if not csvContentParam:

View file

@ -1,34 +1,16 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Parse Excel Content action for JIRA operations.
Parses Excel content with custom headers.
"""
import logging
import json
import pandas as pd
from io import BytesIO
from typing import Dict, Any
from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
logger = logging.getLogger(__name__)
@action
async def parseExcelContent(self, parameters: Dict[str, Any]) -> ActionResult:
"""
Parse Excel content with custom headers.
Parameters:
- excelContent (str, required): Document reference containing Excel file content as bytes
- skipRows (int, optional): Number of header rows to skip (default: 3)
- hasCustomHeaders (bool, optional): Whether Excel has custom header rows (default: true)
Returns:
- ActionResult with ActionDocument containing parsed data and headers as JSON
"""
try:
excelContentParam = parameters.get("excelContent")
if not excelContentParam:

File diff suppressed because it is too large Load diff

View file

@ -1,39 +1,16 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Compose And Draft Email With Context action for Outlook operations.
Composes email content using AI from context and optional documents, then creates a draft.
"""
import logging
import json
import base64
import requests
from typing import Dict, Any
from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
logger = logging.getLogger(__name__)
@action
async def composeAndDraftEmailWithContext(self, parameters: Dict[str, Any]) -> ActionResult:
"""
GENERAL:
- Purpose: Compose email content using AI from context and optional documents, then create a draft.
- Input requirements: connectionReference (required); to (required); context (required); optional documentList, cc, bcc, emailStyle, maxLength.
- Output format: JSON confirmation with AI-generated draft metadata.
Parameters:
- connectionReference (str, required): Microsoft connection label.
- to (list, required): Recipient email addresses.
- context (str, required): Detailled context for composing the email.
- documentList (list, optional): Document references for context/attachments.
- cc (list, optional): CC recipients.
- bcc (list, optional): BCC recipients.
- emailStyle (str, optional): formal | casual | business. Default: business.
- maxLength (int, optional): Maximum length for generated content. Default: 1000.
"""
try:
connectionReference = parameters.get("connectionReference")
to = parameters.get("to")

View file

@ -1,36 +1,16 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Read Emails action for Outlook operations.
Reads emails and metadata from a mailbox folder.
"""
import logging
import time
import json
import requests
from typing import Dict, Any
from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
logger = logging.getLogger(__name__)
@action
async def readEmails(self, parameters: Dict[str, Any]) -> ActionResult:
"""
GENERAL:
- Purpose: Read emails and metadata from a mailbox folder.
- Input requirements: connectionReference (required); optional folder, limit, filter, outputMimeType.
- Output format: JSON with emails and metadata.
Parameters:
- connectionReference (str, required): Microsoft connection label.
- folder (str, optional): Folder to read from. Default: Inbox.
- limit (int, optional): Maximum items to return. Must be > 0. Default: 1000.
- filter (str, optional): Sender, query operators, or subject text.
- outputMimeType (str, optional): MIME type for output file. Options: "application/json" (default), "text/plain", "text/csv". Default: "application/json".
"""
operationId = None
try:
# Init progress logger

View file

@ -1,35 +1,15 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Search Emails action for Outlook operations.
Searches emails by query and returns matching items with metadata.
"""
import logging
import json
import requests
from typing import Dict, Any
from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
logger = logging.getLogger(__name__)
@action
async def searchEmails(self, parameters: Dict[str, Any]) -> ActionResult:
"""
GENERAL:
- Purpose: Search emails by query and return matching items with metadata.
- Input requirements: connectionReference (required); query (required); optional folder, limit, outputMimeType.
- Output format: JSON with search results and metadata.
Parameters:
- connectionReference (str, required): Microsoft connection label.
- query (str, required): Search expression.
- folder (str, optional): Folder scope or All. Default: All.
- limit (int, optional): Maximum items to return. Must be > 0. Default: 1000.
- outputMimeType (str, optional): MIME type for output file. Options: "application/json" (default), "text/plain", "text/csv". Default: "application/json".
"""
try:
connectionReference = parameters.get("connectionReference")
query = parameters.get("query")

View file

@ -1,33 +1,16 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Send Draft Email action for Outlook operations.
Sends draft email(s) using draft email JSON document(s) from action outlook.composeAndDraftEmailWithContext.
"""
import logging
import time
import json
import requests
from typing import Dict, Any
from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
logger = logging.getLogger(__name__)
@action
async def sendDraftEmail(self, parameters: Dict[str, Any]) -> ActionResult:
"""
GENERAL:
- Purpose: Send draft email(s) using draft email JSON document(s) from action outlook.composeAndDraftEmailWithContext.
- Input requirements: connectionReference (required); documentList with draft email JSON documents (required).
- Output format: JSON confirmation with sent mail metadata for all emails.
Parameters:
- connectionReference (str, required): Microsoft connection label.
- documentList (list, required): Document reference(s) to draft emails in JSON format (outputs from outlook.composeAndDraftEmailWithContext function).
"""
operationId = None
try:
# Init progress logger

View file

@ -39,6 +39,7 @@ class MethodOutlook(MethodBase):
"readEmails": WorkflowActionDefinition(
actionId="outlook.readEmails",
description="Read emails and metadata from a mailbox folder",
dynamicMode=True,
parameters={
"connectionReference": WorkflowActionParameter(
name="connectionReference",
@ -87,6 +88,7 @@ class MethodOutlook(MethodBase):
"searchEmails": WorkflowActionDefinition(
actionId="outlook.searchEmails",
description="Search emails by query and return matching items with metadata",
dynamicMode=True,
parameters={
"connectionReference": WorkflowActionParameter(
name="connectionReference",
@ -135,6 +137,7 @@ class MethodOutlook(MethodBase):
"composeAndDraftEmailWithContext": WorkflowActionDefinition(
actionId="outlook.composeAndDraftEmailWithContext",
description="Compose email content using AI from context and optional documents, then create a draft",
dynamicMode=True,
parameters={
"connectionReference": WorkflowActionParameter(
name="connectionReference",
@ -202,6 +205,7 @@ class MethodOutlook(MethodBase):
"sendDraftEmail": WorkflowActionDefinition(
actionId="outlook.sendDraftEmail",
description="Send draft email(s) using draft email JSON document(s) from action outlook.composeAndDraftEmailWithContext",
dynamicMode=True,
parameters={
"connectionReference": WorkflowActionParameter(
name="connectionReference",

File diff suppressed because it is too large Load diff

View file

@ -1,36 +1,16 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Analyze Folder Usage action for SharePoint operations.
Analyzes usage intensity of folders and files in SharePoint.
"""
import logging
import time
import json
from datetime import datetime, timezone, timedelta
from typing import Dict, Any
from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
logger = logging.getLogger(__name__)
@action
async def analyzeFolderUsage(self, parameters: Dict[str, Any]) -> ActionResult:
"""
GENERAL:
- Purpose: Analyze usage intensity of folders and files in SharePoint.
- Input requirements: connectionReference (required); documentList (required); optional startDateTime, endDateTime, interval.
- Output format: JSON with usage analytics grouped by time intervals.
Parameters:
- connectionReference (str, required): Microsoft connection label.
- documentList (list, required): Document list reference(s) containing findDocumentPath result.
- startDateTime (str, optional): Start date/time in ISO format (e.g., "2025-11-01T00:00:00Z"). Default: 30 days ago.
- endDateTime (str, optional): End date/time in ISO format (e.g., "2025-11-30T23:59:59Z"). Default: current time.
- interval (str, optional): Time interval for grouping activities. Options: "day", "week", "month". Default: "day".
"""
operationId = None
try:
# Init progress logger

View file

@ -1,35 +1,14 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Copy File action for SharePoint operations.
Copies file within SharePoint.
"""
import logging
import json
from typing import Dict, Any
from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
logger = logging.getLogger(__name__)
@action
async def copyFile(self, parameters: Dict[str, Any]) -> ActionResult:
"""
Copy file within SharePoint.
Parameters:
- connectionReference (str, required): Microsoft connection label.
- siteId (str, required): SharePoint site ID (from findSiteByUrl result) or document reference containing site info
- sourceFolder (str, required): Source folder path relative to site root
- sourceFile (str, required): Source file name
- destFolder (str, required): Destination folder path relative to site root
- destFile (str, required): Destination file name
Returns:
- ActionResult with ActionDocument containing copy result
"""
try:
connectionReference = parameters.get("connectionReference")
if not connectionReference:

View file

@ -1,34 +1,16 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Download File By Path action for SharePoint operations.
Downloads file from SharePoint by exact file path.
"""
import logging
import json
import base64
import os
from typing import Dict, Any
from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
logger = logging.getLogger(__name__)
@action
async def downloadFileByPath(self, parameters: Dict[str, Any]) -> ActionResult:
"""
Download file from SharePoint by exact file path.
Parameters:
- connectionReference (str, required): Microsoft connection label.
- siteId (str, required): SharePoint site ID (from findSiteByUrl result) or document reference containing site info
- filePath (str, required): Full file path relative to site root (e.g., "/General/50 Docs hosted by SELISE/file.xlsx")
Returns:
- ActionResult with ActionDocument containing file content as base64-encoded bytes
"""
try:
connectionReference = parameters.get("connectionReference")
if not connectionReference:

View file

@ -1,35 +1,16 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Find Document Path action for SharePoint operations.
Finds documents and folders by name/path across SharePoint sites.
"""
import logging
import time
import json
import urllib.parse
from typing import Dict, Any
from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
logger = logging.getLogger(__name__)
@action
async def findDocumentPath(self, parameters: Dict[str, Any]) -> ActionResult:
"""
GENERAL:
- Purpose: Find documents and folders by name/path across sites.
- Input requirements: connectionReference (required); searchQuery (required); optional site, maxResults.
- Output format: JSON with found items and paths.
Parameters:
- connectionReference (str, required): Microsoft connection label.
- site (str, optional): Site hint.
- searchQuery (str, required): Search terms or path.
- maxResults (int, optional): Maximum items to return. Default: 1000.
"""
operationId = None
try:
# Init progress logger

View file

@ -1,32 +1,14 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Find Site By URL action for SharePoint operations.
Finds SharePoint site by hostname and site path.
"""
import logging
import json
from typing import Dict, Any
from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
logger = logging.getLogger(__name__)
@action
async def findSiteByUrl(self, parameters: Dict[str, Any]) -> ActionResult:
"""
Find SharePoint site by hostname and site path.
Parameters:
- connectionReference (str, required): Microsoft connection label.
- hostname (str, required): SharePoint hostname (e.g., "example.sharepoint.com")
- sitePath (str, required): Site path (e.g., "SteeringBPM" or "/sites/SteeringBPM")
Returns:
- ActionResult with ActionDocument containing site information (id, displayName, name, webUrl)
"""
try:
connectionReference = parameters.get("connectionReference")
if not connectionReference:

View file

@ -1,34 +1,16 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
List Documents action for SharePoint operations.
Lists documents and folders in SharePoint paths across sites.
"""
import logging
import time
import json
import urllib.parse
from typing import Dict, Any
from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
logger = logging.getLogger(__name__)
@action
async def listDocuments(self, parameters: Dict[str, Any]) -> ActionResult:
"""
GENERAL:
- Purpose: List documents and folders in SharePoint paths across sites.
- Input requirements: connectionReference (required); documentList (required); includeSubfolders (optional).
- Output format: JSON with folder items and metadata.
Parameters:
- connectionReference (str, required): Microsoft connection label.
- documentList (list, required): Document list reference(s) containing findDocumentPath result.
- includeSubfolders (bool, optional): Include one level of subfolders. Default: False.
"""
operationId = None
try:
# Init progress logger

View file

@ -1,44 +1,16 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Read Documents action for SharePoint operations.
Reads documents from SharePoint and extracts content/metadata.
"""
import logging
import time
import json
import base64
from typing import Dict, Any
from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
logger = logging.getLogger(__name__)
@action
async def readDocuments(self, parameters: Dict[str, Any]) -> ActionResult:
"""
GENERAL:
- Purpose: Read documents from SharePoint and extract content/metadata.
- Input requirements: connectionReference (required); documentList or pathQuery (required); includeMetadata (optional).
- Output format: Standardized ActionDocument format (documentName, documentData, mimeType).
- Binary files (PDFs, etc.) are Base64-encoded in documentData.
- Text files are stored as plain text in documentData.
- Returns ActionResult with documents list for template processing.
Parameters:
- connectionReference (str, required): Microsoft connection label.
- documentList (list, optional): Document list reference(s) containing findDocumentPath result.
- pathQuery (str, optional): Direct path query if no documentList (e.g., /sites/SiteName/FolderPath).
- includeMetadata (bool, optional): Include metadata. Default: True.
Returns:
- ActionResult with documents: List[ActionDocument] where each ActionDocument contains:
- documentName: File name
- documentData: Base64-encoded content (binary files) or plain text (text files)
- mimeType: MIME type (e.g., application/pdf, text/plain)
"""
operationId = None
try:
# Init progress logger

View file

@ -1,34 +1,16 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Upload Document action for SharePoint operations.
Uploads documents to SharePoint.
"""
import logging
import time
import json
import urllib.parse
from typing import Dict, Any
from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
logger = logging.getLogger(__name__)
@action
async def uploadDocument(self, parameters: Dict[str, Any]) -> ActionResult:
"""
GENERAL:
- Purpose: Upload documents to SharePoint. Only to choose this action with a connectionReference
- Input requirements: connectionReference (required); documentList (required); pathQuery (optional).
- Output format: JSON with upload status and file info.
Parameters:
- connectionReference (str, required): Microsoft connection label.
- documentList (list, required): Document reference(s) to upload. File names are taken from the documents.
- pathQuery (str, optional): Direct upload target path if documentList doesn't contain findDocumentPath result (e.g., /sites/SiteName/FolderPath).
"""
operationId = None
try:
# Init progress logger

View file

@ -1,35 +1,15 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Upload File action for SharePoint operations.
Uploads raw file content (bytes) to SharePoint.
"""
import logging
import json
import base64
from typing import Dict, Any
from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
logger = logging.getLogger(__name__)
@action
async def uploadFile(self, parameters: Dict[str, Any]) -> ActionResult:
"""
Upload raw file content (bytes) to SharePoint.
Parameters:
- connectionReference (str, required): Microsoft connection label.
- siteId (str, required): SharePoint site ID (from findSiteByUrl result) or document reference containing site info
- folderPath (str, required): Folder path relative to site root
- fileName (str, required): File name
- content (str, required): Document reference containing file content as base64-encoded bytes
Returns:
- ActionResult with ActionDocument containing upload result
"""
try:
connectionReference = parameters.get("connectionReference")
if not connectionReference:

View file

@ -51,6 +51,7 @@ class MethodSharepoint(MethodBase):
"findDocumentPath": WorkflowActionDefinition(
actionId="sharepoint.findDocumentPath",
description="Find documents and folders by name/path across sites",
dynamicMode=True,
parameters={
"connectionReference": WorkflowActionParameter(
name="connectionReference",
@ -88,6 +89,7 @@ class MethodSharepoint(MethodBase):
"readDocuments": WorkflowActionDefinition(
actionId="sharepoint.readDocuments",
description="Read documents from SharePoint and extract content/metadata",
dynamicMode=True,
parameters={
"connectionReference": WorkflowActionParameter(
name="connectionReference",
@ -124,6 +126,7 @@ class MethodSharepoint(MethodBase):
"uploadDocument": WorkflowActionDefinition(
actionId="sharepoint.uploadDocument",
description="Upload documents to SharePoint",
dynamicMode=True,
parameters={
"connectionReference": WorkflowActionParameter(
name="connectionReference",
@ -152,6 +155,7 @@ class MethodSharepoint(MethodBase):
"listDocuments": WorkflowActionDefinition(
actionId="sharepoint.listDocuments",
description="List documents and folders in SharePoint paths across sites",
dynamicMode=True,
parameters={
"connectionReference": WorkflowActionParameter(
name="connectionReference",
@ -181,6 +185,7 @@ class MethodSharepoint(MethodBase):
"analyzeFolderUsage": WorkflowActionDefinition(
actionId="sharepoint.analyzeFolderUsage",
description="Analyze usage intensity of folders and files in SharePoint",
dynamicMode=True,
parameters={
"connectionReference": WorkflowActionParameter(
name="connectionReference",
@ -225,6 +230,7 @@ class MethodSharepoint(MethodBase):
"findSiteByUrl": WorkflowActionDefinition(
actionId="sharepoint.findSiteByUrl",
description="Find SharePoint site by hostname and site path",
dynamicMode=True,
parameters={
"connectionReference": WorkflowActionParameter(
name="connectionReference",
@ -253,6 +259,7 @@ class MethodSharepoint(MethodBase):
"downloadFileByPath": WorkflowActionDefinition(
actionId="sharepoint.downloadFileByPath",
description="Download file from SharePoint by exact file path",
dynamicMode=True,
parameters={
"connectionReference": WorkflowActionParameter(
name="connectionReference",
@ -281,6 +288,7 @@ class MethodSharepoint(MethodBase):
"copyFile": WorkflowActionDefinition(
actionId="sharepoint.copyFile",
description="Copy file within SharePoint",
dynamicMode=True,
parameters={
"connectionReference": WorkflowActionParameter(
name="connectionReference",
@ -330,6 +338,7 @@ class MethodSharepoint(MethodBase):
"uploadFile": WorkflowActionDefinition(
actionId="sharepoint.uploadFile",
description="Upload raw file content (bytes) to SharePoint",
dynamicMode=True,
parameters={
"connectionReference": WorkflowActionParameter(
name="connectionReference",

View file

@ -24,7 +24,7 @@ class ContentValidator:
self.services = services
self.learningEngine = learningEngine
async def validateContent(self, documents: List[Any], intent: Dict[str, Any], taskStep: Optional[Any] = None, actionName: Optional[str] = None, actionParameters: Optional[Dict[str, Any]] = None, actionHistory: Optional[List[Dict[str, Any]]] = None) -> Dict[str, Any]:
async def validateContent(self, documents: List[Any], intent: Dict[str, Any], taskStep: Optional[Any] = None, actionName: Optional[str] = None, actionParameters: Optional[Dict[str, Any]] = None, actionHistory: Optional[List[Dict[str, Any]]] = None, context: Optional[Any] = None) -> Dict[str, Any]:
"""Validates delivered content against user intent using AI (single attempt; parse-or-fail)
Args:
@ -34,8 +34,9 @@ class ContentValidator:
actionName: Optional action name (e.g., "ai.process", "ai.webResearch") that created the documents
actionParameters: Optional action parameters used during execution (e.g., {"columnsPerRow": 10, "researchDepth": "deep"})
actionHistory: Optional list of previously executed actions in the workflow (for multi-step workflow context)
context: Optional context object to access all documents delivered in the current round
"""
return await self._validateWithAI(documents, intent, taskStep, actionName, actionParameters, actionHistory)
return await self._validateWithAI(documents, intent, taskStep, actionName, actionParameters, actionHistory, context)
def _summarizeJsonStructure(self, jsonData: Any) -> Dict[str, Any]:
"""Summarize JSON document structure for validation - extracts main objects, statistics, captions, and IDs."""
@ -533,13 +534,13 @@ class ContentValidator:
return False
async def _validateWithAI(self, documents: List[Any], intent: Dict[str, Any], taskStep: Optional[Any] = None, actionName: Optional[str] = None, actionParameters: Optional[Dict[str, Any]] = None, actionHistory: Optional[List[Dict[str, Any]]] = None) -> Dict[str, Any]:
async def _validateWithAI(self, documents: List[Any], intent: Dict[str, Any], taskStep: Optional[Any] = None, actionName: Optional[str] = None, actionParameters: Optional[Dict[str, Any]] = None, actionHistory: Optional[List[Dict[str, Any]]] = None, context: Optional[Any] = None) -> Dict[str, Any]:
"""AI-based comprehensive validation - generic approach"""
try:
if not hasattr(self, 'services') or not self.services or not hasattr(self.services, 'ai'):
return self._createFailedValidationResult("AI service not available")
# Use taskStep.objective if available, otherwise fall back to intent.primaryGoal
# Use taskStep.objective if available, otherwise fall back to workflow intent
taskObjective = None
if taskStep and hasattr(taskStep, 'objective'):
taskObjective = taskStep.objective
@ -566,7 +567,9 @@ class ContentValidator:
expectedFormats = intent.get('expectedFormats', [])
# Determine objective text and label
objectiveText = taskObjective if taskObjective else intent.get('primaryGoal', 'Unknown')
workflowIntent = getattr(self.services.workflow, '_workflowIntent', {}) if hasattr(self.services, 'workflow') and self.services.workflow else {}
intentText = workflowIntent.get('intent', 'Unknown')
objectiveText = taskObjective if taskObjective else intentText
objectiveLabel = "TASK OBJECTIVE" if taskObjective else "USER REQUEST"
# Build prompt base WITHOUT document summaries first
@ -636,9 +639,46 @@ class ContentValidator:
actionHistoryContext = f"\n\n=== ACTION HISTORY ===\n" + "\n".join(f"- {entry}" for entry in historyEntries)
actionHistoryContext += "\n\nIMPORTANT: This shows the complete workflow that produced the documents. For process-oriented criteria (e.g., 'internet search performed'), check ACTION HISTORY first. Document metadata may only reflect the LAST action, not the entire workflow."
# Build document index context (all documents delivered in current round)
documentIndexContext = ""
if context and self.services and hasattr(self.services, 'chat') and hasattr(self.services, 'workflow') and self.services.workflow:
try:
documentIndex = self.services.chat.getAvailableDocuments(self.services.workflow)
if documentIndex and documentIndex.strip() and documentIndex != "No documents available":
# Extract only "Current round documents" section if present
lines = documentIndex.split('\n')
currentRoundSection = []
inCurrentRound = False
for line in lines:
if "Current round documents:" in line:
inCurrentRound = True
currentRoundSection.append(line)
elif inCurrentRound:
if line.strip().startswith("- docList:") or line.strip().startswith(" - docItem:") or line.strip().startswith("- docItem:"):
currentRoundSection.append(line)
elif line.strip() == "":
# Empty line is okay, continue
continue
elif "Past rounds documents:" in line or "AVAILABLE_CONNECTIONS_INDEX:" in line:
# End of current round section
break
else:
# Still in current round section
currentRoundSection.append(line)
if currentRoundSection:
documentIndexContext = "\n\n=== ALL DOCUMENTS DELIVERED IN CURRENT ROUND ===\n" + "\n".join(currentRoundSection)
documentIndexContext += "\n\nIMPORTANT: This shows ALL documents that have been delivered in the current round, not just the ones being validated in this step. Use this to check if all required formats/documents are present across the entire round."
except Exception as e:
logger.warning(f"Error extracting document index for validation: {str(e)}")
# Continue without document index - not critical
# Transform criteria that require data access into metadata-only checks
transformedCriteria = self._transformCriteriaForMetadataOnly(successCriteria)
# Format success criteria for display with index numbers
if successCriteria:
criteriaDisplay = "\n".join([f"[{i}] {criterion}" for i, criterion in enumerate(successCriteria)])
if transformedCriteria:
criteriaDisplay = "\n".join([f"[{i}] {criterion}" for i, criterion in enumerate(transformedCriteria)])
else:
criteriaDisplay = "[]"
@ -647,7 +687,7 @@ class ContentValidator:
=== TASK INFORMATION ===
{objectiveLabel}: '{objectiveText}'
EXPECTED DATA TYPE: {dataType}
EXPECTED FORMATS: {expectedFormats if expectedFormats else ['any']}{actionContext}{actionParamsContext}{validationMetadataContext}{actionHistoryContext}
EXPECTED FORMATS: {expectedFormats if expectedFormats else ['any']}{actionContext}{actionParamsContext}{validationMetadataContext}{actionHistoryContext}{documentIndexContext}
=== VALIDATION INSTRUCTIONS ===
@ -661,6 +701,7 @@ VALIDATION RULES:
5. PROCESS VALIDATION: Use ACTION HISTORY for process-oriented criteria (e.g., "search performed", "extraction done").
6. ONE CRITERION PER EVALUATION: Evaluate each criterion independently. Do not mention other criteria.
7. NO ASSUMPTIONS: Do NOT assume content was AI-generated vs extracted. If a section exists with content_type, the content was delivered. Only validate what is present in the metadata.
8. DATA-LEVEL CRITERIA TRANSFORMATION: Criteria mentioning accuracy percentages (e.g., "95% accuracy"), completeness percentages (e.g., "98% completeness"), or "all X extracted" have been transformed to metadata-only checks. For accuracy/completeness: Check if contentPartIds reference all source documents and if structure metadata shows expected data types (tables, lists, etc.) exist. For "all X extracted": Check if contentPartIds reference all source documents mentioned in ACTION HISTORY or document index. NEVER attempt to verify accuracy/completeness by comparing actual data values - only use metadata indicators.
VALIDATION STEPS:
- Check ACTION HISTORY for process-oriented criteria
@ -812,6 +853,52 @@ DELIVERED DOCUMENTS ({len(documents)} items):
logger.error(f"AI validation failed: {str(e)}")
raise
def _transformCriteriaForMetadataOnly(self, criteria: List[str]) -> List[str]:
"""
Transform criteria that require data access into metadata-only checks.
Preserves original criterion intent while converting data-level checks to metadata checks.
Examples:
- "95% accuracy" "[METADATA ONLY] Data structure indicates extraction completed (check contentPartIds reference all source documents)"
- "98% completeness" "[METADATA ONLY] All source documents referenced in contentPartIds (verify source count matches)"
- "all transactions extracted" "[METADATA ONLY] All source documents referenced in contentPartIds (verify source count matches)"
"""
if not criteria:
return []
transformed = []
for criterion in criteria:
original = criterion.strip()
transformed_criterion = original
# Pattern: accuracy percentage (e.g., "95% accuracy", "accuracy meets or exceeds 95% threshold")
if re.search(r'\d+%?\s*accuracy|accuracy.*\d+%', original, re.IGNORECASE):
# Extract the main subject (e.g., "transactions", "data", etc.)
subject_match = re.search(r'(transactions?|data|items?|records?|entries?)', original, re.IGNORECASE)
subject = subject_match.group(1).lower() if subject_match else "data"
transformed_criterion = f"[METADATA ONLY] {original}: Check that contentPartIds reference all source documents and jsonStructure shows expected {subject} structure exists (tables/lists with rowCount/itemCount > 0). Cannot verify actual {subject} accuracy values from metadata."
# Pattern: completeness percentage or "all X extracted" (e.g., "98% completeness", "all transactions extracted")
elif re.search(r'\d+%?\s*completeness|completeness.*\d+%|all\s+.*extracted|extract.*all', original, re.IGNORECASE):
# Extract the main subject
subject_match = re.search(r'(transactions?|data|items?|records?|entries?|statements?|documents?)', original, re.IGNORECASE)
subject = subject_match.group(1).lower() if subject_match else "items"
transformed_criterion = f"[METADATA ONLY] {original}: Verify that contentPartIds reference all source documents mentioned in ACTION HISTORY/document index, and jsonStructure shows {subject} structure exists (check rowCount/itemCount in tables/lists). Cannot verify actual {subject} count from metadata."
# Pattern: "no missing data" or "no incorrect data"
elif re.search(r'no\s+missing|no\s+incorrect|no\s+errors?', original, re.IGNORECASE):
transformed_criterion = f"[METADATA ONLY] {original}: Check that jsonStructure.content_type shows expected data types present (tables, lists, etc.) and contentPreview.looksLikeRenderedContent=true. Cannot verify actual data values from metadata."
# Pattern: data accuracy without percentage (e.g., "data is accurate", "accurate data")
elif re.search(r'data.*accurate|accurate.*data', original, re.IGNORECASE) and '%' not in original:
transformed_criterion = f"[METADATA ONLY] {original}: Check that contentPartIds reference source documents and jsonStructure shows expected data structure exists. Cannot verify actual data accuracy values from metadata."
transformed.append(transformed_criterion)
return transformed
def _createFailedValidationResult(self, errorMessage: str) -> Dict[str, Any]:
"""Create a standardized failed validation result"""
return {

View file

@ -28,7 +28,8 @@ class ProgressTracker:
improvementSuggestions = validation.get('improvementSuggestions', [])
# Get task objective from taskIntent (task-level, not workflow-level)
taskObjective = taskIntent.get('taskObjective', taskIntent.get('primaryGoal', 'Unknown'))
# Fallback to 'Unknown' if task objective not available
taskObjective = taskIntent.get('taskObjective', 'Unknown')
# If validation is not schema compliant, treat as indeterminate (do not count as failure)
if not schemaCompliant or overallSuccess is None or qualityScore is None:

View file

@ -64,7 +64,7 @@ class TaskPlanner:
# Use workflowIntent from workflow object (set in workflowManager from userintention analysis)
workflowIntent = getattr(workflow, '_workflowIntent', None)
if workflowIntent and isinstance(workflowIntent, dict):
cleanedObjective = workflowIntent.get('primaryGoal', actualUserPrompt)
cleanedObjective = workflowIntent.get('intent', actualUserPrompt)
else:
# Fallback: use user prompt directly if workflowIntent not available
cleanedObjective = actualUserPrompt

View file

@ -149,21 +149,63 @@ class DynamicMode(BaseMode):
})
# Content validation (against original cleaned user prompt / workflow intent)
if getattr(self, 'workflowIntent', None) and result.documents:
# Pass ALL documents to validator - validator decides what to validate (generic approach)
# Pass taskStep so validator can use task.objective and format fields
# Pass action name so validator knows which action created the documents
# Pass action parameters so validator can verify parameter-specific requirements
# Pass action history so validator can validate process-oriented criteria in multi-step workflows
actionName = selection.get('action', 'unknown')
actionParameters = selection.get('parameters', {})
actionHistory = getattr(context, 'executedActions', None) if hasattr(context, 'executedActions') else None
validationResult = await self.contentValidator.validateContent(result.documents, self.workflowIntent, taskStep, actionName, actionParameters, actionHistory)
observation.contentValidation = validationResult
quality_score = validationResult.get('qualityScore', 0.0)
if quality_score is None:
quality_score = 0.0
logger.info(f"Content validation: {validationResult['overallSuccess']} (quality: {quality_score:.2f})")
if getattr(self, 'workflowIntent', None):
# Collect ALL documents from current round, not just from last action
# Start with documents from current action (ActionDocument objects with metadata)
allRoundDocuments = list(result.documents) if result and result.documents else []
# Also collect ChatDocument references from all messages in current round
# These provide document existence info even if we don't have full metadata
if workflow and hasattr(workflow, 'messages') and workflow.messages:
currentRound = getattr(workflow, 'currentRound', 0)
currentTask = getattr(workflow, 'currentTask', 0)
# Collect documents from all messages in current round
for message in workflow.messages:
if hasattr(message, 'documents') and message.documents:
for chatDoc in message.documents:
# Include documents from current round and current task
docRound = getattr(chatDoc, 'roundNumber', None)
docTask = getattr(chatDoc, 'taskNumber', None)
if docRound == currentRound and (docTask is None or docTask == currentTask):
# Avoid duplicates - check if document already in list by fileId
chatDocFileId = getattr(chatDoc, 'fileId', None)
if chatDocFileId:
# Check if we already have this document (by fileId for ChatDocument, by documentName for ActionDocument)
isDuplicate = False
for existingDoc in allRoundDocuments:
existingFileId = getattr(existingDoc, 'fileId', None)
existingDocName = getattr(existingDoc, 'documentName', None)
# Match by fileId or by documentName matching fileName
if (existingFileId == chatDocFileId) or \
(existingDocName and hasattr(chatDoc, 'fileName') and existingDocName == chatDoc.fileName):
isDuplicate = True
break
if not isDuplicate:
allRoundDocuments.append(chatDoc)
# Only validate if we have documents to validate
if allRoundDocuments:
# Pass ALL documents from current round to validator
# Pass taskStep so validator can use task.objective and format fields
# Pass action name so validator knows which action created the documents
# Pass action parameters so validator can verify parameter-specific requirements
# Pass action history so validator can validate process-oriented criteria in multi-step workflows
actionName = selection.get('action', 'unknown')
actionParameters = selection.get('parameters', {})
actionHistory = getattr(context, 'executedActions', None) if hasattr(context, 'executedActions') else None
validationResult = await self.contentValidator.validateContent(allRoundDocuments, self.workflowIntent, taskStep, actionName, actionParameters, actionHistory, context)
else:
# No documents to validate
validationResult = None
if validationResult:
observation.contentValidation = validationResult
quality_score = validationResult.get('qualityScore', 0.0)
if quality_score is None:
quality_score = 0.0
logger.info(f"Content validation: {validationResult.get('overallSuccess', False)} (quality: {quality_score:.2f})")
else:
logger.info("Content validation skipped: no documents to validate")
# NEW: Record validation result for adaptive learning
actionValue = selection.get('action', 'unknown')
@ -194,6 +236,31 @@ class DynamicMode(BaseMode):
if decision: # Only append if decision is not None
context.previousReviewResult.append(decision)
# Send ChatLog message if userMessage is present in refinement response
if decision and decision.userMessage:
try:
currentRound = getattr(workflow, 'currentRound', 0)
currentTask = getattr(workflow, 'currentTask', 0)
messageData = {
"workflowId": workflow.id,
"role": "assistant",
"message": decision.userMessage,
"status": "refinement",
"sequenceNr": len(workflow.messages) + 1,
"publishedAt": self.services.utils.timestampGetUtc(),
"documentsLabel": None,
"documents": [],
"roundNumber": currentRound,
"taskNumber": currentTask,
"actionNumber": step
}
self.services.chat.storeMessageWithDocuments(workflow, messageData, [])
logger.info(f"Sent refinement userMessage to UI: {decision.userMessage[:100]}...")
except Exception as e:
logger.warning(f"Failed to send refinement userMessage to UI: {str(e)}")
# Store next action guidance from decision for use in next iteration
if decision and decision.status == "continue" and decision.nextAction:
# Set nextActionGuidance directly (now defined in TaskContext model)

View file

@ -68,6 +68,52 @@ def extractUserPrompt(context: Any) -> str:
return context.taskStep.objective
return 'No request specified'
def extractNormalizedRequest(services: Any) -> str:
"""Extract normalized user request from services. Maps to {{KEY:NORMALIZED_REQUEST}}.
Returns the full normalized request from user input analysis (preserves all constraints and details).
CRITICAL: Must return the actual normalizedRequest from analysis, NOT intent.
"""
try:
# Get normalized request from currentUserPromptNormalized (stores the normalizedRequest from analysis)
if services and getattr(services, 'currentUserPromptNormalized', None):
normalized = services.currentUserPromptNormalized
# Validate that it's not the intent (which is shorter and less detailed)
# Intent is typically a concise objective, normalized request should be longer and more detailed
workflowIntent = getattr(services.workflow, '_workflowIntent', {}) if hasattr(services, 'workflow') and services.workflow else {}
intent = workflowIntent.get('intent', '')
# If normalized matches intent exactly, it's wrong - log warning
if intent and normalized == intent:
logger.warning(f"extractNormalizedRequest: normalized request matches intent - this is incorrect! normalized={normalized[:100]}...")
# Try to get from workflow intent or return error message
return f"ERROR: Normalized request not properly stored. Expected detailed request, got intent: {intent}"
return normalized
return 'No normalized request specified'
except Exception as e:
logger.error(f"Error extracting normalized request: {str(e)}")
return 'No normalized request specified'
def extractUserIntent(services: Any) -> str:
"""Extract user intent from services. Maps to {{KEY:USER_INTENT}}.
Returns the concise intent from user input analysis, or falls back to normalized request.
"""
try:
# Get intent from currentUserPrompt (stores the intent from analysis)
if services and getattr(services, 'currentUserPrompt', None):
intent = services.currentUserPrompt
# If intent is same as normalized, it's fine - use it
return intent
# Fallback to normalized request if intent not available
if services and getattr(services, 'currentUserPromptNormalized', None):
return services.currentUserPromptNormalized
return 'No intent specified'
except Exception:
return 'No intent specified'
def extractWorkflowHistory(service: Any) -> str:
"""Extract workflow history. Maps to {{KEY:WORKFLOW_HISTORY}}
Reverse-chronological, enriched with message summaries and document labels.
@ -79,8 +125,13 @@ def extractWorkflowHistory(service: Any) -> str:
logger.error(f"Error getting workflow history: {str(e)}")
return "No previous workflow rounds available"
def extractAvailableMethods(service: Any) -> str:
"""Extract available methods for action planning. Maps to {{KEY:AVAILABLE_METHODS}}"""
def extractAvailableMethods(service: Any, filterDynamicMode: bool = True) -> str:
"""Extract available methods for action planning. Maps to {{KEY:AVAILABLE_METHODS}}
Args:
service: Service object
filterDynamicMode: If True, only include actions with dynamicMode=True flag (default: True for dynamic workflow prompts)
"""
try:
# Get the methods dictionary directly from the global methods variable
if not methods:
@ -105,7 +156,21 @@ def extractAvailableMethods(service: Any) -> str:
processed_methods.add(shortName)
# Get method instance to access _actions dictionary with WorkflowActionDefinition objects
methodInstance = methodInfo.get('instance')
if not methodInstance:
continue
for actionName, actionInfo in methodInfo['actions'].items():
# Check dynamicMode flag if filtering is enabled
if filterDynamicMode:
# Access original WorkflowActionDefinition from _actions dictionary
if hasattr(methodInstance, '_actions') and actionName in methodInstance._actions:
actionDef = methodInstance._actions[actionName]
# Only include actions with dynamicMode=True
if not getattr(actionDef, 'dynamicMode', False):
continue
# Create compound action name: method.action
compoundActionName = f"{shortName}.{actionName}"
# Get the action description

View file

@ -12,6 +12,8 @@ from modules.workflows.processing.shared.placeholderFactory import (
extractUserPrompt,
extractAvailableDocumentsSummary,
extractWorkflowHistory,
extractUserIntent,
extractNormalizedRequest,
)
logger = logging.getLogger(__name__)
@ -41,13 +43,13 @@ def generateTaskPlanningPrompt(services, context: Any) -> PromptBundle:
- Data Type: {workflowIntent.get('dataType', 'unknown')}
- Expected Formats: {workflowIntent.get('expectedFormats', [])}
- Quality Requirements: {workflowIntent.get('qualityRequirements', {})}
- Primary Goal: {workflowIntent.get('primaryGoal', '')}
Note: Tasks can override these if task-specific needs differ (e.g., workflow wants PDF, but task needs CSV for intermediate step).
"""
placeholders: List[PromptPlaceholder] = [
PromptPlaceholder(label="USER_PROMPT", content=extractUserPrompt(context), summaryAllowed=False),
PromptPlaceholder(label="NORMALIZED_REQUEST", content=extractNormalizedRequest(services), summaryAllowed=False),
PromptPlaceholder(label="USER_INTENT", content=extractUserIntent(services), summaryAllowed=False),
PromptPlaceholder(label="AVAILABLE_DOCUMENTS_SUMMARY", content=extractAvailableDocumentsSummary(services, context), summaryAllowed=True),
PromptPlaceholder(label="WORKFLOW_HISTORY", content=extractWorkflowHistory(services), summaryAllowed=True),
PromptPlaceholder(label="USER_LANGUAGE", content=userLanguage, summaryAllowed=False),
@ -62,9 +64,17 @@ Break down user requests into logical, executable task steps.
## 📋 Context
### User Request
The following is the user's normalized request:
{{KEY:USER_PROMPT}}
### Normalized User Request
The following is the user's full normalized request (preserves all constraints and details):
```
{{KEY:NORMALIZED_REQUEST}}
```
### User Intent
The following is the user's intent (concise objective):
```
{{KEY:USER_INTENT}}
```
### Workflow Intent
{{KEY:WORKFLOW_INTENT}}

View file

@ -155,6 +155,15 @@ class WorkflowManager:
async def _workflowProcess(self, userInput: UserInputRequest) -> None:
"""Process a workflow with user input"""
try:
# Send ChatLog message immediately when workflow starts
workflow = self.services.workflow
self.services.chat.storeLog(workflow, {
"message": "Workflow started...",
"type": "info",
"status": "running",
"progress": 0.0
})
# Store the current user prompt in services for easy access throughout the workflow
self.services.rawUserPrompt = userInput.prompt
self.services.currentUserPrompt = userInput.prompt
@ -203,7 +212,7 @@ class WorkflowManager:
# Extract intent analysis fields and store as workflowIntent
workflowIntent = {
'primaryGoal': analysisResult.get('primaryGoal'),
'intent': intentText, # Use intent instead of primaryGoal
'dataType': analysisResult.get('dataType', 'unknown'),
'expectedFormats': analysisResult.get('expectedFormats', []),
'qualityRequirements': analysisResult.get('qualityRequirements', {}),
@ -220,8 +229,16 @@ class WorkflowManager:
self.services.workflow._workflowIntent = workflowIntent
# Store normalized request and intent
# CRITICAL: normalizedRequest MUST be used if available, do NOT fall back to intent
self.services.currentUserPrompt = intentText or userInput.prompt
self.services.currentUserPromptNormalized = normalizedRequest or intentText or userInput.prompt
if normalizedRequest and normalizedRequest.strip():
# Use normalizedRequest if available and not empty
self.services.currentUserPromptNormalized = normalizedRequest
logger.info(f"Stored normalized request (length: {len(normalizedRequest)}, preview: {normalizedRequest[:100]}...)")
else:
# Fallback only if normalizedRequest is None or empty
logger.warning(f"normalizedRequest is None or empty, falling back to intentText. normalizedRequest={normalizedRequest}, intentText={intentText[:100] if intentText else None}...")
self.services.currentUserPromptNormalized = intentText or userInput.prompt
if contextItems is not None:
self.services.currentUserContextItems = contextItems
@ -280,7 +297,6 @@ class WorkflowManager:
- complexity: "simple" | "moderate" | "complex"
- needsWorkflowHistory: bool
- fastTrack: bool
- primaryGoal: Hauptziel
- dataType: Datentyp
- expectedFormats: Erwartete Formate
- qualityRequirements: Qualitätsanforderungen
@ -304,11 +320,10 @@ class WorkflowManager:
- "complex": Multi-task workflow, many documents, research needed, content generation required, multi-step planning (60-120s)
6. needsWorkflowHistory: Boolean indicating if this request needs previous workflow rounds/history (e.g., 'continue', 'retry', 'fix', 'improve', 'update', 'modify', 'based on previous', 'build on', references to earlier work)
7. fastTrack: Boolean indicating if Fast Track is possible (simple requests without documents and without workflow history)
8. primaryGoal: The main objective the user wants to achieve
9. dataType: What type of data/content they want (numbers|text|documents|analysis|code|unknown)
10. expectedFormats: What file format(s) they expect - provide matching file format extensions list (e.g., ["xlsx", "pdf"]). If format is unclear or not specified, use empty list []
11. qualityRequirements: Quality requirements they have (accuracy, completeness) as {{accuracyThreshold: 0.0-1.0, completenessThreshold: 0.0-1.0}}
12. successCriteria: Specific success criteria that define completion (array of strings)
8. dataType: What type of data/content they want (numbers|text|documents|analysis|code|unknown)
9. expectedFormats: What file format(s) they expect - provide matching file format extensions list (e.g., ["xlsx", "pdf"]). If format is unclear or not specified, use empty list []
10. qualityRequirements: Quality requirements they have (accuracy, completeness) as {{accuracyThreshold: 0.0-1.0, completenessThreshold: 0.0-1.0}}
11. successCriteria: Specific success criteria that define completion (array of strings)
Rules:
- If total content (intent + data) is < 10% of model max tokens, do not extract; return empty contextItems and keep intent compact and self-contained
@ -336,7 +351,6 @@ Return ONLY JSON (no markdown) with this exact structure:
"complexity": "simple" | "moderate" | "complex",
"needsWorkflowHistory": true|false,
"fastTrack": true|false,
"primaryGoal": "The main objective the user wants to achieve",
"dataType": "numbers|text|documents|analysis|code|unknown",
"expectedFormats": ["pdf", "docx", "xlsx", "txt", "json", "csv", "html", "md"],
"qualityRequirements": {{
@ -386,7 +400,6 @@ The following is the user's original input message. Analyze intent, normalize th
"complexity": "moderate",
"needsWorkflowHistory": False,
"fastTrack": False,
"primaryGoal": None,
"dataType": "unknown",
"expectedFormats": [],
"qualityRequirements": {
@ -514,10 +527,14 @@ The following is the user's original input message. Analyze intent, normalize th
roundNum = workflow.currentRound
contextLabel = f"round{roundNum}_usercontext"
# Use normalized request if available (from combined analysis), otherwise use original prompt
# This ensures the first message uses the normalized request for security
normalizedRequest = getattr(self.services, 'currentUserPromptNormalized', None) or userInput.prompt
messageData = {
"workflowId": workflow.id,
"role": "user",
"message": userInput.prompt,
"message": normalizedRequest, # Use normalized request instead of original prompt
"status": "first",
"sequenceNr": 1,
"publishedAt": self.services.utils.timestampGetUtc(),
@ -593,12 +610,11 @@ The following is the user's original input message. Analyze intent, normalize th
"2) normalizedRequest: full, explicit restatement of the user's request in the detected language; do NOT summarize; preserve ALL constraints and details.\n"
"3) intent: concise single-paragraph core request in the detected language for high-level routing.\n"
"4) contextItems: supportive data blocks to attach as separate documents if significantly larger than the intent (large literal content, long lists/tables, code/JSON blocks, transcripts, CSV fragments, detailed specs). Keep URLs in the intent unless they embed large pasted content.\n"
"5) primaryGoal: The main objective the user wants to achieve.\n"
"6) dataType: What type of data/content they want (numbers|text|documents|analysis|code|unknown).\n"
"7) expectedFormats: What file format(s) they expect - provide matching file format extensions list (e.g., [\"xlsx\", \"pdf\"]). If format is unclear or not specified, use empty list [].\n"
"8) qualityRequirements: Quality requirements they have (accuracy, completeness) as {accuracyThreshold: 0.0-1.0, completenessThreshold: 0.0-1.0}.\n"
"9) successCriteria: Specific success criteria that define completion (array of strings).\n"
"10) needsWorkflowHistory: Boolean indicating if this request needs previous workflow rounds/history to be understood or completed (e.g., 'continue', 'retry', 'fix', 'improve', 'update', 'modify', 'based on previous', 'build on', references to earlier work). Return true if the request is a continuation, retry, modification, or builds upon previous work.\n\n"
"5) dataType: What type of data/content they want (numbers|text|documents|analysis|code|unknown).\n"
"6) expectedFormats: What file format(s) they expect - provide matching file format extensions list (e.g., [\"xlsx\", \"pdf\"]). If format is unclear or not specified, use empty list [].\n"
"7) qualityRequirements: Quality requirements they have (accuracy, completeness) as {accuracyThreshold: 0.0-1.0, completenessThreshold: 0.0-1.0}.\n"
"8) successCriteria: Specific success criteria that define completion (array of strings).\n"
"9) needsWorkflowHistory: Boolean indicating if this request needs previous workflow rounds/history to be understood or completed (e.g., 'continue', 'retry', 'fix', 'improve', 'update', 'modify', 'based on previous', 'build on', references to earlier work). Return true if the request is a continuation, retry, modification, or builds upon previous work.\n\n"
"Rules:\n"
"- If total content (intent + data) is < 10% of model max tokens, do not extract; return empty contextItems and keep intent compact and self-contained.\n"
"- If content exceeds that threshold, move bulky parts into contextItems; keep intent short and clear.\n"
@ -616,7 +632,6 @@ The following is the user's original input message. Analyze intent, normalize th
" \"content\": \"Full extracted content block here\"\n"
" }\n"
" ],\n"
" \"primaryGoal\": \"The main objective the user wants to achieve\",\n"
" \"dataType\": \"numbers|text|documents|analysis|code|unknown\",\n"
" \"expectedFormats\": [\"pdf\", \"docx\", \"xlsx\", \"txt\", \"json\", \"csv\", \"html\", \"md\"],\n"
" \"qualityRequirements\": {\n"
@ -659,8 +674,9 @@ The following is the user's original input message. Analyze intent, normalize th
contextItems = parsed.get('contextItems') or []
# Extract intent analysis fields and store as workflowIntent
intentText = parsed.get('intent') or userInput.prompt
workflowIntent = {
'primaryGoal': parsed.get('primaryGoal'),
'intent': intentText, # Use intent instead of primaryGoal
'dataType': parsed.get('dataType', 'unknown'),
'expectedFormats': parsed.get('expectedFormats', []),
'qualityRequirements': parsed.get('qualityRequirements', {}),
@ -684,18 +700,56 @@ The following is the user's original input message. Analyze intent, normalize th
setattr(self.services, '_needsWorkflowHistory', False)
# Update services state
# CRITICAL: Validate language from AI response
# If AI didn't return language or invalid → use user language
# If user language not set → use "en"
validatedLanguage = None
# Validate AI-detected language
if detectedLanguage and isinstance(detectedLanguage, str):
self._setUserLanguage(detectedLanguage)
try:
setattr(self.services, 'currentUserLanguage', detectedLanguage)
except Exception:
pass
detectedLanguage = detectedLanguage.strip().lower()
# Check if it's a valid 2-character ISO code
if len(detectedLanguage) == 2 and detectedLanguage.isalpha():
validatedLanguage = detectedLanguage
# If AI didn't return valid language, use user language
if not validatedLanguage:
userLanguage = getattr(self.services.user, 'language', None) if hasattr(self.services, 'user') and self.services.user else None
if userLanguage and isinstance(userLanguage, str):
userLanguage = userLanguage.strip().lower()
if len(userLanguage) == 2 and userLanguage.isalpha():
validatedLanguage = userLanguage
# Final fallback to "en"
if not validatedLanguage:
validatedLanguage = "en"
logger.warning("Language not detected from AI and user language not set - using default 'en'")
# Set validated language
self._setUserLanguage(validatedLanguage)
try:
setattr(self.services, 'currentUserLanguage', validatedLanguage)
logger.debug(f"Set currentUserLanguage to validated value: {validatedLanguage}")
except Exception:
pass
self.services.currentUserPrompt = intentText or userInput.prompt
# Always set currentUserPromptNormalized - use normalizedRequest if available, otherwise fallback to currentUserPrompt
normalizedValue = normalizedRequest or intentText or userInput.prompt
self.services.currentUserPromptNormalized = normalizedValue
# CRITICAL: normalizedRequest MUST be used if available, do NOT fall back to intent
if normalizedRequest and normalizedRequest.strip():
# Use normalizedRequest if available and not empty
self.services.currentUserPromptNormalized = normalizedRequest
logger.debug(f"Stored normalized request from analysis (length: {len(normalizedRequest)})")
else:
# Fallback only if normalizedRequest is None or empty
logger.warning(f"normalizedRequest is None or empty in analysis, falling back to intentText. normalizedRequest={normalizedRequest}, intentText={intentText}")
self.services.currentUserPromptNormalized = intentText or userInput.prompt
if contextItems is not None:
self.services.currentUserContextItems = contextItems
# Update message with normalized request if analysis produced one
if normalizedRequest and normalizedRequest != userInput.prompt:
messageData["message"] = normalizedRequest
logger.debug(f"Updated first message with normalized request (length: {len(normalizedRequest)})")
# Create documents for context items
if contextItems and isinstance(contextItems, list):
@ -749,6 +803,34 @@ The following is the user's original input message. Analyze intent, normalize th
# Finally, persist and bind the first message with combined documents (context + user)
self.services.chat.storeMessageWithDocuments(workflow, messageData, createdDocs)
# Create ChatMessage with success criteria (KPI) AFTER the first user message
# This ensures the KPI message appears after the user message in the UI
workflowIntent = getattr(workflow, '_workflowIntent', None)
if workflowIntent and isinstance(workflowIntent, dict):
successCriteria = workflowIntent.get('successCriteria', [])
if successCriteria and isinstance(successCriteria, list) and len(successCriteria) > 0:
try:
# Format success criteria as message with "KPI" title
criteriaText = "**KPI**\n\n" + "\n".join([f"{criterion}" for criterion in successCriteria])
kpiMessageData = {
"workflowId": workflow.id,
"role": "system",
"message": criteriaText,
"summary": f"KPI: {len(successCriteria)} success criteria",
"status": "step",
"sequenceNr": len(workflow.messages) + 1, # After user message
"publishedAt": self.services.utils.timestampGetUtc(),
"roundNumber": workflow.currentRound,
"taskNumber": 0,
"actionNumber": 0
}
self.services.chat.storeMessageWithDocuments(workflow, kpiMessageData, [])
logger.info(f"Created KPI message with {len(successCriteria)} success criteria after first user message")
except Exception as e:
logger.error(f"Error creating KPI message: {str(e)}")
except Exception as e:
logger.error(f"Error sending first message: {str(e)}")
raise

View file

@ -252,7 +252,7 @@ class ModelSelectionTester:
print(f"{'='*80}")
options = AiCallOptions(
operationType=OperationTypeEnum.WEB_SEARCH,
operationType=OperationTypeEnum.WEB_SEARCH_DATA,
priority=PriorityEnum.BALANCED,
processingMode=ProcessingModeEnum.ADVANCED,
maxCost=0.05,
@ -269,7 +269,7 @@ class ModelSelectionTester:
print(f"{'='*80}")
options = AiCallOptions(
operationType=OperationTypeEnum.WEB_SEARCH,
operationType=OperationTypeEnum.WEB_SEARCH_DATA,
priority=PriorityEnum.BALANCED,
processingMode=ProcessingModeEnum.BASIC,
maxCost=0.01,
@ -327,7 +327,7 @@ class ModelSelectionTester:
# This method uses webQuery internally, so it uses the same model selection as web research
options = AiCallOptions(
operationType=OperationTypeEnum.WEB_SEARCH,
operationType=OperationTypeEnum.WEB_SEARCH_DATA,
priority=PriorityEnum.BALANCED,
processingMode=ProcessingModeEnum.ADVANCED,
maxCost=0.03,
@ -436,7 +436,7 @@ class ModelSelectionTester:
print("\n Testing: aiObjects.webQuery() - Web Research")
try:
options = AiCallOptions(
operationType=OperationTypeEnum.WEB_SEARCH,
operationType=OperationTypeEnum.WEB_SEARCH_DATA,
priority=PriorityEnum.BALANCED,
processingMode=ProcessingModeEnum.ADVANCED,
maxCost=0.05,

View file

@ -11,7 +11,7 @@ This script tests all available models with all their supported operation types:
- DATA_EXTRACT: Data extraction
- IMAGE_ANALYSE: Image analysis
- IMAGE_GENERATE: Image generation
- WEB_SEARCH: Web search
- WEB_SEARCH_DATA: Web search
- WEB_CRAWL: Web crawling
For each model, it tests every operation type the model supports and validates
@ -119,7 +119,7 @@ class AIModelsTester:
OperationTypeEnum.DATA_EXTRACT: "Extract key information from this text about artificial intelligence trends.",
OperationTypeEnum.IMAGE_ANALYSE: "Describe what you see in this image.",
OperationTypeEnum.IMAGE_GENERATE: "A futuristic cityscape with flying cars and neon lights.",
OperationTypeEnum.WEB_SEARCH: "Who works in valueon ag in switzerland?", # Search query for valueon.ch
OperationTypeEnum.WEB_SEARCH_DATA: "Who works in valueon ag in switzerland?", # Search query for valueon.ch
OperationTypeEnum.WEB_CRAWL: "https://www.valueon.ch" # URL to crawl
}
return prompts.get(operationType, "Test prompt for this operation type.")
@ -195,7 +195,7 @@ class AIModelsTester:
)
# Update message content to JSON format
messages[0]["content"] = json.dumps(imagePrompt.model_dump())
elif operationType == OperationTypeEnum.WEB_SEARCH:
elif operationType == OperationTypeEnum.WEB_SEARCH_DATA:
# Create structured prompt for web search
webSearchPrompt = AiCallPromptWebSearch(
instruction=testPrompt,

View file

@ -74,7 +74,7 @@ class MethodAiOperationsTester:
"aiPrompt": "A beautiful sunset over the ocean with purple and orange hues",
"resultType": "png"
},
OperationTypeEnum.WEB_SEARCH: {
OperationTypeEnum.WEB_SEARCH_DATA: {
"aiPrompt": "Who works in valueon ag in switzerland?",
"resultType": "json"
},

View file

@ -413,12 +413,11 @@ class DocumentGenerationFormatsTester10:
async def testAllFormats(self) -> Dict[str, Any]:
"""Test document generation in DOCX, XLSX, PPTX, PDF, and HTML formats."""
print("\n" + "="*80)
print("TESTING DOCUMENT GENERATION IN HTML FORMAT")
print("TESTING DOCUMENT GENERATION IN ALL FORMATS")
print("="*80)
# Only test HTML format
formats = ["html"]
# formats = ["docx", "xlsx", "pptx", "pdf", "html"] # Commented out other formats
# Test all document formats
formats = ["docx", "xlsx", "pptx", "pdf", "html"]
results = {}
for format in formats:
@ -471,7 +470,7 @@ class DocumentGenerationFormatsTester10:
async def runTest(self):
"""Run the complete test."""
print("\n" + "="*80)
print("DOCUMENT GENERATION FORMATS TEST 10 - HTML ONLY")
print("DOCUMENT GENERATION FORMATS TEST 10 - ALL FORMATS")
print("="*80)
try:

View file

@ -0,0 +1,386 @@
#!/usr/bin/env python3
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Test script for JSON extraction response detection and merging.
Run: python gateway/tests/unit/services/test_json_extraction_merging.py
"""
import json
import sys
import os
# Add gateway to path
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '../../..'))
from modules.datamodels.datamodelExtraction import ContentPart
from modules.services.serviceExtraction.mainServiceExtraction import ExtractionService
def test_detects_json_with_code_fences():
"""Test that JSON extraction responses with markdown code fences are detected"""
print("Test 1: Detecting JSON with code fences...")
service = ExtractionService(None)
content_part = ContentPart(
id="test1",
label="test1",
typeGroup="text",
mimeType="text/plain",
data='```json\n{"extracted_content": {"text": "Sample text", "tables": []}}\n```'
)
result = service._isJsonExtractionResponse([content_part])
assert result == True, "Should detect JSON with code fences"
print(" [PASS]")
def test_detects_json_without_code_fences():
"""Test that JSON extraction responses without code fences are detected"""
print("Test 2: Detecting JSON without code fences...")
service = ExtractionService(None)
content_part = ContentPart(
id="test2",
label="test2",
typeGroup="text",
mimeType="text/plain",
data='{"extracted_content": {"text": "Sample text", "tables": []}}'
)
result = service._isJsonExtractionResponse([content_part])
assert result == True, "Should detect JSON without code fences"
print(" [PASS]")
def test_rejects_non_extraction_json():
"""Test that regular JSON (without extracted_content) is rejected"""
print("Test 3: Rejecting non-extraction JSON...")
service = ExtractionService(None)
content_part = ContentPart(
id="test3",
label="test3",
typeGroup="text",
mimeType="text/plain",
data='{"documents": [{"sections": []}]}'
)
result = service._isJsonExtractionResponse([content_part])
assert result == False, "Should reject non-extraction JSON"
print(" [PASS]")
def test_rejects_non_json_content():
"""Test that non-JSON content is rejected"""
print("Test 4: Rejecting non-JSON content...")
service = ExtractionService(None)
content_part = ContentPart(
id="test4",
label="test4",
typeGroup="text",
mimeType="text/plain",
data="This is plain text, not JSON"
)
result = service._isJsonExtractionResponse([content_part])
assert result == False, "Should reject non-JSON content"
print(" [PASS]")
def test_merges_tables_with_same_headers():
"""Test that tables with identical headers are merged"""
print("Test 5: Merging tables with same headers...")
service = ExtractionService(None)
part1 = ContentPart(
id="test1",
label="test1",
typeGroup="table",
mimeType="application/json",
data='```json\n{"extracted_content": {"tables": [{"headers": ["Name", "Amount"], "rows": [["Alice", "100"], ["Bob", "200"]]}]}}\n```'
)
part2 = ContentPart(
id="test2",
label="test2",
typeGroup="table",
mimeType="application/json",
data='```json\n{"extracted_content": {"tables": [{"headers": ["Name", "Amount"], "rows": [["Charlie", "300"], ["Alice", "100"]]}]}}\n```'
)
merged = service._mergeJsonExtractionResponses([part1, part2])
# Should have one table group with merged rows
assert len(merged["extracted_content"]["tables"]) == 1, f"Should have one merged table, got {len(merged['extracted_content']['tables'])}"
table = merged["extracted_content"]["tables"][0]
assert table["headers"] == ["Name", "Amount"], f"Headers should match, got {table['headers']}"
# Should have 3 unique rows (Alice appears twice but should be deduplicated)
assert len(table["rows"]) == 3, f"Should have 3 unique rows, got {len(table['rows'])}"
assert ["Alice", "100"] in table["rows"], "Alice row should be present"
assert ["Bob", "200"] in table["rows"], "Bob row should be present"
assert ["Charlie", "300"] in table["rows"], "Charlie row should be present"
print(" [PASS]")
def test_merges_multiple_json_blocks_separated_by_dash():
"""Test that multiple JSON blocks separated by --- are merged"""
print("Test 6: Merging multiple JSON blocks separated by ---...")
service = ExtractionService(None)
# Create content part with multiple JSON blocks separated by ---
part1 = ContentPart(
id="test1",
label="test1",
typeGroup="table",
mimeType="application/json",
data='```json\n{"extracted_content": {"tables": [{"headers": ["Name", "Amount"], "rows": [["Alice", "100"]]}]}}\n```\n---\n```json\n{"extracted_content": {"tables": [{"headers": ["Name", "Amount"], "rows": [["Bob", "200"]]}]}}\n```'
)
merged = service._mergeJsonExtractionResponses([part1])
# Should have one table with merged rows from both JSON blocks
assert len(merged["extracted_content"]["tables"]) == 1, f"Should have one merged table, got {len(merged['extracted_content']['tables'])}"
table = merged["extracted_content"]["tables"][0]
assert table["headers"] == ["Name", "Amount"], f"Headers should match, got {table['headers']}"
assert len(table["rows"]) == 2, f"Should have 2 rows, got {len(table['rows'])}"
assert ["Alice", "100"] in table["rows"], "Alice row should be present"
assert ["Bob", "200"] in table["rows"], "Bob row should be present"
print(" [PASS]")
def test_merges_text_content():
"""Test that text content from multiple parts is merged"""
print("Test 7: Merging text content...")
service = ExtractionService(None)
part1 = ContentPart(
id="test1",
label="test1",
typeGroup="text",
mimeType="text/plain",
data='```json\n{"extracted_content": {"text": "First paragraph."}}\n```'
)
part2 = ContentPart(
id="test2",
label="test2",
typeGroup="text",
mimeType="text/plain",
data='```json\n{"extracted_content": {"text": "Second paragraph."}}\n```'
)
merged = service._mergeJsonExtractionResponses([part1, part2])
# Text should be concatenated with newlines
text = merged["extracted_content"]["text"]
assert "First paragraph." in text, "First paragraph should be present"
assert "Second paragraph." in text, "Second paragraph should be present"
print(" [PASS]")
def test_merges_headings_and_lists():
"""Test that headings and lists are merged"""
print("Test 8: Merging headings and lists...")
service = ExtractionService(None)
part1 = ContentPart(
id="test1",
label="test1",
typeGroup="text",
mimeType="text/plain",
data='```json\n{"extracted_content": {"headings": [{"level": 1, "text": "Title 1"}], "lists": [{"type": "bullet", "items": ["Item 1"]}]}}\n```'
)
part2 = ContentPart(
id="test2",
label="test2",
typeGroup="text",
mimeType="text/plain",
data='```json\n{"extracted_content": {"headings": [{"level": 2, "text": "Subtitle 1"}], "lists": [{"type": "bullet", "items": ["Item 2"]}]}}\n```'
)
merged = service._mergeJsonExtractionResponses([part1, part2])
# Should have 2 headings
assert len(merged["extracted_content"]["headings"]) == 2, f"Should have 2 headings, got {len(merged['extracted_content']['headings'])}"
assert merged["extracted_content"]["headings"][0]["text"] == "Title 1", "First heading should be Title 1"
assert merged["extracted_content"]["headings"][1]["text"] == "Subtitle 1", "Second heading should be Subtitle 1"
# Should have 2 lists
assert len(merged["extracted_content"]["lists"]) == 2, f"Should have 2 lists, got {len(merged['extracted_content']['lists'])}"
assert merged["extracted_content"]["lists"][0]["items"] == ["Item 1"], "First list should have Item 1"
assert merged["extracted_content"]["lists"][1]["items"] == ["Item 2"], "Second list should have Item 2"
print(" [PASS]")
def test_handles_empty_content_parts():
"""Test that empty content parts are handled gracefully"""
print("Test 9: Handling empty content parts...")
service = ExtractionService(None)
part1 = ContentPart(
id="test1",
label="test1",
typeGroup="text",
mimeType="text/plain",
data='```json\n{"extracted_content": {"text": "Some text"}}\n```'
)
part2 = ContentPart(
id="test2",
label="test2",
typeGroup="text",
mimeType="text/plain",
data="" # Empty part
)
merged = service._mergeJsonExtractionResponses([part1, part2])
# Should still have the text from part1
assert merged["extracted_content"]["text"] == "Some text", "Should have text from part1"
print(" [PASS]")
def test_merges_tables_with_different_headers():
"""Test that tables with different headers are kept separate"""
print("Test 10: Keeping tables with different headers separate...")
service = ExtractionService(None)
part1 = ContentPart(
id="test1",
label="test1",
typeGroup="table",
mimeType="application/json",
data='```json\n{"extracted_content": {"tables": [{"headers": ["Date", "Amount"], "rows": [["2024-01-01", "100"]]}]}}\n```'
)
part2 = ContentPart(
id="test2",
label="test2",
typeGroup="table",
mimeType="application/json",
data='```json\n{"extracted_content": {"tables": [{"headers": ["Name", "Email"], "rows": [["Alice", "alice@example.com"]]}]}}\n```'
)
merged = service._mergeJsonExtractionResponses([part1, part2])
# Should have 2 separate tables (different headers)
assert len(merged["extracted_content"]["tables"]) == 2, f"Should have 2 separate tables, got {len(merged['extracted_content']['tables'])}"
# Check first table
table1 = merged["extracted_content"]["tables"][0]
assert table1["headers"] == ["Date", "Amount"], "First table should have Date/Amount headers"
assert len(table1["rows"]) == 1, "First table should have 1 row"
# Check second table
table2 = merged["extracted_content"]["tables"][1]
assert table2["headers"] == ["Name", "Email"], "Second table should have Name/Email headers"
assert len(table2["rows"]) == 1, "Second table should have 1 row"
print(" [PASS]")
def test_real_world_scenario():
"""Test with a realistic scenario similar to the debug file"""
print("Test 11: Real-world scenario (multiple documents, multiple JSON blocks)...")
service = ExtractionService(None)
# Simulate 3 documents, each with a table extraction response
part1 = ContentPart(
id="doc1",
label="doc1",
typeGroup="table",
mimeType="application/json",
data='```json\n{"extracted_content": {"tables": [{"headers": ["Transaction ID", "Date", "Amount"], "rows": [["TXN001", "2024-01-01", "100.00"], ["TXN002", "2024-01-02", "200.00"]]}]}}\n```'
)
part2 = ContentPart(
id="doc2",
label="doc2",
typeGroup="table",
mimeType="application/json",
data='```json\n{"extracted_content": {"tables": [{"headers": ["Transaction ID", "Date", "Amount"], "rows": [["TXN003", "2024-01-03", "300.00"], ["TXN001", "2024-01-01", "100.00"]]}]}}\n```'
)
# Part 3 has multiple JSON blocks separated by ---
part3 = ContentPart(
id="doc3",
label="doc3",
typeGroup="table",
mimeType="application/json",
data='```json\n{"extracted_content": {"tables": [{"headers": ["Transaction ID", "Date", "Amount"], "rows": [["TXN004", "2024-01-04", "400.00"]]}]}}\n```\n---\n```json\n{"extracted_content": {"tables": [{"headers": ["Transaction ID", "Date", "Amount"], "rows": [["TXN005", "2024-01-05", "500.00"]]}]}}\n```'
)
merged = service._mergeJsonExtractionResponses([part1, part2, part3])
# Should have one merged table with all unique transactions
assert len(merged["extracted_content"]["tables"]) == 1, f"Should have one merged table, got {len(merged['extracted_content']['tables'])}"
table = merged["extracted_content"]["tables"][0]
assert table["headers"] == ["Transaction ID", "Date", "Amount"], "Headers should match"
# Should have 5 unique rows (TXN001 appears twice but should be deduplicated)
assert len(table["rows"]) == 5, f"Should have 5 unique rows, got {len(table['rows'])}"
# Verify all transactions are present
transaction_ids = [row[0] for row in table["rows"]]
assert "TXN001" in transaction_ids, "TXN001 should be present"
assert "TXN002" in transaction_ids, "TXN002 should be present"
assert "TXN003" in transaction_ids, "TXN003 should be present"
assert "TXN004" in transaction_ids, "TXN004 should be present"
assert "TXN005" in transaction_ids, "TXN005 should be present"
# Verify TXN001 appears only once (deduplicated)
assert transaction_ids.count("TXN001") == 1, "TXN001 should appear only once (deduplicated)"
print(" [PASS]")
def main():
"""Run all tests"""
print("=" * 60)
print("Testing JSON Extraction Response Detection and Merging")
print("=" * 60)
print()
tests = [
test_detects_json_with_code_fences,
test_detects_json_without_code_fences,
test_rejects_non_extraction_json,
test_rejects_non_json_content,
test_merges_tables_with_same_headers,
test_merges_multiple_json_blocks_separated_by_dash,
test_merges_text_content,
test_merges_headings_and_lists,
test_handles_empty_content_parts,
test_merges_tables_with_different_headers,
test_real_world_scenario,
]
passed = 0
failed = 0
for test in tests:
try:
test()
passed += 1
except AssertionError as e:
print(f" [FAIL] {e}")
failed += 1
except Exception as e:
print(f" [ERROR] {e}")
import traceback
traceback.print_exc()
failed += 1
print()
print("=" * 60)
print(f"Results: {passed} passed, {failed} failed")
print("=" * 60)
return 0 if failed == 0 else 1
if __name__ == "__main__":
sys.exit(main())