enhanced the ai flow for languages (prompt in de, user language en, to deliver documents in fr) and document delivery in different formats

This commit is contained in:
ValueOn AG 2026-01-02 23:03:29 +01:00
parent 3408e7b463
commit 1362470f00
10 changed files with 302 additions and 108 deletions

View file

@ -437,8 +437,8 @@ Respond with ONLY a JSON object in this exact format:
self,
userPrompt: str,
contentParts: List[ContentPart],
outputFormat: str,
parentOperationId: str
outputFormat: Optional[str] = None,
parentOperationId: str = None
) -> Dict[str, Any]:
"""Public method: Delegate to StructureGenerator."""
return await self.structureGenerator.generateStructure(
@ -461,6 +461,7 @@ Respond with ONLY a JSON object in this exact format:
self,
filledStructure: Dict[str, Any],
outputFormat: str,
language: str,
title: str,
userPrompt: str,
parentOperationId: str
@ -469,9 +470,15 @@ Respond with ONLY a JSON object in this exact format:
Phase 5E: Rendert gefüllte Struktur zum Ziel-Format.
Jedes Dokument wird einzeln gerendert, jeder Renderer kann 1..n Dokumente zurückgeben.
Render filled structure to documents.
Per-document format and language are extracted from structure (validated in State 3).
The outputFormat and language parameters are only used as global fallbacks.
Multiple documents can have different formats and languages.
Args:
filledStructure: Gefüllte Struktur mit elements
outputFormat: Ziel-Format (pdf, docx, html, etc.) - wird für alle Dokumente verwendet
outputFormat: Ziel-Format (pdf, docx, html, etc.) - Global fallback
language: Language (global fallback) - Per-document language extracted from structure
title: Dokument-Titel
userPrompt: User-Anfrage
parentOperationId: Parent Operation-ID für ChatLog-Hierarchie
@ -480,6 +487,11 @@ Respond with ONLY a JSON object in this exact format:
List of RenderedDocument objects.
Jedes RenderedDocument repräsentiert ein gerendertes Dokument (Hauptdokument oder unterstützende Datei)
"""
# Language comes from structure (per-document), validated in State 3
# This parameter is only used as global fallback if structure validation fails
# Use validated currentUserLanguage as fallback (always valid)
if not language:
language = self._getUserLanguage() if hasattr(self, '_getUserLanguage') else (self.services.currentUserLanguage if hasattr(self.services, 'currentUserLanguage') else 'en')
# Erstelle Operation-ID für Rendering
renderOperationId = f"{parentOperationId}_rendering"
@ -502,6 +514,7 @@ Respond with ONLY a JSON object in this exact format:
renderedDocuments = await generationService.renderReport(
filledStructure,
outputFormat,
language, # Pass language (global fallback, per-document extracted in renderReport)
title,
userPrompt,
self,
@ -577,18 +590,18 @@ Respond with ONLY a JSON object in this exact format:
aiOperationId = f"ai_content_{workflowId}_{int(time.time())}"
# Starte Progress-Tracking mit Parent-Referenz
formatDisplay = outputFormat if outputFormat else "auto-determined"
self.services.chat.progressLogStart(
aiOperationId,
"AI content processing",
"Content Processing",
f"Format: {outputFormat or 'text'}",
f"Format: {formatDisplay}",
parentOperationId=parentOperationId
)
try:
# Initialisiere Defaults
if not outputFormat:
outputFormat = "txt"
# outputFormat is optional - if None, formats determined from prompt by AI
# No default fallback here - let AI service handle it
opType = getattr(options, "operationType", None)
if not opType:
@ -652,19 +665,11 @@ Respond with ONLY a JSON object in this exact format:
parentOperationId=parentOperationId
)
# Other operation types (DATA_ANALYSE, etc.) - existing logic
# Fallback to document generation for backward compatibility (should not happen)
logger.warning(f"Unhandled operation type: {opType}, falling back to document generation")
return await self._handleDocumentGeneration(
prompt=prompt,
options=options,
documentList=documentList,
documentIntents=documentIntents,
contentParts=contentParts,
outputFormat=outputFormat,
title=title,
parentOperationId=parentOperationId
)
# Other operation types (DATA_ANALYSE, etc.) - not supported
errorMsg = f"Unsupported operation type: {opType}. Supported types: IMAGE_GENERATE, DATA_GENERATE, DATA_EXTRACT"
logger.error(errorMsg)
self.services.chat.progressLogFinish(aiOperationId, False)
raise ValueError(errorMsg)
except Exception as e:
logger.error(f"Error in callAiContent: {str(e)}")
@ -707,6 +712,31 @@ Respond with ONLY a JSON object in this exact format:
if documentList:
documents = self.services.chat.getChatDocumentsFromDocumentList(documentList)
# Filter: Remove original documents if already covered by pre-extracted JSONs
# (to prevent duplicate ContentParts - pre-extracted JSONs contain already extracted ContentParts)
if documents:
# Step 1: Identify all original document IDs covered by pre-extracted JSONs
originalDocIdsCoveredByPreExtracted = set()
for doc in documents:
preExtracted = self.intentAnalyzer.resolvePreExtractedDocument(doc)
if preExtracted:
originalDocId = preExtracted["originalDocument"]["id"]
originalDocIdsCoveredByPreExtracted.add(originalDocId)
logger.debug(f"Found pre-extracted JSON {doc.id} covering original document {originalDocId}")
# Step 2: Filter documents - remove originals covered by pre-extracted JSONs
filteredDocuments = []
for doc in documents:
preExtracted = self.intentAnalyzer.resolvePreExtractedDocument(doc)
if preExtracted:
filteredDocuments.append(doc) # Keep pre-extracted JSON
elif doc.id in originalDocIdsCoveredByPreExtracted:
logger.info(f"Skipping original document {doc.id} ({doc.fileName}) - already covered by pre-extracted JSON")
else:
filteredDocuments.append(doc) # Keep regular document
documents = filteredDocuments # Use filtered list
# Step 2: Clarify document intents (if not provided) - REQUIRED for all documents
if not documentIntents and documents:
documentIntents = await self.clarifyDocumentIntents(

View file

@ -408,10 +408,28 @@ class ContentExtractor:
"content_extraction_result"
)
# State 2 Validation: Validate and auto-fix ContentParts
validatedParts = []
for part in allContentParts:
# Validation 2.1: Skip ContentParts without documentId
if not part.metadata.get("documentId"):
logger.warning(f"Skipping ContentPart {part.id} - missing documentId in metadata")
continue
# Validation 2.2: Skip ContentParts with invalid contentFormat
contentFormat = part.metadata.get("contentFormat")
if contentFormat not in ["extracted", "object", "reference"]:
logger.warning(
f"Skipping ContentPart {part.id} - invalid contentFormat: {contentFormat}"
)
continue
validatedParts.append(part)
# ChatLog abschließen
self.services.chat.progressLogFinish(extractionOperationId, True)
return allContentParts
return validatedParts
except Exception as e:
self.services.chat.progressLogFinish(extractionOperationId, False)

View file

@ -109,6 +109,21 @@ class DocumentIntentAnalyzer:
"document_intent_analysis_result"
)
# State 1 Validation: Validate and auto-fix document intents
documentIds = {d.id for d in documents}
validatedIntents = []
for intent in documentIntents:
# Validation 1.2: Skip intents for unknown documents
if intent.documentId not in documentIds:
logger.warning(f"Skipping intent for unknown document: {intent.documentId}")
continue
validatedIntents.append(intent)
# Validation 1.1: Documents without intents are OK (not needed)
# Intents for non-existing documents are already filtered above
documentIntents = validatedIntents
# ChatLog abschließen
self.services.chat.progressLogFinish(intentOperationId, True)
@ -245,8 +260,13 @@ class DocumentIntentAnalyzer:
outputFormat = actionParameters.get("outputFormat", "txt")
prompt = f"""USER REQUEST:
# FENCE user input to prevent prompt injection
fencedUserPrompt = f"""```user_request
{userPrompt}
```"""
prompt = f"""USER REQUEST:
{fencedUserPrompt}
DOCUMENTS TO ANALYZE:
{docListText}
@ -256,20 +276,25 @@ TASK: For each document, determine its intents (can be multiple):
- "render": Image/binary should be rendered as-is (visual element)
- "reference": Document reference/attachment (no extraction, just reference)
OUTPUT FORMAT: {outputFormat}
TASK: For each document, determine:
1. Intents (can be multiple): "extract", "render", "reference"
Note: Output format and language are NOT determined here - they will be
determined during structure generation (Phase 3) in the chapter structure JSON
OUTPUT FORMAT: {outputFormat} (global fallback - for reference only)
RETURN JSON:
{{
"intents": [
{{
"documentId": "doc_1",
"intents": ["extract"], # Array - can contain multiple!
"intents": ["extract"],
"extractionPrompt": "Extract all text content, preserving structure",
"reasoning": "User needs text content for document generation"
}},
{{
"documentId": "doc_2",
"intents": ["extract", "render"], # Both! Image needs text extraction AND visual rendering
"intents": ["extract", "render"],
"extractionPrompt": "Extract text content from image using vision AI",
"reasoning": "Image contains text that needs extraction, but also should be rendered visually"
}},

View file

@ -198,6 +198,31 @@ class StructureFiller:
# Füge ContentParts-Metadaten zur Struktur hinzu (für Validierung)
flattenedStructure = self._addContentPartsMetadata(flattenedStructure, contentParts)
# State 4 Validation: Validate and auto-fix filled structure
# Validation 4.1: Filled structure missing 'documents' field
if "documents" not in flattenedStructure:
raise ValueError("Filled structure missing 'documents' field - cannot auto-fix")
for doc in flattenedStructure["documents"]:
# Validation 4.4: Verify language is preserved from input structure
# Language MUST be preserved from Phase 3 structure (validated in State 3)
if "language" not in doc:
raise ValueError(f"Document {doc.get('id')} missing language in filled structure - should have been preserved from Phase 3")
# Validate language format
if not isinstance(doc["language"], str) or len(doc["language"]) != 2:
raise ValueError(f"Document {doc.get('id')} has invalid language format in filled structure: {doc['language']} - should be 2-character ISO 639-1 code")
for chapter in doc.get("chapters", []):
for section in chapter.get("sections", []):
# Validation 4.2: Section missing 'elements' field
if "elements" not in section:
section["elements"] = []
logger.info(f"Section {section.get('id')} missing 'elements' - created empty list")
# Validation 4.3: Section has empty elements list - ALLOW (intentionally empty is OK)
# No action needed - empty elements are allowed
# ChatLog abschließen
self.services.chat.progressLogFinish(fillOperationId, True)
@ -1783,6 +1808,8 @@ The JSON should be a fragment that can be merged with the previous response."""
"id": doc.get("id"),
"title": doc.get("title"),
"filename": doc.get("filename"),
"outputFormat": doc.get("outputFormat"), # Preserve from Phase 3
"language": doc.get("language"), # Preserve from Phase 3
"sections": []
}

View file

@ -44,8 +44,8 @@ class StructureGenerator:
self,
userPrompt: str,
contentParts: List[ContentPart],
outputFormat: str,
parentOperationId: str
outputFormat: Optional[str] = None,
parentOperationId: str = None
) -> Dict[str, Any]:
"""
Phase 5C: Generiert Chapter-Struktur (Table of Contents).
@ -54,24 +54,37 @@ class StructureGenerator:
- contentParts (unified object with instruction and/or caption per part)
- generationHint
Generate document structure with per-document format determination.
Multiple documents can be produced with different formats (e.g., one PDF, one HTML).
AI determines formats per-document from user prompt. The outputFormat parameter is
only a validation fallback - used if AI doesn't return format per document.
Args:
userPrompt: User-Anfrage
contentParts: Alle vorbereiteten ContentParts mit Metadaten
outputFormat: Ziel-Format (html, docx, pdf, etc.)
outputFormat: Optional global format fallback. If omitted, formats are determined
from user prompt by AI. Used as validation fallback if AI doesn't
return format per document. Defaults to "txt" if not provided.
parentOperationId: Parent Operation-ID für ChatLog-Hierarchie
Returns:
Struktur-Dict mit documents und chapters (nicht sections!)
"""
# If outputFormat not provided, use "txt" as fallback for validation
# AI will determine formats per document from user prompt
if not outputFormat:
outputFormat = "txt"
logger.debug("outputFormat not provided - using 'txt' as validation fallback, formats determined from prompt")
# Erstelle Operation-ID für Struktur-Generierung
structureOperationId = f"{parentOperationId}_structure_generation"
# Starte ChatLog mit Parent-Referenz
formatDisplay = outputFormat if outputFormat else "auto-determined"
self.services.chat.progressLogStart(
structureOperationId,
"Chapter Structure Generation",
"Structure",
f"Generating chapter structure for {outputFormat}",
f"Generating chapter structure (format: {formatDisplay})",
parentOperationId=parentOperationId
)
@ -181,6 +194,72 @@ Continue generating the remaining chapters now.
else:
structure = parsedJson
# State 3 Validation: Validate and auto-fix structure
# Validation 3.1: Structure missing 'documents' field
if "documents" not in structure:
raise ValueError("Structure missing 'documents' field - cannot auto-fix")
documents = structure["documents"]
# Validation 3.2: Structure has no documents
if not isinstance(documents, list) or len(documents) == 0:
raise ValueError("Structure has no documents - cannot generate without documents")
# Import renderer registry for format validation (existing infrastructure)
from modules.services.serviceGeneration.renderers.registry import getRenderer
# Validate and fix each document
for doc in documents:
# Validation 3.3 & 3.4: Document outputFormat
# outputFormat parameter is optional - if omitted, formats determined from prompt by AI
# Use as fallback only if AI doesn't return format per document
# Multiple documents can have different formats (e.g., one PDF, one HTML)
globalFormatFallback = outputFormat or "txt" # Fallback for validation
if "outputFormat" not in doc or not doc["outputFormat"]:
# AI didn't return format or returned empty - use global fallback
doc["outputFormat"] = globalFormatFallback
logger.warning(f"Document {doc.get('id')} missing outputFormat - using fallback: {doc['outputFormat']}")
else:
# AI returned format - validate using existing renderer registry
formatName = str(doc["outputFormat"]).lower().strip()
renderer = getRenderer(formatName) # Uses existing infrastructure
if not renderer:
# Format doesn't match any renderer - use txt (simple approach)
logger.warning(f"Document {doc.get('id')} has format without renderer: {formatName}, using 'txt'")
doc["outputFormat"] = "txt"
else:
# Valid format with renderer - normalize and keep AI result
doc["outputFormat"] = formatName
logger.debug(f"Document {doc.get('id')} using AI-determined format: {formatName}")
# Validation 3.5 & 3.6: Document language
# Use validated currentUserLanguage (always valid, validated during user intention analysis)
# Access via _getUserLanguage() which uses self.services.currentUserLanguage
userPromptLanguage = self._getUserLanguage() # Uses validated currentUserLanguage infrastructure
if "language" not in doc or not isinstance(doc["language"], str) or len(doc["language"]) != 2:
# AI didn't return language or invalid format - use validated currentUserLanguage
doc["language"] = userPromptLanguage
if "language" not in doc:
logger.warning(f"Document {doc.get('id')} missing language - using currentUserLanguage: {userPromptLanguage}")
else:
logger.warning(f"Document {doc.get('id')} has invalid language format from AI: {doc['language']}, using currentUserLanguage")
else:
# AI returned valid language format - normalize
doc["language"] = doc["language"].lower().strip()[:2]
logger.debug(f"Document {doc.get('id')} using AI-determined language: {doc['language']}")
# Validation 3.7: Document missing 'chapters' field
if "chapters" not in doc:
raise ValueError(f"Document {doc.get('id')} missing 'chapters' field - cannot auto-fix")
# Validation 3.8: Chapter missing 'contentParts' field
for chapter in doc["chapters"]:
if "contentParts" not in chapter:
raise ValueError(f"Chapter {chapter.get('id')} missing 'contentParts' field - cannot auto-fix")
# ChatLog abschließen
self.services.chat.progressLogFinish(structureOperationId, True)
@ -261,15 +340,13 @@ This is a PLANNING task. Return EXACTLY ONE complete JSON object. Do not generat
{userPrompt}
```
DEFAULT LANGUAGE: If no language is specified for a document, use "{language}" (from user prompt). Each document can have its own language specified in the "language" field. Use ISO 639-1 language codes in lowercase (e.g., "de", "en", "fr", "it").
## AVAILABLE CONTENT PARTS
{contentPartsIndex}
## CONTENT ASSIGNMENT RULE - CRITICAL
## CONTENT ASSIGNMENT RULE
If the user request mentions documents/images/data, then EVERY chapter that generates content related to those references MUST assign the relevant ContentParts explicitly.
**Assignment logic:**
Assignment logic:
- If chapter DISPLAYS a document/image assign "object" format ContentPart with "caption"
- If chapter generates text content ABOUT a document/image/data assign ContentPart with "instruction":
- Prefer "extracted" format if available (contains analyzed/extracted content)
@ -279,14 +356,11 @@ If the user request mentions documents/images/data, then EVERY chapter that gene
- Use ContentPart IDs exactly as listed in AVAILABLE CONTENT PARTS above
- Empty contentParts are only allowed if chapter generates content WITHOUT referencing any documents/images/data from the user request
**CRITICAL RULE**: If the user request mentions BOTH:
CRITICAL RULE: If the user request mentions BOTH:
a) Documents/images/data (listed in AVAILABLE CONTENT PARTS above), AND
b) Generic content types (article text, main content, body text, etc.)
Then chapters that generate those generic content types MUST assign the relevant ContentParts, because the content should relate to or be based on the provided documents/images/data.
## FORMATTING
- Formatting is handled automatically - focus on content and structure only
## CHAPTER STRUCTURE REQUIREMENTS
- Generate chapters based on USER REQUEST - analyze what structure the user wants
- Each chapter needs: id, level (1, 2, 3, etc.), title
@ -294,15 +368,38 @@ Then chapters that generate those generic content types MUST assign the relevant
- generationHint: Description of what content to generate for this chapter
- The number of chapters depends on the user request - create only what is requested
## DOCUMENT LANGUAGE
- Each document can have its own language (ISO 639-1 code in lowercase: "de", "en", "fr", "it", etc.)
- If no language is specified for a document, use the user prompt language: "{language}"
- The language determines in which language the content of that document will be generated
- Multiple documents can have different languages if needed
- Always use lowercase ISO 639-1 codes in the JSON output (e.g., "de", not "DE")
## DOCUMENT OUTPUT FORMAT
For each document, determine the output format by analyzing the USER REQUEST:
- Look for explicit format mentions
- Infer from document purpose
- Infer from content type
- If format cannot be determined from the prompt, use: "{outputFormat}"
- Include "outputFormat" field in each document in the JSON structure
- Multiple documents can have different formats
## OUTPUT FORMAT
Generate the chapter structure based on the USER REQUEST above. The number and types of chapters depend entirely on what the user requested - do NOT copy the example structure below.
## DOCUMENT LANGUAGE
For each document, determine the language by analyzing the USER REQUEST:
- Look for explicit language mentions
- Map language names to ISO 639-1 codes
- If language cannot be determined from the prompt, use: "{language}"
- Include "language" field in each document in the JSON structure
- Multiple documents can have different languages
## JSON STRUCTURE REQUIREMENTS
- metadata: {{"title": "...", "language": "..."}}
- documents: Array of document objects, each with:
- id: Unique document identifier (e.g., "doc_1")
- title: Document title
- filename: Output filename with extension (e.g., "document.docx")
- outputFormat: Format code (e.g., "docx", "pdf", "html", "xlsx", "pptx", "txt")
- language: ISO 639-1 language code (e.g., "de", "en", "fr", "it")
- chapters: Array of chapter objects, each with:
- id: Unique chapter identifier (e.g., "chapter_1")
- level: Heading level (1, 2, 3, etc.)
- title: Chapter title
- contentParts: Object mapping ContentPart IDs to usage instructions {{"partId": {{"instruction": "..."}} or {{"caption": "..."}}}}
- generationHint: Description of what content to generate
- sections: Empty array []
EXAMPLE STRUCTURE (for reference only - adapt to user request):
{{
@ -314,6 +411,7 @@ EXAMPLE STRUCTURE (for reference only - adapt to user request):
"id": "doc_1",
"title": "Document Title",
"filename": "document.{outputFormat}",
"outputFormat": "{outputFormat}",
"language": "{language}",
"chapters": [
{{
@ -337,8 +435,10 @@ CRITICAL INSTRUCTIONS:
- The example shows the JSON structure format, NOT the required chapters
- Create only the chapters that match the user's request
- Adapt chapter titles and structure to match the user's specific request
- Determine outputFormat and language for each document by analyzing the USER REQUEST above
- The example shows placeholders "{outputFormat}" and "{language}" - YOU MUST REPLACE THESE with actual values determined from the USER REQUEST
**MANDATORY CONTENT ASSIGNMENT CHECK:**
MANDATORY CONTENT ASSIGNMENT CHECK:
For each chapter, verify:
1. Does the user request mention documents/images/data? (e.g., "photo", "image", "document", "data", "based on", "about")
2. Does this chapter's generationHint, title, or purpose relate to those documents/images/data mentioned in step 1?

View file

@ -346,16 +346,19 @@ class GenerationService:
'workflowId': 'unknown'
}
async def renderReport(self, extractedContent: Dict[str, Any], outputFormat: str, title: str, userPrompt: str = None, aiService=None, parentOperationId: Optional[str] = None) -> List[RenderedDocument]:
async def renderReport(self, extractedContent: Dict[str, Any], outputFormat: str, language: str, title: str, userPrompt: str = None, aiService=None, parentOperationId: Optional[str] = None) -> List[RenderedDocument]:
"""
Render extracted JSON content to the specified output format.
Processes EACH document separately and calls renderer for each.
Each renderer can return 1..n documents (e.g., HTML + images).
Per-document format and language are extracted from structure (validated in State 3).
Multiple documents can have different formats and languages.
Args:
extractedContent: Structured JSON document with documents array
outputFormat: Target format (html, pdf, docx, txt, md, json, csv, xlsx)
In future, each document can have its own format
outputFormat: Target format (html, pdf, docx, txt, md, json, csv, xlsx) - Global fallback
language: Language (global fallback) - Per-document language extracted from structure
title: Report title
userPrompt: User's original prompt for report generation
aiService: AI service instance for generation prompt creation
@ -392,9 +395,17 @@ class GenerationService:
continue
# Determine format for this document
# TODO: In future, each document can have its own format field
# For now, use the global outputFormat
docFormat = doc.get("format", outputFormat)
# Check outputFormat field first (per-document), then format field (legacy), then global fallback
docFormat = doc.get("outputFormat") or doc.get("format") or outputFormat
# Determine language for this document
# Extract per-document language from structure (validated in State 3), fallback to global
docLanguage = doc.get("language") or language
# Validate language format (should be 2-character ISO code, validated in State 3)
if not isinstance(docLanguage, str) or len(docLanguage) != 2:
logger.warning(f"Document {doc.get('id')} has invalid language format: {docLanguage}, using fallback")
docLanguage = language # Use global fallback
# Get renderer for this document's format
renderer = self._getFormatRenderer(docFormat)
@ -404,7 +415,7 @@ class GenerationService:
# Create JSON structure with single document (preserving metadata)
singleDocContent = {
"metadata": metadata,
"metadata": {**metadata, "language": docLanguage}, # Add per-document language to metadata
"documents": [doc] # Only this document
}

View file

@ -148,9 +148,15 @@ class DocumentGenerationPath:
# Schritt 5E: Rendere Resultat
# Jedes Dokument wird einzeln gerendert, kann 1..n Dateien zurückgeben (z.B. HTML + Bilder)
# Language is already validated in structure (State 3) and preserved in filled structure (State 4)
# Per-document language will be extracted in renderReport() from filledStructure
# Use validated currentUserLanguage as global fallback (always valid infrastructure)
language = self.services.currentUserLanguage if hasattr(self.services, 'currentUserLanguage') and self.services.currentUserLanguage else "en"
renderedDocuments = await self.services.ai.renderResult(
filledStructure,
outputFormat,
language, # Global fallback (per-document language extracted from structure in renderReport)
title or "Generated Document",
userPrompt,
docOperationId

View file

@ -42,6 +42,7 @@ class FrontendType(str, Enum):
JSON = "json"
MULTILINGUAL = "multilingual"
FILE = "file"
HIDDEN = "hidden"
# Custom Types for Actions
USER_CONNECTION = "userConnection"

View file

@ -78,55 +78,21 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult:
output_mime_type = "application/octet-stream" # Prefer service-provided mimeType when available
# Phase 7.3: Extract content first if documents provided, then use contentParts
# Check if contentParts are already provided (preferred path)
# Phase 7.3: Pass both documentList and contentParts to AI service
# (Extraction logic removed - handled by AI service)
contentParts: Optional[List[ContentPart]] = None
if "contentParts" in parameters:
contentParts = parameters.get("contentParts")
if contentParts and not isinstance(contentParts, list):
# Try to extract from ContentExtracted if it's an ActionDocument
if hasattr(contentParts, 'parts'):
contentParts = contentParts.parts
contentPartsParam = parameters.get("contentParts")
if contentPartsParam:
if isinstance(contentPartsParam, list):
contentParts = contentPartsParam
elif hasattr(contentPartsParam, 'parts'):
# Extract from ContentExtracted if it's an ActionDocument
contentParts = contentPartsParam.parts
else:
logger.warning(f"Invalid contentParts type: {type(contentParts)}, treating as empty")
logger.warning(f"Invalid contentParts type: {type(contentPartsParam)}, treating as empty")
contentParts = None
# If contentParts not provided but documentList is, extract content first
if not contentParts and documentList.references:
self.services.chat.progressLogUpdate(operationId, 0.3, "Extracting content from documents")
# Get ChatDocuments
chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(documentList)
if not chatDocuments:
logger.warning("No documents found in documentList")
else:
logger.info(f"Extracting content from {len(chatDocuments)} documents")
# Prepare extraction options (use defaults if not provided)
from modules.datamodels.datamodelExtraction import ExtractionOptions, MergeStrategy
extractionOptions = parameters.get("extractionOptions")
if not extractionOptions:
extractionOptions = ExtractionOptions(
prompt="Extract all content from the document",
mergeStrategy=MergeStrategy(
mergeType="concatenate",
groupBy="typeGroup",
orderBy="id"
),
processDocumentsIndividually=True
)
# Extract content using extraction service
extractedResults = self.services.extraction.extractContent(chatDocuments, extractionOptions)
# Combine all ContentParts from all extracted results
contentParts = []
for extracted in extractedResults:
if extracted.parts:
contentParts.extend(extracted.parts)
logger.info(f"Extracted {len(contentParts)} content parts from {len(extractedResults)} documents")
# Update progress - preparing AI call
self.services.chat.progressLogUpdate(operationId, 0.4, "Preparing AI call")
@ -136,8 +102,9 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult:
# Build options with correct operationType
from modules.datamodels.datamodelAi import OperationTypeEnum
# resultFormat in options can be None - formats will be determined by AI if not provided
options = AiCallOptions(
resultFormat=output_format or "txt", # Fallback for options, but outputFormat can be None for callAiContent
resultFormat=output_format, # Can be None - formats determined by AI
operationType=OperationTypeEnum.IMAGE_GENERATE if isImageGeneration else OperationTypeEnum.DATA_GENERATE
)
@ -149,13 +116,14 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult:
# Update progress - calling AI
self.services.chat.progressLogUpdate(operationId, 0.6, "Calling AI")
# Use unified callAiContent method with contentParts (extraction is now separate)
# ContentParts are already extracted above (or None if no documents)
# Use unified callAiContent method with BOTH documentList and contentParts
# Extraction is handled by AI service - no extraction here
# outputFormat: Optional - if None, formats determined from prompt by AI
aiResponse = await self.services.ai.callAiContent(
prompt=aiPrompt,
options=options,
contentParts=contentParts, # Already extracted (or None if no documents)
documentList=documentList, # Pass documentList - AI service handles extraction
contentParts=contentParts, # Pass contentParts if provided (or None)
outputFormat=output_format, # Can be None - AI determines from prompt
parentOperationId=operationId,
generationIntent=generationIntent # REQUIRED for DATA_GENERATE
@ -186,6 +154,7 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult:
final_documents = action_documents
else:
# Text response - create document from content
# If no extension provided, use "txt" (required for filename)
extension = output_extension.lstrip('.') if output_extension else "txt"
meaningful_name = self._generateMeaningfulFileName(
base_name="ai",
@ -194,8 +163,8 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult:
)
validationMetadata = {
"actionType": "ai.process",
"resultType": normalized_result_type or "auto",
"outputFormat": output_format or "auto",
"resultType": normalized_result_type if normalized_result_type else None,
"outputFormat": output_format if output_format else None,
"hasDocuments": False,
"contentType": "text"
}

View file

@ -36,7 +36,7 @@ class MethodAi(MethodBase):
self._actions = {
"process": WorkflowActionDefinition(
actionId="ai.process",
description="Universal AI document processing action - accepts multiple input documents in any format and processes them together with a prompt",
description="Universal AI document processing action - accepts multiple input documents in any format and processes them together with a prompt. If the prompt specifies document formats to deliver, include them in the prompt",
dynamicMode=True,
parameters={
"aiPrompt": WorkflowActionParameter(
@ -70,7 +70,14 @@ class MethodAi(MethodBase):
required=False,
default="document",
description="Explicit generation intent (\"document\" | \"code\" | \"image\"). Required for DATA_GENERATE operations. Defaults to \"document\" if not provided. For code generation, use ai.generateCode action or explicitly pass generationIntent=\"code\". For IMAGE_GENERATE operations, this parameter is ignored."
)
),
"contentParts": WorkflowActionParameter(
name="contentParts",
type="List[ContentPart]",
frontendType=FrontendType.HIDDEN,
required=False,
description="Pre-extracted content parts (internal parameter, typically passed between actions). If provided, these will be used instead of extracting from documentList. Can be a list of ContentPart objects or an object with a 'parts' attribute."
),
},
execute=process.__get__(self, self.__class__)
),
@ -122,7 +129,7 @@ class MethodAi(MethodBase):
),
"summarizeDocument": WorkflowActionDefinition(
actionId="ai.summarizeDocument",
description="Summarize one or more documents, extracting key points and main ideas",
description="Summarize one or more documents, extracting key points and main ideas. If the prompt specifies document formats to deliver, include them in the prompt",
dynamicMode=True,
parameters={
"documentList": WorkflowActionParameter(
@ -237,7 +244,7 @@ class MethodAi(MethodBase):
),
"generateDocument": WorkflowActionDefinition(
actionId="ai.generateDocument",
description="Generate documents from scratch or based on templates/inputs",
description="Generate documents from scratch or based on templates/inputs. If the prompt specifies document formats to deliver, include them in the prompt",
dynamicMode=True,
parameters={
"prompt": WorkflowActionParameter(
@ -275,7 +282,7 @@ class MethodAi(MethodBase):
),
"generateCode": WorkflowActionDefinition(
actionId="ai.generateCode",
description="Generate code files - explicitly sets intent to 'code'",
description="Generate code files - explicitly sets intent to 'code'. If the prompt specifies file formats to deliver, include them in the prompt",
dynamicMode=True,
parameters={
"prompt": WorkflowActionParameter(