enhanced the ai flow for languages (prompt in de, user language en, to deliver documents in fr) and document delivery in different formats
This commit is contained in:
parent
3408e7b463
commit
1362470f00
10 changed files with 302 additions and 108 deletions
|
|
@ -437,8 +437,8 @@ Respond with ONLY a JSON object in this exact format:
|
|||
self,
|
||||
userPrompt: str,
|
||||
contentParts: List[ContentPart],
|
||||
outputFormat: str,
|
||||
parentOperationId: str
|
||||
outputFormat: Optional[str] = None,
|
||||
parentOperationId: str = None
|
||||
) -> Dict[str, Any]:
|
||||
"""Public method: Delegate to StructureGenerator."""
|
||||
return await self.structureGenerator.generateStructure(
|
||||
|
|
@ -461,6 +461,7 @@ Respond with ONLY a JSON object in this exact format:
|
|||
self,
|
||||
filledStructure: Dict[str, Any],
|
||||
outputFormat: str,
|
||||
language: str,
|
||||
title: str,
|
||||
userPrompt: str,
|
||||
parentOperationId: str
|
||||
|
|
@ -469,9 +470,15 @@ Respond with ONLY a JSON object in this exact format:
|
|||
Phase 5E: Rendert gefüllte Struktur zum Ziel-Format.
|
||||
Jedes Dokument wird einzeln gerendert, jeder Renderer kann 1..n Dokumente zurückgeben.
|
||||
|
||||
Render filled structure to documents.
|
||||
Per-document format and language are extracted from structure (validated in State 3).
|
||||
The outputFormat and language parameters are only used as global fallbacks.
|
||||
Multiple documents can have different formats and languages.
|
||||
|
||||
Args:
|
||||
filledStructure: Gefüllte Struktur mit elements
|
||||
outputFormat: Ziel-Format (pdf, docx, html, etc.) - wird für alle Dokumente verwendet
|
||||
outputFormat: Ziel-Format (pdf, docx, html, etc.) - Global fallback
|
||||
language: Language (global fallback) - Per-document language extracted from structure
|
||||
title: Dokument-Titel
|
||||
userPrompt: User-Anfrage
|
||||
parentOperationId: Parent Operation-ID für ChatLog-Hierarchie
|
||||
|
|
@ -480,6 +487,11 @@ Respond with ONLY a JSON object in this exact format:
|
|||
List of RenderedDocument objects.
|
||||
Jedes RenderedDocument repräsentiert ein gerendertes Dokument (Hauptdokument oder unterstützende Datei)
|
||||
"""
|
||||
# Language comes from structure (per-document), validated in State 3
|
||||
# This parameter is only used as global fallback if structure validation fails
|
||||
# Use validated currentUserLanguage as fallback (always valid)
|
||||
if not language:
|
||||
language = self._getUserLanguage() if hasattr(self, '_getUserLanguage') else (self.services.currentUserLanguage if hasattr(self.services, 'currentUserLanguage') else 'en')
|
||||
# Erstelle Operation-ID für Rendering
|
||||
renderOperationId = f"{parentOperationId}_rendering"
|
||||
|
||||
|
|
@ -502,6 +514,7 @@ Respond with ONLY a JSON object in this exact format:
|
|||
renderedDocuments = await generationService.renderReport(
|
||||
filledStructure,
|
||||
outputFormat,
|
||||
language, # Pass language (global fallback, per-document extracted in renderReport)
|
||||
title,
|
||||
userPrompt,
|
||||
self,
|
||||
|
|
@ -577,18 +590,18 @@ Respond with ONLY a JSON object in this exact format:
|
|||
aiOperationId = f"ai_content_{workflowId}_{int(time.time())}"
|
||||
|
||||
# Starte Progress-Tracking mit Parent-Referenz
|
||||
formatDisplay = outputFormat if outputFormat else "auto-determined"
|
||||
self.services.chat.progressLogStart(
|
||||
aiOperationId,
|
||||
"AI content processing",
|
||||
"Content Processing",
|
||||
f"Format: {outputFormat or 'text'}",
|
||||
f"Format: {formatDisplay}",
|
||||
parentOperationId=parentOperationId
|
||||
)
|
||||
|
||||
try:
|
||||
# Initialisiere Defaults
|
||||
if not outputFormat:
|
||||
outputFormat = "txt"
|
||||
# outputFormat is optional - if None, formats determined from prompt by AI
|
||||
# No default fallback here - let AI service handle it
|
||||
|
||||
opType = getattr(options, "operationType", None)
|
||||
if not opType:
|
||||
|
|
@ -652,19 +665,11 @@ Respond with ONLY a JSON object in this exact format:
|
|||
parentOperationId=parentOperationId
|
||||
)
|
||||
|
||||
# Other operation types (DATA_ANALYSE, etc.) - existing logic
|
||||
# Fallback to document generation for backward compatibility (should not happen)
|
||||
logger.warning(f"Unhandled operation type: {opType}, falling back to document generation")
|
||||
return await self._handleDocumentGeneration(
|
||||
prompt=prompt,
|
||||
options=options,
|
||||
documentList=documentList,
|
||||
documentIntents=documentIntents,
|
||||
contentParts=contentParts,
|
||||
outputFormat=outputFormat,
|
||||
title=title,
|
||||
parentOperationId=parentOperationId
|
||||
)
|
||||
# Other operation types (DATA_ANALYSE, etc.) - not supported
|
||||
errorMsg = f"Unsupported operation type: {opType}. Supported types: IMAGE_GENERATE, DATA_GENERATE, DATA_EXTRACT"
|
||||
logger.error(errorMsg)
|
||||
self.services.chat.progressLogFinish(aiOperationId, False)
|
||||
raise ValueError(errorMsg)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in callAiContent: {str(e)}")
|
||||
|
|
@ -707,6 +712,31 @@ Respond with ONLY a JSON object in this exact format:
|
|||
if documentList:
|
||||
documents = self.services.chat.getChatDocumentsFromDocumentList(documentList)
|
||||
|
||||
# Filter: Remove original documents if already covered by pre-extracted JSONs
|
||||
# (to prevent duplicate ContentParts - pre-extracted JSONs contain already extracted ContentParts)
|
||||
if documents:
|
||||
# Step 1: Identify all original document IDs covered by pre-extracted JSONs
|
||||
originalDocIdsCoveredByPreExtracted = set()
|
||||
for doc in documents:
|
||||
preExtracted = self.intentAnalyzer.resolvePreExtractedDocument(doc)
|
||||
if preExtracted:
|
||||
originalDocId = preExtracted["originalDocument"]["id"]
|
||||
originalDocIdsCoveredByPreExtracted.add(originalDocId)
|
||||
logger.debug(f"Found pre-extracted JSON {doc.id} covering original document {originalDocId}")
|
||||
|
||||
# Step 2: Filter documents - remove originals covered by pre-extracted JSONs
|
||||
filteredDocuments = []
|
||||
for doc in documents:
|
||||
preExtracted = self.intentAnalyzer.resolvePreExtractedDocument(doc)
|
||||
if preExtracted:
|
||||
filteredDocuments.append(doc) # Keep pre-extracted JSON
|
||||
elif doc.id in originalDocIdsCoveredByPreExtracted:
|
||||
logger.info(f"Skipping original document {doc.id} ({doc.fileName}) - already covered by pre-extracted JSON")
|
||||
else:
|
||||
filteredDocuments.append(doc) # Keep regular document
|
||||
|
||||
documents = filteredDocuments # Use filtered list
|
||||
|
||||
# Step 2: Clarify document intents (if not provided) - REQUIRED for all documents
|
||||
if not documentIntents and documents:
|
||||
documentIntents = await self.clarifyDocumentIntents(
|
||||
|
|
|
|||
|
|
@ -408,10 +408,28 @@ class ContentExtractor:
|
|||
"content_extraction_result"
|
||||
)
|
||||
|
||||
# State 2 Validation: Validate and auto-fix ContentParts
|
||||
validatedParts = []
|
||||
for part in allContentParts:
|
||||
# Validation 2.1: Skip ContentParts without documentId
|
||||
if not part.metadata.get("documentId"):
|
||||
logger.warning(f"Skipping ContentPart {part.id} - missing documentId in metadata")
|
||||
continue
|
||||
|
||||
# Validation 2.2: Skip ContentParts with invalid contentFormat
|
||||
contentFormat = part.metadata.get("contentFormat")
|
||||
if contentFormat not in ["extracted", "object", "reference"]:
|
||||
logger.warning(
|
||||
f"Skipping ContentPart {part.id} - invalid contentFormat: {contentFormat}"
|
||||
)
|
||||
continue
|
||||
|
||||
validatedParts.append(part)
|
||||
|
||||
# ChatLog abschließen
|
||||
self.services.chat.progressLogFinish(extractionOperationId, True)
|
||||
|
||||
return allContentParts
|
||||
return validatedParts
|
||||
|
||||
except Exception as e:
|
||||
self.services.chat.progressLogFinish(extractionOperationId, False)
|
||||
|
|
|
|||
|
|
@ -109,6 +109,21 @@ class DocumentIntentAnalyzer:
|
|||
"document_intent_analysis_result"
|
||||
)
|
||||
|
||||
# State 1 Validation: Validate and auto-fix document intents
|
||||
documentIds = {d.id for d in documents}
|
||||
validatedIntents = []
|
||||
|
||||
for intent in documentIntents:
|
||||
# Validation 1.2: Skip intents for unknown documents
|
||||
if intent.documentId not in documentIds:
|
||||
logger.warning(f"Skipping intent for unknown document: {intent.documentId}")
|
||||
continue
|
||||
validatedIntents.append(intent)
|
||||
|
||||
# Validation 1.1: Documents without intents are OK (not needed)
|
||||
# Intents for non-existing documents are already filtered above
|
||||
documentIntents = validatedIntents
|
||||
|
||||
# ChatLog abschließen
|
||||
self.services.chat.progressLogFinish(intentOperationId, True)
|
||||
|
||||
|
|
@ -245,8 +260,13 @@ class DocumentIntentAnalyzer:
|
|||
|
||||
outputFormat = actionParameters.get("outputFormat", "txt")
|
||||
|
||||
prompt = f"""USER REQUEST:
|
||||
# FENCE user input to prevent prompt injection
|
||||
fencedUserPrompt = f"""```user_request
|
||||
{userPrompt}
|
||||
```"""
|
||||
|
||||
prompt = f"""USER REQUEST:
|
||||
{fencedUserPrompt}
|
||||
|
||||
DOCUMENTS TO ANALYZE:
|
||||
{docListText}
|
||||
|
|
@ -256,20 +276,25 @@ TASK: For each document, determine its intents (can be multiple):
|
|||
- "render": Image/binary should be rendered as-is (visual element)
|
||||
- "reference": Document reference/attachment (no extraction, just reference)
|
||||
|
||||
OUTPUT FORMAT: {outputFormat}
|
||||
TASK: For each document, determine:
|
||||
1. Intents (can be multiple): "extract", "render", "reference"
|
||||
Note: Output format and language are NOT determined here - they will be
|
||||
determined during structure generation (Phase 3) in the chapter structure JSON
|
||||
|
||||
OUTPUT FORMAT: {outputFormat} (global fallback - for reference only)
|
||||
|
||||
RETURN JSON:
|
||||
{{
|
||||
"intents": [
|
||||
{{
|
||||
"documentId": "doc_1",
|
||||
"intents": ["extract"], # Array - can contain multiple!
|
||||
"intents": ["extract"],
|
||||
"extractionPrompt": "Extract all text content, preserving structure",
|
||||
"reasoning": "User needs text content for document generation"
|
||||
}},
|
||||
{{
|
||||
"documentId": "doc_2",
|
||||
"intents": ["extract", "render"], # Both! Image needs text extraction AND visual rendering
|
||||
"intents": ["extract", "render"],
|
||||
"extractionPrompt": "Extract text content from image using vision AI",
|
||||
"reasoning": "Image contains text that needs extraction, but also should be rendered visually"
|
||||
}},
|
||||
|
|
|
|||
|
|
@ -198,6 +198,31 @@ class StructureFiller:
|
|||
# Füge ContentParts-Metadaten zur Struktur hinzu (für Validierung)
|
||||
flattenedStructure = self._addContentPartsMetadata(flattenedStructure, contentParts)
|
||||
|
||||
# State 4 Validation: Validate and auto-fix filled structure
|
||||
# Validation 4.1: Filled structure missing 'documents' field
|
||||
if "documents" not in flattenedStructure:
|
||||
raise ValueError("Filled structure missing 'documents' field - cannot auto-fix")
|
||||
|
||||
for doc in flattenedStructure["documents"]:
|
||||
# Validation 4.4: Verify language is preserved from input structure
|
||||
# Language MUST be preserved from Phase 3 structure (validated in State 3)
|
||||
if "language" not in doc:
|
||||
raise ValueError(f"Document {doc.get('id')} missing language in filled structure - should have been preserved from Phase 3")
|
||||
|
||||
# Validate language format
|
||||
if not isinstance(doc["language"], str) or len(doc["language"]) != 2:
|
||||
raise ValueError(f"Document {doc.get('id')} has invalid language format in filled structure: {doc['language']} - should be 2-character ISO 639-1 code")
|
||||
|
||||
for chapter in doc.get("chapters", []):
|
||||
for section in chapter.get("sections", []):
|
||||
# Validation 4.2: Section missing 'elements' field
|
||||
if "elements" not in section:
|
||||
section["elements"] = []
|
||||
logger.info(f"Section {section.get('id')} missing 'elements' - created empty list")
|
||||
|
||||
# Validation 4.3: Section has empty elements list - ALLOW (intentionally empty is OK)
|
||||
# No action needed - empty elements are allowed
|
||||
|
||||
# ChatLog abschließen
|
||||
self.services.chat.progressLogFinish(fillOperationId, True)
|
||||
|
||||
|
|
@ -1783,6 +1808,8 @@ The JSON should be a fragment that can be merged with the previous response."""
|
|||
"id": doc.get("id"),
|
||||
"title": doc.get("title"),
|
||||
"filename": doc.get("filename"),
|
||||
"outputFormat": doc.get("outputFormat"), # Preserve from Phase 3
|
||||
"language": doc.get("language"), # Preserve from Phase 3
|
||||
"sections": []
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -44,8 +44,8 @@ class StructureGenerator:
|
|||
self,
|
||||
userPrompt: str,
|
||||
contentParts: List[ContentPart],
|
||||
outputFormat: str,
|
||||
parentOperationId: str
|
||||
outputFormat: Optional[str] = None,
|
||||
parentOperationId: str = None
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Phase 5C: Generiert Chapter-Struktur (Table of Contents).
|
||||
|
|
@ -54,24 +54,37 @@ class StructureGenerator:
|
|||
- contentParts (unified object with instruction and/or caption per part)
|
||||
- generationHint
|
||||
|
||||
Generate document structure with per-document format determination.
|
||||
Multiple documents can be produced with different formats (e.g., one PDF, one HTML).
|
||||
AI determines formats per-document from user prompt. The outputFormat parameter is
|
||||
only a validation fallback - used if AI doesn't return format per document.
|
||||
|
||||
Args:
|
||||
userPrompt: User-Anfrage
|
||||
contentParts: Alle vorbereiteten ContentParts mit Metadaten
|
||||
outputFormat: Ziel-Format (html, docx, pdf, etc.)
|
||||
outputFormat: Optional global format fallback. If omitted, formats are determined
|
||||
from user prompt by AI. Used as validation fallback if AI doesn't
|
||||
return format per document. Defaults to "txt" if not provided.
|
||||
parentOperationId: Parent Operation-ID für ChatLog-Hierarchie
|
||||
|
||||
Returns:
|
||||
Struktur-Dict mit documents und chapters (nicht sections!)
|
||||
"""
|
||||
# If outputFormat not provided, use "txt" as fallback for validation
|
||||
# AI will determine formats per document from user prompt
|
||||
if not outputFormat:
|
||||
outputFormat = "txt"
|
||||
logger.debug("outputFormat not provided - using 'txt' as validation fallback, formats determined from prompt")
|
||||
# Erstelle Operation-ID für Struktur-Generierung
|
||||
structureOperationId = f"{parentOperationId}_structure_generation"
|
||||
|
||||
# Starte ChatLog mit Parent-Referenz
|
||||
formatDisplay = outputFormat if outputFormat else "auto-determined"
|
||||
self.services.chat.progressLogStart(
|
||||
structureOperationId,
|
||||
"Chapter Structure Generation",
|
||||
"Structure",
|
||||
f"Generating chapter structure for {outputFormat}",
|
||||
f"Generating chapter structure (format: {formatDisplay})",
|
||||
parentOperationId=parentOperationId
|
||||
)
|
||||
|
||||
|
|
@ -181,6 +194,72 @@ Continue generating the remaining chapters now.
|
|||
else:
|
||||
structure = parsedJson
|
||||
|
||||
# State 3 Validation: Validate and auto-fix structure
|
||||
# Validation 3.1: Structure missing 'documents' field
|
||||
if "documents" not in structure:
|
||||
raise ValueError("Structure missing 'documents' field - cannot auto-fix")
|
||||
|
||||
documents = structure["documents"]
|
||||
|
||||
# Validation 3.2: Structure has no documents
|
||||
if not isinstance(documents, list) or len(documents) == 0:
|
||||
raise ValueError("Structure has no documents - cannot generate without documents")
|
||||
|
||||
# Import renderer registry for format validation (existing infrastructure)
|
||||
from modules.services.serviceGeneration.renderers.registry import getRenderer
|
||||
|
||||
# Validate and fix each document
|
||||
for doc in documents:
|
||||
# Validation 3.3 & 3.4: Document outputFormat
|
||||
# outputFormat parameter is optional - if omitted, formats determined from prompt by AI
|
||||
# Use as fallback only if AI doesn't return format per document
|
||||
# Multiple documents can have different formats (e.g., one PDF, one HTML)
|
||||
globalFormatFallback = outputFormat or "txt" # Fallback for validation
|
||||
|
||||
if "outputFormat" not in doc or not doc["outputFormat"]:
|
||||
# AI didn't return format or returned empty - use global fallback
|
||||
doc["outputFormat"] = globalFormatFallback
|
||||
logger.warning(f"Document {doc.get('id')} missing outputFormat - using fallback: {doc['outputFormat']}")
|
||||
else:
|
||||
# AI returned format - validate using existing renderer registry
|
||||
formatName = str(doc["outputFormat"]).lower().strip()
|
||||
renderer = getRenderer(formatName) # Uses existing infrastructure
|
||||
|
||||
if not renderer:
|
||||
# Format doesn't match any renderer - use txt (simple approach)
|
||||
logger.warning(f"Document {doc.get('id')} has format without renderer: {formatName}, using 'txt'")
|
||||
doc["outputFormat"] = "txt"
|
||||
else:
|
||||
# Valid format with renderer - normalize and keep AI result
|
||||
doc["outputFormat"] = formatName
|
||||
logger.debug(f"Document {doc.get('id')} using AI-determined format: {formatName}")
|
||||
|
||||
# Validation 3.5 & 3.6: Document language
|
||||
# Use validated currentUserLanguage (always valid, validated during user intention analysis)
|
||||
# Access via _getUserLanguage() which uses self.services.currentUserLanguage
|
||||
userPromptLanguage = self._getUserLanguage() # Uses validated currentUserLanguage infrastructure
|
||||
|
||||
if "language" not in doc or not isinstance(doc["language"], str) or len(doc["language"]) != 2:
|
||||
# AI didn't return language or invalid format - use validated currentUserLanguage
|
||||
doc["language"] = userPromptLanguage
|
||||
if "language" not in doc:
|
||||
logger.warning(f"Document {doc.get('id')} missing language - using currentUserLanguage: {userPromptLanguage}")
|
||||
else:
|
||||
logger.warning(f"Document {doc.get('id')} has invalid language format from AI: {doc['language']}, using currentUserLanguage")
|
||||
else:
|
||||
# AI returned valid language format - normalize
|
||||
doc["language"] = doc["language"].lower().strip()[:2]
|
||||
logger.debug(f"Document {doc.get('id')} using AI-determined language: {doc['language']}")
|
||||
|
||||
# Validation 3.7: Document missing 'chapters' field
|
||||
if "chapters" not in doc:
|
||||
raise ValueError(f"Document {doc.get('id')} missing 'chapters' field - cannot auto-fix")
|
||||
|
||||
# Validation 3.8: Chapter missing 'contentParts' field
|
||||
for chapter in doc["chapters"]:
|
||||
if "contentParts" not in chapter:
|
||||
raise ValueError(f"Chapter {chapter.get('id')} missing 'contentParts' field - cannot auto-fix")
|
||||
|
||||
# ChatLog abschließen
|
||||
self.services.chat.progressLogFinish(structureOperationId, True)
|
||||
|
||||
|
|
@ -261,15 +340,13 @@ This is a PLANNING task. Return EXACTLY ONE complete JSON object. Do not generat
|
|||
{userPrompt}
|
||||
```
|
||||
|
||||
DEFAULT LANGUAGE: If no language is specified for a document, use "{language}" (from user prompt). Each document can have its own language specified in the "language" field. Use ISO 639-1 language codes in lowercase (e.g., "de", "en", "fr", "it").
|
||||
|
||||
## AVAILABLE CONTENT PARTS
|
||||
{contentPartsIndex}
|
||||
|
||||
## CONTENT ASSIGNMENT RULE - CRITICAL
|
||||
## CONTENT ASSIGNMENT RULE
|
||||
If the user request mentions documents/images/data, then EVERY chapter that generates content related to those references MUST assign the relevant ContentParts explicitly.
|
||||
|
||||
**Assignment logic:**
|
||||
Assignment logic:
|
||||
- If chapter DISPLAYS a document/image → assign "object" format ContentPart with "caption"
|
||||
- If chapter generates text content ABOUT a document/image/data → assign ContentPart with "instruction":
|
||||
- Prefer "extracted" format if available (contains analyzed/extracted content)
|
||||
|
|
@ -279,14 +356,11 @@ If the user request mentions documents/images/data, then EVERY chapter that gene
|
|||
- Use ContentPart IDs exactly as listed in AVAILABLE CONTENT PARTS above
|
||||
- Empty contentParts are only allowed if chapter generates content WITHOUT referencing any documents/images/data from the user request
|
||||
|
||||
**CRITICAL RULE**: If the user request mentions BOTH:
|
||||
CRITICAL RULE: If the user request mentions BOTH:
|
||||
a) Documents/images/data (listed in AVAILABLE CONTENT PARTS above), AND
|
||||
b) Generic content types (article text, main content, body text, etc.)
|
||||
Then chapters that generate those generic content types MUST assign the relevant ContentParts, because the content should relate to or be based on the provided documents/images/data.
|
||||
|
||||
## FORMATTING
|
||||
- Formatting is handled automatically - focus on content and structure only
|
||||
|
||||
## CHAPTER STRUCTURE REQUIREMENTS
|
||||
- Generate chapters based on USER REQUEST - analyze what structure the user wants
|
||||
- Each chapter needs: id, level (1, 2, 3, etc.), title
|
||||
|
|
@ -294,15 +368,38 @@ Then chapters that generate those generic content types MUST assign the relevant
|
|||
- generationHint: Description of what content to generate for this chapter
|
||||
- The number of chapters depends on the user request - create only what is requested
|
||||
|
||||
## DOCUMENT LANGUAGE
|
||||
- Each document can have its own language (ISO 639-1 code in lowercase: "de", "en", "fr", "it", etc.)
|
||||
- If no language is specified for a document, use the user prompt language: "{language}"
|
||||
- The language determines in which language the content of that document will be generated
|
||||
- Multiple documents can have different languages if needed
|
||||
- Always use lowercase ISO 639-1 codes in the JSON output (e.g., "de", not "DE")
|
||||
## DOCUMENT OUTPUT FORMAT
|
||||
For each document, determine the output format by analyzing the USER REQUEST:
|
||||
- Look for explicit format mentions
|
||||
- Infer from document purpose
|
||||
- Infer from content type
|
||||
- If format cannot be determined from the prompt, use: "{outputFormat}"
|
||||
- Include "outputFormat" field in each document in the JSON structure
|
||||
- Multiple documents can have different formats
|
||||
|
||||
## OUTPUT FORMAT
|
||||
Generate the chapter structure based on the USER REQUEST above. The number and types of chapters depend entirely on what the user requested - do NOT copy the example structure below.
|
||||
## DOCUMENT LANGUAGE
|
||||
For each document, determine the language by analyzing the USER REQUEST:
|
||||
- Look for explicit language mentions
|
||||
- Map language names to ISO 639-1 codes
|
||||
- If language cannot be determined from the prompt, use: "{language}"
|
||||
- Include "language" field in each document in the JSON structure
|
||||
- Multiple documents can have different languages
|
||||
|
||||
## JSON STRUCTURE REQUIREMENTS
|
||||
- metadata: {{"title": "...", "language": "..."}}
|
||||
- documents: Array of document objects, each with:
|
||||
- id: Unique document identifier (e.g., "doc_1")
|
||||
- title: Document title
|
||||
- filename: Output filename with extension (e.g., "document.docx")
|
||||
- outputFormat: Format code (e.g., "docx", "pdf", "html", "xlsx", "pptx", "txt")
|
||||
- language: ISO 639-1 language code (e.g., "de", "en", "fr", "it")
|
||||
- chapters: Array of chapter objects, each with:
|
||||
- id: Unique chapter identifier (e.g., "chapter_1")
|
||||
- level: Heading level (1, 2, 3, etc.)
|
||||
- title: Chapter title
|
||||
- contentParts: Object mapping ContentPart IDs to usage instructions {{"partId": {{"instruction": "..."}} or {{"caption": "..."}}}}
|
||||
- generationHint: Description of what content to generate
|
||||
- sections: Empty array []
|
||||
|
||||
EXAMPLE STRUCTURE (for reference only - adapt to user request):
|
||||
{{
|
||||
|
|
@ -314,6 +411,7 @@ EXAMPLE STRUCTURE (for reference only - adapt to user request):
|
|||
"id": "doc_1",
|
||||
"title": "Document Title",
|
||||
"filename": "document.{outputFormat}",
|
||||
"outputFormat": "{outputFormat}",
|
||||
"language": "{language}",
|
||||
"chapters": [
|
||||
{{
|
||||
|
|
@ -337,8 +435,10 @@ CRITICAL INSTRUCTIONS:
|
|||
- The example shows the JSON structure format, NOT the required chapters
|
||||
- Create only the chapters that match the user's request
|
||||
- Adapt chapter titles and structure to match the user's specific request
|
||||
- Determine outputFormat and language for each document by analyzing the USER REQUEST above
|
||||
- The example shows placeholders "{outputFormat}" and "{language}" - YOU MUST REPLACE THESE with actual values determined from the USER REQUEST
|
||||
|
||||
**MANDATORY CONTENT ASSIGNMENT CHECK:**
|
||||
MANDATORY CONTENT ASSIGNMENT CHECK:
|
||||
For each chapter, verify:
|
||||
1. Does the user request mention documents/images/data? (e.g., "photo", "image", "document", "data", "based on", "about")
|
||||
2. Does this chapter's generationHint, title, or purpose relate to those documents/images/data mentioned in step 1?
|
||||
|
|
|
|||
|
|
@ -346,16 +346,19 @@ class GenerationService:
|
|||
'workflowId': 'unknown'
|
||||
}
|
||||
|
||||
async def renderReport(self, extractedContent: Dict[str, Any], outputFormat: str, title: str, userPrompt: str = None, aiService=None, parentOperationId: Optional[str] = None) -> List[RenderedDocument]:
|
||||
async def renderReport(self, extractedContent: Dict[str, Any], outputFormat: str, language: str, title: str, userPrompt: str = None, aiService=None, parentOperationId: Optional[str] = None) -> List[RenderedDocument]:
|
||||
"""
|
||||
Render extracted JSON content to the specified output format.
|
||||
Processes EACH document separately and calls renderer for each.
|
||||
Each renderer can return 1..n documents (e.g., HTML + images).
|
||||
|
||||
Per-document format and language are extracted from structure (validated in State 3).
|
||||
Multiple documents can have different formats and languages.
|
||||
|
||||
Args:
|
||||
extractedContent: Structured JSON document with documents array
|
||||
outputFormat: Target format (html, pdf, docx, txt, md, json, csv, xlsx)
|
||||
In future, each document can have its own format
|
||||
outputFormat: Target format (html, pdf, docx, txt, md, json, csv, xlsx) - Global fallback
|
||||
language: Language (global fallback) - Per-document language extracted from structure
|
||||
title: Report title
|
||||
userPrompt: User's original prompt for report generation
|
||||
aiService: AI service instance for generation prompt creation
|
||||
|
|
@ -392,9 +395,17 @@ class GenerationService:
|
|||
continue
|
||||
|
||||
# Determine format for this document
|
||||
# TODO: In future, each document can have its own format field
|
||||
# For now, use the global outputFormat
|
||||
docFormat = doc.get("format", outputFormat)
|
||||
# Check outputFormat field first (per-document), then format field (legacy), then global fallback
|
||||
docFormat = doc.get("outputFormat") or doc.get("format") or outputFormat
|
||||
|
||||
# Determine language for this document
|
||||
# Extract per-document language from structure (validated in State 3), fallback to global
|
||||
docLanguage = doc.get("language") or language
|
||||
|
||||
# Validate language format (should be 2-character ISO code, validated in State 3)
|
||||
if not isinstance(docLanguage, str) or len(docLanguage) != 2:
|
||||
logger.warning(f"Document {doc.get('id')} has invalid language format: {docLanguage}, using fallback")
|
||||
docLanguage = language # Use global fallback
|
||||
|
||||
# Get renderer for this document's format
|
||||
renderer = self._getFormatRenderer(docFormat)
|
||||
|
|
@ -404,7 +415,7 @@ class GenerationService:
|
|||
|
||||
# Create JSON structure with single document (preserving metadata)
|
||||
singleDocContent = {
|
||||
"metadata": metadata,
|
||||
"metadata": {**metadata, "language": docLanguage}, # Add per-document language to metadata
|
||||
"documents": [doc] # Only this document
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -148,9 +148,15 @@ class DocumentGenerationPath:
|
|||
|
||||
# Schritt 5E: Rendere Resultat
|
||||
# Jedes Dokument wird einzeln gerendert, kann 1..n Dateien zurückgeben (z.B. HTML + Bilder)
|
||||
# Language is already validated in structure (State 3) and preserved in filled structure (State 4)
|
||||
# Per-document language will be extracted in renderReport() from filledStructure
|
||||
# Use validated currentUserLanguage as global fallback (always valid infrastructure)
|
||||
language = self.services.currentUserLanguage if hasattr(self.services, 'currentUserLanguage') and self.services.currentUserLanguage else "en"
|
||||
|
||||
renderedDocuments = await self.services.ai.renderResult(
|
||||
filledStructure,
|
||||
outputFormat,
|
||||
language, # Global fallback (per-document language extracted from structure in renderReport)
|
||||
title or "Generated Document",
|
||||
userPrompt,
|
||||
docOperationId
|
||||
|
|
|
|||
|
|
@ -42,6 +42,7 @@ class FrontendType(str, Enum):
|
|||
JSON = "json"
|
||||
MULTILINGUAL = "multilingual"
|
||||
FILE = "file"
|
||||
HIDDEN = "hidden"
|
||||
|
||||
# Custom Types for Actions
|
||||
USER_CONNECTION = "userConnection"
|
||||
|
|
|
|||
|
|
@ -78,55 +78,21 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult:
|
|||
|
||||
output_mime_type = "application/octet-stream" # Prefer service-provided mimeType when available
|
||||
|
||||
# Phase 7.3: Extract content first if documents provided, then use contentParts
|
||||
# Check if contentParts are already provided (preferred path)
|
||||
# Phase 7.3: Pass both documentList and contentParts to AI service
|
||||
# (Extraction logic removed - handled by AI service)
|
||||
contentParts: Optional[List[ContentPart]] = None
|
||||
if "contentParts" in parameters:
|
||||
contentParts = parameters.get("contentParts")
|
||||
if contentParts and not isinstance(contentParts, list):
|
||||
# Try to extract from ContentExtracted if it's an ActionDocument
|
||||
if hasattr(contentParts, 'parts'):
|
||||
contentParts = contentParts.parts
|
||||
contentPartsParam = parameters.get("contentParts")
|
||||
if contentPartsParam:
|
||||
if isinstance(contentPartsParam, list):
|
||||
contentParts = contentPartsParam
|
||||
elif hasattr(contentPartsParam, 'parts'):
|
||||
# Extract from ContentExtracted if it's an ActionDocument
|
||||
contentParts = contentPartsParam.parts
|
||||
else:
|
||||
logger.warning(f"Invalid contentParts type: {type(contentParts)}, treating as empty")
|
||||
logger.warning(f"Invalid contentParts type: {type(contentPartsParam)}, treating as empty")
|
||||
contentParts = None
|
||||
|
||||
# If contentParts not provided but documentList is, extract content first
|
||||
if not contentParts and documentList.references:
|
||||
self.services.chat.progressLogUpdate(operationId, 0.3, "Extracting content from documents")
|
||||
|
||||
# Get ChatDocuments
|
||||
chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(documentList)
|
||||
if not chatDocuments:
|
||||
logger.warning("No documents found in documentList")
|
||||
else:
|
||||
logger.info(f"Extracting content from {len(chatDocuments)} documents")
|
||||
|
||||
# Prepare extraction options (use defaults if not provided)
|
||||
from modules.datamodels.datamodelExtraction import ExtractionOptions, MergeStrategy
|
||||
extractionOptions = parameters.get("extractionOptions")
|
||||
if not extractionOptions:
|
||||
extractionOptions = ExtractionOptions(
|
||||
prompt="Extract all content from the document",
|
||||
mergeStrategy=MergeStrategy(
|
||||
mergeType="concatenate",
|
||||
groupBy="typeGroup",
|
||||
orderBy="id"
|
||||
),
|
||||
processDocumentsIndividually=True
|
||||
)
|
||||
|
||||
# Extract content using extraction service
|
||||
extractedResults = self.services.extraction.extractContent(chatDocuments, extractionOptions)
|
||||
|
||||
# Combine all ContentParts from all extracted results
|
||||
contentParts = []
|
||||
for extracted in extractedResults:
|
||||
if extracted.parts:
|
||||
contentParts.extend(extracted.parts)
|
||||
|
||||
logger.info(f"Extracted {len(contentParts)} content parts from {len(extractedResults)} documents")
|
||||
|
||||
# Update progress - preparing AI call
|
||||
self.services.chat.progressLogUpdate(operationId, 0.4, "Preparing AI call")
|
||||
|
||||
|
|
@ -136,8 +102,9 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult:
|
|||
|
||||
# Build options with correct operationType
|
||||
from modules.datamodels.datamodelAi import OperationTypeEnum
|
||||
# resultFormat in options can be None - formats will be determined by AI if not provided
|
||||
options = AiCallOptions(
|
||||
resultFormat=output_format or "txt", # Fallback for options, but outputFormat can be None for callAiContent
|
||||
resultFormat=output_format, # Can be None - formats determined by AI
|
||||
operationType=OperationTypeEnum.IMAGE_GENERATE if isImageGeneration else OperationTypeEnum.DATA_GENERATE
|
||||
)
|
||||
|
||||
|
|
@ -149,13 +116,14 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult:
|
|||
# Update progress - calling AI
|
||||
self.services.chat.progressLogUpdate(operationId, 0.6, "Calling AI")
|
||||
|
||||
# Use unified callAiContent method with contentParts (extraction is now separate)
|
||||
# ContentParts are already extracted above (or None if no documents)
|
||||
# Use unified callAiContent method with BOTH documentList and contentParts
|
||||
# Extraction is handled by AI service - no extraction here
|
||||
# outputFormat: Optional - if None, formats determined from prompt by AI
|
||||
aiResponse = await self.services.ai.callAiContent(
|
||||
prompt=aiPrompt,
|
||||
options=options,
|
||||
contentParts=contentParts, # Already extracted (or None if no documents)
|
||||
documentList=documentList, # Pass documentList - AI service handles extraction
|
||||
contentParts=contentParts, # Pass contentParts if provided (or None)
|
||||
outputFormat=output_format, # Can be None - AI determines from prompt
|
||||
parentOperationId=operationId,
|
||||
generationIntent=generationIntent # REQUIRED for DATA_GENERATE
|
||||
|
|
@ -186,6 +154,7 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult:
|
|||
final_documents = action_documents
|
||||
else:
|
||||
# Text response - create document from content
|
||||
# If no extension provided, use "txt" (required for filename)
|
||||
extension = output_extension.lstrip('.') if output_extension else "txt"
|
||||
meaningful_name = self._generateMeaningfulFileName(
|
||||
base_name="ai",
|
||||
|
|
@ -194,8 +163,8 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult:
|
|||
)
|
||||
validationMetadata = {
|
||||
"actionType": "ai.process",
|
||||
"resultType": normalized_result_type or "auto",
|
||||
"outputFormat": output_format or "auto",
|
||||
"resultType": normalized_result_type if normalized_result_type else None,
|
||||
"outputFormat": output_format if output_format else None,
|
||||
"hasDocuments": False,
|
||||
"contentType": "text"
|
||||
}
|
||||
|
|
|
|||
|
|
@ -36,7 +36,7 @@ class MethodAi(MethodBase):
|
|||
self._actions = {
|
||||
"process": WorkflowActionDefinition(
|
||||
actionId="ai.process",
|
||||
description="Universal AI document processing action - accepts multiple input documents in any format and processes them together with a prompt",
|
||||
description="Universal AI document processing action - accepts multiple input documents in any format and processes them together with a prompt. If the prompt specifies document formats to deliver, include them in the prompt",
|
||||
dynamicMode=True,
|
||||
parameters={
|
||||
"aiPrompt": WorkflowActionParameter(
|
||||
|
|
@ -70,7 +70,14 @@ class MethodAi(MethodBase):
|
|||
required=False,
|
||||
default="document",
|
||||
description="Explicit generation intent (\"document\" | \"code\" | \"image\"). Required for DATA_GENERATE operations. Defaults to \"document\" if not provided. For code generation, use ai.generateCode action or explicitly pass generationIntent=\"code\". For IMAGE_GENERATE operations, this parameter is ignored."
|
||||
)
|
||||
),
|
||||
"contentParts": WorkflowActionParameter(
|
||||
name="contentParts",
|
||||
type="List[ContentPart]",
|
||||
frontendType=FrontendType.HIDDEN,
|
||||
required=False,
|
||||
description="Pre-extracted content parts (internal parameter, typically passed between actions). If provided, these will be used instead of extracting from documentList. Can be a list of ContentPart objects or an object with a 'parts' attribute."
|
||||
),
|
||||
},
|
||||
execute=process.__get__(self, self.__class__)
|
||||
),
|
||||
|
|
@ -122,7 +129,7 @@ class MethodAi(MethodBase):
|
|||
),
|
||||
"summarizeDocument": WorkflowActionDefinition(
|
||||
actionId="ai.summarizeDocument",
|
||||
description="Summarize one or more documents, extracting key points and main ideas",
|
||||
description="Summarize one or more documents, extracting key points and main ideas. If the prompt specifies document formats to deliver, include them in the prompt",
|
||||
dynamicMode=True,
|
||||
parameters={
|
||||
"documentList": WorkflowActionParameter(
|
||||
|
|
@ -237,7 +244,7 @@ class MethodAi(MethodBase):
|
|||
),
|
||||
"generateDocument": WorkflowActionDefinition(
|
||||
actionId="ai.generateDocument",
|
||||
description="Generate documents from scratch or based on templates/inputs",
|
||||
description="Generate documents from scratch or based on templates/inputs. If the prompt specifies document formats to deliver, include them in the prompt",
|
||||
dynamicMode=True,
|
||||
parameters={
|
||||
"prompt": WorkflowActionParameter(
|
||||
|
|
@ -275,7 +282,7 @@ class MethodAi(MethodBase):
|
|||
),
|
||||
"generateCode": WorkflowActionDefinition(
|
||||
actionId="ai.generateCode",
|
||||
description="Generate code files - explicitly sets intent to 'code'",
|
||||
description="Generate code files - explicitly sets intent to 'code'. If the prompt specifies file formats to deliver, include them in the prompt",
|
||||
dynamicMode=True,
|
||||
parameters={
|
||||
"prompt": WorkflowActionParameter(
|
||||
|
|
|
|||
Loading…
Reference in a new issue