fixed generation issue and ai calls only for extracted content
This commit is contained in:
parent
723f98ea7a
commit
db456f1667
4 changed files with 138 additions and 95 deletions
|
|
@ -134,23 +134,13 @@ class StructureFiller:
|
|||
userPrompt=userPrompt
|
||||
)
|
||||
|
||||
# Debug: Log Prompt
|
||||
self.services.utils.writeDebugFile(
|
||||
chapterPrompt,
|
||||
f"chapter_structure_{chapterId}_prompt"
|
||||
)
|
||||
|
||||
# AI-Call für Chapter-Struktur-Generierung
|
||||
# Note: Debug logging is handled by callAiPlanning
|
||||
aiResponse = await self.aiService.callAiPlanning(
|
||||
prompt=chapterPrompt,
|
||||
debugType=f"chapter_structure_{chapterId}"
|
||||
)
|
||||
|
||||
# Debug: Log Response
|
||||
self.services.utils.writeDebugFile(
|
||||
aiResponse,
|
||||
f"chapter_structure_{chapterId}_response"
|
||||
)
|
||||
|
||||
sectionsStructure = json.loads(
|
||||
self.services.utils.jsonExtractString(aiResponse)
|
||||
)
|
||||
|
|
@ -158,20 +148,39 @@ class StructureFiller:
|
|||
chapter["sections"] = sectionsStructure.get("sections", [])
|
||||
|
||||
# Setze useAiCall Flag (falls nicht von AI gesetzt)
|
||||
# WICHTIG: useAiCall kann nur true sein, wenn mindestens ein ContentPart Format "extracted" hat!
|
||||
# "object" und "reference" Formate werden direkt als Elemente hinzugefügt, benötigen kein AI.
|
||||
for section in chapter["sections"]:
|
||||
if "useAiCall" not in section:
|
||||
contentType = section.get("content_type", "paragraph")
|
||||
useAiCall = contentType != "paragraph"
|
||||
contentPartIds = section.get("contentPartIds", [])
|
||||
|
||||
# Prüfe contentPartInstructions
|
||||
if not useAiCall:
|
||||
for partId in section.get("contentPartIds", []):
|
||||
instruction = contentPartInstructions.get(partId, {}).get("instruction", "")
|
||||
if instruction and instruction.lower() not in ["include full text", "include all content", "use full extracted text"]:
|
||||
useAiCall = True
|
||||
# Prüfe ob mindestens ein ContentPart Format "extracted" hat
|
||||
hasExtractedPart = False
|
||||
for partId in contentPartIds:
|
||||
part = self._findContentPartById(partId, contentParts)
|
||||
if part:
|
||||
contentFormat = part.metadata.get("contentFormat", "unknown")
|
||||
if contentFormat == "extracted":
|
||||
hasExtractedPart = True
|
||||
break
|
||||
|
||||
# useAiCall kann nur true sein, wenn extracted Parts vorhanden sind
|
||||
useAiCall = False
|
||||
if hasExtractedPart:
|
||||
# Prüfe ob Transformation nötig ist
|
||||
useAiCall = contentType != "paragraph"
|
||||
|
||||
# Prüfe contentPartInstructions für Transformation
|
||||
if not useAiCall:
|
||||
for partId in contentPartIds:
|
||||
instruction = contentPartInstructions.get(partId, {}).get("instruction", "")
|
||||
if instruction and instruction.lower() not in ["include full text", "include all content", "use full extracted text"]:
|
||||
useAiCall = True
|
||||
break
|
||||
|
||||
section["useAiCall"] = useAiCall
|
||||
logger.debug(f"Section {section.get('id')}: useAiCall={useAiCall} (hasExtractedPart={hasExtractedPart}, contentType={contentType})")
|
||||
|
||||
return chapterStructure
|
||||
|
||||
|
|
@ -200,10 +209,16 @@ class StructureFiller:
|
|||
sectionId = section.get("id")
|
||||
contentPartIds = section.get("contentPartIds", [])
|
||||
contentFormats = section.get("contentFormats", {})
|
||||
generationHint = section.get("generation_hint")
|
||||
# Check both camelCase and snake_case for generationHint
|
||||
generationHint = section.get("generationHint") or section.get("generation_hint")
|
||||
contentType = section.get("content_type", "paragraph")
|
||||
useAiCall = section.get("useAiCall", False)
|
||||
|
||||
# WICHTIG: Wenn keine ContentParts vorhanden sind, kann kein AI-Call gemacht werden
|
||||
if len(contentPartIds) == 0:
|
||||
useAiCall = False
|
||||
logger.debug(f"Section {sectionId}: No content parts, setting useAiCall=False")
|
||||
|
||||
elements = []
|
||||
|
||||
# Prüfe ob Aggregation nötig ist
|
||||
|
|
@ -212,6 +227,8 @@ class StructureFiller:
|
|||
contentPartCount=len(contentPartIds)
|
||||
)
|
||||
|
||||
logger.info(f"Processing section {sectionId}: contentType={contentType}, contentPartCount={len(contentPartIds)}, useAiCall={useAiCall}, needsAggregation={needsAggregation}, hasGenerationHint={bool(generationHint)}")
|
||||
|
||||
if needsAggregation and useAiCall:
|
||||
# Aggregation: Alle Parts zusammen verarbeiten
|
||||
sectionParts = [
|
||||
|
|
@ -251,6 +268,7 @@ class StructureFiller:
|
|||
|
||||
# Aggregiere extracted Parts mit AI
|
||||
if extractedParts:
|
||||
logger.debug(f"Section {sectionId}: Aggregating {len(extractedParts)} extracted parts with AI")
|
||||
generationPrompt = self._buildSectionGenerationPrompt(
|
||||
section=section,
|
||||
contentParts=extractedParts, # ALLE PARTS für Aggregation!
|
||||
|
|
@ -279,6 +297,7 @@ class StructureFiller:
|
|||
generationPrompt,
|
||||
f"section_content_{sectionId}_prompt"
|
||||
)
|
||||
logger.debug(f"Logged section prompt: section_content_{sectionId}_prompt (aggregation)")
|
||||
|
||||
# Verwende callAi für ContentParts-Unterstützung (nicht callAiPlanning!)
|
||||
request = AiCallRequest(
|
||||
|
|
@ -297,6 +316,7 @@ class StructureFiller:
|
|||
aiResponse.content,
|
||||
f"section_content_{sectionId}_response"
|
||||
)
|
||||
logger.debug(f"Logged section response: section_content_{sectionId}_response (aggregation)")
|
||||
|
||||
# Parse und füge zu elements hinzu
|
||||
generatedElements = json.loads(
|
||||
|
|
@ -348,8 +368,10 @@ class StructureFiller:
|
|||
})
|
||||
|
||||
elif contentFormat == "extracted":
|
||||
if generationHint:
|
||||
# WICHTIG: Prüfe sowohl useAiCall als auch generationHint
|
||||
if useAiCall and generationHint:
|
||||
# AI-Call mit einzelnen ContentPart
|
||||
logger.debug(f"Processing section {sectionId}: Single extracted part with AI call (useAiCall={useAiCall}, generationHint={bool(generationHint)})")
|
||||
generationPrompt = self._buildSectionGenerationPrompt(
|
||||
section=section,
|
||||
contentParts=[part], # EIN PART
|
||||
|
|
@ -378,6 +400,7 @@ class StructureFiller:
|
|||
generationPrompt,
|
||||
f"section_content_{sectionId}_prompt"
|
||||
)
|
||||
logger.debug(f"Logged section prompt: section_content_{sectionId}_prompt")
|
||||
|
||||
# Verwende callAi für ContentParts-Unterstützung
|
||||
request = AiCallRequest(
|
||||
|
|
@ -396,6 +419,7 @@ class StructureFiller:
|
|||
aiResponse.content,
|
||||
f"section_content_{sectionId}_response"
|
||||
)
|
||||
logger.debug(f"Logged section response: section_content_{sectionId}_response")
|
||||
|
||||
# Parse und füge zu elements hinzu
|
||||
generatedElements = json.loads(
|
||||
|
|
@ -421,6 +445,7 @@ class StructureFiller:
|
|||
# NICHT raise - Section wird mit Fehlermeldung gerendert
|
||||
else:
|
||||
# Füge extrahierten Text direkt hinzu (kein AI-Call)
|
||||
logger.debug(f"Processing section {sectionId}: Single extracted part WITHOUT AI call (useAiCall={useAiCall}, generationHint={bool(generationHint)}) - adding extracted text directly")
|
||||
elements.append({
|
||||
"type": "extracted_text",
|
||||
"content": part.data,
|
||||
|
|
@ -566,8 +591,15 @@ class StructureFiller:
|
|||
sectionId = section.get("id")
|
||||
contentPartIds = section.get("contentPartIds", [])
|
||||
contentFormats = section.get("contentFormats", {})
|
||||
generationHint = section.get("generation_hint")
|
||||
# Check both camelCase and snake_case for generationHint
|
||||
generationHint = section.get("generationHint") or section.get("generation_hint")
|
||||
contentType = section.get("content_type", "paragraph")
|
||||
useAiCall = section.get("useAiCall", False)
|
||||
|
||||
# WICHTIG: Wenn keine ContentParts vorhanden sind, kann kein AI-Call gemacht werden
|
||||
if len(contentPartIds) == 0:
|
||||
useAiCall = False
|
||||
logger.debug(f"Section {sectionId} (legacy): No content parts, setting useAiCall=False")
|
||||
|
||||
elements = []
|
||||
|
||||
|
|
@ -577,7 +609,9 @@ class StructureFiller:
|
|||
contentPartCount=len(contentPartIds)
|
||||
)
|
||||
|
||||
if needsAggregation and generationHint:
|
||||
logger.info(f"Processing section {sectionId} (legacy): contentType={contentType}, contentPartCount={len(contentPartIds)}, useAiCall={useAiCall}, needsAggregation={needsAggregation}, hasGenerationHint={bool(generationHint)}")
|
||||
|
||||
if needsAggregation and useAiCall and generationHint:
|
||||
# Aggregation: Alle Parts zusammen verarbeiten
|
||||
sectionParts = [
|
||||
self._findContentPartById(pid, contentParts)
|
||||
|
|
@ -702,8 +736,10 @@ class StructureFiller:
|
|||
})
|
||||
|
||||
elif contentFormat == "extracted":
|
||||
if generationHint:
|
||||
# WICHTIG: Prüfe sowohl useAiCall als auch generationHint
|
||||
if useAiCall and generationHint:
|
||||
# AI-Call mit einzelnen ContentPart
|
||||
logger.debug(f"Processing section {sectionId}: Single extracted part with AI call (useAiCall={useAiCall}, generationHint={bool(generationHint)})")
|
||||
generationPrompt = self._buildSectionGenerationPrompt(
|
||||
section=section,
|
||||
contentParts=[part],
|
||||
|
|
@ -729,6 +765,7 @@ class StructureFiller:
|
|||
generationPrompt,
|
||||
f"section_content_{sectionId}_prompt"
|
||||
)
|
||||
logger.debug(f"Logged section prompt: section_content_{sectionId}_prompt")
|
||||
|
||||
request = AiCallRequest(
|
||||
prompt=generationPrompt,
|
||||
|
|
@ -745,6 +782,7 @@ class StructureFiller:
|
|||
aiResponse.content,
|
||||
f"section_content_{sectionId}_response"
|
||||
)
|
||||
logger.debug(f"Logged section response: section_content_{sectionId}_response")
|
||||
|
||||
generatedElements = json.loads(
|
||||
self.services.utils.jsonExtractString(aiResponse.content)
|
||||
|
|
@ -765,6 +803,8 @@ class StructureFiller:
|
|||
})
|
||||
logger.error(f"Error generating section {sectionId}: {str(e)}")
|
||||
else:
|
||||
# Füge extrahierten Text direkt hinzu (kein AI-Call)
|
||||
logger.debug(f"Processing section {sectionId}: Single extracted part WITHOUT AI call (useAiCall={useAiCall}, generationHint={bool(generationHint)}) - adding extracted text directly")
|
||||
elements.append({
|
||||
"type": "extracted_text",
|
||||
"content": part.data,
|
||||
|
|
@ -817,35 +857,44 @@ class StructureFiller:
|
|||
|
||||
prompt = f"""TASK: Generate Chapter Sections Structure
|
||||
|
||||
CHAPTER METADATA:
|
||||
- Chapter ID: {chapterId}
|
||||
- Chapter Level: {chapterLevel}
|
||||
- Chapter Title: {chapterTitle}
|
||||
- Generation Hint: {generationHint}
|
||||
CHAPTER: {chapterTitle} (Level {chapterLevel}, ID: {chapterId})
|
||||
GENERATION HINT: {generationHint}
|
||||
|
||||
WICHTIG: Chapter hat bereits vordefinierte Heading-Section.
|
||||
Generiere NICHT eine Heading-Section für Chapter-Title!
|
||||
NOTE: Chapter already has a heading section. Do NOT generate a heading for the chapter title.
|
||||
|
||||
AVAILABLE CONTENT PARTS:
|
||||
{contentPartsIndex}
|
||||
|
||||
STANDARD JSON SCHEMA FOR SECTIONS:
|
||||
Supported content_types: table, bullet_list, heading, paragraph, code_block, image
|
||||
CONTENT TYPES: table, bullet_list, heading, paragraph, code_block, image
|
||||
|
||||
Return JSON:
|
||||
useAiCall RULES:
|
||||
- useAiCall: true ONLY if ContentPart Format is "extracted" AND transformation needed
|
||||
- useAiCall: false if Format is "object" or "reference" (direct insertion)
|
||||
- useAiCall: false if Format is "extracted" AND simple "include full text" instruction
|
||||
|
||||
RETURN JSON:
|
||||
{{
|
||||
"sections": [
|
||||
{{
|
||||
"id": "section_1",
|
||||
"content_type": "paragraph",
|
||||
"contentPartIds": ["part_ext_1"],
|
||||
"generationHint": "...",
|
||||
"contentPartIds": ["extracted_part_1"],
|
||||
"generationHint": "Include full text",
|
||||
"useAiCall": false,
|
||||
"elements": []
|
||||
}}
|
||||
]
|
||||
}}
|
||||
|
||||
EXAMPLES (all content types):
|
||||
- paragraph: {{"id": "s1", "content_type": "paragraph", "contentPartIds": ["extracted_1"], "generationHint": "Include full text", "useAiCall": false, "elements": []}}
|
||||
- bullet_list: {{"id": "s2", "content_type": "bullet_list", "contentPartIds": ["extracted_1"], "generationHint": "Create bullet list", "useAiCall": true, "elements": []}}
|
||||
- table: {{"id": "s3", "content_type": "table", "contentPartIds": ["extracted_1", "extracted_2"], "generationHint": "Create table", "useAiCall": true, "elements": []}}
|
||||
- heading: {{"id": "s4", "content_type": "heading", "contentPartIds": ["extracted_1"], "generationHint": "Extract heading", "useAiCall": true, "elements": []}}
|
||||
- code_block: {{"id": "s5", "content_type": "code_block", "contentPartIds": ["extracted_1"], "generationHint": "Format code", "useAiCall": true, "elements": []}}
|
||||
- image: {{"id": "s6", "content_type": "image", "contentPartIds": ["obj_1"], "generationHint": "Display image", "useAiCall": false, "elements": []}}
|
||||
- reference: {{"id": "s7", "content_type": "paragraph", "contentPartIds": ["ref_1"], "generationHint": "Reference", "useAiCall": false, "elements": []}}
|
||||
|
||||
CRITICAL: Return ONLY valid JSON. Do not include any explanatory text outside the JSON.
|
||||
"""
|
||||
return prompt
|
||||
|
|
|
|||
|
|
@ -68,24 +68,13 @@ class StructureGenerator:
|
|||
outputFormat=outputFormat
|
||||
)
|
||||
|
||||
# Debug: Log Prompt
|
||||
self.services.utils.writeDebugFile(
|
||||
structurePrompt,
|
||||
"chapter_structure_generation_prompt"
|
||||
)
|
||||
|
||||
# AI-Call für Chapter-Struktur-Generierung
|
||||
# Note: Debug logging is handled by callAiPlanning
|
||||
aiResponse = await self.aiService.callAiPlanning(
|
||||
prompt=structurePrompt,
|
||||
debugType="chapter_structure_generation"
|
||||
)
|
||||
|
||||
# Debug: Log Response
|
||||
self.services.utils.writeDebugFile(
|
||||
aiResponse,
|
||||
"chapter_structure_generation_response"
|
||||
)
|
||||
|
||||
# Parse Struktur
|
||||
structure = json.loads(self.services.utils.jsonExtractString(aiResponse))
|
||||
|
||||
|
|
@ -143,34 +132,6 @@ class StructureGenerator:
|
|||
# Baue Index nur für gültige Parts
|
||||
for i, part in enumerate(validParts, 1):
|
||||
contentFormat = part.metadata.get("contentFormat", "unknown")
|
||||
dataPreview = ""
|
||||
|
||||
if contentFormat == "extracted":
|
||||
# Für Image-Parts: Zeige dass es ein Image ist
|
||||
if part.typeGroup == "image":
|
||||
dataLength = len(part.data) if part.data else 0
|
||||
mimeType = part.mimeType or "image"
|
||||
dataPreview = f"Image data ({mimeType}, {dataLength} chars) - base64 encoded image content"
|
||||
elif part.typeGroup == "container":
|
||||
# Container ohne Daten überspringen wir bereits oben
|
||||
dataPreview = "Container structure (no text content)"
|
||||
else:
|
||||
# Zeige Preview von extrahiertem Text
|
||||
if part.data:
|
||||
preview = part.data[:200] + "..." if len(part.data) > 200 else part.data
|
||||
dataPreview = preview
|
||||
else:
|
||||
dataPreview = "(empty)"
|
||||
elif contentFormat == "object":
|
||||
dataLength = len(part.data) if part.data else 0
|
||||
mimeType = part.mimeType or "binary"
|
||||
if part.typeGroup == "image":
|
||||
dataPreview = f"Base64 encoded image ({mimeType}, {dataLength} chars)"
|
||||
else:
|
||||
dataPreview = f"Base64 encoded binary ({mimeType}, {dataLength} chars)"
|
||||
elif contentFormat == "reference":
|
||||
dataPreview = part.metadata.get("documentReference", "reference")
|
||||
|
||||
originalFileName = part.metadata.get('originalFileName', 'N/A')
|
||||
|
||||
contentPartsIndex += f"\n{i}. ContentPart ID: {part.id}\n"
|
||||
|
|
@ -180,7 +141,6 @@ class StructureGenerator:
|
|||
contentPartsIndex += f" Source: {part.metadata.get('documentId', 'unknown')}\n"
|
||||
contentPartsIndex += f" Original file name: {originalFileName}\n"
|
||||
contentPartsIndex += f" Usage hint: {part.metadata.get('usageHint', 'N/A')}\n"
|
||||
contentPartsIndex += f" Data preview: {dataPreview}\n"
|
||||
|
||||
if not contentPartsIndex:
|
||||
contentPartsIndex = "\n(No content parts available)"
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ Base renderer class for all format renderers.
|
|||
"""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Dict, Any, List
|
||||
from typing import Dict, Any, List, Tuple
|
||||
from modules.datamodels.datamodelJson import supportedSectionTypes
|
||||
from modules.datamodels.datamodelDocument import RenderedDocument
|
||||
import json
|
||||
|
|
@ -201,9 +201,15 @@ class BaseRenderer(ABC):
|
|||
def _extractTableData(self, sectionData: Dict[str, Any]) -> Tuple[List[str], List[List[str]]]:
|
||||
"""Extract table headers and rows from section data."""
|
||||
# Normalize when elements array was passed in
|
||||
if isinstance(sectionData, list) and sectionData:
|
||||
candidate = sectionData[0]
|
||||
sectionData = candidate if isinstance(candidate, dict) else {}
|
||||
if isinstance(sectionData, list):
|
||||
if sectionData and isinstance(sectionData[0], dict):
|
||||
sectionData = sectionData[0]
|
||||
else:
|
||||
# Empty list or invalid structure - return empty table
|
||||
return [], []
|
||||
# Ensure sectionData is a dict before calling .get()
|
||||
if not isinstance(sectionData, dict):
|
||||
return [], []
|
||||
headers = sectionData.get("headers", [])
|
||||
rows = sectionData.get("rows", [])
|
||||
return headers, rows
|
||||
|
|
@ -227,8 +233,15 @@ class BaseRenderer(ABC):
|
|||
def _extractHeadingData(self, sectionData: Dict[str, Any]) -> Tuple[int, str]:
|
||||
"""Extract heading level and text from section data."""
|
||||
# Normalize when elements array was passed in
|
||||
if isinstance(sectionData, list) and sectionData:
|
||||
sectionData = sectionData[0] if isinstance(sectionData[0], dict) else {}
|
||||
if isinstance(sectionData, list):
|
||||
if sectionData and isinstance(sectionData[0], dict):
|
||||
sectionData = sectionData[0]
|
||||
else:
|
||||
# Empty list or invalid structure - return default
|
||||
return 1, ""
|
||||
# Ensure sectionData is a dict before calling .get()
|
||||
if not isinstance(sectionData, dict):
|
||||
return 1, ""
|
||||
level = sectionData.get("level", 1)
|
||||
text = sectionData.get("text", "")
|
||||
return level, text
|
||||
|
|
@ -249,8 +262,15 @@ class BaseRenderer(ABC):
|
|||
def _extractCodeBlockData(self, sectionData: Dict[str, Any]) -> Tuple[str, str]:
|
||||
"""Extract code and language from section data."""
|
||||
# Normalize when elements array was passed in
|
||||
if isinstance(sectionData, list) and sectionData:
|
||||
sectionData = sectionData[0] if isinstance(sectionData[0], dict) else {}
|
||||
if isinstance(sectionData, list):
|
||||
if sectionData and isinstance(sectionData[0], dict):
|
||||
sectionData = sectionData[0]
|
||||
else:
|
||||
# Empty list or invalid structure - return default
|
||||
return "", ""
|
||||
# Ensure sectionData is a dict before calling .get()
|
||||
if not isinstance(sectionData, dict):
|
||||
return "", ""
|
||||
code = sectionData.get("code", "")
|
||||
language = sectionData.get("language", "")
|
||||
return code, language
|
||||
|
|
@ -258,8 +278,15 @@ class BaseRenderer(ABC):
|
|||
def _extractImageData(self, sectionData: Dict[str, Any]) -> Tuple[str, str]:
|
||||
"""Extract base64 data and alt text from section data."""
|
||||
# Normalize when elements array was passed in
|
||||
if isinstance(sectionData, list) and sectionData:
|
||||
sectionData = sectionData[0] if isinstance(sectionData[0], dict) else {}
|
||||
if isinstance(sectionData, list):
|
||||
if sectionData and isinstance(sectionData[0], dict):
|
||||
sectionData = sectionData[0]
|
||||
else:
|
||||
# Empty list or invalid structure - return default
|
||||
return "", "Image"
|
||||
# Ensure sectionData is a dict before calling .get()
|
||||
if not isinstance(sectionData, dict):
|
||||
return "", "Image"
|
||||
base64Data = sectionData.get("base64Data", "")
|
||||
altText = sectionData.get("altText", "Image")
|
||||
return base64Data, altText
|
||||
|
|
|
|||
|
|
@ -396,7 +396,7 @@ class RendererHtml(BaseRenderer):
|
|||
source = element.get("source", "")
|
||||
if content:
|
||||
source_text = f' <small><em>(Source: {source})</em></small>' if source else ''
|
||||
htmlParts.append(f'<p class="extracted-text">{content}{source_text}</p>')
|
||||
htmlParts.append(f'<p>{content}{source_text}</p>')
|
||||
elif isinstance(element, dict):
|
||||
# Regular paragraph element
|
||||
text = element.get("text", element.get("content", ""))
|
||||
|
|
@ -432,7 +432,7 @@ class RendererHtml(BaseRenderer):
|
|||
source = element.get("source", "")
|
||||
if content:
|
||||
source_text = f' <small><em>(Source: {source})</em></small>' if source else ''
|
||||
htmlParts.append(f'<p class="extracted-text">{content}{source_text}</p>')
|
||||
htmlParts.append(f'<p>{content}{source_text}</p>')
|
||||
|
||||
if htmlParts:
|
||||
return '\n'.join(htmlParts)
|
||||
|
|
@ -577,18 +577,23 @@ class RendererHtml(BaseRenderer):
|
|||
def _renderJsonImage(self, imageData: Dict[str, Any], styles: Dict[str, Any]) -> str:
|
||||
"""Render a JSON image to HTML with placeholder for later replacement."""
|
||||
try:
|
||||
import html
|
||||
base64Data = imageData.get("base64Data", "")
|
||||
altText = imageData.get("altText", "Image")
|
||||
caption = imageData.get("caption", "")
|
||||
|
||||
# Escape HTML in altText and caption to prevent injection
|
||||
altTextEscaped = html.escape(str(altText))
|
||||
captionEscaped = html.escape(str(caption)) if caption else ""
|
||||
|
||||
if base64Data:
|
||||
# Use data URI as placeholder - will be replaced with file path in _replaceImageDataUris
|
||||
# Include a marker so we can find and replace it
|
||||
imageMarker = f"<!--IMAGE_MARKER:{len(base64Data)}:{altText[:50]}-->"
|
||||
imgTag = f'<img src="data:image/png;base64,{base64Data}" alt="{altText}">'
|
||||
imageMarker = f"<!--IMAGE_MARKER:{len(base64Data)}:{altTextEscaped[:50]}-->"
|
||||
imgTag = f'<img src="data:image/png;base64,{base64Data}" alt="{altTextEscaped}">'
|
||||
|
||||
if caption:
|
||||
return f'{imageMarker}<figure>{imgTag}<figcaption>{caption}</figcaption></figure>'
|
||||
if captionEscaped:
|
||||
return f'{imageMarker}<figure>{imgTag}<figcaption>{captionEscaped}</figcaption></figure>'
|
||||
else:
|
||||
return f'{imageMarker}{imgTag}'
|
||||
|
||||
|
|
@ -712,12 +717,14 @@ class RendererHtml(BaseRenderer):
|
|||
break
|
||||
|
||||
if matchingImage:
|
||||
import html
|
||||
# Use filename from image data (generated from section ID)
|
||||
filename = matchingImage.get("filename", f"image_{images.index(matchingImage) + 1}.png")
|
||||
|
||||
# Replace with relative path (ohne Pfad, nur Dateiname)
|
||||
altText = matchingImage.get("altText", "Image")
|
||||
caption = matchingImage.get("caption", "")
|
||||
# Escape HTML in altText and caption to prevent injection
|
||||
altText = html.escape(str(matchingImage.get("altText", "Image")))
|
||||
caption = html.escape(str(matchingImage.get("caption", ""))) if matchingImage.get("caption") else ""
|
||||
|
||||
# Entferne IMAGE_MARKER Kommentar falls vorhanden
|
||||
imgTag = f'<img src="{filename}" alt="{altText}">'
|
||||
|
|
|
|||
Loading…
Reference in a new issue