diff --git a/modules/services/serviceAi/subStructureFilling.py b/modules/services/serviceAi/subStructureFilling.py index e0cdfc53..7089103c 100644 --- a/modules/services/serviceAi/subStructureFilling.py +++ b/modules/services/serviceAi/subStructureFilling.py @@ -114,15 +114,28 @@ class StructureFiller: Phase 5D.1: Generiert Sections-Struktur für jedes Chapter (ohne Content). Sections enthalten: content_type, contentPartIds, generationHint, useAiCall """ + # Count total chapters for progress tracking + totalChapters = sum(len(doc.get("chapters", [])) for doc in chapterStructure.get("documents", [])) + chapterIndex = 0 + for doc in chapterStructure.get("documents", []): for chapter in doc.get("chapters", []): + chapterIndex += 1 chapterId = chapter.get("id", "unknown") chapterLevel = chapter.get("level", 1) - chapterTitle = chapter.get("title", "") + chapterTitle = chapter.get("title", "Untitled Chapter") generationHint = chapter.get("generationHint", "") contentPartIds = chapter.get("contentPartIds", []) contentPartInstructions = chapter.get("contentPartInstructions", {}) + # Update progress for chapter structure generation + progress = chapterIndex / totalChapters if totalChapters > 0 else 1.0 + self.services.chat.progressLogUpdate( + parentOperationId, + progress, + f"Generating sections for Chapter {chapterIndex}/{totalChapters}: {chapterTitle}" + ) + chapterPrompt = self._buildChapterSectionsStructurePrompt( chapterId=chapterId, chapterLevel=chapterLevel, @@ -194,133 +207,384 @@ class StructureFiller: """ Phase 5D.2: Füllt Sections mit ContentParts. """ - # Sammle alle Sections für sequenzielle Verarbeitung - sections_to_process = [] - all_sections_list = [] # Für Kontext-Informationen + # Sammle alle Sections für Kontext-Informationen (für alle Sections) + all_sections_list = [] for doc in chapterStructure.get("documents", []): for chapter in doc.get("chapters", []): for section in chapter.get("sections", []): all_sections_list.append(section) - sections_to_process.append((doc, chapter, section)) - # Sequenzielle Section-Generierung + # Berechne Gesamtanzahl Chapters für Progress-Tracking + totalChapters = sum(len(doc.get("chapters", [])) for doc in chapterStructure.get("documents", [])) fillOperationId = parentOperationId - for sectionIndex, (doc, chapter, section) in enumerate(sections_to_process): - sectionId = section.get("id") - contentPartIds = section.get("contentPartIds", []) - contentFormats = section.get("contentFormats", {}) - # Check both camelCase and snake_case for generationHint - generationHint = section.get("generationHint") or section.get("generation_hint") - contentType = section.get("content_type", "paragraph") - useAiCall = section.get("useAiCall", False) + + # Helper function to calculate overall progress + def calculateOverallProgress(chapterIndex, totalChapters, sectionIndex, totalSections): + """Calculate overall progress: 0.0 to 1.0""" + if totalChapters == 0: + return 1.0 - # WICHTIG: Wenn keine ContentParts vorhanden sind UND kein generationHint, kann kein AI-Call gemacht werden - # Aber: Wenn generationHint vorhanden ist, SOLLTE AI verwendet werden, auch wenn useAiCall=false gesetzt ist - # (z.B. wenn AI die Struktur generiert hat, aber useAiCall falsch gesetzt wurde) - if len(contentPartIds) == 0 and not generationHint: - useAiCall = False - logger.debug(f"Section {sectionId}: No content parts and no generation hint, setting useAiCall=False") - elif len(contentPartIds) == 0 and generationHint and not useAiCall: - # Override: If there's a generationHint but no content parts, we should use AI - # This handles cases where structure generation set useAiCall=false incorrectly - useAiCall = True - logger.info(f"Section {sectionId}: Overriding useAiCall=True (has generationHint but no content parts)") + # Progress from completed chapters (0 to chapterIndex-1) + completedChaptersProgress = chapterIndex / totalChapters - elements = [] + # Progress from current chapter (sectionIndex / totalSections) + currentChapterProgress = (sectionIndex / totalSections) / totalChapters if totalSections > 0 else 0 - # Prüfe ob Aggregation nötig ist - needsAggregation = self._needsAggregation( - contentType=contentType, - contentPartCount=len(contentPartIds) - ) - - logger.info(f"Processing section {sectionId}: contentType={contentType}, contentPartCount={len(contentPartIds)}, useAiCall={useAiCall}, needsAggregation={needsAggregation}, hasGenerationHint={bool(generationHint)}") - - if needsAggregation and useAiCall: - # Aggregation: Alle Parts zusammen verarbeiten - sectionParts = [ - self._findContentPartById(pid, contentParts) - for pid in contentPartIds - ] - sectionParts = [p for p in sectionParts if p is not None] + return min(1.0, completedChaptersProgress + currentChapterProgress) + + # Process chapters sequentially with chapter-level progress + chapterIndex = 0 + for doc in chapterStructure.get("documents", []): + for chapter in doc.get("chapters", []): + chapterIndex += 1 + chapterId = chapter.get("id", "unknown") + chapterTitle = chapter.get("title", "Untitled Chapter") + sections = chapter.get("sections", []) + totalSections = len(sections) + + # Start chapter operation + chapterOperationId = f"{fillOperationId}_chapter_{chapterId}" + self.services.chat.progressLogStart( + chapterOperationId, + "Chapter Generation", + f"Chapter {chapterIndex}/{totalChapters}", + chapterTitle, + parentOperationId=fillOperationId + ) + + # Process sections within chapter + for sectionIndex, section in enumerate(sections): + sectionId = section.get("id") + sectionTitle = section.get("title", sectionId) + contentPartIds = section.get("contentPartIds", []) + contentFormats = section.get("contentFormats", {}) + # Check both camelCase and snake_case for generationHint + generationHint = section.get("generationHint") or section.get("generation_hint") + contentType = section.get("content_type", "paragraph") + useAiCall = section.get("useAiCall", False) - if sectionParts: - # Filtere nur extracted Parts für Aggregation (reference/object werden separat behandelt) - extractedParts = [ - p for p in sectionParts - if contentFormats.get(p.id, p.metadata.get("contentFormat")) == "extracted" - ] - nonExtractedParts = [ - p for p in sectionParts - if contentFormats.get(p.id, p.metadata.get("contentFormat")) != "extracted" + # Update overall progress at start of section + overallProgress = calculateOverallProgress(chapterIndex - 1, totalChapters, sectionIndex, totalSections) + self.services.chat.progressLogUpdate( + fillOperationId, + overallProgress, + f"Chapter {chapterIndex}/{totalChapters}, Section {sectionIndex + 1}/{totalSections}: {sectionTitle}" + ) + + # WICHTIG: Wenn keine ContentParts vorhanden sind UND kein generationHint, kann kein AI-Call gemacht werden + # Aber: Wenn generationHint vorhanden ist, SOLLTE AI verwendet werden, auch wenn useAiCall=false gesetzt ist + # (z.B. wenn AI die Struktur generiert hat, aber useAiCall falsch gesetzt wurde) + if len(contentPartIds) == 0 and not generationHint: + useAiCall = False + logger.debug(f"Section {sectionId}: No content parts and no generation hint, setting useAiCall=False") + elif len(contentPartIds) == 0 and generationHint and not useAiCall: + # Override: If there's a generationHint but no content parts, we should use AI + # This handles cases where structure generation set useAiCall=false incorrectly + useAiCall = True + logger.info(f"Section {sectionId}: Overriding useAiCall=True (has generationHint but no content parts)") + + elements = [] + + # Prüfe ob Aggregation nötig ist + needsAggregation = self._needsAggregation( + contentType=contentType, + contentPartCount=len(contentPartIds) + ) + + logger.info(f"Processing section {sectionId}: contentType={contentType}, contentPartCount={len(contentPartIds)}, useAiCall={useAiCall}, needsAggregation={needsAggregation}, hasGenerationHint={bool(generationHint)}") + + if needsAggregation and useAiCall: + # Aggregation: Alle Parts zusammen verarbeiten + sectionParts = [ + self._findContentPartById(pid, contentParts) + for pid in contentPartIds ] + sectionParts = [p for p in sectionParts if p is not None] - # Verarbeite non-extracted Parts separat (reference, object) - for part in nonExtractedParts: - contentFormat = contentFormats.get(part.id, part.metadata.get("contentFormat")) + if sectionParts: + # Filtere nur extracted Parts für Aggregation (reference/object werden separat behandelt) + extractedParts = [ + p for p in sectionParts + if contentFormats.get(p.id, p.metadata.get("contentFormat")) == "extracted" + ] + nonExtractedParts = [ + p for p in sectionParts + if contentFormats.get(p.id, p.metadata.get("contentFormat")) != "extracted" + ] - if contentFormat == "reference": + # Verarbeite non-extracted Parts separat (reference, object) + for part in nonExtractedParts: + contentFormat = contentFormats.get(part.id, part.metadata.get("contentFormat")) + + if contentFormat == "reference": + elements.append({ + "type": "reference", + "documentReference": part.metadata.get("documentReference"), + "label": part.metadata.get("usageHint", part.label) + }) + elif contentFormat == "object": + # Nested content structure for objects + if part.typeGroup == "image": + elements.append({ + "type": "image", + "content": { + "base64Data": part.data, + "altText": part.metadata.get("usageHint", part.label), + "caption": part.metadata.get("caption", "") + } + }) + else: + elements.append({ + "type": part.typeGroup, + "content": { + "data": part.data, + "mimeType": part.mimeType, + "label": part.metadata.get("usageHint", part.label) + } + }) + + # Aggregiere extracted Parts mit AI + if extractedParts: + logger.debug(f"Section {sectionId}: Aggregating {len(extractedParts)} extracted parts with AI") + generationPrompt = self._buildSectionGenerationPrompt( + section=section, + contentParts=extractedParts, # ALLE PARTS für Aggregation! + userPrompt=userPrompt, + generationHint=generationHint, + allSections=all_sections_list, + sectionIndex=sectionIndex, + isAggregation=True + ) + + # Erstelle Operation-ID für Section-Generierung + sectionOperationId = f"{fillOperationId}_section_{sectionId}" + + # Starte ChatLog mit Parent-Referenz (chapter, not fillOperationId) + self.services.chat.progressLogStart( + sectionOperationId, + "Section Generation (Aggregation)", + f"Section {sectionIndex + 1}/{totalSections}", + f"{sectionTitle} ({len(extractedParts)} parts)", + parentOperationId=chapterOperationId + ) + + try: + # Update: Building prompt + self.services.chat.progressLogUpdate(sectionOperationId, 0.2, "Building generation prompt") + + # Debug: Log Prompt + self.services.utils.writeDebugFile( + generationPrompt, + f"{chapterId}_section_{sectionId}_prompt" + ) + logger.debug(f"Logged section prompt: {chapterId}_section_{sectionId}_prompt (aggregation)") + + # Update: Calling AI + self.services.chat.progressLogUpdate(sectionOperationId, 0.4, "Calling AI for content generation") + + # Verwende callAi für ContentParts-Unterstützung (nicht callAiPlanning!) + # Use IMAGE_GENERATE for image content type + operationType = OperationTypeEnum.IMAGE_GENERATE if contentType == "image" else OperationTypeEnum.DATA_ANALYSE + + # For IMAGE_GENERATE, truncate prompt to 4000 chars (DALL-E limit) + if operationType == OperationTypeEnum.IMAGE_GENERATE: + maxPromptLength = 4000 + if len(generationPrompt) > maxPromptLength: + logger.warning(f"Truncating DALL-E prompt from {len(generationPrompt)} to {maxPromptLength} characters") + # Keep the beginning (task, metadata, generation hint) and truncate from end + generationPrompt = generationPrompt[:maxPromptLength].rsplit('\n', 1)[0] # Truncate at last newline + + # For IMAGE_GENERATE, don't pass contentParts - image generation uses prompt only, not content chunks + contentPartsForCall = [] if operationType == OperationTypeEnum.IMAGE_GENERATE else extractedParts + request = AiCallRequest( + prompt=generationPrompt, + contentParts=contentPartsForCall, # Empty for IMAGE_GENERATE, all parts for others + options=AiCallOptions( + operationType=operationType, + priority=PriorityEnum.BALANCED, + processingMode=ProcessingModeEnum.DETAILED + ) + ) + aiResponse = await self.aiService.callAi(request) + + # Update: Processing response + self.services.chat.progressLogUpdate(sectionOperationId, 0.6, "Processing AI response") + + # Debug: Log Response + self.services.utils.writeDebugFile( + aiResponse.content, + f"{chapterId}_section_{sectionId}_response" + ) + logger.debug(f"Logged section response: {chapterId}_section_{sectionId}_response (aggregation)") + + # Update: Validating content + self.services.chat.progressLogUpdate(sectionOperationId, 0.8, "Validating generated content") + + # Handle IMAGE_GENERATE differently - returns image data directly + if contentType == "image" and operationType == OperationTypeEnum.IMAGE_GENERATE: + import base64 + base64Data = "" + + # Convert image data to base64 string if needed + if isinstance(aiResponse.content, bytes): + base64Data = base64.b64encode(aiResponse.content).decode('utf-8') + elif isinstance(aiResponse.content, str): + # Check if it's already a JSON structure + try: + # Try to parse as JSON first + jsonContent = json.loads(self.services.utils.jsonExtractString(aiResponse.content)) + # If it's already a proper JSON structure with image element, use it + if isinstance(jsonContent, dict) and jsonContent.get("type") == "image": + elements.append(jsonContent) + logger.debug("AI returned proper JSON image structure") + continue + elif isinstance(jsonContent, list) and len(jsonContent) > 0: + # Check if first element is an image + if isinstance(jsonContent[0], dict) and jsonContent[0].get("type") == "image": + elements.extend(jsonContent) + logger.debug("AI returned proper JSON image structure in list") + continue + except (json.JSONDecodeError, ValueError, AttributeError): + # Not JSON, treat as base64 string or data URI + pass + + # Already base64 string or data URI + if aiResponse.content.startswith("data:image/"): + # Extract base64 from data URI + base64Data = aiResponse.content.split(",", 1)[1] + else: + # Check if it looks like base64 (alphanumeric + / + =) + content_stripped = aiResponse.content.strip() + if len(content_stripped) > 100 and all(c in "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=\n\r\t " for c in content_stripped[:200]): + # Looks like base64, use it + base64Data = content_stripped.replace("\n", "").replace("\r", "").replace("\t", "").replace(" ", "") + else: + base64Data = aiResponse.content + else: + base64Data = "" + + # Always create proper JSON structure for images + if base64Data: + elements.append({ + "type": "image", + "content": { + "base64Data": base64Data, + "altText": generationHint or "Generated image", + "caption": "" + } + }) + logger.debug(f"Created proper JSON image structure with base64Data length: {len(base64Data)}") + else: + logger.warning(f"IMAGE_GENERATE returned empty or invalid content for section {sectionId}") + elements.append({ + "type": "error", + "message": f"Image generation returned empty or invalid content", + "sectionId": sectionId + }) + else: + # Parse JSON response for other content types + try: + generatedElements = json.loads( + self.services.utils.jsonExtractString(aiResponse.content) + ) + if isinstance(generatedElements, list): + elements.extend(generatedElements) + elif isinstance(generatedElements, dict) and "elements" in generatedElements: + elements.extend(generatedElements["elements"]) + elif isinstance(generatedElements, dict) and generatedElements.get("type"): + # Single element in dict format + elements.append(generatedElements) + except (json.JSONDecodeError, ValueError) as json_error: + logger.error(f"Error parsing JSON response for section {sectionId}: {str(json_error)}") + # Try to extract any image data that might be in the response + if contentType == "image": + # Check if response content might be base64 image data + content_str = str(aiResponse.content) + if len(content_str) > 100: + elements.append({ + "type": "error", + "message": f"Failed to parse image generation response: {str(json_error)}", + "sectionId": sectionId + }) + else: + elements.append({ + "type": "error", + "message": f"Failed to parse JSON response: {str(json_error)}", + "sectionId": sectionId + }) + + # ChatLog abschließen + self.services.chat.progressLogFinish(sectionOperationId, True) + + # Update chapter progress after section completion + chapterProgress = (sectionIndex + 1) / totalSections if totalSections > 0 else 1.0 + self.services.chat.progressLogUpdate( + chapterOperationId, + chapterProgress, + f"Section {sectionIndex + 1}/{totalSections} completed" + ) + + except Exception as e: + # Fehlerhafte Section mit Fehlermeldung rendern (kein Abbruch!) + self.services.chat.progressLogFinish(sectionOperationId, False) elements.append({ - "type": "reference", - "documentReference": part.metadata.get("documentReference"), - "label": part.metadata.get("usageHint", part.label) + "type": "error", + "message": f"Error generating section {sectionId}: {str(e)}", + "sectionId": sectionId }) - elif contentFormat == "object": - # Nested content structure for objects - if part.typeGroup == "image": - elements.append({ - "type": "image", - "content": { - "base64Data": part.data, - "altText": part.metadata.get("usageHint", part.label), - "caption": part.metadata.get("caption", "") - } - }) - else: - elements.append({ - "type": part.typeGroup, - "content": { - "data": part.data, - "mimeType": part.mimeType, - "label": part.metadata.get("usageHint", part.label) - } - }) - - # Aggregiere extracted Parts mit AI - if extractedParts: - logger.debug(f"Section {sectionId}: Aggregating {len(extractedParts)} extracted parts with AI") + logger.error(f"Error generating section {sectionId}: {str(e)}") + # Still update chapter progress even on error + chapterProgress = (sectionIndex + 1) / totalSections if totalSections > 0 else 1.0 + self.services.chat.progressLogUpdate( + chapterOperationId, + chapterProgress, + f"Section {sectionIndex + 1}/{totalSections} completed (with errors)" + ) + # NICHT raise - Section wird mit Fehlermeldung gerendert + + else: + # Einzelverarbeitung: Jeder Part einzeln ODER Generation ohne ContentParts + # Handle case where no content parts but generationHint exists (e.g., Executive Summary) + if len(contentPartIds) == 0 and useAiCall and generationHint: + # Generate content from scratch using only generationHint + logger.debug(f"Processing section {sectionId}: No content parts, generating from generationHint only") generationPrompt = self._buildSectionGenerationPrompt( section=section, - contentParts=extractedParts, # ALLE PARTS für Aggregation! + contentParts=[], # NO PARTS userPrompt=userPrompt, generationHint=generationHint, allSections=all_sections_list, sectionIndex=sectionIndex, - isAggregation=True + isAggregation=False ) # Erstelle Operation-ID für Section-Generierung sectionOperationId = f"{fillOperationId}_section_{sectionId}" - # Starte ChatLog mit Parent-Referenz + # Starte ChatLog mit Parent-Referenz (chapter, not fillOperationId) self.services.chat.progressLogStart( sectionOperationId, - "Section Generation (Aggregation)", - "Section", - f"Generating section {sectionId} with {len(extractedParts)} parts", - parentOperationId=fillOperationId + "Section Generation", + f"Section {sectionIndex + 1}/{totalSections}", + f"{sectionTitle} (from generationHint)", + parentOperationId=chapterOperationId ) try: + # Update: Building prompt + self.services.chat.progressLogUpdate(sectionOperationId, 0.2, "Building generation prompt") + # Debug: Log Prompt self.services.utils.writeDebugFile( generationPrompt, - f"section_content_{sectionId}_prompt" + f"{chapterId}_section_{sectionId}_prompt" ) - logger.debug(f"Logged section prompt: section_content_{sectionId}_prompt (aggregation)") + logger.debug(f"Logged section prompt: {chapterId}_section_{sectionId}_prompt") - # Verwende callAi für ContentParts-Unterstützung (nicht callAiPlanning!) - # Use IMAGE_GENERATE for image content type + # Update: Calling AI + self.services.chat.progressLogUpdate(sectionOperationId, 0.4, "Calling AI for content generation") + + # Verwende callAi ohne ContentParts operationType = OperationTypeEnum.IMAGE_GENERATE if contentType == "image" else OperationTypeEnum.DATA_ANALYSE # For IMAGE_GENERATE, truncate prompt to 4000 chars (DALL-E limit) @@ -331,11 +595,9 @@ class StructureFiller: # Keep the beginning (task, metadata, generation hint) and truncate from end generationPrompt = generationPrompt[:maxPromptLength].rsplit('\n', 1)[0] # Truncate at last newline - # For IMAGE_GENERATE, don't pass contentParts - image generation uses prompt only, not content chunks - contentPartsForCall = [] if operationType == OperationTypeEnum.IMAGE_GENERATE else extractedParts request = AiCallRequest( prompt=generationPrompt, - contentParts=contentPartsForCall, # Empty for IMAGE_GENERATE, all parts for others + contentParts=[], # NO PARTS options=AiCallOptions( operationType=operationType, priority=PriorityEnum.BALANCED, @@ -344,49 +606,103 @@ class StructureFiller: ) aiResponse = await self.aiService.callAi(request) + # Update: Processing response + self.services.chat.progressLogUpdate(sectionOperationId, 0.6, "Processing AI response") + # Debug: Log Response self.services.utils.writeDebugFile( aiResponse.content, - f"section_content_{sectionId}_response" + f"{chapterId}_section_{sectionId}_response" ) - logger.debug(f"Logged section response: section_content_{sectionId}_response (aggregation)") + logger.debug(f"Logged section response: {chapterId}_section_{sectionId}_response") + + # Update: Validating content + self.services.chat.progressLogUpdate(sectionOperationId, 0.8, "Validating generated content") # Handle IMAGE_GENERATE differently - returns image data directly if contentType == "image" and operationType == OperationTypeEnum.IMAGE_GENERATE: import base64 + base64Data = "" + # Convert image data to base64 string if needed if isinstance(aiResponse.content, bytes): base64Data = base64.b64encode(aiResponse.content).decode('utf-8') elif isinstance(aiResponse.content, str): + # Check if it's already a JSON structure + try: + jsonContent = json.loads(self.services.utils.jsonExtractString(aiResponse.content)) + if isinstance(jsonContent, dict) and jsonContent.get("type") == "image": + elements.append(jsonContent) + logger.debug("AI returned proper JSON image structure") + continue + elif isinstance(jsonContent, list) and len(jsonContent) > 0: + if isinstance(jsonContent[0], dict) and jsonContent[0].get("type") == "image": + elements.extend(jsonContent) + logger.debug("AI returned proper JSON image structure in list") + continue + except (json.JSONDecodeError, ValueError, AttributeError): + pass + # Already base64 string or data URI if aiResponse.content.startswith("data:image/"): - # Extract base64 from data URI base64Data = aiResponse.content.split(",", 1)[1] else: - base64Data = aiResponse.content + content_stripped = aiResponse.content.strip() + if len(content_stripped) > 100 and all(c in "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=\n\r\t " for c in content_stripped[:200]): + base64Data = content_stripped.replace("\n", "").replace("\r", "").replace("\t", "").replace(" ", "") + else: + base64Data = aiResponse.content else: base64Data = "" - elements.append({ - "type": "image", - "content": { - "base64Data": base64Data, - "altText": generationHint or "Generated image", - "caption": "" - } - }) + # Always create proper JSON structure for images + if base64Data: + elements.append({ + "type": "image", + "content": { + "base64Data": base64Data, + "altText": generationHint or "Generated image", + "caption": "" + } + }) + logger.debug(f"Created proper JSON image structure with base64Data length: {len(base64Data)}") + else: + logger.warning(f"IMAGE_GENERATE returned empty content for section {sectionId}") + elements.append({ + "type": "error", + "message": f"Image generation returned empty content", + "sectionId": sectionId + }) else: # Parse JSON response for other content types - generatedElements = json.loads( - self.services.utils.jsonExtractString(aiResponse.content) - ) - if isinstance(generatedElements, list): - elements.extend(generatedElements) - elif isinstance(generatedElements, dict) and "elements" in generatedElements: - elements.extend(generatedElements["elements"]) + try: + generatedElements = json.loads( + self.services.utils.jsonExtractString(aiResponse.content) + ) + if isinstance(generatedElements, list): + elements.extend(generatedElements) + elif isinstance(generatedElements, dict) and "elements" in generatedElements: + elements.extend(generatedElements["elements"]) + elif isinstance(generatedElements, dict) and generatedElements.get("type"): + elements.append(generatedElements) + except (json.JSONDecodeError, ValueError) as json_error: + logger.error(f"Error parsing JSON response for section {sectionId}: {str(json_error)}") + elements.append({ + "type": "error", + "message": f"Failed to parse JSON response: {str(json_error)}", + "sectionId": sectionId + }) # ChatLog abschließen self.services.chat.progressLogFinish(sectionOperationId, True) + + # Update chapter progress after section completion + chapterProgress = (sectionIndex + 1) / totalSections if totalSections > 0 else 1.0 + self.services.chat.progressLogUpdate( + chapterOperationId, + chapterProgress, + f"Section {sectionIndex + 1}/{totalSections} completed" + ) except Exception as e: # Fehlerhafte Section mit Fehlermeldung rendern (kein Abbruch!) @@ -397,119 +713,13 @@ class StructureFiller: "sectionId": sectionId }) logger.error(f"Error generating section {sectionId}: {str(e)}") - # NICHT raise - Section wird mit Fehlermeldung gerendert - - else: - # Einzelverarbeitung: Jeder Part einzeln ODER Generation ohne ContentParts - # Handle case where no content parts but generationHint exists (e.g., Executive Summary) - if len(contentPartIds) == 0 and useAiCall and generationHint: - # Generate content from scratch using only generationHint - logger.debug(f"Processing section {sectionId}: No content parts, generating from generationHint only") - generationPrompt = self._buildSectionGenerationPrompt( - section=section, - contentParts=[], # NO PARTS - userPrompt=userPrompt, - generationHint=generationHint, - allSections=all_sections_list, - sectionIndex=sectionIndex, - isAggregation=False - ) - - # Erstelle Operation-ID für Section-Generierung - sectionOperationId = f"{fillOperationId}_section_{sectionId}" - - # Starte ChatLog mit Parent-Referenz - self.services.chat.progressLogStart( - sectionOperationId, - "Section Generation", - "Section", - f"Generating section {sectionId} from generationHint", - parentOperationId=fillOperationId - ) - - try: - # Debug: Log Prompt - self.services.utils.writeDebugFile( - generationPrompt, - f"section_content_{sectionId}_prompt" - ) - logger.debug(f"Logged section prompt: section_content_{sectionId}_prompt") - - # Verwende callAi ohne ContentParts - operationType = OperationTypeEnum.IMAGE_GENERATE if contentType == "image" else OperationTypeEnum.DATA_ANALYSE - - # For IMAGE_GENERATE, truncate prompt to 4000 chars (DALL-E limit) - if operationType == OperationTypeEnum.IMAGE_GENERATE: - maxPromptLength = 4000 - if len(generationPrompt) > maxPromptLength: - logger.warning(f"Truncating DALL-E prompt from {len(generationPrompt)} to {maxPromptLength} characters") - # Keep the beginning (task, metadata, generation hint) and truncate from end - generationPrompt = generationPrompt[:maxPromptLength].rsplit('\n', 1)[0] # Truncate at last newline - - request = AiCallRequest( - prompt=generationPrompt, - contentParts=[], # NO PARTS - options=AiCallOptions( - operationType=operationType, - priority=PriorityEnum.BALANCED, - processingMode=ProcessingModeEnum.DETAILED + # Still update chapter progress even on error + chapterProgress = (sectionIndex + 1) / totalSections if totalSections > 0 else 1.0 + self.services.chat.progressLogUpdate( + chapterOperationId, + chapterProgress, + f"Section {sectionIndex + 1}/{totalSections} completed (with errors)" ) - ) - aiResponse = await self.aiService.callAi(request) - - # Debug: Log Response - self.services.utils.writeDebugFile( - aiResponse.content, - f"section_content_{sectionId}_response" - ) - logger.debug(f"Logged section response: section_content_{sectionId}_response") - - # Handle IMAGE_GENERATE differently - returns image data directly - if contentType == "image" and operationType == OperationTypeEnum.IMAGE_GENERATE: - import base64 - # Convert image data to base64 string if needed - if isinstance(aiResponse.content, bytes): - base64Data = base64.b64encode(aiResponse.content).decode('utf-8') - elif isinstance(aiResponse.content, str): - # Already base64 string or data URI - if aiResponse.content.startswith("data:image/"): - # Extract base64 from data URI - base64Data = aiResponse.content.split(",", 1)[1] - else: - base64Data = aiResponse.content - else: - base64Data = "" - - elements.append({ - "type": "image", - "content": { - "base64Data": base64Data, - "altText": generationHint or "Generated image", - "caption": "" - } - }) - else: - # Parse JSON response for other content types - generatedElements = json.loads( - self.services.utils.jsonExtractString(aiResponse.content) - ) - if isinstance(generatedElements, list): - elements.extend(generatedElements) - elif isinstance(generatedElements, dict) and "elements" in generatedElements: - elements.extend(generatedElements["elements"]) - - # ChatLog abschließen - self.services.chat.progressLogFinish(sectionOperationId, True) - - except Exception as e: - # Fehlerhafte Section mit Fehlermeldung rendern (kein Abbruch!) - self.services.chat.progressLogFinish(sectionOperationId, False) - elements.append({ - "type": "error", - "message": f"Error generating section {sectionId}: {str(e)}", - "sectionId": sectionId - }) - logger.error(f"Error generating section {sectionId}: {str(e)}") # Einzelverarbeitung: Jeder Part einzeln for partId in contentPartIds: @@ -567,22 +777,28 @@ class StructureFiller: # Erstelle Operation-ID für Section-Generierung sectionOperationId = f"{fillOperationId}_section_{sectionId}" - # Starte ChatLog mit Parent-Referenz + # Starte ChatLog mit Parent-Referenz (chapter, not fillOperationId) self.services.chat.progressLogStart( sectionOperationId, "Section Generation", - "Section", - f"Generating section {sectionId}", - parentOperationId=fillOperationId + f"Section {sectionIndex + 1}/{totalSections}", + f"{sectionTitle} (single part)", + parentOperationId=chapterOperationId ) try: + # Update: Building prompt + self.services.chat.progressLogUpdate(sectionOperationId, 0.2, "Building generation prompt") + # Debug: Log Prompt self.services.utils.writeDebugFile( generationPrompt, - f"section_content_{sectionId}_prompt" + f"{chapterId}_section_{sectionId}_prompt" ) - logger.debug(f"Logged section prompt: section_content_{sectionId}_prompt") + logger.debug(f"Logged section prompt: {chapterId}_section_{sectionId}_prompt") + + # Update: Calling AI + self.services.chat.progressLogUpdate(sectionOperationId, 0.4, "Calling AI for content generation") # Verwende callAi für ContentParts-Unterstützung # Use IMAGE_GENERATE for image content type @@ -609,49 +825,103 @@ class StructureFiller: ) aiResponse = await self.aiService.callAi(request) + # Update: Processing response + self.services.chat.progressLogUpdate(sectionOperationId, 0.6, "Processing AI response") + # Debug: Log Response self.services.utils.writeDebugFile( aiResponse.content, - f"section_content_{sectionId}_response" + f"{chapterId}_section_{sectionId}_response" ) - logger.debug(f"Logged section response: section_content_{sectionId}_response") + logger.debug(f"Logged section response: {chapterId}_section_{sectionId}_response") + + # Update: Validating content + self.services.chat.progressLogUpdate(sectionOperationId, 0.8, "Validating generated content") # Handle IMAGE_GENERATE differently - returns image data directly if contentType == "image" and operationType == OperationTypeEnum.IMAGE_GENERATE: import base64 + base64Data = "" + # Convert image data to base64 string if needed if isinstance(aiResponse.content, bytes): base64Data = base64.b64encode(aiResponse.content).decode('utf-8') elif isinstance(aiResponse.content, str): + # Check if it's already a JSON structure + try: + jsonContent = json.loads(self.services.utils.jsonExtractString(aiResponse.content)) + if isinstance(jsonContent, dict) and jsonContent.get("type") == "image": + elements.append(jsonContent) + logger.debug("AI returned proper JSON image structure") + continue + elif isinstance(jsonContent, list) and len(jsonContent) > 0: + if isinstance(jsonContent[0], dict) and jsonContent[0].get("type") == "image": + elements.extend(jsonContent) + logger.debug("AI returned proper JSON image structure in list") + continue + except (json.JSONDecodeError, ValueError, AttributeError): + pass + # Already base64 string or data URI if aiResponse.content.startswith("data:image/"): - # Extract base64 from data URI base64Data = aiResponse.content.split(",", 1)[1] else: - base64Data = aiResponse.content + content_stripped = aiResponse.content.strip() + if len(content_stripped) > 100 and all(c in "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=\n\r\t " for c in content_stripped[:200]): + base64Data = content_stripped.replace("\n", "").replace("\r", "").replace("\t", "").replace(" ", "") + else: + base64Data = aiResponse.content else: base64Data = "" - elements.append({ - "type": "image", - "content": { - "base64Data": base64Data, - "altText": generationHint or "Generated image", - "caption": "" - } - }) + # Always create proper JSON structure for images + if base64Data: + elements.append({ + "type": "image", + "content": { + "base64Data": base64Data, + "altText": generationHint or "Generated image", + "caption": "" + } + }) + logger.debug(f"Created proper JSON image structure with base64Data length: {len(base64Data)}") + else: + logger.warning(f"IMAGE_GENERATE returned empty content for section {sectionId}") + elements.append({ + "type": "error", + "message": f"Image generation returned empty content", + "sectionId": sectionId + }) else: # Parse JSON response for other content types - generatedElements = json.loads( - self.services.utils.jsonExtractString(aiResponse.content) - ) - if isinstance(generatedElements, list): - elements.extend(generatedElements) - elif isinstance(generatedElements, dict) and "elements" in generatedElements: - elements.extend(generatedElements["elements"]) + try: + generatedElements = json.loads( + self.services.utils.jsonExtractString(aiResponse.content) + ) + if isinstance(generatedElements, list): + elements.extend(generatedElements) + elif isinstance(generatedElements, dict) and "elements" in generatedElements: + elements.extend(generatedElements["elements"]) + elif isinstance(generatedElements, dict) and generatedElements.get("type"): + elements.append(generatedElements) + except (json.JSONDecodeError, ValueError) as json_error: + logger.error(f"Error parsing JSON response for section {sectionId}: {str(json_error)}") + elements.append({ + "type": "error", + "message": f"Failed to parse JSON response: {str(json_error)}", + "sectionId": sectionId + }) # ChatLog abschließen self.services.chat.progressLogFinish(sectionOperationId, True) + + # Update chapter progress after section completion + chapterProgress = (sectionIndex + 1) / totalSections if totalSections > 0 else 1.0 + self.services.chat.progressLogUpdate( + chapterOperationId, + chapterProgress, + f"Section {sectionIndex + 1}/{totalSections} completed" + ) except Exception as e: # Fehlerhafte Section mit Fehlermeldung rendern (kein Abbruch!) @@ -662,6 +932,13 @@ class StructureFiller: "sectionId": sectionId }) logger.error(f"Error generating section {sectionId}: {str(e)}") + # Still update chapter progress even on error + chapterProgress = (sectionIndex + 1) / totalSections if totalSections > 0 else 1.0 + self.services.chat.progressLogUpdate( + chapterOperationId, + chapterProgress, + f"Section {sectionIndex + 1}/{totalSections} completed (with errors)" + ) # NICHT raise - Section wird mit Fehlermeldung gerendert else: # Füge extrahierten Content direkt hinzu (kein AI-Call) @@ -687,7 +964,35 @@ class StructureFiller: "extractionPrompt": part.metadata.get("extractionPrompt") }) - section["elements"] = elements + # Assign elements to section (for all processing paths) + section["elements"] = elements + + # Update chapter progress after section completion (for all sections, including non-AI) + chapterProgress = (sectionIndex + 1) / totalSections if totalSections > 0 else 1.0 + self.services.chat.progressLogUpdate( + chapterOperationId, + chapterProgress, + f"Section {sectionIndex + 1}/{totalSections} completed" + ) + + # Update overall progress after section completion + overallProgress = calculateOverallProgress(chapterIndex - 1, totalChapters, sectionIndex + 1, totalSections) + self.services.chat.progressLogUpdate( + fillOperationId, + overallProgress, + f"Chapter {chapterIndex}/{totalChapters}, Section {sectionIndex + 1}/{totalSections} completed" + ) + + # Finish chapter operation after all sections processed + self.services.chat.progressLogFinish(chapterOperationId, True) + + # Update overall progress after chapter completion + overallProgress = chapterIndex / totalChapters if totalChapters > 0 else 1.0 + self.services.chat.progressLogUpdate( + fillOperationId, + overallProgress, + f"Chapter {chapterIndex}/{totalChapters} completed: {chapterTitle}" + ) return chapterStructure @@ -744,7 +1049,10 @@ class StructureFiller: ) -> Dict[str, Any]: """ Flattening: Konvertiert Chapters zu finaler Section-Struktur. - Jedes Chapter wird zu einer Heading-Section + dessen Sections. + Jedes Chapter wird zu einer Heading-Section (Level 1) + dessen Sections. + + IMPORTANT: Chapters are the main structure elements (heading level 1). + All section headings with level < 2 are adjusted to level 2. """ result = { "metadata": chapterStructure.get("metadata", {}), @@ -760,7 +1068,7 @@ class StructureFiller: } for chapter in doc.get("chapters", []): - # 1. Vordefinierte Heading-Section für Chapter-Title + # 1. Vordefinierte Heading-Section für Chapter-Title (ALWAYS Level 1) heading_section = { "id": f"{chapter['id']}_heading", "content_type": "heading", @@ -768,19 +1076,42 @@ class StructureFiller: "type": "heading", "content": { "text": chapter.get("title", ""), - "level": chapter.get("level", 1) + "level": 1 # Chapters are always level 1 } }] } flattened_doc["sections"].append(heading_section) - # 2. Generierte Sections - flattened_doc["sections"].extend(chapter.get("sections", [])) + # 2. Generierte Sections - adjust heading levels + for section in chapter.get("sections", []): + adjusted_section = self._adjustSectionHeadingLevels(section) + flattened_doc["sections"].append(adjusted_section) result["documents"].append(flattened_doc) return result + def _adjustSectionHeadingLevels(self, section: Dict[str, Any]) -> Dict[str, Any]: + """ + Adjust heading levels in sections: sections with type heading and level < 2 are changed to level 2. + Only chapter headings have level 1. + """ + adjusted_section = copy.deepcopy(section) + + # Check if this is a heading section + if adjusted_section.get("content_type") == "heading": + elements = adjusted_section.get("elements", []) + for element in elements: + if isinstance(element, dict) and element.get("type") == "heading": + content = element.get("content", {}) + if isinstance(content, dict): + level = content.get("level", 1) + # If level < 2, change to level 2 (only chapters have level 1) + if level < 2: + content["level"] = 2 + + return adjusted_section + def _buildChapterSectionsStructurePrompt( self, chapterId: str, @@ -975,6 +1306,9 @@ CRITICAL: Return ONLY valid JSON. Do not include any explanatory text outside th contentStructureExample = self._getContentStructureExample(contentType) + # Special handling for image content type with IMAGE_GENERATE + isImageGeneration = contentType == "image" and len(validParts) == 0 + if isAggregation: prompt = f"""# TASK: Generate Section Content (Aggregation) @@ -982,22 +1316,10 @@ CRITICAL: Return ONLY valid JSON. Do not include any explanatory text outside th - Section ID: {sectionId} - Content Type: {contentType} - Generation Hint: {generationHint} -{contextText} - -## USER REQUEST (for context) -``` -{userPrompt} -``` ## AVAILABLE CONTENT FOR THIS SECTION {contentPartsText if contentPartsText else "(No content parts specified for this section)"} -## IMPORTANT - SECTION INDEPENDENCE: -- This section is independent and self-contained -- You do NOT have information about other sections' content -- Provide all necessary context within this section -- Context above is for logical flow only, NOT for content dependencies - ## INSTRUCTIONS 1. Generate content for section "{sectionId}" based on the generation hint above 2. **AGGREGATION**: Combine ALL provided ContentParts into ONE element (e.g., one table with all data) @@ -1007,6 +1329,10 @@ CRITICAL: Return ONLY valid JSON. Do not include any explanatory text outside th 6. Ensure the generated content is self-contained and understandable independently 7. Return ONLY a JSON object with an "elements" array 8. Each element should match the content_type: {contentType} +9. CRITICAL - NO HTML/STYLING: Do NOT include HTML tags, CSS styles, or any formatting markup in text content. Return plain text only. Formatting is handled automatically by the renderer. +10. For paragraphs: Return plain text only, no HTML tags like
, or style attributes +11. For headings: Return plain text only, no HTML tags or styling +12. For images: Do NOT include base64 data in JSON - images are handled separately ## OUTPUT FORMAT Return a JSON object with this structure: @@ -1020,7 +1346,16 @@ Return a JSON object with this structure: ] }} -CRITICAL: "content" MUST always be an object (never a string). Return ONLY valid JSON. Do not include any explanatory text outside the JSON. +CRITICAL: +- "content" MUST always be an object (never a string) +- For text content: Return plain text only, NO HTML tags, NO CSS styles, NO formatting markup +- Return ONLY valid JSON. Do not include any explanatory text outside the JSON. + +## CONTEXT (for reference only) +{contextText if contextText else ""} +``` +{userPrompt} +``` """ else: prompt = f"""# TASK: Generate Section Content @@ -1029,30 +1364,21 @@ CRITICAL: "content" MUST always be an object (never a string). Return ONLY valid - Section ID: {sectionId} - Content Type: {contentType} - Generation Hint: {generationHint} -{contextText} - -## USER REQUEST (for context) -``` -{userPrompt} -``` ## AVAILABLE CONTENT FOR THIS SECTION {contentPartsText if contentPartsText else "(No content parts specified for this section)"} -## IMPORTANT - SECTION INDEPENDENCE: -- This section is independent and self-contained -- You do NOT have information about other sections' content -- Provide all necessary context within this section -- Context above is for logical flow only, NOT for content dependencies - ## INSTRUCTIONS 1. Generate content for section "{sectionId}" based on the generation hint above 2. Use the available content parts to populate this section -3. For images: Use data URI format (data:image/[type];base64,[data]) when embedding base64 image data -4. For extracted text: Format appropriately based on content_type ({contentType}) -5. Ensure the generated content is self-contained and understandable independently -6. Return ONLY a JSON object with an "elements" array -7. Each element should match the content_type: {contentType} +3. For extracted text: Format appropriately based on content_type ({contentType}) +4. Ensure the generated content is self-contained and understandable independently +5. Return ONLY a JSON object with an "elements" array +6. Each element should match the content_type: {contentType} +7. CRITICAL - NO HTML/STYLING: Do NOT include HTML tags, CSS styles, or any formatting markup in text content. Return plain text only. Formatting is handled automatically by the renderer. +8. For paragraphs: Return plain text only, no HTML tags like
, or style attributes +9. For headings: Return plain text only, no HTML tags or styling +10. For images: If you need to reference an image, describe it in altText. Do NOT include base64 data - images are handled separately ## OUTPUT FORMAT Return a JSON object with this structure: @@ -1066,7 +1392,16 @@ Return a JSON object with this structure: ] }} -CRITICAL: "content" MUST always be an object (never a string). Return ONLY valid JSON. Do not include any explanatory text outside the JSON. +CRITICAL: +- "content" MUST always be an object (never a string) +- For text content: Return plain text only, NO HTML tags, NO CSS styles, NO formatting markup +- Return ONLY valid JSON. Do not include any explanatory text outside the JSON + +## CONTEXT (for reference only) +{contextText if contextText else ""} +``` +{userPrompt} +``` """ return prompt diff --git a/modules/services/serviceAi/subStructureGeneration.py b/modules/services/serviceAi/subStructureGeneration.py index 84e659a4..d3b46e0e 100644 --- a/modules/services/serviceAi/subStructureGeneration.py +++ b/modules/services/serviceAi/subStructureGeneration.py @@ -160,18 +160,30 @@ IMPORTANT - CHAPTER INDEPENDENCE: - One chapter does NOT have information about another chapter - Each chapter must provide its own context and be understandable alone +CRITICAL - CONTENT ASSIGNMENT TO CHAPTERS: +- You MUST assign available ContentParts to chapters using contentPartIds +- Based on the user request, determine which content should be used in which chapter +- If the user request mentions specific content, assign the corresponding ContentPart to the appropriate chapter +- Chapters WITHOUT contentPartIds can only generate generic content, NOT document-specific analysis +- To include document content analysis, chapters MUST have contentPartIds assigned +- Review the user request carefully to match ContentParts to chapters based on context and purpose + CRITICAL - CHAPTERS WITHOUT CONTENT PARTS: - If contentPartIds is EMPTY, generationHint MUST be VERY DETAILED with all context needed to generate content from scratch - Include: what to generate, what information to include, purpose, specific details -- Without content parts, AI relies ENTIRELY on generationHint -- GOOD: "Create [specific content] with [details]. Include [information]. Purpose: [explanation]." -- BAD: "Create title" or "Add section" (too vague) +- Without content parts, AI relies ENTIRELY on generationHint and CANNOT analyze document content + +IMPORTANT - FORMATTING: +- Formatting (fonts, colors, layouts, styles) is handled AUTOMATICALLY by the renderer +- Do NOT specify formatting details in generationHint unless it's content-specific (e.g., "pie chart with 3 segments") +- Focus on CONTENT and STRUCTURE, not visual formatting +- The renderer will apply appropriate styling based on the output format ({outputFormat}) For each chapter: - chapter id - level (1, 2, 3, etc.) - title -- contentPartIds: [List of ContentPart IDs] +- contentPartIds: [List of ContentPart IDs] - ASSIGN content based on user request and chapter purpose - contentPartInstructions: {{ "partId": {{ "instruction": "How content should be structured" @@ -179,6 +191,7 @@ For each chapter: }} - generationHint: Description of the content (must be self-contained with all necessary context) * If contentPartIds is EMPTY, generationHint MUST be VERY DETAILED with all context needed to generate content from scratch + * Focus on content and structure, NOT formatting details OUTPUT FORMAT: {outputFormat} diff --git a/modules/services/serviceGeneration/renderers/rendererPptx.py b/modules/services/serviceGeneration/renderers/rendererPptx.py index 9e6f41c9..5525ae89 100644 --- a/modules/services/serviceGeneration/renderers/rendererPptx.py +++ b/modules/services/serviceGeneration/renderers/rendererPptx.py @@ -82,205 +82,119 @@ class RendererPptx(BaseRenderer): logger.info(f"Slide {i+1}: '{slide_data.get('title', 'No title')}' - sections: {len(slide_sections)}, images: {len(slide_images)}, content: {len(slide_content)} chars") - # Determine layout: first slide (i==0) uses title slide layout - # For image-only slides, use blank layout to avoid placeholder interference - # Otherwise use title+content layout - if i == 0: - slideLayoutIndex = 0 # Title slide layout - elif hasImages and not hasSections and not slide_content: - # Image-only slide: use blank layout (typically index 6, fallback to 5 if not available) - try: - slideLayoutIndex = 6 # Blank layout - # Verify layout exists, fallback if not - if slideLayoutIndex >= len(prs.slide_layouts): - slideLayoutIndex = 5 # Alternative blank layout - except (AttributeError, IndexError): - slideLayoutIndex = 1 # Fallback to title+content - else: - slideLayoutIndex = 1 # Title and content layout + # Use blank layout for all slides to avoid placeholder interference + # Find blank layout (typically index 6, fallback to 5) + slideLayoutIndex = None + for idx in [6, 5]: + if idx < len(prs.slide_layouts): + try: + layout = prs.slide_layouts[idx] + # Check if it's a blank layout (no placeholders) + if len(layout.placeholders) == 0: + slideLayoutIndex = idx + break + except (AttributeError, IndexError): + continue + + # If no blank layout found, use layout with fewest placeholders + if slideLayoutIndex is None: + min_placeholders = float('inf') + for idx in range(len(prs.slide_layouts)): + try: + layout = prs.slide_layouts[idx] + placeholder_count = len(layout.placeholders) if hasattr(layout, 'placeholders') else 0 + if placeholder_count < min_placeholders: + min_placeholders = placeholder_count + slideLayoutIndex = idx + except: + continue + + # Fallback to first layout if still None + if slideLayoutIndex is None: + slideLayoutIndex = 0 slide_layout = prs.slide_layouts[slideLayoutIndex] slide = prs.slides.add_slide(slide_layout) - # Set title with AI-generated styling - # For blank layouts, add title as textbox since there's no title placeholder + # Clear placeholder text instead of removing placeholders (safer approach) + # This avoids corrupting the PPTX file structure try: - title_shape = slide.shapes.title - title_shape.text = slide_data.get("title", "Slide") - - # Apply title styling - LEFT ALIGNED by default - title_style = styles.get("title", {}) - if title_shape.text_frame.paragraphs[0].font: - title_shape.text_frame.paragraphs[0].font.size = Pt(title_style.get("font_size", 44)) - title_shape.text_frame.paragraphs[0].font.bold = title_style.get("bold", True) - title_color = self._getSafeColor(title_style.get("color", (31, 78, 121))) - title_shape.text_frame.paragraphs[0].font.color.rgb = RGBColor(*title_color) - # Set left alignment for title - title_shape.text_frame.paragraphs[0].alignment = PP_ALIGN.LEFT - except AttributeError: - # Blank layout has no title placeholder - add title as textbox - from pptx.util import Inches - titleBox = slide.shapes.add_textbox(Inches(0.5), Inches(0.3), prs.slide_width - Inches(1), Inches(0.8)) - titleFrame = titleBox.text_frame - titleFrame.text = slide_data.get("title", "Slide") - title_style = styles.get("title", {}) - titleFrame.paragraphs[0].font.size = Pt(title_style.get("font_size", 44)) - titleFrame.paragraphs[0].font.bold = title_style.get("bold", True) - title_color = self._getSafeColor(title_style.get("color", (31, 78, 121))) - titleFrame.paragraphs[0].font.color.rgb = RGBColor(*title_color) - titleFrame.paragraphs[0].alignment = PP_ALIGN.LEFT + for shape in slide.shapes: + if hasattr(shape, 'is_placeholder') and shape.is_placeholder: + try: + if hasattr(shape, 'text_frame'): + shape.text_frame.clear() + # Set text to empty string to remove "Click to add text" + if len(shape.text_frame.paragraphs) > 0: + shape.text_frame.paragraphs[0].text = "" + except: + pass + except Exception as placeholder_error: + logger.warning(f"Could not clear placeholders: {str(placeholder_error)}") + + # Add title as textbox (smaller size for slides) + from pptx.util import Inches + titleBox = slide.shapes.add_textbox(Inches(0.5), Inches(0.2), prs.slide_width - Inches(1), Inches(0.6)) + titleFrame = titleBox.text_frame + titleFrame.text = slide_data.get("title", "Slide") + title_style = styles.get("title", {}) + # Smaller title size for slides (default 32 instead of 44) + title_font_size = title_style.get("font_size", 32) + # Reduce further for slides (max 32pt, min 10pt for readability) + title_font_size = max(10, min(title_font_size, 32)) + titleFrame.paragraphs[0].font.size = Pt(title_font_size) + titleFrame.paragraphs[0].font.bold = title_style.get("bold", True) + title_color = self._getSafeColor(title_style.get("color", (31, 78, 121))) + titleFrame.paragraphs[0].font.color.rgb = RGBColor(*title_color) + titleFrame.paragraphs[0].alignment = PP_ALIGN.LEFT + titleFrame.word_wrap = True # Render sections with proper PowerPoint objects (tables, lists, etc.) + # Organize content into frames for better layout if hasSections: - # Use content placeholder for structured content (only if layout has placeholder[1]) - try: - content_shape = slide.placeholders[1] - text_frame = content_shape.text_frame - text_frame.clear() - except (AttributeError, IndexError): - # Layout might not have placeholder[1], create textbox instead - from pptx.util import Inches - left = Inches(0.5) - top = Inches(1.5) - width = prs.slide_width - Inches(1) - height = prs.slide_height - top - Inches(0.5) - textbox = slide.shapes.add_textbox(left, top, width, height) - text_frame = textbox.text_frame - text_frame.word_wrap = True - - # Track vertical position for multiple content types - current_y = Inches(1.5) # Start below title - - for section in slide_sections: - section_type = section.get("content_type", "paragraph") - elements = section.get("elements", []) - - # Check if section has image content_type - if section_type == "image": - # Extract images from this section - for element in elements: - if isinstance(element, dict) and element.get("type") == "image": - content = element.get("content", {}) - if isinstance(content, dict): - base64Data = content.get("base64Data") - if base64Data: - slide_images.append({ - "base64Data": base64Data, - "altText": content.get("altText", "Image"), - "caption": content.get("caption", "") - }) - continue # Skip rendering image sections as text - - # Handle sections without elements (e.g., headings that create slides) - if not elements: - continue - - for element in elements: - if not isinstance(element, dict): - continue - - # Check element type first, fall back to section type - element_type = element.get("type", "") - if not element_type: - element_type = section_type - - # Skip image elements - they're handled separately - if element_type == "image": - content = element.get("content", {}) - if isinstance(content, dict): - base64Data = content.get("base64Data") - if base64Data: - slide_images.append({ - "base64Data": base64Data, - "altText": content.get("altText", "Image"), - "caption": content.get("caption", "") - }) - continue - - if element_type == "table": - # Render as actual PowerPoint table - self._addTableToSlide(slide, element, styles, current_y) - current_y += Inches(2) # Space for table - elif element_type == "bullet_list" or element_type == "list": - # Render as actual PowerPoint bullet list - if text_frame: - self._addBulletListToSlide(slide, element, styles, text_frame) - elif element_type == "heading": - # Render as heading in text frame - if text_frame: - self._addHeadingToSlide(slide, element, styles, text_frame) - elif element_type == "paragraph": - # Render as paragraph in text frame - if text_frame: - self._addParagraphToSlide(slide, element, styles, text_frame) - elif element_type == "code_block" or element_type == "code": - # Render as formatted code block - if text_frame: - self._addCodeBlockToSlide(slide, element, styles, text_frame) - elif element_type == "extracted_text": - # Render extracted text as paragraph with styling - if text_frame: - content = element.get("content", "") - source = element.get("source", "") - if content: - paragraph_style = styles.get("paragraph", {}) - p = text_frame.add_paragraph() - p.text = content - p.font.size = Pt(paragraph_style.get("font_size", 18)) - p.font.bold = paragraph_style.get("bold", False) - p.font.color.rgb = RGBColor(*self._getSafeColor(paragraph_style.get("color", (47, 47, 47)))) - p.alignment = PP_ALIGN.LEFT # Left align by default - if source: - p.add_run(f" (Source: {source})").font.italic = True - elif element_type == "reference": - # Render reference - if text_frame: - label = element.get("label", "Reference") - p = text_frame.add_paragraph() - p.text = f"[Reference: {label}]" - p.font.italic = True - p.alignment = PP_ALIGN.LEFT - else: - # Fallback: try to render as paragraph - if text_frame: - content = element.get("content", "") - if isinstance(content, dict): - text = content.get("text", "") - elif isinstance(content, str): - text = content - else: - text = "" - - if text: - self._addParagraphToSlide(slide, element, styles, text_frame) + # Organize sections into content groups for frame-based layout + # Images are handled within the frame rendering method + self._renderSlideContentWithFrames(slide, slide_sections, slide_images, styles, prs) - # Handle images after processing sections (images may have been extracted from sections) - # Update hasImages in case images were added during section processing - hasImages = len(slide_images) > 0 - if hasImages: - self._addImagesToSlide(slide, slide_images, styles) - - # Fallback: if no sections but has content text, render as before + # Fallback: if no sections but has content text, render in textbox elif slide_content and not hasImages: - content_shape = slide.placeholders[1] - text_frame = content_shape.text_frame - text_frame.clear() + # Create textbox for content (no placeholders in blank layout) + from pptx.util import Inches + title_height_used = Inches(1.0) # Title height for blank slides + content_left = Inches(0.5) + content_top = title_height_used + Inches(0.3) + content_width = prs.slide_width - Inches(1) + content_height = prs.slide_height - content_top - Inches(0.5) + content_textbox = slide.shapes.add_textbox(content_left, content_top, content_width, content_height) + text_frame = content_textbox.text_frame + text_frame.word_wrap = True + text_frame.auto_size = None # Split content into paragraphs paragraphs = slide_content.split('\n\n') - for paraIdx, paragraph in enumerate(paragraphs): + for paragraph in paragraphs: if paragraph.strip(): - if paraIdx == 0: - p = text_frame.paragraphs[0] - else: - p = text_frame.add_paragraph() - + p = text_frame.add_paragraph() p.text = paragraph.strip() - # Apply AI-generated styling + # Apply AI-generated styling with adaptive sizing paragraph_style = styles.get("paragraph", {}) - p.font.size = Pt(paragraph_style.get("font_size", 18)) + base_font_size = paragraph_style.get("font_size", 18) + # Calculate adaptive font size based on content length + try: + total_chars = len(slide_content) + chars_per_line = max(1, int(content_width / Pt(10))) + lines_needed = total_chars / chars_per_line + available_lines = max(1, int(content_height / Pt(14))) + font_multiplier = 1.0 + if available_lines > 0 and lines_needed > available_lines: + font_multiplier = max(0.6, min(1.0, (available_lines / lines_needed) * 1.1)) + calculated_size = max(6, int(base_font_size * font_multiplier)) # Minimum 6pt + except (ZeroDivisionError, ValueError, TypeError): + calculated_size = max(6, base_font_size) # Fallback to base size with minimum + + p.font.size = Pt(calculated_size) p.font.bold = paragraph_style.get("bold", False) paragraph_color = self._getSafeColor(paragraph_style.get("color", (47, 47, 47))) p.font.color.rgb = RGBColor(*paragraph_color) @@ -567,11 +481,11 @@ class RendererPptx(BaseRenderer): def _getDefaultStyleSet(self) -> Dict[str, Any]: """Default PowerPoint style set - used when no style instructions present.""" return { - "title": {"font_size": 52, "color": "#1B365D", "bold": True, "align": "center"}, - "heading": {"font_size": 36, "color": "#2C5F2D", "bold": True, "align": "left"}, - "subheading": {"font_size": 28, "color": "#4A90E2", "bold": True, "align": "left"}, - "paragraph": {"font_size": 20, "color": "#2F2F2F", "bold": False, "align": "left"}, - "bullet_list": {"font_size": 20, "color": "#2F2F2F", "indent": 20}, + "title": {"font_size": 32, "color": "#1B365D", "bold": True, "align": "left"}, + "heading": {"font_size": 24, "color": "#1B365D", "bold": True, "align": "left"}, + "subheading": {"font_size": 20, "color": "#4A90E2", "bold": True, "align": "left"}, + "paragraph": {"font_size": 14, "color": "#2F2F2F", "bold": False, "align": "left"}, + "bullet_list": {"font_size": 14, "color": "#2F2F2F", "indent": 20}, "table_header": {"font_size": 18, "color": "#FFFFFF", "bold": True, "background": "#1B365D"}, "table_cell": {"font_size": 16, "color": "#2F2F2F", "bold": False, "background": "#F8F9FA"}, "slide_size": "16:9", @@ -724,11 +638,15 @@ JSON ONLY. NO OTHER TEXT.""" # Get section title from data or use default section_title = "Untitled Section" if section.get("content_type") == "heading": - # Extract text from elements array + # Extract text from elements array - use nested content structure for element in section.get("elements", []): - if isinstance(element, dict) and "text" in element: - section_title = element.get("text", "Untitled Section") - break + if isinstance(element, dict): + content = element.get("content", {}) + if isinstance(content, dict): + text = content.get("text", "") + if text: + section_title = text + break elif section.get("title"): section_title = section.get("title") @@ -738,7 +656,10 @@ JSON ONLY. NO OTHER TEXT.""" # Check for three content formats from Phase 5D in elements content_parts = [] for element in elements: - element_type = element.get("type", "") if isinstance(element, dict) else "" + if not isinstance(element, dict): + continue + + element_type = element.get("type", "") # Support three content formats from Phase 5D if element_type == "reference": @@ -782,25 +703,47 @@ JSON ONLY. NO OTHER TEXT.""" }) return { - "title": section_title or (elements[0].get("altText", "Image") if elements else "Image"), + "title": section_title or (elements[0].get("content", {}).get("altText", "Image") if elements and isinstance(elements[0], dict) else "Image"), "content": "\n\n".join(content_parts) if content_parts else "", # Include reference/extracted_text if present "images": images } - # Build slide content based on section type + # Build slide content based on section type - iterate over elements and format each if not content_parts: # Only if we didn't process reference/extracted_text above - if content_type == "table": - content_parts.append(self._formatTableForSlide(elements)) - elif content_type == "list": - content_parts.append(self._formatListForSlide(elements)) - elif content_type == "heading": - content_parts.append(self._formatHeadingForSlide(elements)) - elif content_type == "paragraph": - content_parts.append(self._formatParagraphForSlide(elements)) - elif content_type == "code": - content_parts.append(self._formatCodeForSlide(elements)) - else: - content_parts.append(self._formatParagraphForSlide(elements)) + for element in elements: + if not isinstance(element, dict): + continue + + element_type = element.get("type", "") + # Use element type if available, otherwise fall back to section content_type + if not element_type: + element_type = content_type + + if element_type == "table": + formatted = self._formatTableForSlide(element) + if formatted: + content_parts.append(formatted) + elif element_type == "bullet_list" or element_type == "list": + formatted = self._formatListForSlide(element) + if formatted: + content_parts.append(formatted) + elif element_type == "heading": + formatted = self._formatHeadingForSlide(element) + if formatted: + content_parts.append(formatted) + elif element_type == "paragraph": + formatted = self._formatParagraphForSlide(element) + if formatted: + content_parts.append(formatted) + elif element_type == "code_block" or element_type == "code": + formatted = self._formatCodeForSlide(element) + if formatted: + content_parts.append(formatted) + else: + # Fallback to paragraph formatting + formatted = self._formatParagraphForSlide(element) + if formatted: + content_parts.append(formatted) # Combine content parts slide_content = "\n\n".join(filter(None, content_parts)) @@ -1002,7 +945,7 @@ JSON ONLY. NO OTHER TEXT.""" return 1 # Default to title and content layout def _createSlidesFromSections(self, sections: List[Dict[str, Any]], styles: Dict[str, Any]) -> List[Dict[str, Any]]: - """Create slides from sections: each heading creates a new slide, content accumulates until next heading.""" + """Create slides from sections: each heading level 1 (chapter) creates a new slide, content accumulates until next level 1 heading.""" try: slides = [] current_slide_sections = [] # Store sections (not formatted text) for proper rendering @@ -1017,74 +960,43 @@ JSON ONLY. NO OTHER TEXT.""" continue if section_type == "heading": - # If we have accumulated content, create a slide - if current_slide_sections: - slides.append({ - "title": current_slide_title, - "sections": current_slide_sections.copy(), # Store sections for proper rendering - "images": [] - }) - current_slide_sections = [] - - # Start new slide with heading as title - heading_found = False + # Extract heading level + level = 1 # Default + heading_text = "" for element in elements: if isinstance(element, dict): # Extract from nested content structure content = element.get("content", {}) if isinstance(content, dict): heading_text = content.get("text", "") + level = content.get("level", 1) elif isinstance(content, str): heading_text = content - else: - heading_text = "" - - if heading_text: - current_slide_title = heading_text - heading_found = True - break + level = 1 - # If no heading text found but this is a heading section, use section ID or default - if not heading_found: - current_slide_title = section.get("id", "Untitled Section") + # Only level 1 headings (chapters) create new slides + if level == 1: + # If we have accumulated content, create a slide + if current_slide_sections: + slides.append({ + "title": current_slide_title, + "sections": current_slide_sections.copy(), # Store sections for proper rendering + "images": [] + }) + current_slide_sections = [] + + # Start new slide with heading as title + if heading_text: + current_slide_title = heading_text + else: + # If no heading text found but this is a heading section, use section ID or default + current_slide_title = section.get("id", "Untitled Section") + else: + # Level 2+ headings are added as sections to current slide + current_slide_sections.append(section) elif section_type == "image": - # Create separate slide for image - if current_slide_sections: - slides.append({ - "title": current_slide_title, - "sections": current_slide_sections.copy(), - "images": [] - }) - current_slide_sections = [] - - # Extract image data - imageData = [] - for element in elements: - if isinstance(element, dict): - # Extract from nested content structure - content = element.get("content", {}) - if isinstance(content, dict): - base64Data = content.get("base64Data") - altText = content.get("altText", "Image") - caption = content.get("caption", "") - else: - # Fallback to direct element fields - base64Data = element.get("base64Data") - altText = element.get("altText", "Image") - caption = element.get("caption", "") - - if base64Data: - imageData.append({ - "base64Data": base64Data, - "altText": altText, - "caption": caption - }) - - slides.append({ - "title": section.get("title") or (imageData[0].get("altText", "Image") if imageData else "Image"), - "sections": [], - "images": imageData - }) + # Images are added to current slide (will be organized in frames) + current_slide_sections.append(section) else: # Add section to current slide (will be rendered properly) current_slide_sections.append(section) @@ -1113,21 +1025,42 @@ JSON ONLY. NO OTHER TEXT.""" if content_type == "image": return "" - # Process each element in the section + # Process each element in the section - use element type, not section type content_parts = [] for element in elements: - if content_type == "table": - content_parts.append(self._formatTableForSlide(element)) - elif content_type == "bullet_list" or content_type == "list": - content_parts.append(self._formatListForSlide(element)) - elif content_type == "heading": - content_parts.append(self._formatHeadingForSlide(element)) - elif content_type == "paragraph": - content_parts.append(self._formatParagraphForSlide(element)) - elif content_type == "code_block" or content_type == "code": - content_parts.append(self._formatCodeForSlide(element)) + if not isinstance(element, dict): + continue + + element_type = element.get("type", "") + # Use element type if available, otherwise fall back to section content_type + if not element_type: + element_type = content_type + + if element_type == "table": + formatted = self._formatTableForSlide(element) + if formatted: + content_parts.append(formatted) + elif element_type == "bullet_list" or element_type == "list": + formatted = self._formatListForSlide(element) + if formatted: + content_parts.append(formatted) + elif element_type == "heading": + formatted = self._formatHeadingForSlide(element) + if formatted: + content_parts.append(formatted) + elif element_type == "paragraph": + formatted = self._formatParagraphForSlide(element) + if formatted: + content_parts.append(formatted) + elif element_type == "code_block" or element_type == "code": + formatted = self._formatCodeForSlide(element) + if formatted: + content_parts.append(formatted) else: - content_parts.append(self._formatParagraphForSlide(element)) + # Fallback to paragraph formatting + formatted = self._formatParagraphForSlide(element) + if formatted: + content_parts.append(formatted) return "\n\n".join(filter(None, content_parts)) @@ -1166,80 +1099,80 @@ JSON ONLY. NO OTHER TEXT.""" img = images[0] base64Data = img.get("base64Data") # Validate base64Data is present and not empty - if base64Data and isinstance(base64Data, str) and len(base64Data.strip()) > 0: - try: - imageBytes = base64.b64decode(base64Data) - if len(imageBytes) == 0: - logger.error("Decoded image bytes are empty") - return - imageStream = io.BytesIO(imageBytes) - except Exception as decode_error: - logger.error(f"Failed to decode base64 image data: {str(decode_error)}") - return - else: + if not base64Data or not isinstance(base64Data, str) or len(base64Data.strip()) == 0: logger.error(f"Invalid base64Data: present={bool(base64Data)}, type={type(base64Data)}, length={len(base64Data) if base64Data else 0}") return + + try: + imageBytes = base64.b64decode(base64Data) + if len(imageBytes) == 0: + logger.error("Decoded image bytes are empty") + return + imageStream = io.BytesIO(imageBytes) + except Exception as decode_error: + logger.error(f"Failed to decode base64 image data: {str(decode_error)}") + return + + # Get image dimensions + try: + from PIL import Image as PILImage + pilImage = PILImage.open(imageStream) + imgWidth, imgHeight = pilImage.size - # Get image dimensions - try: - from PIL import Image as PILImage - pilImage = PILImage.open(imageStream) - imgWidth, imgHeight = pilImage.size - - # Scale to fit available space (max 90% of slide for better visibility) - # Convert PIL pixels to PowerPoint points (1 inch = 72 points, typical screen DPI = 96) - # Conversion: pixels * (72/96) = points - imgWidthPoints = imgWidth * (72.0 / 96.0) - imgHeightPoints = imgHeight * (72.0 / 96.0) - - maxWidth = availableWidth * 0.9 - maxHeight = availableHeight * 0.9 - - scale = min(maxWidth / imgWidthPoints, maxHeight / imgHeightPoints, 1.0) - finalWidth = imgWidthPoints * scale - finalHeight = imgHeightPoints * scale - - # Center image - left = (slideWidth - finalWidth) / 2 - top = titleHeight + (availableHeight - finalHeight) / 2 - - imageStream.seek(0) - except Exception: - # Fallback: use default size - finalWidth = Inches(6) - finalHeight = Inches(4.5) - left = (slideWidth - finalWidth) / 2 - top = titleHeight + Inches(1) - imageStream.seek(0) + # Scale to fit available space (max 90% of slide for better visibility) + # Convert PIL pixels to PowerPoint points (1 inch = 72 points, typical screen DPI = 96) + # Conversion: pixels * (72/96) = points + imgWidthPoints = imgWidth * (72.0 / 96.0) + imgHeightPoints = imgHeight * (72.0 / 96.0) - # Add image to slide - try: + maxWidth = availableWidth * 0.9 + maxHeight = availableHeight * 0.9 + + scale = min(maxWidth / imgWidthPoints, maxHeight / imgHeightPoints, 1.0) + finalWidth = imgWidthPoints * scale + finalHeight = imgHeightPoints * scale + + # Center image + left = (slideWidth - finalWidth) / 2 + top = titleHeight + (availableHeight - finalHeight) / 2 + + imageStream.seek(0) + except Exception: + # Fallback: use default size + finalWidth = Inches(6) + finalHeight = Inches(4.5) + left = (slideWidth - finalWidth) / 2 + top = titleHeight + Inches(1) + imageStream.seek(0) + + # Add image to slide + try: + slide.shapes.add_picture(imageStream, left, top, width=finalWidth, height=finalHeight) + except Exception as add_error: + # If add_picture fails, try with explicit format + imageStream.seek(0) + # Ensure we have valid image data + if len(imageBytes) > 0: slide.shapes.add_picture(imageStream, left, top, width=finalWidth, height=finalHeight) - except Exception as add_error: - # If add_picture fails, try with explicit format - imageStream.seek(0) - # Ensure we have valid image data - if len(imageBytes) > 0: - slide.shapes.add_picture(imageStream, left, top, width=finalWidth, height=finalHeight) - else: - raise Exception(f"Empty image data: {add_error}") - - # Add caption if available - caption = img.get("caption") or img.get("altText") - if caption and caption != "Image": - # Add text box below image - captionTop = top + finalHeight + Inches(0.2) - captionBox = slide.shapes.add_textbox( - Inches(1), - captionTop, - slideWidth - Inches(2), - Inches(0.5) - ) - captionFrame = captionBox.text_frame - captionFrame.text = caption - captionFrame.paragraphs[0].font.size = Pt(12) - captionFrame.paragraphs[0].font.italic = True - captionFrame.paragraphs[0].alignment = PP_ALIGN.CENTER + else: + raise Exception(f"Empty image data: {add_error}") + + # Add caption if available + caption = img.get("caption") or img.get("altText") + if caption and caption != "Image": + # Add text box below image + captionTop = top + finalHeight + Inches(0.2) + captionBox = slide.shapes.add_textbox( + Inches(1), + captionTop, + slideWidth - Inches(2), + Inches(0.5) + ) + captionFrame = captionBox.text_frame + captionFrame.text = caption + captionFrame.paragraphs[0].font.size = Pt(12) + captionFrame.paragraphs[0].font.italic = True + captionFrame.paragraphs[0].alignment = PP_ALIGN.CENTER else: # Multiple images: arrange in grid cols = 2 if len(images) <= 4 else 3 @@ -1267,7 +1200,7 @@ JSON ONLY. NO OTHER TEXT.""" import traceback logger.error(f"Traceback: {traceback.format_exc()}") - def _addTableToSlide(self, slide, element: Dict[str, Any], styles: Dict[str, Any], top: float) -> None: + def _addTableToSlide(self, slide, element: Dict[str, Any], styles: Dict[str, Any], top: float, max_width: float = None) -> None: """Add a PowerPoint table to slide.""" try: from pptx.util import Inches, Pt @@ -1286,25 +1219,27 @@ JSON ONLY. NO OTHER TEXT.""" return # Calculate table dimensions - num_cols = len(headers) - num_rows = len(rows) + 1 # +1 for header row + num_cols = int(len(headers)) # Ensure integer + num_rows = int(len(rows) + 1) # +1 for header row, ensure integer left = Inches(0.5) # Get presentation from stored reference or slide if hasattr(self, '_currentPresentation'): prs = self._currentPresentation else: prs = slide.presentation - width = prs.slide_width - Inches(1) + width = max_width if max_width is not None else (prs.slide_width - Inches(1)) row_height = Inches(0.4) - # Create table - table_shape = slide.shapes.add_table(num_rows, num_cols, left, top, width, row_height * num_rows) + # Create table - ensure all parameters are proper types + table_height = row_height * num_rows + table_shape = slide.shapes.add_table(num_rows, num_cols, left, top, width, table_height) table = table_shape.table - # Set column widths - col_width = width / num_cols + # Set column widths - width is in EMU, divide evenly + # python-pptx expects EMU values (914400 EMU = 1 inch) + col_width_emu = int(width) // num_cols # Ensure integer division for EMU for col_idx in range(num_cols): - table.columns[col_idx].width = col_width + table.columns[col_idx].width = col_width_emu # Add headers with styling header_style = styles.get("table_header", {}) @@ -1314,20 +1249,33 @@ JSON ONLY. NO OTHER TEXT.""" for col_idx, header in enumerate(headers): cell = table.cell(0, col_idx) - cell.text = str(header) + # Clear existing text and set new text + cell.text_frame.clear() + cell.text = str(header) if header else "" + + # Ensure paragraph exists + if len(cell.text_frame.paragraphs) == 0: + cell.text_frame.add_paragraph() + + # Apply styling cell.fill.solid() cell.fill.fore_color.rgb = RGBColor(*header_bg_color) - cell.text_frame.paragraphs[0].font.bold = header_style.get("bold", True) - cell.text_frame.paragraphs[0].font.size = Pt(header_font_size) - cell.text_frame.paragraphs[0].font.color.rgb = RGBColor(*header_text_color) + para = cell.text_frame.paragraphs[0] + para.font.bold = header_style.get("bold", True) + para.font.size = Pt(header_font_size) + para.font.color.rgb = RGBColor(*header_text_color) align = header_style.get("align", "center") if align == "left": - cell.text_frame.paragraphs[0].alignment = PP_ALIGN.LEFT + para.alignment = PP_ALIGN.LEFT elif align == "right": - cell.text_frame.paragraphs[0].alignment = PP_ALIGN.RIGHT + para.alignment = PP_ALIGN.RIGHT else: - cell.text_frame.paragraphs[0].alignment = PP_ALIGN.CENTER + para.alignment = PP_ALIGN.CENTER + + # Ensure text is set on paragraph + if not para.text: + para.text = str(header) if header else "" # Add data rows with styling cell_style = styles.get("table_cell", {}) @@ -1338,25 +1286,38 @@ JSON ONLY. NO OTHER TEXT.""" for row_idx, row_data in enumerate(rows, 1): for col_idx, cell_data in enumerate(row_data[:num_cols]): cell = table.cell(row_idx, col_idx) - cell.text = str(cell_data) + # Clear existing text and set new text + cell.text_frame.clear() + cell.text = str(cell_data) if cell_data is not None else "" + + # Ensure paragraph exists + if len(cell.text_frame.paragraphs) == 0: + cell.text_frame.add_paragraph() + + # Apply styling cell.fill.solid() cell.fill.fore_color.rgb = RGBColor(*cell_bg_color) - cell.text_frame.paragraphs[0].font.size = Pt(cell_font_size) - cell.text_frame.paragraphs[0].font.bold = cell_style.get("bold", False) - cell.text_frame.paragraphs[0].font.color.rgb = RGBColor(*cell_text_color) + para = cell.text_frame.paragraphs[0] + para.font.size = Pt(cell_font_size) + para.font.bold = cell_style.get("bold", False) + para.font.color.rgb = RGBColor(*cell_text_color) align = cell_style.get("align", "left") if align == "center": - cell.text_frame.paragraphs[0].alignment = PP_ALIGN.CENTER + para.alignment = PP_ALIGN.CENTER elif align == "right": - cell.text_frame.paragraphs[0].alignment = PP_ALIGN.RIGHT + para.alignment = PP_ALIGN.RIGHT else: - cell.text_frame.paragraphs[0].alignment = PP_ALIGN.LEFT + para.alignment = PP_ALIGN.LEFT + + # Ensure text is set on paragraph + if not para.text: + para.text = str(cell_data) if cell_data is not None else "" except Exception as e: logger.warning(f"Error adding table to slide: {str(e)}") - def _addBulletListToSlide(self, slide, element: Dict[str, Any], styles: Dict[str, Any], text_frame) -> None: + def _addBulletListToSlide(self, slide, element: Dict[str, Any], styles: Dict[str, Any], text_frame, font_size_multiplier: float = 1.0) -> None: """Add bullet list to slide text frame.""" try: from pptx.util import Pt @@ -1373,31 +1334,91 @@ JSON ONLY. NO OTHER TEXT.""" return list_style = styles.get("bullet_list", {}) - for item in items: - p = text_frame.add_paragraph() - if isinstance(item, dict): - p.text = item.get("text", "") - else: - p.text = str(item) - - p.level = 0 - p.font.size = Pt(list_style.get("font_size", 18)) - p.font.color.rgb = RGBColor(*self._getSafeColor(list_style.get("color", (47, 47, 47)))) - p.alignment = PP_ALIGN.LEFT # Left align bullet lists - p.space_before = Pt(6) - # Enable bullet points - set bullet type to enable bullets + base_font_size = list_style.get("font_size", 14) + calculated_size = max(10, int(base_font_size * font_size_multiplier)) # Minimum 10pt for readability + + logger.debug(f"Rendering bullet list with {len(items)} items") + + for idx, item in enumerate(items): try: - from pptx.enum.text import MSO_AUTO_NUMBER - p.paragraph_format.bullet.type = MSO_AUTO_NUMBER.BULLET - except (ImportError, AttributeError): - # Fallback: bullets are usually enabled by default when level is set - # Just ensure level is set (already done above) - pass + # Get text content first + if isinstance(item, dict): + item_text = item.get("text", "") + else: + item_text = str(item) + + # Skip empty items + if not item_text or len(item_text.strip()) == 0: + logger.debug(f"Skipping empty bullet item {idx}") + continue + + # Create new paragraph for each bullet item + p = text_frame.add_paragraph() + + # Set level to 1 for bullet points BEFORE setting text + # In python-pptx, setting level > 0 should automatically enable bullets + p.level = 1 + + # Set text content + p.text = item_text + + # Apply formatting first + p.font.size = Pt(calculated_size) + p.font.color.rgb = RGBColor(*self._getSafeColor(list_style.get("color", (47, 47, 47)))) + p.alignment = PP_ALIGN.LEFT # Left align bullet lists + p.space_before = Pt(2) # Small spacing before + p.space_after = Pt(2) # Small spacing after + + # In python-pptx, setting level > 0 should enable bullets automatically + # However, some versions may not support paragraph_format, so we'll use manual bullets as fallback + # Always add manual bullet character to ensure visibility + if not (p.text.startswith('•') or p.text.startswith('-') or p.text.startswith('*') or p.text.startswith('◦')): + p.text = '• ' + p.text + logger.debug(f"Added manual bullet character to item {idx}") + + # Set proper indentation for multiline bullets (hanging indent) + # For multiline bullets: bullet at left margin, text indented, wrapped lines align with text + try: + # Try accessing paragraph_format - it may not exist in all python-pptx versions + if hasattr(p, 'paragraph_format'): + pf = p.paragraph_format + # Left indent: indents the entire paragraph (bullet + text) + pf.left_indent = Pt(18) + # First line indent: negative value creates hanging indent + # This brings the bullet back to the left while keeping text indented + pf.first_line_indent = Pt(-18) # Negative to create hanging indent + logger.debug(f"Set hanging indent for bullet item {idx}") + else: + # Try via _element if paragraph_format not available + try: + from pptx.util import Pt as PtUtil + pPr = p._element.get_or_add_pPr() + # Set left margin (indents entire paragraph) + pPr.left_margin = PtUtil(18) + # Set first line indent (negative for hanging indent) + pPr.first_line_indent = PtUtil(-18) + logger.debug(f"Set hanging indent via XML for bullet item {idx}") + except Exception as xml_error: + logger.debug(f"Could not set hanging indent via XML: {str(xml_error)}") + # Indentation is optional, continue without it + pass + except Exception as indent_error: + logger.debug(f"Could not set indent for item {idx}: {str(indent_error)}") + # Continue without indent - bullets will still show, but multiline won't be properly indented + + logger.debug(f"Successfully added bullet item {idx}: '{item_text[:50]}...'") + + except Exception as item_error: + logger.error(f"Error adding bullet item {idx}: {str(item_error)}", exc_info=True) + # Continue with next item even if one fails + continue + + logger.debug(f"Completed rendering bullet list, added {len(text_frame.paragraphs)} paragraphs") except Exception as e: logger.warning(f"Error adding bullet list to slide: {str(e)}") - def _addHeadingToSlide(self, slide, element: Dict[str, Any], styles: Dict[str, Any], text_frame) -> None: + def _addHeadingToSlide(self, slide, element: Dict[str, Any], styles: Dict[str, Any], text_frame, font_size_multiplier: float = 1.0) -> None: """Add heading to slide text frame.""" try: from pptx.util import Pt @@ -1414,17 +1435,32 @@ JSON ONLY. NO OTHER TEXT.""" if text: p = text_frame.add_paragraph() p.text = text - p.level = min(level - 1, 2) # PowerPoint supports 0-2 levels + # Headings should be level 0 (no indentation) regardless of heading level + p.level = 0 heading_style = styles.get("heading", {}) - p.font.size = Pt(heading_style.get("font_size", 32)) + # Different font sizes for different heading levels + if level == 1: + base_font_size = heading_style.get("font_size", 28) # Largest for H1 + elif level == 2: + base_font_size = heading_style.get("font_size", 22) # Medium for H2 + elif level == 3: + base_font_size = heading_style.get("font_size", 18) # Smaller for H3 + else: + base_font_size = heading_style.get("font_size", 16) # Default for H4+ + + calculated_size = max(12, int(base_font_size * font_size_multiplier)) # Minimum 12pt for headings + p.font.size = Pt(calculated_size) p.font.bold = heading_style.get("bold", True) - p.font.color.rgb = RGBColor(*self._getSafeColor(heading_style.get("color", (47, 47, 47)))) + p.font.color.rgb = RGBColor(*self._getSafeColor(heading_style.get("color", (31, 78, 121)))) + # Add spacing before and after headings + p.space_before = Pt(12 if level == 1 else 8) # More space before H1 + p.space_after = Pt(6) # Space after heading except Exception as e: logger.warning(f"Error adding heading to slide: {str(e)}") - def _addParagraphToSlide(self, slide, element: Dict[str, Any], styles: Dict[str, Any], text_frame) -> None: + def _addParagraphToSlide(self, slide, element: Dict[str, Any], styles: Dict[str, Any], text_frame, font_size_multiplier: float = 1.0) -> None: """Add paragraph to slide text frame.""" try: from pptx.util import Pt @@ -1443,12 +1479,28 @@ JSON ONLY. NO OTHER TEXT.""" if text: p = text_frame.add_paragraph() p.text = text + # Explicitly set level to 0 for regular paragraphs (not bullets) + p.level = 0 + + # Ensure no bullet formatting + try: + if hasattr(p, 'paragraph_format'): + p.paragraph_format.bullet.type = None + except (AttributeError, TypeError): + pass paragraph_style = styles.get("paragraph", {}) - p.font.size = Pt(paragraph_style.get("font_size", 18)) + base_font_size = paragraph_style.get("font_size", 14) # Smaller default for better readability + calculated_size = max(10, int(base_font_size * font_size_multiplier)) # Minimum 10pt for readability + p.font.size = Pt(calculated_size) p.font.bold = paragraph_style.get("bold", False) p.font.color.rgb = RGBColor(*self._getSafeColor(paragraph_style.get("color", (47, 47, 47)))) + # Add proper spacing + p.space_before = Pt(6) # Space before paragraph + p.space_after = Pt(6) # Space after paragraph + p.line_spacing = 1.2 # Line spacing for readability + align = paragraph_style.get("align", "left") if align == "center": p.alignment = PP_ALIGN.CENTER @@ -1460,7 +1512,7 @@ JSON ONLY. NO OTHER TEXT.""" except Exception as e: logger.warning(f"Error adding paragraph to slide: {str(e)}") - def _addCodeBlockToSlide(self, slide, element: Dict[str, Any], styles: Dict[str, Any], text_frame) -> None: + def _addCodeBlockToSlide(self, slide, element: Dict[str, Any], styles: Dict[str, Any], text_frame, font_size_multiplier: float = 1.0) -> None: """Add code block to slide text frame.""" try: from pptx.util import Pt @@ -1477,13 +1529,15 @@ JSON ONLY. NO OTHER TEXT.""" if code: code_style = styles.get("code_block", {}) code_font = code_style.get("font", "Courier New") - code_font_size = code_style.get("font_size", 9) + base_code_font_size = code_style.get("font_size", 9) + code_font_size = max(6, int(base_code_font_size * font_size_multiplier)) # Minimum 6pt for code code_color = self._getSafeColor(code_style.get("color", (47, 47, 47))) p = text_frame.add_paragraph() if language: p.text = f"Code ({language}):" p.font.bold = True + p.font.size = Pt(code_font_size) p = text_frame.add_paragraph() p.text = code @@ -1498,3 +1552,593 @@ JSON ONLY. NO OTHER TEXT.""" """Format current timestamp for presentation generation.""" # datetime and UTC are already imported at module level return datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S UTC") + + def _renderSlideContentWithFrames(self, slide, slide_sections: List[Dict[str, Any]], slide_images: List[Dict[str, Any]], styles: Dict[str, Any], prs) -> None: + """ + Organize slide content into frames for better layout. + Groups content by type (images, bullet lists, paragraphs, tables) and renders each in appropriately sized frames. + """ + try: + from pptx.util import Inches, Pt + from pptx.enum.text import PP_ALIGN + from pptx.dml.color import RGBColor + + # Extract images from sections first + images_to_render = list(slide_images) if slide_images else [] + text_sections = [] + table_sections = [] + + for section in slide_sections: + section_type = section.get("content_type", "paragraph") + elements = section.get("elements", []) + + if not elements: + # Skip empty sections + continue + + # Extract images from all sections + section_has_images = False + for element in elements: + if isinstance(element, dict) and element.get("type") == "image": + content = element.get("content", {}) + base64Data = None + + # Handle different content formats + if isinstance(content, dict): + base64Data = content.get("base64Data") + altText = content.get("altText", "Image") + caption = content.get("caption", "") + elif isinstance(content, str): + # If content is a string, it might be base64 data directly + # Check if it looks like base64 + if len(content) > 100 and all(c in "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=" for c in content[:100]): + base64Data = content + altText = "Image" + caption = "" + else: + # Not base64, skip + continue + else: + # Try to get base64Data directly from element + base64Data = element.get("base64Data") + altText = element.get("altText", "Image") + caption = element.get("caption", "") + + if base64Data: + images_to_render.append({ + "base64Data": base64Data, + "altText": altText, + "caption": caption + }) + section_has_images = True + + # Skip image-only sections (they're already added to images_to_render) + if section_type == "image" and section_has_images: + continue + + # Categorize sections (excluding image elements) + has_table = False + non_image_elements = [] + + for element in elements: + if isinstance(element, dict): + element_type = element.get("type", "") + # Skip image elements when categorizing + if element_type == "image": + continue + if element_type == "table" or section_type == "table": + has_table = True + non_image_elements.append(element) + + # Only add sections that have non-image content + if non_image_elements: + if has_table: + # Create a copy of section without image elements for table rendering + table_section = { + **section, + "elements": non_image_elements + } + table_sections.append(table_section) + else: + # Create a copy of section without image elements for text rendering + text_section = { + **section, + "elements": non_image_elements + } + text_sections.append(text_section) + + # Calculate layout dimensions + title_height = Inches(1.5) + available_height = prs.slide_height - title_height - Inches(0.5) # Title + margin + available_width = prs.slide_width - Inches(1) # Margins + margin = Inches(0.5) + + current_y = title_height + Inches(0.3) + + # Determine layout strategy based on content types + has_images = len(images_to_render) > 0 + has_tables = len(table_sections) > 0 + has_text = len(text_sections) > 0 + + # Layout 1: Images + Text (horizontal split for landscape) + if has_images and has_text and not has_tables: + # Horizontal split: images on left, text on right (landscape format) + img_width = available_width * 0.48 + text_width = available_width * 0.48 + img_left = margin + text_left = margin + img_width + Inches(0.2) + + # Render images in left column (full height) + if images_to_render: + img_height = available_height - Inches(0.2) + self._addImagesToSlideInFrame(slide, images_to_render, styles, img_left, current_y, img_width, img_height) + + # Render text in right column (full height, adaptive font size) + if text_sections: + text_height = available_height - Inches(0.2) + self._renderTextSectionsInFrame(slide, text_sections, styles, text_left, current_y, text_width, text_height, adaptiveFontSize=True) + + # Layout 2: Tables + Text (horizontal split for landscape) + elif has_tables and has_text: + # Horizontal split: tables on left, text on right (landscape format) + table_width = available_width * 0.48 + text_width = available_width * 0.48 + table_left = margin + text_left = margin + table_width + Inches(0.2) + + # Render tables in left column (full height) + table_y = current_y + for table_section in table_sections: + elements = table_section.get("elements", []) + for element in elements: + if isinstance(element, dict) and element.get("type") == "table": + try: + self._addTableToSlide(slide, element, styles, table_y, max_width=table_width) + # Calculate actual table height + content = element.get("content", {}) + if isinstance(content, dict): + rows = content.get("rows", []) + num_rows = len(rows) + 1 # +1 for header + actual_height = Inches(0.4) * num_rows + table_y += actual_height + Inches(0.15) + else: + table_y += Inches(2) + except Exception as table_error: + logger.error(f"Error rendering table: {str(table_error)}") + # Continue with next table + break + + # Render text in right column (full height, adaptive font size) + if text_sections: + text_height = available_height - Inches(0.2) + self._renderTextSectionsInFrame(slide, text_sections, styles, text_left, current_y, text_width, text_height, adaptiveFontSize=True) + + # Layout 3: Images + Tables + Text (horizontal split for landscape) + elif has_images and has_tables and has_text: + # Horizontal split: Images (left), Tables (middle), Text (right) + img_width = available_width * 0.31 + table_width = available_width * 0.31 + text_width = available_width * 0.31 + img_left = margin + table_left = margin + img_width + Inches(0.15) + text_left = margin + img_width + table_width + Inches(0.3) + + # Render images in left column (full height) + if images_to_render: + img_height = available_height - Inches(0.2) + self._addImagesToSlideInFrame(slide, images_to_render, styles, img_left, current_y, img_width, img_height) + + # Render tables in middle column (full height) + table_y = current_y + for table_section in table_sections: + elements = table_section.get("elements", []) + for element in elements: + if isinstance(element, dict) and element.get("type") == "table": + try: + self._addTableToSlide(slide, element, styles, table_y, max_width=table_width) + content = element.get("content", {}) + if isinstance(content, dict): + rows = content.get("rows", []) + num_rows = len(rows) + 1 + actual_height = Inches(0.4) * num_rows + table_y += actual_height + Inches(0.15) + else: + table_y += Inches(2) + except Exception as table_error: + logger.error(f"Error rendering table: {str(table_error)}") + break + + # Render text in right column (full height, adaptive font size) + if text_sections: + text_height = available_height - Inches(0.2) + self._renderTextSectionsInFrame(slide, text_sections, styles, text_left, current_y, text_width, text_height, adaptiveFontSize=True) + + # Layout 4: Images only + elif has_images and not has_text and not has_tables: + img_width = available_width * 0.8 + img_height = available_height * 0.8 + img_left = (available_width - img_width) / 2 + margin + self._addImagesToSlideInFrame(slide, images_to_render, styles, img_left, current_y, img_width, img_height) + + # Layout 5: Text only (default, adaptive font size) + elif has_text and not has_images and not has_tables: + text_height = available_height - Inches(0.2) + self._renderTextSectionsInFrame(slide, text_sections, styles, margin, current_y, available_width, text_height, adaptiveFontSize=True) + + # Layout 6: Tables only + elif has_tables and not has_images and not has_text: + table_height = available_height / max(len(table_sections), 1) + table_width = available_width + for table_section in table_sections: + elements = table_section.get("elements", []) + for element in elements: + if isinstance(element, dict) and element.get("type") == "table": + try: + self._addTableToSlide(slide, element, styles, current_y, max_width=table_width) + # Calculate actual table height + content = element.get("content", {}) + if isinstance(content, dict): + rows = content.get("rows", []) + num_rows = len(rows) + 1 # +1 for header + actual_height = min(Inches(0.4) * num_rows, table_height) + current_y += actual_height + Inches(0.2) + else: + current_y += table_height + Inches(0.2) + except Exception as table_error: + logger.error(f"Error rendering table: {str(table_error)}") + # Continue with next table + break + + except Exception as e: + logger.error(f"Error rendering slide content with frames: {str(e)}") + # Fallback to simple rendering + try: + content_shape = slide.placeholders[1] + text_frame = content_shape.text_frame + text_frame.clear() + except (AttributeError, IndexError): + from pptx.util import Inches + left = Inches(0.5) + top = Inches(1.5) + width = prs.slide_width - Inches(1) + height = prs.slide_height - top - Inches(0.5) + textbox = slide.shapes.add_textbox(left, top, width, height) + text_frame = textbox.text_frame + text_frame.word_wrap = True + + # Simple fallback rendering + for section in slide_sections: + self._renderSectionToTextFrame(slide, section, styles, text_frame, font_size_multiplier=1.0) + + def _renderTextSectionsInFrame(self, slide, text_sections: List[Dict[str, Any]], styles: Dict[str, Any], left: float, top: float, width: float, height: float, adaptiveFontSize: bool = False) -> None: + """Render text sections (paragraphs, lists, headings) in a text frame.""" + try: + from pptx.util import Inches, Pt + from pptx.enum.text import PP_ALIGN + from pptx.dml.color import RGBColor + + # Calculate total text length for adaptive font sizing + total_text_length = 0 + if adaptiveFontSize: + for section in text_sections: + elements = section.get("elements", []) + for element in elements: + if isinstance(element, dict): + element_type = element.get("type", "") + if element_type in ["paragraph", "bullet_list", "list", "heading"]: + content = element.get("content", "") + if isinstance(content, dict): + if "text" in content: + total_text_length += len(str(content["text"])) + elif "items" in content: + for item in content.get("items", []): + total_text_length += len(str(item)) + elif isinstance(content, str): + total_text_length += len(content) + + # Calculate adaptive font size multiplier based on text length and frame size + font_size_multiplier = 1.0 + if adaptiveFontSize and total_text_length > 0: + try: + # More accurate calculation: estimate characters per line based on average character width + # Average character width is approximately 0.6 * font_size in points + # For 14pt font, average char width ≈ 8.4pt + avg_char_width_pt = 8.4 # Approximate for 14pt font + chars_per_line = max(1, int(float(width) / avg_char_width_pt)) + + # Estimate lines needed + lines_needed = total_text_length / max(chars_per_line, 1) + + # Available lines based on height (line height ≈ 1.2 * font_size) + line_height_pt = 16.8 # Approximate for 14pt font with 1.2 spacing + available_lines = max(1, int(float(height) / line_height_pt)) + + if available_lines > 0 and lines_needed > available_lines: + # More aggressive scaling for long texts + # Calculate exact scale needed, then add 10% buffer + scale_needed = available_lines / lines_needed + font_size_multiplier = scale_needed * 0.9 # 10% buffer + # Allow scaling down to 50% for very long texts (minimum readable) + font_size_multiplier = max(0.5, min(1.0, font_size_multiplier)) + elif lines_needed <= available_lines * 0.7: + # If text is much shorter than available space, can use slightly larger font + font_size_multiplier = min(1.1, (available_lines / lines_needed) * 0.8) + except (ZeroDivisionError, ValueError, TypeError) as calc_error: + logger.debug(f"Font size calculation error: {str(calc_error)}") + # Fallback to default if calculation fails + font_size_multiplier = 1.0 + + textbox = slide.shapes.add_textbox(left, top, width, height) + text_frame = textbox.text_frame + text_frame.word_wrap = True + text_frame.auto_size = None # Disable auto-size for fixed frame + # Ensure text frame can display bullets + text_frame.margin_left = Pt(0) + text_frame.margin_right = Pt(0) + text_frame.margin_top = Pt(0) + text_frame.margin_bottom = Pt(0) + + # Pass font size multiplier to rendering methods + for section in text_sections: + self._renderSectionToTextFrame(slide, section, styles, text_frame, font_size_multiplier) + + except Exception as e: + logger.warning(f"Error rendering text sections in frame: {str(e)}") + + def _renderSectionToTextFrame(self, slide, section: Dict[str, Any], styles: Dict[str, Any], text_frame, font_size_multiplier: float = 1.0) -> None: + """Render a single section to a text frame.""" + try: + from pptx.util import Pt + from pptx.enum.text import PP_ALIGN + from pptx.dml.color import RGBColor + + section_type = section.get("content_type", "paragraph") + elements = section.get("elements", []) + + if not elements: + return + + for element in elements: + if not isinstance(element, dict): + continue + + element_type = element.get("type", "") + if not element_type: + element_type = section_type + + # Skip images - handled separately + if element_type == "image": + continue + + if element_type == "bullet_list" or element_type == "list": + self._addBulletListToSlide(slide, element, styles, text_frame, font_size_multiplier) + elif element_type == "heading": + self._addHeadingToSlide(slide, element, styles, text_frame, font_size_multiplier) + elif element_type == "paragraph": + self._addParagraphToSlide(slide, element, styles, text_frame, font_size_multiplier) + elif element_type == "code_block" or element_type == "code": + self._addCodeBlockToSlide(slide, element, styles, text_frame, font_size_multiplier) + elif element_type == "extracted_text": + content = element.get("content", "") + source = element.get("source", "") + if content: + paragraph_style = styles.get("paragraph", {}) + p = text_frame.add_paragraph() + p.text = content + base_font_size = paragraph_style.get("font_size", 18) + p.font.size = Pt(int(base_font_size * font_size_multiplier)) + p.font.bold = paragraph_style.get("bold", False) + p.font.color.rgb = RGBColor(*self._getSafeColor(paragraph_style.get("color", (47, 47, 47)))) + p.alignment = PP_ALIGN.LEFT + if source: + p.add_run(f" (Source: {source})").font.italic = True + elif element_type == "reference": + label = element.get("label", "Reference") + p = text_frame.add_paragraph() + p.text = f"[Reference: {label}]" + p.font.italic = True + p.alignment = PP_ALIGN.LEFT + else: + # Fallback to paragraph + content = element.get("content", "") + if isinstance(content, dict): + text = content.get("text", "") + elif isinstance(content, str): + text = content + else: + text = "" + + if text: + self._addParagraphToSlide(slide, element, styles, text_frame, font_size_multiplier=1.0) + + except Exception as e: + logger.warning(f"Error rendering section to text frame: {str(e)}") + + def _addImagesToSlideInFrame(self, slide, images: List[Dict[str, Any]], styles: Dict[str, Any], left: float, top: float, width: float, height: float) -> None: + """Add images to slide within a specific frame area.""" + try: + from pptx.util import Inches, Pt + from pptx.enum.text import PP_ALIGN + import base64 + import io + + if not images: + logger.debug("No images to render in frame") + return + + logger.info(f"Rendering {len(images)} image(s) in frame at ({left}, {top}), size ({width}, {height})") + + # Calculate image dimensions within frame + if len(images) == 1: + # Single image: fit to frame + img = images[0] + base64Data = img.get("base64Data") + + if not base64Data: + logger.warning("Image has no base64Data") + return + + # Clean base64 data (remove data URI prefix if present) + if isinstance(base64Data, str): + if base64Data.startswith("data:image/"): + # Extract base64 from data URI + base64Data = base64Data.split(",", 1)[1] + # Remove any whitespace + base64Data = base64Data.strip() + + try: + # Decode base64 + imageBytes = base64.b64decode(base64Data, validate=True) + if len(imageBytes) == 0: + logger.error("Decoded image bytes are empty") + return + + imageStream = io.BytesIO(imageBytes) + + # Get image dimensions using PIL + imgWidth, imgHeight = None, None + try: + from PIL import Image as PILImage + pilImage = PILImage.open(imageStream) + imgWidth, imgHeight = pilImage.size + imageStream.seek(0) # Reset stream for PowerPoint + + # Validate image dimensions - ensure they're reasonable + if imgWidth <= 1 or imgHeight <= 1: + logger.warning(f"Image has invalid dimensions: {imgWidth}x{imgHeight}, using default size") + imgWidth, imgHeight = 800, 600 + imageStream.seek(0) + elif imgWidth < 100 or imgHeight < 100: + logger.warning(f"Image dimensions very small: {imgWidth}x{imgHeight}, may appear tiny") + except ImportError: + logger.warning("PIL not available, using default image size") + imgWidth, imgHeight = 800, 600 # Default dimensions + except Exception as pil_error: + logger.warning(f"Error getting image dimensions with PIL: {str(pil_error)}, using default size") + imgWidth, imgHeight = 800, 600 + imageStream.seek(0) + + # Ensure we have valid dimensions + if not imgWidth or not imgHeight or imgWidth <= 1 or imgHeight <= 1: + logger.warning("Invalid image dimensions, using default 800x600") + imgWidth, imgHeight = 800, 600 + + # Scale to fit frame while maintaining aspect ratio + # width and height parameters are already in Inches (from pptx.util.Inches) + # Convert PIL pixel dimensions to Inches (assuming 96 DPI for PIL images) + imgWidthInches = Inches(imgWidth / 96.0) + imgHeightInches = Inches(imgHeight / 96.0) + + # Calculate scale to fit within frame + # Inches objects support division, result is a float + try: + scale_width = width / imgWidthInches if imgWidthInches > 0 else 1.0 + scale_height = height / imgHeightInches if imgHeightInches > 0 else 1.0 + scale = min(scale_width, scale_height, 1.0) # Don't scale up, only down + + finalWidth = imgWidthInches * scale + finalHeight = imgHeightInches * scale + + # Ensure minimum size (at least 1 inch) to prevent tiny rendering + minSize = Inches(1) + if finalWidth < minSize or finalHeight < minSize: + # Use minimum size while maintaining aspect ratio + min_scale = max(minSize / imgWidthInches if imgWidthInches > 0 else 1.0, + minSize / imgHeightInches if imgHeightInches > 0 else 1.0) + finalWidth = max(minSize, imgWidthInches * min_scale) + finalHeight = max(minSize, imgHeightInches * min_scale) + + # Ensure we don't exceed frame bounds + if finalWidth > width: + finalWidth = width + finalHeight = imgHeightInches * (width / imgWidthInches) if imgWidthInches > 0 else finalHeight + if finalHeight > height: + finalHeight = height + finalWidth = imgWidthInches * (height / imgHeightInches) if imgHeightInches > 0 else finalWidth + except (ZeroDivisionError, TypeError, AttributeError) as calc_error: + logger.warning(f"Error calculating image size: {str(calc_error)}, using frame size") + finalWidth = width * 0.9 # Use 90% of frame width + finalHeight = height * 0.9 # Use 90% of frame height + + # Center in frame + frame_left = left + (width - finalWidth) / 2 + frame_top = top + (height - finalHeight) / 2 + + # Add image to slide + imageStream.seek(0) + slide.shapes.add_picture(imageStream, frame_left, frame_top, width=finalWidth, height=finalHeight) + logger.info(f"Successfully added image to slide at ({frame_left}, {frame_top}), size ({finalWidth}, {finalHeight})") + + # Add caption if available + caption = img.get("caption") or img.get("altText") + if caption and caption != "Image": + captionTop = frame_top + finalHeight + Inches(0.1) + captionBox = slide.shapes.add_textbox(left, captionTop, width, Inches(0.4)) + captionFrame = captionBox.text_frame + captionFrame.text = caption + captionFrame.paragraphs[0].font.size = Pt(10) + captionFrame.paragraphs[0].font.italic = True + captionFrame.paragraphs[0].alignment = PP_ALIGN.CENTER + except base64.binascii.Error as b64_error: + logger.error(f"Invalid base64 data: {str(b64_error)}") + except Exception as img_error: + logger.error(f"Error adding image to frame: {str(img_error)}", exc_info=True) + else: + # Multiple images: grid layout + cols = 2 if len(images) <= 4 else 3 + rows = (len(images) + cols - 1) // cols + imgWidth = (width - Inches(0.2) * (cols - 1)) / cols + imgHeight = (height - Inches(0.2) * (rows - 1)) / rows + + for idx, img in enumerate(images): + base64Data = img.get("base64Data") + if not base64Data: + logger.warning(f"Image {idx} has no base64Data") + continue + + # Clean base64 data + if isinstance(base64Data, str): + if base64Data.startswith("data:image/"): + base64Data = base64Data.split(",", 1)[1] + base64Data = base64Data.strip().replace("\n", "").replace("\r", "").replace("\t", "").replace(" ", "") + + row = idx // cols + col = idx % cols + img_left = left + col * (imgWidth + Inches(0.2)) + img_top = top + row * (imgHeight + Inches(0.2)) + + try: + imageBytes = base64.b64decode(base64Data, validate=True) + if len(imageBytes) == 0: + logger.error(f"Decoded image {idx} bytes are empty") + continue + + imageStream = io.BytesIO(imageBytes) + + # Try to get dimensions for better scaling + try: + from PIL import Image as PILImage + pilImage = PILImage.open(imageStream) + imgW, imgH = pilImage.size + # Scale to fit grid cell while maintaining aspect ratio + scale = min(imgWidth / (imgW * (72.0 / 96.0)), imgHeight / (imgH * (72.0 / 96.0)), 1.0) + finalW = (imgW * (72.0 / 96.0)) * scale + finalH = (imgH * (72.0 / 96.0)) * scale + # Center in grid cell + cell_left = img_left + (imgWidth - finalW) / 2 + cell_top = img_top + (imgHeight - finalH) / 2 + imageStream.seek(0) + slide.shapes.add_picture(imageStream, cell_left, cell_top, width=finalW, height=finalH) + except (ImportError, Exception): + # Fallback: use grid cell size directly + imageStream.seek(0) + slide.shapes.add_picture(imageStream, img_left, img_top, width=imgWidth, height=imgHeight) + + logger.info(f"Successfully added image {idx+1}/{len(images)} to slide grid") + except base64.binascii.Error as b64_error: + logger.error(f"Invalid base64 data for image {idx}: {str(b64_error)}") + except Exception as img_error: + logger.error(f"Error adding image {idx} to frame: {str(img_error)}", exc_info=True) + + except Exception as e: + logger.error(f"Error adding images to slide frame: {str(e)}", exc_info=True) diff --git a/modules/services/serviceGeneration/renderers/rendererXlsx.py b/modules/services/serviceGeneration/renderers/rendererXlsx.py index c1992f94..24c620d2 100644 --- a/modules/services/serviceGeneration/renderers/rendererXlsx.py +++ b/modules/services/serviceGeneration/renderers/rendererXlsx.py @@ -535,6 +535,45 @@ class RendererXlsx(BaseRenderer): self.logger.warning(f"AI styling failed: {str(e)}, using defaults") return defaultStyles + def _getSafeAlignment(self, alignValue: Any) -> str: + """Get safe alignment value for openpyxl. Valid values: 'left', 'general', 'distributed', 'fill', 'justify', 'center', 'right', 'centerContinuous'.""" + if not alignValue: + return "left" + + alignStr = str(alignValue).lower().strip() + + # Map common alignment values to openpyxl values + alignmentMap = { + "left": "left", + "right": "right", + "center": "center", + "centre": "center", + "general": "general", + "distributed": "distributed", + "fill": "fill", + "justify": "justify", + "centercontinuous": "centerContinuous", + "center-continuous": "centerContinuous", + "start": "left", + "end": "right", + "middle": "center" + } + + # Check direct mapping + if alignStr in alignmentMap: + return alignmentMap[alignStr] + + # Check if it contains alignment keywords + if "left" in alignStr or "start" in alignStr: + return "left" + elif "right" in alignStr or "end" in alignStr: + return "right" + elif "center" in alignStr or "centre" in alignStr or "middle" in alignStr: + return "center" + + # Default to left if unknown + return "left" + def _getSafeColor(self, colorValue: str, default: str = "FF000000") -> str: """Get a safe aRGB color value for Excel (without # prefix).""" if not isinstance(colorValue, str): @@ -603,30 +642,34 @@ class RendererXlsx(BaseRenderer): return sanitized[:31] def _generateSheetNamesFromContent(self, jsonContent: Dict[str, Any]) -> List[str]: - """Generate sheet names: each heading section creates a new tab.""" + """Generate sheet names: each heading level 1 (chapter) creates a new tab.""" sections = self._extractSections(jsonContent) # If no sections, create a single sheet if not sections: return ["Content"] - # Simple logic: each heading section creates a new tab + # Only heading level 1 (chapters) create new tabs sheetNames = [] for section in sections: if section.get("content_type") == "heading": - # Extract heading text from elements + # Extract heading text and level from elements elements = section.get("elements", []) if elements and isinstance(elements, list) and len(elements) > 0: headingElement = elements[0] content = headingElement.get("content", {}) if isinstance(content, dict): headingText = content.get("text", "") + level = content.get("level", 1) elif isinstance(content, str): headingText = content + level = 1 else: headingText = "" + level = 1 - if headingText: + # Only level 1 headings (chapters) create tabs + if headingText and level == 1: sanitized_name = self._sanitizeSheetName(headingText) # Ensure unique sheet names if sanitized_name not in sheetNames: @@ -639,7 +682,7 @@ class RendererXlsx(BaseRenderer): counter += 1 sheetNames.append(f"{base_name} ({counter})"[:31]) - # If no headings found, use document title + # If no level 1 headings found, use document title if not sheetNames: documentTitle = jsonContent.get("metadata", {}).get("title", "Document") sheetNames.append(self._sanitizeSheetName(documentTitle)) @@ -647,7 +690,7 @@ class RendererXlsx(BaseRenderer): return sheetNames def _populateExcelSheets(self, sheets: Dict[str, Any], jsonContent: Dict[str, Any], styles: Dict[str, Any]) -> None: - """Populate Excel sheets: each heading creates a new tab, all following content goes in that tab.""" + """Populate Excel sheets: each heading level 1 (chapter) creates a new tab, all following content goes in that tab.""" try: # Get the actual sheet names that were created (keys are lowercase) sheetNames = list(sheets.keys()) @@ -657,7 +700,7 @@ class RendererXlsx(BaseRenderer): sections = self._extractSections(jsonContent) - # Simple logic: iterate through sections, each heading creates a new tab + # Only heading level 1 (chapters) create new tabs currentSheetIndex = 0 currentSheet = None currentRow = 1 @@ -665,17 +708,28 @@ class RendererXlsx(BaseRenderer): for section in sections: contentType = section.get("content_type", "paragraph") - # Heading section: switch to next sheet + # Heading section: check if it's level 1 (chapter) to switch to next sheet if contentType == "heading": - if currentSheetIndex < len(sheetNames): - sheetName = sheetNames[currentSheetIndex] - currentSheet = sheets[sheetName] # sheets dict uses lowercase keys - currentSheetIndex += 1 - currentRow = 1 # Start at row 1 for new sheet - else: - # More headings than sheets - use last sheet - if sheetNames: - currentSheet = sheets[sheetNames[-1]] + # Extract level from heading element + elements = section.get("elements", []) + level = 1 # Default + if elements and isinstance(elements, list) and len(elements) > 0: + headingElement = elements[0] + content = headingElement.get("content", {}) + if isinstance(content, dict): + level = content.get("level", 1) + + # Only level 1 headings (chapters) create new tabs + if level == 1: + if currentSheetIndex < len(sheetNames): + sheetName = sheetNames[currentSheetIndex] + currentSheet = sheets[sheetName] # sheets dict uses lowercase keys + currentSheetIndex += 1 + currentRow = 1 # Start at row 1 for new sheet + else: + # More headings than sheets - use last sheet + if sheetNames: + currentSheet = sheets[sheetNames[-1]] # Render content in current sheet (or first sheet if no headings yet) if currentSheet is None and sheetNames: @@ -695,7 +749,7 @@ class RendererXlsx(BaseRenderer): sheet['A1'] = sheetTitle title_style = styles.get("title", {}) sheet['A1'].font = Font(size=16, bold=True, color=self._getSafeColor(title_style.get("color", "FF1F4E79"))) - sheet['A1'].alignment = Alignment(horizontal=title_style.get("align", "left")) + sheet['A1'].alignment = Alignment(horizontal=self._getSafeAlignment(title_style.get("align", "left"))) # Get table data from elements (canonical JSON format) elements = section.get("elements", []) @@ -707,8 +761,13 @@ class RendererXlsx(BaseRenderer): headers = [] rows = [] else: - headers = content.get("headers", []) - rows = content.get("rows", []) + headers = content.get("headers") or [] + rows = content.get("rows") or [] + # Ensure headers and rows are lists + if not isinstance(headers, list): + headers = [] + if not isinstance(rows, list): + rows = [] else: headers = [] rows = [] @@ -770,11 +829,11 @@ class RendererXlsx(BaseRenderer): try: safe_color = self._getSafeColor(title_style["color"]) sheet['A1'].font = Font(size=title_style["font_size"], bold=title_style["bold"], color=safe_color) - sheet['A1'].alignment = Alignment(horizontal=title_style["align"]) + sheet['A1'].alignment = Alignment(horizontal=self._getSafeAlignment(title_style["align"])) except Exception as font_error: # Try with a safe color sheet['A1'].font = Font(size=title_style["font_size"], bold=title_style["bold"], color="FF000000") - sheet['A1'].alignment = Alignment(horizontal=title_style["align"]) + sheet['A1'].alignment = Alignment(horizontal=self._getSafeAlignment(title_style["align"])) # Generation info sheet['A3'] = "Generated:" @@ -892,6 +951,8 @@ class RendererXlsx(BaseRenderer): startRow = self._addHeadingToExcel(sheet, element, styles, startRow) elif element_type == "image": startRow = self._addImageToExcel(sheet, element, styles, startRow) + elif element_type == "code_block" or element_type == "code": + startRow = self._addCodeBlockToExcel(sheet, element, styles, startRow) else: # Fallback: if element_type not set, use section_type if section_type == "table": @@ -904,6 +965,8 @@ class RendererXlsx(BaseRenderer): startRow = self._addHeadingToExcel(sheet, element, styles, startRow) elif section_type == "image": startRow = self._addImageToExcel(sheet, element, styles, startRow) + elif section_type == "code_block" or section_type == "code": + startRow = self._addCodeBlockToExcel(sheet, element, styles, startRow) else: startRow = self._addParagraphToExcel(sheet, element, styles, startRow) @@ -943,9 +1006,16 @@ class RendererXlsx(BaseRenderer): content = element.get("content", {}) if not isinstance(content, dict): return startRow + headers = content.get("headers", []) rows = content.get("rows", []) + # Ensure headers and rows are lists + if not isinstance(headers, list): + headers = [] + if not isinstance(rows, list): + rows = [] + if not headers and not rows: return startRow @@ -965,60 +1035,95 @@ class RendererXlsx(BaseRenderer): sanitized_header = self._sanitizeCellValue(header) cell = sheet.cell(row=headerRow, column=col, value=sanitized_header) - # Font styling - cell.font = Font( - bold=header_style.get("bold", True), - color=self._getSafeColor(header_style.get("text_color", "FF000000")) - ) - - # Background color - if header_style.get("background"): - cell.fill = PatternFill( - start_color=self._getSafeColor(header_style["background"]), - end_color=self._getSafeColor(header_style["background"]), - fill_type="solid" + # Apply styling with fallbacks - don't let styling errors prevent data rendering + try: + # Font styling + cell.font = Font( + bold=header_style.get("bold", True), + color=self._getSafeColor(header_style.get("text_color", "FF000000")) ) + except Exception: + # Fallback to default font if styling fails + try: + cell.font = Font(bold=True, color=self._getSafeColor("FF000000")) + except Exception: + pass # Continue even if font fails - # Alignment - cell.alignment = Alignment( - horizontal=header_style.get("align", "left"), - vertical="center" - ) + try: + # Background color + if header_style.get("background"): + cell.fill = PatternFill( + start_color=self._getSafeColor(header_style["background"]), + end_color=self._getSafeColor(header_style["background"]), + fill_type="solid" + ) + except Exception: + pass # Continue without background color if it fails - # Border - cell.border = thin_border + try: + # Alignment + cell.alignment = Alignment( + horizontal=self._getSafeAlignment(header_style.get("align", "left")), + vertical="center" + ) + except Exception: + # Fallback to default alignment if it fails + try: + cell.alignment = Alignment(horizontal="left", vertical="center") + except Exception: + pass # Continue even if alignment fails + + try: + # Border + cell.border = thin_border + except Exception: + pass # Continue without border if it fails startRow += 1 # Add rows with formatting cell_style = styles.get("table_cell", {}) for row_data in rows: - # Handle different row formats - if isinstance(row_data, list): - cell_values = row_data - elif isinstance(row_data, dict) and "cells" in row_data: - cell_values = [cell_obj.get("value", "") for cell_obj in row_data.get("cells", [])] - else: - continue - - for col, cell_value in enumerate(cell_values, 1): - sanitized_value = self._sanitizeCellValue(cell_value) - cell = sheet.cell(row=startRow, column=col, value=sanitized_value) + # Handle different row formats + if isinstance(row_data, list): + cell_values = row_data + elif isinstance(row_data, dict) and "cells" in row_data: + cell_values = [cell_obj.get("value", "") for cell_obj in row_data.get("cells", [])] + else: + continue - # Font styling - if cell_style.get("text_color"): - cell.font = Font(color=self._getSafeColor(cell_style["text_color"])) + for col, cell_value in enumerate(cell_values, 1): + sanitized_value = self._sanitizeCellValue(cell_value) + cell = sheet.cell(row=startRow, column=col, value=sanitized_value) + + # Apply styling with fallbacks - don't let styling errors prevent data rendering + try: + # Font styling + if cell_style.get("text_color"): + cell.font = Font(color=self._getSafeColor(cell_style["text_color"])) + except Exception: + pass # Continue without font color if it fails + + try: + # Alignment + cell.alignment = Alignment( + horizontal=self._getSafeAlignment(cell_style.get("align", "left")), + vertical="center" + ) + except Exception: + # Fallback to default alignment if it fails + try: + cell.alignment = Alignment(horizontal="left", vertical="center") + except Exception: + pass # Continue even if alignment fails + + try: + # Border + cell.border = thin_border + except Exception: + pass # Continue without border if it fails - # Alignment - cell.alignment = Alignment( - horizontal=cell_style.get("align", "left"), - vertical="center" - ) - - # Border - cell.border = thin_border - - startRow += 1 + startRow += 1 # Auto-adjust column widths for col in range(1, len(headers) + 1): @@ -1038,7 +1143,10 @@ class RendererXlsx(BaseRenderer): content = element.get("content", {}) if not isinstance(content, dict): return startRow - list_items = content.get("items", []) + list_items = content.get("items") or [] + # Ensure list_items is a list + if not isinstance(list_items, list): + list_items = [] list_style = styles.get("bullet_list", {}) for item in list_items: @@ -1199,6 +1307,52 @@ class RendererXlsx(BaseRenderer): errorCell = sheet.cell(row=startRow, column=1, value=errorMsg) errorCell.font = Font(color="FFFF0000", italic=True) # Red color return startRow + 1 + + def _addCodeBlockToExcel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], startRow: int) -> int: + """Add a code block element to Excel sheet. Expects nested content structure.""" + try: + # Extract from nested content structure + content = element.get("content", {}) + if not isinstance(content, dict): + return startRow + code = content.get("code", "") + language = content.get("language", "") + + if code: + code_style = styles.get("code_block", {}) + + # Add language label if present + if language: + langCell = sheet.cell(row=startRow, column=1, value=f"Code ({language}):") + langCell.font = Font(bold=True, color=self._getSafeColor(code_style.get("color", "FF000000"))) + startRow += 1 + + # Split code into lines and add each line + code_lines = code.split('\n') + for line in code_lines: + codeCell = sheet.cell(row=startRow, column=1, value=line) + codeCell.font = Font( + name=code_style.get("font", "Courier New"), + size=code_style.get("font_size", 10), + color=self._getSafeColor(code_style.get("color", "FF2F2F2F")) + ) + # Set background color if specified + if code_style.get("background"): + codeCell.fill = PatternFill( + start_color=self._getSafeColor(code_style["background"]), + end_color=self._getSafeColor(code_style["background"]), + fill_type="solid" + ) + startRow += 1 + + # Add spacing after code block + startRow += 1 + + return startRow + + except Exception as e: + self.logger.warning(f"Could not add code block to Excel: {str(e)}") + return startRow + 1 def _formatTimestamp(self) -> str: """Format current timestamp for document generation.""" diff --git a/tests/functional/test10_document_generation_formats.py b/tests/functional/test10_document_generation_formats.py index 05532313..8d963643 100644 --- a/tests/functional/test10_document_generation_formats.py +++ b/tests/functional/test10_document_generation_formats.py @@ -413,10 +413,12 @@ class DocumentGenerationFormatsTester10: async def testAllFormats(self) -> Dict[str, Any]: """Test document generation in DOCX, XLSX, PPTX, PDF, and HTML formats.""" print("\n" + "="*80) - print("TESTING DOCUMENT GENERATION IN DOCX, XLSX, PPTX, PDF, AND HTML FORMATS") + print("TESTING DOCUMENT GENERATION IN HTML FORMAT") print("="*80) - formats = ["docx", "xlsx", "pptx", "pdf", "html"] + # Only test HTML format + formats = ["html"] + # formats = ["docx", "xlsx", "pptx", "pdf", "html"] # Commented out other formats results = {} for format in formats: @@ -469,7 +471,7 @@ class DocumentGenerationFormatsTester10: async def runTest(self): """Run the complete test.""" print("\n" + "="*80) - print("DOCUMENT GENERATION FORMATS TEST 10 - DOCX, XLSX, PPTX, PDF, HTML") + print("DOCUMENT GENERATION FORMATS TEST 10 - HTML ONLY") print("="*80) try: