From e4b633cac66ab1ab22d5be50135c29a2d29f60aa Mon Sep 17 00:00:00 2001 From: ValueOn AG Date: Sat, 3 Jan 2026 01:55:25 +0100 Subject: [PATCH] fixed iteration ai calls for documents sections --- .../services/serviceAi/subAiCallLooping.py | 254 ++++++++++++++++-- .../serviceGeneration/paths/codePath.py | 85 +++++- .../renderers/codeRendererBaseTemplate.py | 45 ++++ .../serviceGeneration/renderers/registry.py | 23 +- .../renderers/rendererCodeCsv.py | 125 +++++++++ .../renderers/rendererCodeJson.py | 97 +++++++ .../renderers/rendererCodeXml.py | 113 ++++++++ 7 files changed, 699 insertions(+), 43 deletions(-) create mode 100644 modules/services/serviceGeneration/renderers/codeRendererBaseTemplate.py create mode 100644 modules/services/serviceGeneration/renderers/rendererCodeCsv.py create mode 100644 modules/services/serviceGeneration/renderers/rendererCodeJson.py create mode 100644 modules/services/serviceGeneration/renderers/rendererCodeXml.py diff --git a/modules/services/serviceAi/subAiCallLooping.py b/modules/services/serviceAi/subAiCallLooping.py index 2b71520b..b7fe3f32 100644 --- a/modules/services/serviceAi/subAiCallLooping.py +++ b/modules/services/serviceAi/subAiCallLooping.py @@ -155,14 +155,17 @@ class AiCallLooper: ) # Write the ACTUAL prompt sent to AI - # For section content generation: only write one prompt file (first iteration) + # For section content generation: write prompt for first iteration and continuation iterations # For document generation: write prompt for each iteration isSectionContent = "_section_" in debugPrefix - if iteration == 1 or not isSectionContent: - if iteration == 1: - self.services.utils.writeDebugFile(iterationPrompt, f"{debugPrefix}_prompt") - elif not isSectionContent: - self.services.utils.writeDebugFile(iterationPrompt, f"{debugPrefix}_prompt_iteration_{iteration}") + if iteration == 1: + self.services.utils.writeDebugFile(iterationPrompt, f"{debugPrefix}_prompt") + elif isSectionContent: + # Save continuation prompts for section_content debugging + self.services.utils.writeDebugFile(iterationPrompt, f"{debugPrefix}_prompt_iteration_{iteration}") + else: + # Document generation - save all iteration prompts + self.services.utils.writeDebugFile(iterationPrompt, f"{debugPrefix}_prompt_iteration_{iteration}") response = await self.aiService.callAi(request) result = response.content @@ -183,13 +186,16 @@ class AiCallLooper: self.services.chat.progressLogUpdate(iterationOperationId, 0.6, f"AI response received ({bytesDisplay})") # Write raw AI response to debug file - # For section content generation: only write one response file (first iteration) + # For section content generation: write response for first iteration and continuation iterations # For document generation: write response for each iteration - if iteration == 1 or not isSectionContent: - if iteration == 1: - self.services.utils.writeDebugFile(result, f"{debugPrefix}_response") - elif not isSectionContent: - self.services.utils.writeDebugFile(result, f"{debugPrefix}_response_iteration_{iteration}") + if iteration == 1: + self.services.utils.writeDebugFile(result, f"{debugPrefix}_response") + elif isSectionContent: + # Save continuation responses for section_content debugging + self.services.utils.writeDebugFile(result, f"{debugPrefix}_response_iteration_{iteration}") + else: + # Document generation - save all iteration responses + self.services.utils.writeDebugFile(result, f"{debugPrefix}_response_iteration_{iteration}") # Emit stats for this iteration (only if workflow exists and has id) if self.services.workflow and hasattr(self.services.workflow, 'id') and self.services.workflow.id: @@ -279,23 +285,57 @@ class AiCallLooper: # JSON is complete - merge accumulated responses if any if accumulatedDirectJson: logger.info(f"Iteration {iteration}: Merging {len(accumulatedDirectJson) + 1} accumulated responses") - # Merge accumulated JSON strings with current response - mergedJsonString = accumulatedDirectJson[0] if accumulatedDirectJson else result - for prevJson in accumulatedDirectJson[1:]: - mergedJsonString = JsonResponseHandler.mergeJsonStringsWithOverlap(mergedJsonString, prevJson) - # Finally merge with current response - mergedJsonString = JsonResponseHandler.mergeJsonStringsWithOverlap(mergedJsonString, result) - # Re-parse merged JSON + # Use generic data-based merging for all use cases try: - extractedMerged = extractJsonString(mergedJsonString) - parsedMerged, parseError, _ = tryParseJson(extractedMerged) - if parseError is None and parsedMerged: - parsedJsonForUseCase = parsedMerged + # Parse all accumulated JSON strings and current response + allParsed = [] + for jsonStr in accumulatedDirectJson + [result]: + extracted = extractJsonString(jsonStr) + parsed, parseErr, _ = tryParseJson(extracted) + if parseErr is None and parsed: + # Normalize structure: ensure consistent format + normalized = self._normalizeJsonStructure(parsed, useCaseId) + allParsed.append(normalized) + + if allParsed and len(allParsed) > 1: + # Generic recursive merge of parsed JSON objects + mergedJsonObj = self._mergeJsonObjectsRecursively(allParsed) + + # Reconstruct merged JSON string + mergedJsonString = json.dumps(mergedJsonObj, indent=2, ensure_ascii=False) + parsedJsonForUseCase = mergedJsonObj + result = mergedJsonString + logger.info(f"Successfully merged {len(accumulatedDirectJson) + 1} JSON fragments using generic recursive merge") + elif allParsed: + # Only one parsed JSON, use it directly + parsedJsonForUseCase = allParsed[0] + result = json.dumps(parsedJsonForUseCase, indent=2, ensure_ascii=False) + else: + # Fallback to string merging if parsing fails + logger.warning("Failed to parse all JSON fragments for data-based merge, falling back to string merging") + mergedJsonString = accumulatedDirectJson[0] if accumulatedDirectJson else result + for prevJson in accumulatedDirectJson[1:]: + mergedJsonString = JsonResponseHandler.mergeJsonStringsWithOverlap(mergedJsonString, prevJson) + mergedJsonString = JsonResponseHandler.mergeJsonStringsWithOverlap(mergedJsonString, result) result = mergedJsonString - logger.info(f"Successfully merged and parsed {len(accumulatedDirectJson) + 1} JSON fragments") except Exception as e: - logger.warning(f"Failed to parse merged JSON, using last response: {e}") + logger.warning(f"Failed data-based merge, falling back to string merging: {e}") + # Fallback to string merging + mergedJsonString = accumulatedDirectJson[0] if accumulatedDirectJson else result + for prevJson in accumulatedDirectJson[1:]: + mergedJsonString = JsonResponseHandler.mergeJsonStringsWithOverlap(mergedJsonString, prevJson) + mergedJsonString = JsonResponseHandler.mergeJsonStringsWithOverlap(mergedJsonString, result) + result = mergedJsonString + + # Try to parse the string-merged result + try: + extractedMerged = extractJsonString(result) + parsedMerged, parseError, _ = tryParseJson(extractedMerged) + if parseError is None and parsedMerged: + parsedJsonForUseCase = parsedMerged + except Exception: + pass logger.info(f"Iteration {iteration}: Use case '{useCaseId}' - JSON is complete") @@ -307,10 +347,12 @@ class AiCallLooper: # The merging logic in subStructureFilling.py will handle extraction and merging if useCaseId == "section_content": final_json = result # Return raw response to preserve all JSON blocks + # Write final merged result for section_content (overwrites iteration 1 response with complete merged result) + self.services.utils.writeDebugFile(final_json, f"{debugPrefix}_response") else: final_json = json.dumps(parsedJsonForUseCase, indent=2, ensure_ascii=False) if parsedJsonForUseCase else (extractedJsonForUseCase or result) - # Write final result for chapter structure and code structure (section_content skips it) + # Write final result for chapter structure and code structure if useCaseId in ["chapter_structure", "code_structure"]: self.services.utils.writeDebugFile(final_json, f"{debugPrefix}_final_result") @@ -567,6 +609,166 @@ class AiCallLooper: return final_result + def _normalizeJsonStructure(self, parsed: Any, useCaseId: str) -> Any: + """ + Normalize JSON structure to ensure consistent format before merging. + Handles different response formats and converts them to expected structure. + + Args: + parsed: Parsed JSON object (can be dict, list, or primitive) + useCaseId: Use case ID to determine expected structure + + Returns: + Normalized JSON structure + """ + # For section_content, expect {"elements": [...]} structure + if useCaseId == "section_content": + if isinstance(parsed, list): + # Check if list contains strings (invalid format) or element objects + if parsed and isinstance(parsed[0], str): + # Invalid format - list of strings instead of elements + # This shouldn't happen, but we'll log a warning and return empty structure + logger.warning(f"Invalid response format: received list of strings instead of elements array. Expected {{'elements': [...]}} structure.") + return {"elements": []} + else: + # Convert plain list of elements to elements structure + return {"elements": parsed} + elif isinstance(parsed, dict): + # If it already has "elements", return as-is + if "elements" in parsed: + return parsed + # If it has "type" and looks like an element, wrap in elements array + elif parsed.get("type"): + return {"elements": [parsed]} + # Otherwise, assume it's already in correct format + else: + return parsed + + # For other use cases, return as-is (they have their own structures) + return parsed + + def _mergeJsonObjectsRecursively(self, jsonObjects: List[Any]) -> Any: + """ + GENERIC recursive merge function for JSON objects. + Works for ANY JSON structure - handles lists, dicts, and primitives intelligently. + + Merge strategy: + - Lists/Arrays: Merge by removing duplicates based on content (works for rows, items, elements, etc.) + - Dicts/Objects: Merge properties, recursively merging nested structures + - Primitives: Use the latest value + + Args: + jsonObjects: List of parsed JSON objects to merge + + Returns: + Merged JSON object + """ + if not jsonObjects: + return None + if len(jsonObjects) == 1: + return jsonObjects[0] + + # Start with first object and merge others into it + merged = jsonObjects[0] + + for obj in jsonObjects[1:]: + merged = self._mergeTwoObjects(merged, obj) + + return merged + + def _mergeTwoObjects(self, obj1: Any, obj2: Any) -> Any: + """ + Merge two JSON objects recursively. + + Args: + obj1: First object (base) + obj2: Second object (to merge into obj1) + + Returns: + Merged object + """ + # Handle None values + if obj1 is None: + return obj2 + if obj2 is None: + return obj1 + + # Handle different types + if isinstance(obj1, dict) and isinstance(obj2, dict): + # Merge dictionaries + merged = dict(obj1) # Start with copy of obj1 + + for key, value2 in obj2.items(): + if key in merged: + # Key exists in both - recursively merge + merged[key] = self._mergeTwoObjects(merged[key], value2) + else: + # New key - add it + merged[key] = value2 + + return merged + + elif isinstance(obj1, list) and isinstance(obj2, list): + # Merge lists by removing duplicates based on content + merged = list(obj1) # Start with copy of obj1 + seenItems = set() # Track seen items to avoid duplicates + + # Add all items from obj1 with their keys + for item in merged: + itemKey = self._createItemKey(item) + seenItems.add(itemKey) + + # Add items from obj2 that aren't duplicates + for item in obj2: + itemKey = self._createItemKey(item) + if itemKey not in seenItems: + seenItems.add(itemKey) + merged.append(item) + + return merged + + else: + # Different types or primitives - use obj2 (latest value) + return obj2 + + def _createItemKey(self, item: Any) -> Any: + """ + Create a key for an item to detect duplicates. + Works generically for any JSON structure. + + Args: + item: Item to create key for + + Returns: + Key that can be used for duplicate detection + """ + if isinstance(item, dict): + # For dicts, create key from all values (or specific identifying fields) + # Try to find common identifying fields first + if "id" in item: + return ("id", item["id"]) + elif "type" in item and "content" in item: + # For elements with type and content, use type + content hash + content = item.get("content", {}) + if isinstance(content, dict): + # For tables/lists, use type + first few rows/items for key + if "rows" in content: + rows = content.get("rows", []) + return ("type", item["type"], "rows", tuple(rows[:3]) if rows else ()) + elif "items" in content: + items = content.get("items", []) + return ("type", item["type"], "items", tuple(items[:3]) if items else ()) + return ("type", item["type"], tuple(sorted(item.items()))) + else: + # Generic: use sorted items tuple + return tuple(sorted(item.items())) + elif isinstance(item, (list, tuple)): + # For lists/tuples, use the tuple itself as key + return tuple(item) if isinstance(item, list) else item + else: + # For primitives, use the value itself + return item + async def _defineKpisFromPrompt( self, userPrompt: str, diff --git a/modules/services/serviceGeneration/paths/codePath.py b/modules/services/serviceGeneration/paths/codePath.py index 5beb1867..4db57fc3 100644 --- a/modules/services/serviceGeneration/paths/codePath.py +++ b/modules/services/serviceGeneration/paths/codePath.py @@ -69,24 +69,59 @@ class CodeGenerationPath: codeFiles = await self._generateCodeContent(codeStructure, codeOperationId) # Phase 3: Code formatting & validation - self.services.chat.progressLogUpdate(codeOperationId, 0.9, "Formatting code files") + self.services.chat.progressLogUpdate(codeOperationId, 0.8, "Formatting code files") formattedFiles = await self._formatAndValidateCode(codeFiles) - # Convert to unified document format - documents = [] + # Phase 4: Code Rendering (Renderer-Based) + self.services.chat.progressLogUpdate(codeOperationId, 0.9, "Rendering code files") + + # Group files by format + filesByFormat = {} for file in formattedFiles: - mimeType = self._getMimeType(file.get("fileType", outputFormat or "txt")) - content = file.get("content", "") - if isinstance(content, str): - contentBytes = content.encode('utf-8') - else: - contentBytes = content + fileType = file.get("fileType", outputFormat or "txt") + if fileType not in filesByFormat: + filesByFormat[fileType] = [] + filesByFormat[fileType].append(file) + + # Render each format group using appropriate renderer + allRenderedDocuments = [] + for fileType, files in filesByFormat.items(): + # Get renderer for this format + renderer = self._getCodeRenderer(fileType) + if renderer: + # Use code renderer + renderedDocs = await renderer.renderCodeFiles( + codeFiles=files, + metadata=codeStructure.get("metadata", {}), + userPrompt=userPrompt + ) + allRenderedDocuments.extend(renderedDocs) + else: + # Fallback: output directly (for formats without renderers) + for file in files: + mimeType = self._getMimeType(file.get("fileType", "txt")) + content = file.get("content", "") + contentBytes = content.encode('utf-8') if isinstance(content, str) else content + + from modules.datamodels.datamodelDocument import RenderedDocument + allRenderedDocuments.append( + RenderedDocument( + documentData=contentBytes, + mimeType=mimeType, + filename=file.get("filename", "generated.txt"), + metadata=codeStructure.get("metadata", {}) + ) + ) + + # Convert RenderedDocument to DocumentData + documents = [] + for renderedDoc in allRenderedDocuments: documents.append(DocumentData( - documentName=file.get("filename", "generated.txt"), - documentData=contentBytes, - mimeType=mimeType, - sourceJson=file + documentName=renderedDoc.filename, + documentData=renderedDoc.documentData, + mimeType=renderedDoc.mimeType, + sourceJson=renderedDoc.metadata if hasattr(renderedDoc, 'metadata') else None )) metadata = AiResponseMetadata( @@ -579,6 +614,28 @@ Return ONLY valid JSON in this format: "md": "text/markdown", "java": "text/x-java-source", "cpp": "text/x-c++src", - "c": "text/x-csrc" + "c": "text/x-csrc", + "csv": "text/csv", + "xml": "application/xml" } return mimeTypes.get(fileType.lower(), "text/plain") + + def _getCodeRenderer(self, fileType: str): + """Get code renderer for file type.""" + from modules.services.serviceGeneration.renderers.registry import getRenderer + + # Map file types to renderer formats + formatMap = { + 'json': 'json', + 'csv': 'csv', + 'xml': 'xml' + } + + rendererFormat = formatMap.get(fileType.lower()) + if rendererFormat: + renderer = getRenderer(rendererFormat, self.services) + # Check if renderer supports code rendering + if renderer and hasattr(renderer, 'renderCodeFiles'): + return renderer + + return None diff --git a/modules/services/serviceGeneration/renderers/codeRendererBaseTemplate.py b/modules/services/serviceGeneration/renderers/codeRendererBaseTemplate.py new file mode 100644 index 00000000..0c461b5c --- /dev/null +++ b/modules/services/serviceGeneration/renderers/codeRendererBaseTemplate.py @@ -0,0 +1,45 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. +""" +Base renderer class for code format renderers. +""" + +from abc import abstractmethod +from .rendererBaseTemplate import BaseRenderer +from modules.datamodels.datamodelDocument import RenderedDocument +from typing import Dict, Any, List, Optional +import logging + +logger = logging.getLogger(__name__) + +class BaseCodeRenderer(BaseRenderer): + """Base class for code format renderers.""" + + @abstractmethod + async def renderCodeFiles( + self, + codeFiles: List[Dict[str, Any]], + metadata: Dict[str, Any], + userPrompt: str = None + ) -> List[RenderedDocument]: + """ + Render code files to format-specific output. + + Args: + codeFiles: List of file dictionaries with: + - filename: str + - fileType: str (json, csv, xml, etc.) + - content: str (generated code) + - id: str (optional) + metadata: Project metadata (language, projectType, etc.) + userPrompt: Original user prompt + + Returns: + List of RenderedDocument objects (can be 1..n files) + """ + pass + + def _validateCodeFile(self, codeFile: Dict[str, Any]) -> bool: + """Validate code file structure.""" + required = ['filename', 'fileType', 'content'] + return all(key in codeFile for key in required) diff --git a/modules/services/serviceGeneration/renderers/registry.py b/modules/services/serviceGeneration/renderers/registry.py index fdaba913..8795b56e 100644 --- a/modules/services/serviceGeneration/renderers/registry.py +++ b/modules/services/serviceGeneration/renderers/registry.py @@ -76,9 +76,26 @@ class RendererRegistry: # Get supported formats from the renderer class supportedFormats = rendererClass.getSupportedFormats() + # Get priority (default to 0 if not specified) + priority = rendererClass.getPriority() if hasattr(rendererClass, 'getPriority') else 0 + for formatName in supportedFormats: - # Register primary format - self._renderers[formatName.lower()] = rendererClass + formatKey = formatName.lower() + + # Check if format already registered - use priority to decide + if formatKey in self._renderers: + existingRenderer = self._renderers[formatKey] + existingPriority = existingRenderer.getPriority() if hasattr(existingRenderer, 'getPriority') else 0 + + # Only replace if new renderer has higher priority + if priority > existingPriority: + logger.debug(f"Replacing {existingRenderer.__name__} with {rendererClass.__name__} for format '{formatName}' (priority {priority} > {existingPriority})") + self._renderers[formatKey] = rendererClass + else: + logger.debug(f"Keeping {existingRenderer.__name__} for format '{formatName}' (priority {existingPriority} >= {priority})") + else: + # Register primary format + self._renderers[formatKey] = rendererClass # Register aliases if any if hasattr(rendererClass, 'getFormatAliases'): @@ -86,7 +103,7 @@ class RendererRegistry: for alias in aliases: self._format_mappings[alias.lower()] = formatName.lower() - logger.debug(f"Registered {rendererClass.__name__} for formats: {supportedFormats}") + logger.debug(f"Registered {rendererClass.__name__} for formats: {supportedFormats} (priority: {priority})") except Exception as e: logger.error(f"Error registering renderer {rendererClass.__name__}: {str(e)}") diff --git a/modules/services/serviceGeneration/renderers/rendererCodeCsv.py b/modules/services/serviceGeneration/renderers/rendererCodeCsv.py new file mode 100644 index 00000000..3cfc6a52 --- /dev/null +++ b/modules/services/serviceGeneration/renderers/rendererCodeCsv.py @@ -0,0 +1,125 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. +""" +CSV code renderer for code generation. +""" + +from .codeRendererBaseTemplate import BaseCodeRenderer +from modules.datamodels.datamodelDocument import RenderedDocument +from typing import Dict, Any, List, Optional +import csv +import io + +class RendererCodeCsv(BaseCodeRenderer): + """Renders CSV code files.""" + + @classmethod + def getSupportedFormats(cls) -> List[str]: + """Return supported CSV formats.""" + return ['csv'] + + @classmethod + def getFormatAliases(cls) -> List[str]: + """Return format aliases.""" + return [] + + @classmethod + def getPriority(cls) -> int: + """Return priority for CSV code renderer.""" + return 75 # Higher than document renderer (70) for code generation + + @classmethod + def getOutputStyle(cls, formatName: Optional[str] = None) -> str: + """Return output style classification: CSV requires specific structure.""" + return 'code' + + async def renderCodeFiles( + self, + codeFiles: List[Dict[str, Any]], + metadata: Dict[str, Any], + userPrompt: str = None + ) -> List[RenderedDocument]: + """ + Render CSV code files. + For single file: output as-is (validate structure) + For multiple files: output separately (each is independent CSV) + """ + renderedDocs = [] + + for codeFile in codeFiles: + if not self._validateCodeFile(codeFile): + self.logger.warning(f"Invalid code file: {codeFile.get('filename', 'unknown')}") + continue + + filename = codeFile['filename'] + content = codeFile['content'] + + # Validate CSV structure (header row, consistent columns) + validatedContent = self._validateAndFixCsv(content) + + renderedDocs.append( + RenderedDocument( + documentData=validatedContent.encode('utf-8'), + mimeType="text/csv", + filename=filename, + metadata=metadata + ) + ) + + return renderedDocs + + async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]: + """ + Render method for document generation compatibility. + Delegates to document renderer if needed, or handles code files directly. + """ + # Check if this is code generation (has files array) or document generation (has documents array) + if "files" in extractedContent: + # Code generation path - use renderCodeFiles + files = extractedContent.get("files", []) + metadata = extractedContent.get("metadata", {}) + return await self.renderCodeFiles(files, metadata, userPrompt) + else: + # Document generation path - delegate to document renderer + from .rendererCsv import RendererCsv + documentRenderer = RendererCsv(self.services) + return await documentRenderer.render(extractedContent, title, userPrompt, aiService) + + def _validateAndFixCsv(self, content: str) -> str: + """Validate CSV structure and fix common issues.""" + try: + # Parse CSV to validate structure + reader = csv.reader(io.StringIO(content)) + rows = list(reader) + + if not rows: + return content # Empty CSV + + # Check header row exists + headerRow = rows[0] + headerCount = len(headerRow) + + # Validate all rows have same column count + fixedRows = [headerRow] # Start with header + + for i, row in enumerate(rows[1:], 1): + if len(row) != headerCount: + self.logger.warning(f"Row {i} has {len(row)} columns, expected {headerCount}. Fixing...") + # Pad or truncate to match header + if len(row) < headerCount: + row.extend([''] * (headerCount - len(row))) + else: + row = row[:headerCount] + fixedRows.append(row) + + # Convert back to CSV string + output = io.StringIO() + writer = csv.writer(output) + for row in fixedRows: + writer.writerow(row) + + return output.getvalue() + + except Exception as e: + self.logger.warning(f"CSV validation failed: {e}, returning original content") + return content diff --git a/modules/services/serviceGeneration/renderers/rendererCodeJson.py b/modules/services/serviceGeneration/renderers/rendererCodeJson.py new file mode 100644 index 00000000..e4e4a207 --- /dev/null +++ b/modules/services/serviceGeneration/renderers/rendererCodeJson.py @@ -0,0 +1,97 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. +""" +JSON code renderer for code generation. +""" + +from .codeRendererBaseTemplate import BaseCodeRenderer +from modules.datamodels.datamodelDocument import RenderedDocument +from typing import Dict, Any, List, Optional +import json + +class RendererCodeJson(BaseCodeRenderer): + """Renders JSON code files.""" + + @classmethod + def getSupportedFormats(cls) -> List[str]: + """Return supported JSON formats.""" + return ['json'] + + @classmethod + def getFormatAliases(cls) -> List[str]: + """Return format aliases.""" + return [] + + @classmethod + def getPriority(cls) -> int: + """Return priority for JSON code renderer.""" + return 85 # Higher than document renderer (80) for code generation + + @classmethod + def getOutputStyle(cls, formatName: Optional[str] = None) -> str: + """Return output style classification: JSON is structured data format.""" + return 'code' + + async def renderCodeFiles( + self, + codeFiles: List[Dict[str, Any]], + metadata: Dict[str, Any], + userPrompt: str = None + ) -> List[RenderedDocument]: + """ + Render JSON code files. + For single file: output as-is + For multiple files: output separately (each file is independent JSON) + """ + renderedDocs = [] + + for codeFile in codeFiles: + if not self._validateCodeFile(codeFile): + self.logger.warning(f"Invalid code file: {codeFile.get('filename', 'unknown')}") + continue + + filename = codeFile['filename'] + content = codeFile['content'] + + # Validate JSON syntax + try: + json.loads(content) # Validate JSON + except json.JSONDecodeError as e: + self.logger.warning(f"Invalid JSON in {filename}: {e}") + # Could fix/format JSON here if needed + + # Format JSON (pretty print) + try: + parsed = json.loads(content) + formattedContent = json.dumps(parsed, indent=2, ensure_ascii=False) + except Exception: + formattedContent = content # Use original if formatting fails + + renderedDocs.append( + RenderedDocument( + documentData=formattedContent.encode('utf-8'), + mimeType="application/json", + filename=filename, + metadata=metadata + ) + ) + + return renderedDocs + + async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]: + """ + Render method for document generation compatibility. + Delegates to document renderer if needed, or handles code files directly. + """ + # Check if this is code generation (has files array) or document generation (has documents array) + if "files" in extractedContent: + # Code generation path - use renderCodeFiles + files = extractedContent.get("files", []) + metadata = extractedContent.get("metadata", {}) + return await self.renderCodeFiles(files, metadata, userPrompt) + else: + # Document generation path - delegate to document renderer + # Import here to avoid circular dependency + from .rendererJson import RendererJson + documentRenderer = RendererJson(self.services) + return await documentRenderer.render(extractedContent, title, userPrompt, aiService) diff --git a/modules/services/serviceGeneration/renderers/rendererCodeXml.py b/modules/services/serviceGeneration/renderers/rendererCodeXml.py new file mode 100644 index 00000000..18bf8ab1 --- /dev/null +++ b/modules/services/serviceGeneration/renderers/rendererCodeXml.py @@ -0,0 +1,113 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. +""" +XML code renderer for code generation. +""" + +from .codeRendererBaseTemplate import BaseCodeRenderer +from modules.datamodels.datamodelDocument import RenderedDocument +from typing import Dict, Any, List, Optional +import xml.etree.ElementTree as ET +from xml.dom import minidom + +class RendererCodeXml(BaseCodeRenderer): + """Renders XML code files.""" + + @classmethod + def getSupportedFormats(cls) -> List[str]: + """Return supported XML formats.""" + return ['xml'] + + @classmethod + def getFormatAliases(cls) -> List[str]: + """Return format aliases.""" + return [] + + @classmethod + def getPriority(cls) -> int: + """Return priority for XML code renderer.""" + return 80 + + @classmethod + def getOutputStyle(cls, formatName: Optional[str] = None) -> str: + """Return output style classification: XML is structured data format.""" + return 'code' + + async def renderCodeFiles( + self, + codeFiles: List[Dict[str, Any]], + metadata: Dict[str, Any], + userPrompt: str = None + ) -> List[RenderedDocument]: + """ + Render XML code files. + Validates XML syntax and formats (pretty print). + """ + renderedDocs = [] + + for codeFile in codeFiles: + if not self._validateCodeFile(codeFile): + self.logger.warning(f"Invalid code file: {codeFile.get('filename', 'unknown')}") + continue + + filename = codeFile['filename'] + content = codeFile['content'] + + # Validate and format XML + formattedContent = self._validateAndFormatXml(content) + + renderedDocs.append( + RenderedDocument( + documentData=formattedContent.encode('utf-8'), + mimeType="application/xml", + filename=filename, + metadata=metadata + ) + ) + + return renderedDocs + + async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]: + """ + Render method for document generation compatibility. + For XML, we only support code generation (no document renderer exists yet). + """ + # Check if this is code generation (has files array) + if "files" in extractedContent: + # Code generation path - use renderCodeFiles + files = extractedContent.get("files", []) + metadata = extractedContent.get("metadata", {}) + return await self.renderCodeFiles(files, metadata, userPrompt) + else: + # Document generation path - not supported yet, return error + self.logger.warning("XML document generation not supported, only code generation") + return [ + RenderedDocument( + documentData=f"XML document generation not yet supported".encode('utf-8'), + mimeType="text/plain", + filename="error.txt", + metadata={} + ) + ] + + def _validateAndFormatXml(self, content: str) -> str: + """Validate XML syntax and format (pretty print).""" + try: + # Parse XML to validate + root = ET.fromstring(content) + + # Format XML (pretty print) + rough_string = ET.tostring(root, encoding='unicode') + reparsed = minidom.parseString(rough_string) + formatted = reparsed.toprettyxml(indent=" ") + + # Remove extra blank lines + lines = [line for line in formatted.split('\n') if line.strip()] + return '\n'.join(lines) + + except ET.ParseError as e: + self.logger.warning(f"Invalid XML: {e}, returning original content") + return content + except Exception as e: + self.logger.warning(f"XML formatting failed: {e}, returning original content") + return content