fixed iteration ai calls for documents sections

This commit is contained in:
ValueOn AG 2026-01-03 01:55:25 +01:00
parent 9f433743a6
commit e4b633cac6
7 changed files with 699 additions and 43 deletions

View file

@ -155,14 +155,17 @@ class AiCallLooper:
)
# Write the ACTUAL prompt sent to AI
# For section content generation: only write one prompt file (first iteration)
# For section content generation: write prompt for first iteration and continuation iterations
# For document generation: write prompt for each iteration
isSectionContent = "_section_" in debugPrefix
if iteration == 1 or not isSectionContent:
if iteration == 1:
self.services.utils.writeDebugFile(iterationPrompt, f"{debugPrefix}_prompt")
elif not isSectionContent:
self.services.utils.writeDebugFile(iterationPrompt, f"{debugPrefix}_prompt_iteration_{iteration}")
if iteration == 1:
self.services.utils.writeDebugFile(iterationPrompt, f"{debugPrefix}_prompt")
elif isSectionContent:
# Save continuation prompts for section_content debugging
self.services.utils.writeDebugFile(iterationPrompt, f"{debugPrefix}_prompt_iteration_{iteration}")
else:
# Document generation - save all iteration prompts
self.services.utils.writeDebugFile(iterationPrompt, f"{debugPrefix}_prompt_iteration_{iteration}")
response = await self.aiService.callAi(request)
result = response.content
@ -183,13 +186,16 @@ class AiCallLooper:
self.services.chat.progressLogUpdate(iterationOperationId, 0.6, f"AI response received ({bytesDisplay})")
# Write raw AI response to debug file
# For section content generation: only write one response file (first iteration)
# For section content generation: write response for first iteration and continuation iterations
# For document generation: write response for each iteration
if iteration == 1 or not isSectionContent:
if iteration == 1:
self.services.utils.writeDebugFile(result, f"{debugPrefix}_response")
elif not isSectionContent:
self.services.utils.writeDebugFile(result, f"{debugPrefix}_response_iteration_{iteration}")
if iteration == 1:
self.services.utils.writeDebugFile(result, f"{debugPrefix}_response")
elif isSectionContent:
# Save continuation responses for section_content debugging
self.services.utils.writeDebugFile(result, f"{debugPrefix}_response_iteration_{iteration}")
else:
# Document generation - save all iteration responses
self.services.utils.writeDebugFile(result, f"{debugPrefix}_response_iteration_{iteration}")
# Emit stats for this iteration (only if workflow exists and has id)
if self.services.workflow and hasattr(self.services.workflow, 'id') and self.services.workflow.id:
@ -279,23 +285,57 @@ class AiCallLooper:
# JSON is complete - merge accumulated responses if any
if accumulatedDirectJson:
logger.info(f"Iteration {iteration}: Merging {len(accumulatedDirectJson) + 1} accumulated responses")
# Merge accumulated JSON strings with current response
mergedJsonString = accumulatedDirectJson[0] if accumulatedDirectJson else result
for prevJson in accumulatedDirectJson[1:]:
mergedJsonString = JsonResponseHandler.mergeJsonStringsWithOverlap(mergedJsonString, prevJson)
# Finally merge with current response
mergedJsonString = JsonResponseHandler.mergeJsonStringsWithOverlap(mergedJsonString, result)
# Re-parse merged JSON
# Use generic data-based merging for all use cases
try:
extractedMerged = extractJsonString(mergedJsonString)
parsedMerged, parseError, _ = tryParseJson(extractedMerged)
if parseError is None and parsedMerged:
parsedJsonForUseCase = parsedMerged
# Parse all accumulated JSON strings and current response
allParsed = []
for jsonStr in accumulatedDirectJson + [result]:
extracted = extractJsonString(jsonStr)
parsed, parseErr, _ = tryParseJson(extracted)
if parseErr is None and parsed:
# Normalize structure: ensure consistent format
normalized = self._normalizeJsonStructure(parsed, useCaseId)
allParsed.append(normalized)
if allParsed and len(allParsed) > 1:
# Generic recursive merge of parsed JSON objects
mergedJsonObj = self._mergeJsonObjectsRecursively(allParsed)
# Reconstruct merged JSON string
mergedJsonString = json.dumps(mergedJsonObj, indent=2, ensure_ascii=False)
parsedJsonForUseCase = mergedJsonObj
result = mergedJsonString
logger.info(f"Successfully merged {len(accumulatedDirectJson) + 1} JSON fragments using generic recursive merge")
elif allParsed:
# Only one parsed JSON, use it directly
parsedJsonForUseCase = allParsed[0]
result = json.dumps(parsedJsonForUseCase, indent=2, ensure_ascii=False)
else:
# Fallback to string merging if parsing fails
logger.warning("Failed to parse all JSON fragments for data-based merge, falling back to string merging")
mergedJsonString = accumulatedDirectJson[0] if accumulatedDirectJson else result
for prevJson in accumulatedDirectJson[1:]:
mergedJsonString = JsonResponseHandler.mergeJsonStringsWithOverlap(mergedJsonString, prevJson)
mergedJsonString = JsonResponseHandler.mergeJsonStringsWithOverlap(mergedJsonString, result)
result = mergedJsonString
logger.info(f"Successfully merged and parsed {len(accumulatedDirectJson) + 1} JSON fragments")
except Exception as e:
logger.warning(f"Failed to parse merged JSON, using last response: {e}")
logger.warning(f"Failed data-based merge, falling back to string merging: {e}")
# Fallback to string merging
mergedJsonString = accumulatedDirectJson[0] if accumulatedDirectJson else result
for prevJson in accumulatedDirectJson[1:]:
mergedJsonString = JsonResponseHandler.mergeJsonStringsWithOverlap(mergedJsonString, prevJson)
mergedJsonString = JsonResponseHandler.mergeJsonStringsWithOverlap(mergedJsonString, result)
result = mergedJsonString
# Try to parse the string-merged result
try:
extractedMerged = extractJsonString(result)
parsedMerged, parseError, _ = tryParseJson(extractedMerged)
if parseError is None and parsedMerged:
parsedJsonForUseCase = parsedMerged
except Exception:
pass
logger.info(f"Iteration {iteration}: Use case '{useCaseId}' - JSON is complete")
@ -307,10 +347,12 @@ class AiCallLooper:
# The merging logic in subStructureFilling.py will handle extraction and merging
if useCaseId == "section_content":
final_json = result # Return raw response to preserve all JSON blocks
# Write final merged result for section_content (overwrites iteration 1 response with complete merged result)
self.services.utils.writeDebugFile(final_json, f"{debugPrefix}_response")
else:
final_json = json.dumps(parsedJsonForUseCase, indent=2, ensure_ascii=False) if parsedJsonForUseCase else (extractedJsonForUseCase or result)
# Write final result for chapter structure and code structure (section_content skips it)
# Write final result for chapter structure and code structure
if useCaseId in ["chapter_structure", "code_structure"]:
self.services.utils.writeDebugFile(final_json, f"{debugPrefix}_final_result")
@ -567,6 +609,166 @@ class AiCallLooper:
return final_result
def _normalizeJsonStructure(self, parsed: Any, useCaseId: str) -> Any:
"""
Normalize JSON structure to ensure consistent format before merging.
Handles different response formats and converts them to expected structure.
Args:
parsed: Parsed JSON object (can be dict, list, or primitive)
useCaseId: Use case ID to determine expected structure
Returns:
Normalized JSON structure
"""
# For section_content, expect {"elements": [...]} structure
if useCaseId == "section_content":
if isinstance(parsed, list):
# Check if list contains strings (invalid format) or element objects
if parsed and isinstance(parsed[0], str):
# Invalid format - list of strings instead of elements
# This shouldn't happen, but we'll log a warning and return empty structure
logger.warning(f"Invalid response format: received list of strings instead of elements array. Expected {{'elements': [...]}} structure.")
return {"elements": []}
else:
# Convert plain list of elements to elements structure
return {"elements": parsed}
elif isinstance(parsed, dict):
# If it already has "elements", return as-is
if "elements" in parsed:
return parsed
# If it has "type" and looks like an element, wrap in elements array
elif parsed.get("type"):
return {"elements": [parsed]}
# Otherwise, assume it's already in correct format
else:
return parsed
# For other use cases, return as-is (they have their own structures)
return parsed
def _mergeJsonObjectsRecursively(self, jsonObjects: List[Any]) -> Any:
"""
GENERIC recursive merge function for JSON objects.
Works for ANY JSON structure - handles lists, dicts, and primitives intelligently.
Merge strategy:
- Lists/Arrays: Merge by removing duplicates based on content (works for rows, items, elements, etc.)
- Dicts/Objects: Merge properties, recursively merging nested structures
- Primitives: Use the latest value
Args:
jsonObjects: List of parsed JSON objects to merge
Returns:
Merged JSON object
"""
if not jsonObjects:
return None
if len(jsonObjects) == 1:
return jsonObjects[0]
# Start with first object and merge others into it
merged = jsonObjects[0]
for obj in jsonObjects[1:]:
merged = self._mergeTwoObjects(merged, obj)
return merged
def _mergeTwoObjects(self, obj1: Any, obj2: Any) -> Any:
"""
Merge two JSON objects recursively.
Args:
obj1: First object (base)
obj2: Second object (to merge into obj1)
Returns:
Merged object
"""
# Handle None values
if obj1 is None:
return obj2
if obj2 is None:
return obj1
# Handle different types
if isinstance(obj1, dict) and isinstance(obj2, dict):
# Merge dictionaries
merged = dict(obj1) # Start with copy of obj1
for key, value2 in obj2.items():
if key in merged:
# Key exists in both - recursively merge
merged[key] = self._mergeTwoObjects(merged[key], value2)
else:
# New key - add it
merged[key] = value2
return merged
elif isinstance(obj1, list) and isinstance(obj2, list):
# Merge lists by removing duplicates based on content
merged = list(obj1) # Start with copy of obj1
seenItems = set() # Track seen items to avoid duplicates
# Add all items from obj1 with their keys
for item in merged:
itemKey = self._createItemKey(item)
seenItems.add(itemKey)
# Add items from obj2 that aren't duplicates
for item in obj2:
itemKey = self._createItemKey(item)
if itemKey not in seenItems:
seenItems.add(itemKey)
merged.append(item)
return merged
else:
# Different types or primitives - use obj2 (latest value)
return obj2
def _createItemKey(self, item: Any) -> Any:
"""
Create a key for an item to detect duplicates.
Works generically for any JSON structure.
Args:
item: Item to create key for
Returns:
Key that can be used for duplicate detection
"""
if isinstance(item, dict):
# For dicts, create key from all values (or specific identifying fields)
# Try to find common identifying fields first
if "id" in item:
return ("id", item["id"])
elif "type" in item and "content" in item:
# For elements with type and content, use type + content hash
content = item.get("content", {})
if isinstance(content, dict):
# For tables/lists, use type + first few rows/items for key
if "rows" in content:
rows = content.get("rows", [])
return ("type", item["type"], "rows", tuple(rows[:3]) if rows else ())
elif "items" in content:
items = content.get("items", [])
return ("type", item["type"], "items", tuple(items[:3]) if items else ())
return ("type", item["type"], tuple(sorted(item.items())))
else:
# Generic: use sorted items tuple
return tuple(sorted(item.items()))
elif isinstance(item, (list, tuple)):
# For lists/tuples, use the tuple itself as key
return tuple(item) if isinstance(item, list) else item
else:
# For primitives, use the value itself
return item
async def _defineKpisFromPrompt(
self,
userPrompt: str,

View file

@ -69,24 +69,59 @@ class CodeGenerationPath:
codeFiles = await self._generateCodeContent(codeStructure, codeOperationId)
# Phase 3: Code formatting & validation
self.services.chat.progressLogUpdate(codeOperationId, 0.9, "Formatting code files")
self.services.chat.progressLogUpdate(codeOperationId, 0.8, "Formatting code files")
formattedFiles = await self._formatAndValidateCode(codeFiles)
# Convert to unified document format
documents = []
# Phase 4: Code Rendering (Renderer-Based)
self.services.chat.progressLogUpdate(codeOperationId, 0.9, "Rendering code files")
# Group files by format
filesByFormat = {}
for file in formattedFiles:
mimeType = self._getMimeType(file.get("fileType", outputFormat or "txt"))
content = file.get("content", "")
if isinstance(content, str):
contentBytes = content.encode('utf-8')
else:
contentBytes = content
fileType = file.get("fileType", outputFormat or "txt")
if fileType not in filesByFormat:
filesByFormat[fileType] = []
filesByFormat[fileType].append(file)
# Render each format group using appropriate renderer
allRenderedDocuments = []
for fileType, files in filesByFormat.items():
# Get renderer for this format
renderer = self._getCodeRenderer(fileType)
if renderer:
# Use code renderer
renderedDocs = await renderer.renderCodeFiles(
codeFiles=files,
metadata=codeStructure.get("metadata", {}),
userPrompt=userPrompt
)
allRenderedDocuments.extend(renderedDocs)
else:
# Fallback: output directly (for formats without renderers)
for file in files:
mimeType = self._getMimeType(file.get("fileType", "txt"))
content = file.get("content", "")
contentBytes = content.encode('utf-8') if isinstance(content, str) else content
from modules.datamodels.datamodelDocument import RenderedDocument
allRenderedDocuments.append(
RenderedDocument(
documentData=contentBytes,
mimeType=mimeType,
filename=file.get("filename", "generated.txt"),
metadata=codeStructure.get("metadata", {})
)
)
# Convert RenderedDocument to DocumentData
documents = []
for renderedDoc in allRenderedDocuments:
documents.append(DocumentData(
documentName=file.get("filename", "generated.txt"),
documentData=contentBytes,
mimeType=mimeType,
sourceJson=file
documentName=renderedDoc.filename,
documentData=renderedDoc.documentData,
mimeType=renderedDoc.mimeType,
sourceJson=renderedDoc.metadata if hasattr(renderedDoc, 'metadata') else None
))
metadata = AiResponseMetadata(
@ -579,6 +614,28 @@ Return ONLY valid JSON in this format:
"md": "text/markdown",
"java": "text/x-java-source",
"cpp": "text/x-c++src",
"c": "text/x-csrc"
"c": "text/x-csrc",
"csv": "text/csv",
"xml": "application/xml"
}
return mimeTypes.get(fileType.lower(), "text/plain")
def _getCodeRenderer(self, fileType: str):
"""Get code renderer for file type."""
from modules.services.serviceGeneration.renderers.registry import getRenderer
# Map file types to renderer formats
formatMap = {
'json': 'json',
'csv': 'csv',
'xml': 'xml'
}
rendererFormat = formatMap.get(fileType.lower())
if rendererFormat:
renderer = getRenderer(rendererFormat, self.services)
# Check if renderer supports code rendering
if renderer and hasattr(renderer, 'renderCodeFiles'):
return renderer
return None

View file

@ -0,0 +1,45 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Base renderer class for code format renderers.
"""
from abc import abstractmethod
from .rendererBaseTemplate import BaseRenderer
from modules.datamodels.datamodelDocument import RenderedDocument
from typing import Dict, Any, List, Optional
import logging
logger = logging.getLogger(__name__)
class BaseCodeRenderer(BaseRenderer):
"""Base class for code format renderers."""
@abstractmethod
async def renderCodeFiles(
self,
codeFiles: List[Dict[str, Any]],
metadata: Dict[str, Any],
userPrompt: str = None
) -> List[RenderedDocument]:
"""
Render code files to format-specific output.
Args:
codeFiles: List of file dictionaries with:
- filename: str
- fileType: str (json, csv, xml, etc.)
- content: str (generated code)
- id: str (optional)
metadata: Project metadata (language, projectType, etc.)
userPrompt: Original user prompt
Returns:
List of RenderedDocument objects (can be 1..n files)
"""
pass
def _validateCodeFile(self, codeFile: Dict[str, Any]) -> bool:
"""Validate code file structure."""
required = ['filename', 'fileType', 'content']
return all(key in codeFile for key in required)

View file

@ -76,9 +76,26 @@ class RendererRegistry:
# Get supported formats from the renderer class
supportedFormats = rendererClass.getSupportedFormats()
# Get priority (default to 0 if not specified)
priority = rendererClass.getPriority() if hasattr(rendererClass, 'getPriority') else 0
for formatName in supportedFormats:
# Register primary format
self._renderers[formatName.lower()] = rendererClass
formatKey = formatName.lower()
# Check if format already registered - use priority to decide
if formatKey in self._renderers:
existingRenderer = self._renderers[formatKey]
existingPriority = existingRenderer.getPriority() if hasattr(existingRenderer, 'getPriority') else 0
# Only replace if new renderer has higher priority
if priority > existingPriority:
logger.debug(f"Replacing {existingRenderer.__name__} with {rendererClass.__name__} for format '{formatName}' (priority {priority} > {existingPriority})")
self._renderers[formatKey] = rendererClass
else:
logger.debug(f"Keeping {existingRenderer.__name__} for format '{formatName}' (priority {existingPriority} >= {priority})")
else:
# Register primary format
self._renderers[formatKey] = rendererClass
# Register aliases if any
if hasattr(rendererClass, 'getFormatAliases'):
@ -86,7 +103,7 @@ class RendererRegistry:
for alias in aliases:
self._format_mappings[alias.lower()] = formatName.lower()
logger.debug(f"Registered {rendererClass.__name__} for formats: {supportedFormats}")
logger.debug(f"Registered {rendererClass.__name__} for formats: {supportedFormats} (priority: {priority})")
except Exception as e:
logger.error(f"Error registering renderer {rendererClass.__name__}: {str(e)}")

View file

@ -0,0 +1,125 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
CSV code renderer for code generation.
"""
from .codeRendererBaseTemplate import BaseCodeRenderer
from modules.datamodels.datamodelDocument import RenderedDocument
from typing import Dict, Any, List, Optional
import csv
import io
class RendererCodeCsv(BaseCodeRenderer):
"""Renders CSV code files."""
@classmethod
def getSupportedFormats(cls) -> List[str]:
"""Return supported CSV formats."""
return ['csv']
@classmethod
def getFormatAliases(cls) -> List[str]:
"""Return format aliases."""
return []
@classmethod
def getPriority(cls) -> int:
"""Return priority for CSV code renderer."""
return 75 # Higher than document renderer (70) for code generation
@classmethod
def getOutputStyle(cls, formatName: Optional[str] = None) -> str:
"""Return output style classification: CSV requires specific structure."""
return 'code'
async def renderCodeFiles(
self,
codeFiles: List[Dict[str, Any]],
metadata: Dict[str, Any],
userPrompt: str = None
) -> List[RenderedDocument]:
"""
Render CSV code files.
For single file: output as-is (validate structure)
For multiple files: output separately (each is independent CSV)
"""
renderedDocs = []
for codeFile in codeFiles:
if not self._validateCodeFile(codeFile):
self.logger.warning(f"Invalid code file: {codeFile.get('filename', 'unknown')}")
continue
filename = codeFile['filename']
content = codeFile['content']
# Validate CSV structure (header row, consistent columns)
validatedContent = self._validateAndFixCsv(content)
renderedDocs.append(
RenderedDocument(
documentData=validatedContent.encode('utf-8'),
mimeType="text/csv",
filename=filename,
metadata=metadata
)
)
return renderedDocs
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
"""
Render method for document generation compatibility.
Delegates to document renderer if needed, or handles code files directly.
"""
# Check if this is code generation (has files array) or document generation (has documents array)
if "files" in extractedContent:
# Code generation path - use renderCodeFiles
files = extractedContent.get("files", [])
metadata = extractedContent.get("metadata", {})
return await self.renderCodeFiles(files, metadata, userPrompt)
else:
# Document generation path - delegate to document renderer
from .rendererCsv import RendererCsv
documentRenderer = RendererCsv(self.services)
return await documentRenderer.render(extractedContent, title, userPrompt, aiService)
def _validateAndFixCsv(self, content: str) -> str:
"""Validate CSV structure and fix common issues."""
try:
# Parse CSV to validate structure
reader = csv.reader(io.StringIO(content))
rows = list(reader)
if not rows:
return content # Empty CSV
# Check header row exists
headerRow = rows[0]
headerCount = len(headerRow)
# Validate all rows have same column count
fixedRows = [headerRow] # Start with header
for i, row in enumerate(rows[1:], 1):
if len(row) != headerCount:
self.logger.warning(f"Row {i} has {len(row)} columns, expected {headerCount}. Fixing...")
# Pad or truncate to match header
if len(row) < headerCount:
row.extend([''] * (headerCount - len(row)))
else:
row = row[:headerCount]
fixedRows.append(row)
# Convert back to CSV string
output = io.StringIO()
writer = csv.writer(output)
for row in fixedRows:
writer.writerow(row)
return output.getvalue()
except Exception as e:
self.logger.warning(f"CSV validation failed: {e}, returning original content")
return content

View file

@ -0,0 +1,97 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
JSON code renderer for code generation.
"""
from .codeRendererBaseTemplate import BaseCodeRenderer
from modules.datamodels.datamodelDocument import RenderedDocument
from typing import Dict, Any, List, Optional
import json
class RendererCodeJson(BaseCodeRenderer):
"""Renders JSON code files."""
@classmethod
def getSupportedFormats(cls) -> List[str]:
"""Return supported JSON formats."""
return ['json']
@classmethod
def getFormatAliases(cls) -> List[str]:
"""Return format aliases."""
return []
@classmethod
def getPriority(cls) -> int:
"""Return priority for JSON code renderer."""
return 85 # Higher than document renderer (80) for code generation
@classmethod
def getOutputStyle(cls, formatName: Optional[str] = None) -> str:
"""Return output style classification: JSON is structured data format."""
return 'code'
async def renderCodeFiles(
self,
codeFiles: List[Dict[str, Any]],
metadata: Dict[str, Any],
userPrompt: str = None
) -> List[RenderedDocument]:
"""
Render JSON code files.
For single file: output as-is
For multiple files: output separately (each file is independent JSON)
"""
renderedDocs = []
for codeFile in codeFiles:
if not self._validateCodeFile(codeFile):
self.logger.warning(f"Invalid code file: {codeFile.get('filename', 'unknown')}")
continue
filename = codeFile['filename']
content = codeFile['content']
# Validate JSON syntax
try:
json.loads(content) # Validate JSON
except json.JSONDecodeError as e:
self.logger.warning(f"Invalid JSON in {filename}: {e}")
# Could fix/format JSON here if needed
# Format JSON (pretty print)
try:
parsed = json.loads(content)
formattedContent = json.dumps(parsed, indent=2, ensure_ascii=False)
except Exception:
formattedContent = content # Use original if formatting fails
renderedDocs.append(
RenderedDocument(
documentData=formattedContent.encode('utf-8'),
mimeType="application/json",
filename=filename,
metadata=metadata
)
)
return renderedDocs
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
"""
Render method for document generation compatibility.
Delegates to document renderer if needed, or handles code files directly.
"""
# Check if this is code generation (has files array) or document generation (has documents array)
if "files" in extractedContent:
# Code generation path - use renderCodeFiles
files = extractedContent.get("files", [])
metadata = extractedContent.get("metadata", {})
return await self.renderCodeFiles(files, metadata, userPrompt)
else:
# Document generation path - delegate to document renderer
# Import here to avoid circular dependency
from .rendererJson import RendererJson
documentRenderer = RendererJson(self.services)
return await documentRenderer.render(extractedContent, title, userPrompt, aiService)

View file

@ -0,0 +1,113 @@
# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
XML code renderer for code generation.
"""
from .codeRendererBaseTemplate import BaseCodeRenderer
from modules.datamodels.datamodelDocument import RenderedDocument
from typing import Dict, Any, List, Optional
import xml.etree.ElementTree as ET
from xml.dom import minidom
class RendererCodeXml(BaseCodeRenderer):
"""Renders XML code files."""
@classmethod
def getSupportedFormats(cls) -> List[str]:
"""Return supported XML formats."""
return ['xml']
@classmethod
def getFormatAliases(cls) -> List[str]:
"""Return format aliases."""
return []
@classmethod
def getPriority(cls) -> int:
"""Return priority for XML code renderer."""
return 80
@classmethod
def getOutputStyle(cls, formatName: Optional[str] = None) -> str:
"""Return output style classification: XML is structured data format."""
return 'code'
async def renderCodeFiles(
self,
codeFiles: List[Dict[str, Any]],
metadata: Dict[str, Any],
userPrompt: str = None
) -> List[RenderedDocument]:
"""
Render XML code files.
Validates XML syntax and formats (pretty print).
"""
renderedDocs = []
for codeFile in codeFiles:
if not self._validateCodeFile(codeFile):
self.logger.warning(f"Invalid code file: {codeFile.get('filename', 'unknown')}")
continue
filename = codeFile['filename']
content = codeFile['content']
# Validate and format XML
formattedContent = self._validateAndFormatXml(content)
renderedDocs.append(
RenderedDocument(
documentData=formattedContent.encode('utf-8'),
mimeType="application/xml",
filename=filename,
metadata=metadata
)
)
return renderedDocs
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
"""
Render method for document generation compatibility.
For XML, we only support code generation (no document renderer exists yet).
"""
# Check if this is code generation (has files array)
if "files" in extractedContent:
# Code generation path - use renderCodeFiles
files = extractedContent.get("files", [])
metadata = extractedContent.get("metadata", {})
return await self.renderCodeFiles(files, metadata, userPrompt)
else:
# Document generation path - not supported yet, return error
self.logger.warning("XML document generation not supported, only code generation")
return [
RenderedDocument(
documentData=f"XML document generation not yet supported".encode('utf-8'),
mimeType="text/plain",
filename="error.txt",
metadata={}
)
]
def _validateAndFormatXml(self, content: str) -> str:
"""Validate XML syntax and format (pretty print)."""
try:
# Parse XML to validate
root = ET.fromstring(content)
# Format XML (pretty print)
rough_string = ET.tostring(root, encoding='unicode')
reparsed = minidom.parseString(rough_string)
formatted = reparsed.toprettyxml(indent=" ")
# Remove extra blank lines
lines = [line for line in formatted.split('\n') if line.strip()]
return '\n'.join(lines)
except ET.ParseError as e:
self.logger.warning(f"Invalid XML: {e}, returning original content")
return content
except Exception as e:
self.logger.warning(f"XML formatting failed: {e}, returning original content")
return content