fixing renderers

This commit is contained in:
ValueOn AG 2025-12-23 00:50:00 +01:00
parent 4d4db7bb85
commit 262f3296bf
3 changed files with 81 additions and 25 deletions

View file

@ -12,8 +12,8 @@ logger = logging.getLogger(__name__)
class RendererPptx(BaseRenderer):
"""Renderer for PowerPoint (.pptx) files using python-pptx library."""
def __init__(self):
super().__init__()
def __init__(self, services=None):
super().__init__(services=services)
self.supportedFormats = ["pptx", "ppt"]
self.outputMimeType = "application/vnd.openxmlformats-officedocument.presentationml.presentation"

View file

@ -475,11 +475,34 @@ class ContentGenerator:
except (json.JSONDecodeError, ValueError) as recoveryError:
logger.error(f"JSON recovery failed: {str(recoveryError)}")
logger.error(f"Recovered JSON (first 500 chars): {recoveredJson[:500] if 'recoveredJson' in locals() else 'N/A'}")
# Check if raw response might be truncated
if len(rawContent) <= len(extractedJson) + 100: # Raw content is similar length to extracted
logger.warning(f"Raw AI response may be truncated (length: {len(rawContent)} chars)")
logger.warning(f"Consider increasing max_tokens for AI calls or checking token limits")
raise ValueError(f"Invalid JSON in AI response (truncated?): {str(e)}")
logger.error(f"Recovered JSON (last 200 chars): {recoveredJson[-200:] if 'recoveredJson' in locals() else 'N/A'}")
# Last resort: try to extract partial content and create minimal valid JSON
try:
# Try to extract text content before the truncation point
import re
# Look for text field that might be partially complete
textMatch = re.search(r'"text"\s*:\s*"([^"]*)', extractedJson)
if textMatch:
partialText = textMatch.group(1)
# Create minimal valid JSON with truncated text marked
elementsData = {
"elements": [{
"text": partialText + "... [Content truncated due to token limit]"
}]
}
logger.warning(f"Created minimal JSON structure with truncated text for section {section.get('id')}")
else:
# If no text found, create empty structure
elementsData = {"elements": []}
logger.warning(f"Created empty JSON structure for section {section.get('id')} due to recovery failure")
except Exception as fallbackError:
logger.error(f"Fallback recovery also failed: {str(fallbackError)}")
# Check if raw response might be truncated
if len(rawContent) <= len(extractedJson) + 100: # Raw content is similar length to extracted
logger.warning(f"Raw AI response may be truncated (length: {len(rawContent)} chars)")
logger.warning(f"Consider increasing max_tokens for AI calls or checking token limits")
raise ValueError(f"Invalid JSON in AI response (truncated?): {str(e)}")
else:
raise ValueError(f"Invalid JSON in AI response: {str(e)}")
else:

View file

@ -201,6 +201,7 @@ def closeJsonStructures(text: str) -> str:
# Look for patterns like: "value" or "value\n (unterminated)
# Check if we're in the middle of a string value when text ends
if result.strip():
import re
# Count quotes - if odd number, we have an unterminated string
quoteCount = result.count('"')
if quoteCount % 2 == 1:
@ -220,30 +221,62 @@ def closeJsonStructures(text: str) -> str:
result += '"'
else:
# Even number of quotes, but might still be in middle of string if cut off
# Check if text ends with a colon followed by a quote (start of string value)
# or ends with text that looks like it's inside a string (no closing quote after last quote)
import re
# Pattern: ends with "text" where text doesn't end with quote
# Look for pattern like: "text": "incomplete
# More robust detection: check if text ends with alphanumeric/text chars after a quote
# This handles cases like: "text": "value cut off mid-word
# Pattern 1: ends with colon + quote + text (no closing quote)
if re.search(r':\s*"[^"]*$', result):
# We're in the middle of a string value, close it
result += '"'
# Also check if we end with text after a quote (like "key": "value but cut off)
elif re.search(r'"\s*:\s*"[^"]*[^",}\]]$', result):
# Check if last quote is followed by non-quote, non-structural chars
else:
# Pattern 2: find last quote and check what comes after
lastQuotePos = result.rfind('"')
if lastQuotePos >= 0:
afterQuote = result[lastQuotePos + 1:]
# If after quote we have text but no closing quote, comma, or brace, we're in a string
if afterQuote and not re.match(r'^\s*[,}\]\]]', afterQuote):
# Check if it's escaped
escapeCount = 0
i = lastQuotePos - 1
while i >= 0 and result[i] == '\\':
escapeCount += 1
i -= 1
if escapeCount % 2 == 0:
result += '"'
# If after quote we have text (alphanumeric/whitespace) but no closing quote/comma/brace
# and the text doesn't end with structural characters, we're likely in a string
if afterQuote:
# Check if it looks like we're in a string value (has text, no closing quote)
# Pattern: ends with letters/numbers/spaces, not ending with quote, comma, }, or ]
if re.search(r'[a-zA-Z0-9\s]$', result) and not re.match(r'^\s*[,}\]\]]', afterQuote):
# Check if it's escaped
escapeCount = 0
i = lastQuotePos - 1
while i >= 0 and result[i] == '\\':
escapeCount += 1
i -= 1
if escapeCount % 2 == 0:
# Verify we're actually in a string context (not in a key name)
# Look backwards to see if we have ": " before the quote (value context)
beforeQuote = result[:lastQuotePos]
# Check if we're in a value context (has ": " before quote) or in an array (has "[ before quote)
if re.search(r':\s*"', beforeQuote[-50:]) or re.search(r'\[\s*"', beforeQuote[-50:]):
result += '"'
# Also check if text ends with alphanumeric (likely cut off mid-word)
elif re.search(r'[a-zA-Z]$', result):
# If we end with a letter and have a quote before it, likely in a string
result += '"'
# Final fallback: if text ends with alphanumeric and we have quotes, try to close the last string
# This handles edge cases where patterns above didn't match
if result.strip() and re.search(r'[a-zA-Z0-9]$', result):
# Count quotes - if we have quotes and end with text, might be in a string
if quoteCount > 0:
lastQuotePos = result.rfind('"')
if lastQuotePos >= 0:
afterQuote = result[lastQuotePos + 1:]
# If after quote is text (not empty, not structural), close it
if afterQuote and re.search(r'^[a-zA-Z0-9\s]+$', afterQuote[:50]): # Check first 50 chars after quote
# Make sure we're not already closed (check if next char would be quote/comma/brace)
if not result.endswith('"') and not result.endswith(',') and not result.endswith('}') and not result.endswith(']'):
# Check if escaped
escapeCount = 0
i = lastQuotePos - 1
while i >= 0 and result[i] == '\\':
escapeCount += 1
i -= 1
if escapeCount % 2 == 0:
result += '"'
# Count open/close brackets and braces
openBraces = result.count('{')