fixing renderers
This commit is contained in:
parent
4d4db7bb85
commit
262f3296bf
3 changed files with 81 additions and 25 deletions
|
|
@ -12,8 +12,8 @@ logger = logging.getLogger(__name__)
|
|||
class RendererPptx(BaseRenderer):
|
||||
"""Renderer for PowerPoint (.pptx) files using python-pptx library."""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
def __init__(self, services=None):
|
||||
super().__init__(services=services)
|
||||
self.supportedFormats = ["pptx", "ppt"]
|
||||
self.outputMimeType = "application/vnd.openxmlformats-officedocument.presentationml.presentation"
|
||||
|
||||
|
|
|
|||
|
|
@ -475,11 +475,34 @@ class ContentGenerator:
|
|||
except (json.JSONDecodeError, ValueError) as recoveryError:
|
||||
logger.error(f"JSON recovery failed: {str(recoveryError)}")
|
||||
logger.error(f"Recovered JSON (first 500 chars): {recoveredJson[:500] if 'recoveredJson' in locals() else 'N/A'}")
|
||||
# Check if raw response might be truncated
|
||||
if len(rawContent) <= len(extractedJson) + 100: # Raw content is similar length to extracted
|
||||
logger.warning(f"Raw AI response may be truncated (length: {len(rawContent)} chars)")
|
||||
logger.warning(f"Consider increasing max_tokens for AI calls or checking token limits")
|
||||
raise ValueError(f"Invalid JSON in AI response (truncated?): {str(e)}")
|
||||
logger.error(f"Recovered JSON (last 200 chars): {recoveredJson[-200:] if 'recoveredJson' in locals() else 'N/A'}")
|
||||
|
||||
# Last resort: try to extract partial content and create minimal valid JSON
|
||||
try:
|
||||
# Try to extract text content before the truncation point
|
||||
import re
|
||||
# Look for text field that might be partially complete
|
||||
textMatch = re.search(r'"text"\s*:\s*"([^"]*)', extractedJson)
|
||||
if textMatch:
|
||||
partialText = textMatch.group(1)
|
||||
# Create minimal valid JSON with truncated text marked
|
||||
elementsData = {
|
||||
"elements": [{
|
||||
"text": partialText + "... [Content truncated due to token limit]"
|
||||
}]
|
||||
}
|
||||
logger.warning(f"Created minimal JSON structure with truncated text for section {section.get('id')}")
|
||||
else:
|
||||
# If no text found, create empty structure
|
||||
elementsData = {"elements": []}
|
||||
logger.warning(f"Created empty JSON structure for section {section.get('id')} due to recovery failure")
|
||||
except Exception as fallbackError:
|
||||
logger.error(f"Fallback recovery also failed: {str(fallbackError)}")
|
||||
# Check if raw response might be truncated
|
||||
if len(rawContent) <= len(extractedJson) + 100: # Raw content is similar length to extracted
|
||||
logger.warning(f"Raw AI response may be truncated (length: {len(rawContent)} chars)")
|
||||
logger.warning(f"Consider increasing max_tokens for AI calls or checking token limits")
|
||||
raise ValueError(f"Invalid JSON in AI response (truncated?): {str(e)}")
|
||||
else:
|
||||
raise ValueError(f"Invalid JSON in AI response: {str(e)}")
|
||||
else:
|
||||
|
|
|
|||
|
|
@ -201,6 +201,7 @@ def closeJsonStructures(text: str) -> str:
|
|||
# Look for patterns like: "value" or "value\n (unterminated)
|
||||
# Check if we're in the middle of a string value when text ends
|
||||
if result.strip():
|
||||
import re
|
||||
# Count quotes - if odd number, we have an unterminated string
|
||||
quoteCount = result.count('"')
|
||||
if quoteCount % 2 == 1:
|
||||
|
|
@ -220,30 +221,62 @@ def closeJsonStructures(text: str) -> str:
|
|||
result += '"'
|
||||
else:
|
||||
# Even number of quotes, but might still be in middle of string if cut off
|
||||
# Check if text ends with a colon followed by a quote (start of string value)
|
||||
# or ends with text that looks like it's inside a string (no closing quote after last quote)
|
||||
import re
|
||||
# Pattern: ends with "text" where text doesn't end with quote
|
||||
# Look for pattern like: "text": "incomplete
|
||||
# More robust detection: check if text ends with alphanumeric/text chars after a quote
|
||||
# This handles cases like: "text": "value cut off mid-word
|
||||
|
||||
# Pattern 1: ends with colon + quote + text (no closing quote)
|
||||
if re.search(r':\s*"[^"]*$', result):
|
||||
# We're in the middle of a string value, close it
|
||||
result += '"'
|
||||
# Also check if we end with text after a quote (like "key": "value but cut off)
|
||||
elif re.search(r'"\s*:\s*"[^"]*[^",}\]]$', result):
|
||||
# Check if last quote is followed by non-quote, non-structural chars
|
||||
else:
|
||||
# Pattern 2: find last quote and check what comes after
|
||||
lastQuotePos = result.rfind('"')
|
||||
if lastQuotePos >= 0:
|
||||
afterQuote = result[lastQuotePos + 1:]
|
||||
# If after quote we have text but no closing quote, comma, or brace, we're in a string
|
||||
if afterQuote and not re.match(r'^\s*[,}\]\]]', afterQuote):
|
||||
# Check if it's escaped
|
||||
escapeCount = 0
|
||||
i = lastQuotePos - 1
|
||||
while i >= 0 and result[i] == '\\':
|
||||
escapeCount += 1
|
||||
i -= 1
|
||||
if escapeCount % 2 == 0:
|
||||
result += '"'
|
||||
# If after quote we have text (alphanumeric/whitespace) but no closing quote/comma/brace
|
||||
# and the text doesn't end with structural characters, we're likely in a string
|
||||
if afterQuote:
|
||||
# Check if it looks like we're in a string value (has text, no closing quote)
|
||||
# Pattern: ends with letters/numbers/spaces, not ending with quote, comma, }, or ]
|
||||
if re.search(r'[a-zA-Z0-9\s]$', result) and not re.match(r'^\s*[,}\]\]]', afterQuote):
|
||||
# Check if it's escaped
|
||||
escapeCount = 0
|
||||
i = lastQuotePos - 1
|
||||
while i >= 0 and result[i] == '\\':
|
||||
escapeCount += 1
|
||||
i -= 1
|
||||
if escapeCount % 2 == 0:
|
||||
# Verify we're actually in a string context (not in a key name)
|
||||
# Look backwards to see if we have ": " before the quote (value context)
|
||||
beforeQuote = result[:lastQuotePos]
|
||||
# Check if we're in a value context (has ": " before quote) or in an array (has "[ before quote)
|
||||
if re.search(r':\s*"', beforeQuote[-50:]) or re.search(r'\[\s*"', beforeQuote[-50:]):
|
||||
result += '"'
|
||||
# Also check if text ends with alphanumeric (likely cut off mid-word)
|
||||
elif re.search(r'[a-zA-Z]$', result):
|
||||
# If we end with a letter and have a quote before it, likely in a string
|
||||
result += '"'
|
||||
|
||||
# Final fallback: if text ends with alphanumeric and we have quotes, try to close the last string
|
||||
# This handles edge cases where patterns above didn't match
|
||||
if result.strip() and re.search(r'[a-zA-Z0-9]$', result):
|
||||
# Count quotes - if we have quotes and end with text, might be in a string
|
||||
if quoteCount > 0:
|
||||
lastQuotePos = result.rfind('"')
|
||||
if lastQuotePos >= 0:
|
||||
afterQuote = result[lastQuotePos + 1:]
|
||||
# If after quote is text (not empty, not structural), close it
|
||||
if afterQuote and re.search(r'^[a-zA-Z0-9\s]+$', afterQuote[:50]): # Check first 50 chars after quote
|
||||
# Make sure we're not already closed (check if next char would be quote/comma/brace)
|
||||
if not result.endswith('"') and not result.endswith(',') and not result.endswith('}') and not result.endswith(']'):
|
||||
# Check if escaped
|
||||
escapeCount = 0
|
||||
i = lastQuotePos - 1
|
||||
while i >= 0 and result[i] == '\\':
|
||||
escapeCount += 1
|
||||
i -= 1
|
||||
if escapeCount % 2 == 0:
|
||||
result += '"'
|
||||
|
||||
# Count open/close brackets and braces
|
||||
openBraces = result.count('{')
|
||||
|
|
|
|||
Loading…
Reference in a new issue