From 262f3296bf83408f08e5fbdc539a8ca37de77991 Mon Sep 17 00:00:00 2001 From: ValueOn AG Date: Tue, 23 Dec 2025 00:50:00 +0100 Subject: [PATCH] fixing renderers --- .../renderers/rendererPptx.py | 4 +- .../serviceGeneration/subContentGenerator.py | 33 +++++++-- modules/shared/jsonUtils.py | 69 ++++++++++++++----- 3 files changed, 81 insertions(+), 25 deletions(-) diff --git a/modules/services/serviceGeneration/renderers/rendererPptx.py b/modules/services/serviceGeneration/renderers/rendererPptx.py index f7b65eb1..6b1b9e18 100644 --- a/modules/services/serviceGeneration/renderers/rendererPptx.py +++ b/modules/services/serviceGeneration/renderers/rendererPptx.py @@ -12,8 +12,8 @@ logger = logging.getLogger(__name__) class RendererPptx(BaseRenderer): """Renderer for PowerPoint (.pptx) files using python-pptx library.""" - def __init__(self): - super().__init__() + def __init__(self, services=None): + super().__init__(services=services) self.supportedFormats = ["pptx", "ppt"] self.outputMimeType = "application/vnd.openxmlformats-officedocument.presentationml.presentation" diff --git a/modules/services/serviceGeneration/subContentGenerator.py b/modules/services/serviceGeneration/subContentGenerator.py index 1b1f64a9..0f75f595 100644 --- a/modules/services/serviceGeneration/subContentGenerator.py +++ b/modules/services/serviceGeneration/subContentGenerator.py @@ -475,11 +475,34 @@ class ContentGenerator: except (json.JSONDecodeError, ValueError) as recoveryError: logger.error(f"JSON recovery failed: {str(recoveryError)}") logger.error(f"Recovered JSON (first 500 chars): {recoveredJson[:500] if 'recoveredJson' in locals() else 'N/A'}") - # Check if raw response might be truncated - if len(rawContent) <= len(extractedJson) + 100: # Raw content is similar length to extracted - logger.warning(f"Raw AI response may be truncated (length: {len(rawContent)} chars)") - logger.warning(f"Consider increasing max_tokens for AI calls or checking token limits") - raise ValueError(f"Invalid JSON in AI response (truncated?): {str(e)}") + logger.error(f"Recovered JSON (last 200 chars): {recoveredJson[-200:] if 'recoveredJson' in locals() else 'N/A'}") + + # Last resort: try to extract partial content and create minimal valid JSON + try: + # Try to extract text content before the truncation point + import re + # Look for text field that might be partially complete + textMatch = re.search(r'"text"\s*:\s*"([^"]*)', extractedJson) + if textMatch: + partialText = textMatch.group(1) + # Create minimal valid JSON with truncated text marked + elementsData = { + "elements": [{ + "text": partialText + "... [Content truncated due to token limit]" + }] + } + logger.warning(f"Created minimal JSON structure with truncated text for section {section.get('id')}") + else: + # If no text found, create empty structure + elementsData = {"elements": []} + logger.warning(f"Created empty JSON structure for section {section.get('id')} due to recovery failure") + except Exception as fallbackError: + logger.error(f"Fallback recovery also failed: {str(fallbackError)}") + # Check if raw response might be truncated + if len(rawContent) <= len(extractedJson) + 100: # Raw content is similar length to extracted + logger.warning(f"Raw AI response may be truncated (length: {len(rawContent)} chars)") + logger.warning(f"Consider increasing max_tokens for AI calls or checking token limits") + raise ValueError(f"Invalid JSON in AI response (truncated?): {str(e)}") else: raise ValueError(f"Invalid JSON in AI response: {str(e)}") else: diff --git a/modules/shared/jsonUtils.py b/modules/shared/jsonUtils.py index 907e84a6..f2678b63 100644 --- a/modules/shared/jsonUtils.py +++ b/modules/shared/jsonUtils.py @@ -201,6 +201,7 @@ def closeJsonStructures(text: str) -> str: # Look for patterns like: "value" or "value\n (unterminated) # Check if we're in the middle of a string value when text ends if result.strip(): + import re # Count quotes - if odd number, we have an unterminated string quoteCount = result.count('"') if quoteCount % 2 == 1: @@ -220,30 +221,62 @@ def closeJsonStructures(text: str) -> str: result += '"' else: # Even number of quotes, but might still be in middle of string if cut off - # Check if text ends with a colon followed by a quote (start of string value) - # or ends with text that looks like it's inside a string (no closing quote after last quote) - import re - # Pattern: ends with "text" where text doesn't end with quote - # Look for pattern like: "text": "incomplete + # More robust detection: check if text ends with alphanumeric/text chars after a quote + # This handles cases like: "text": "value cut off mid-word + + # Pattern 1: ends with colon + quote + text (no closing quote) if re.search(r':\s*"[^"]*$', result): # We're in the middle of a string value, close it result += '"' - # Also check if we end with text after a quote (like "key": "value but cut off) - elif re.search(r'"\s*:\s*"[^"]*[^",}\]]$', result): - # Check if last quote is followed by non-quote, non-structural chars + else: + # Pattern 2: find last quote and check what comes after lastQuotePos = result.rfind('"') if lastQuotePos >= 0: afterQuote = result[lastQuotePos + 1:] - # If after quote we have text but no closing quote, comma, or brace, we're in a string - if afterQuote and not re.match(r'^\s*[,}\]\]]', afterQuote): - # Check if it's escaped - escapeCount = 0 - i = lastQuotePos - 1 - while i >= 0 and result[i] == '\\': - escapeCount += 1 - i -= 1 - if escapeCount % 2 == 0: - result += '"' + # If after quote we have text (alphanumeric/whitespace) but no closing quote/comma/brace + # and the text doesn't end with structural characters, we're likely in a string + if afterQuote: + # Check if it looks like we're in a string value (has text, no closing quote) + # Pattern: ends with letters/numbers/spaces, not ending with quote, comma, }, or ] + if re.search(r'[a-zA-Z0-9\s]$', result) and not re.match(r'^\s*[,}\]\]]', afterQuote): + # Check if it's escaped + escapeCount = 0 + i = lastQuotePos - 1 + while i >= 0 and result[i] == '\\': + escapeCount += 1 + i -= 1 + if escapeCount % 2 == 0: + # Verify we're actually in a string context (not in a key name) + # Look backwards to see if we have ": " before the quote (value context) + beforeQuote = result[:lastQuotePos] + # Check if we're in a value context (has ": " before quote) or in an array (has "[ before quote) + if re.search(r':\s*"', beforeQuote[-50:]) or re.search(r'\[\s*"', beforeQuote[-50:]): + result += '"' + # Also check if text ends with alphanumeric (likely cut off mid-word) + elif re.search(r'[a-zA-Z]$', result): + # If we end with a letter and have a quote before it, likely in a string + result += '"' + + # Final fallback: if text ends with alphanumeric and we have quotes, try to close the last string + # This handles edge cases where patterns above didn't match + if result.strip() and re.search(r'[a-zA-Z0-9]$', result): + # Count quotes - if we have quotes and end with text, might be in a string + if quoteCount > 0: + lastQuotePos = result.rfind('"') + if lastQuotePos >= 0: + afterQuote = result[lastQuotePos + 1:] + # If after quote is text (not empty, not structural), close it + if afterQuote and re.search(r'^[a-zA-Z0-9\s]+$', afterQuote[:50]): # Check first 50 chars after quote + # Make sure we're not already closed (check if next char would be quote/comma/brace) + if not result.endswith('"') and not result.endswith(',') and not result.endswith('}') and not result.endswith(']'): + # Check if escaped + escapeCount = 0 + i = lastQuotePos - 1 + while i >= 0 and result[i] == '\\': + escapeCount += 1 + i -= 1 + if escapeCount % 2 == 0: + result += '"' # Count open/close brackets and braces openBraces = result.count('{')