From 262f3296bf83408f08e5fbdc539a8ca37de77991 Mon Sep 17 00:00:00 2001
From: ValueOn AG <p.motsch@valueon.ch>
Date: Tue, 23 Dec 2025 00:50:00 +0100
Subject: [PATCH] fixing renderers

---
 .../renderers/rendererPptx.py                 |  4 +-
 .../serviceGeneration/subContentGenerator.py  | 33 +++++++--
 modules/shared/jsonUtils.py                   | 69 ++++++++++++++-----
 3 files changed, 81 insertions(+), 25 deletions(-)

diff --git a/modules/services/serviceGeneration/renderers/rendererPptx.py b/modules/services/serviceGeneration/renderers/rendererPptx.py
index f7b65eb1..6b1b9e18 100644
--- a/modules/services/serviceGeneration/renderers/rendererPptx.py
+++ b/modules/services/serviceGeneration/renderers/rendererPptx.py
@@ -12,8 +12,8 @@ logger = logging.getLogger(__name__)
 class RendererPptx(BaseRenderer):
     """Renderer for PowerPoint (.pptx) files using python-pptx library."""
     
-    def __init__(self):
-        super().__init__()
+    def __init__(self, services=None):
+        super().__init__(services=services)
         self.supportedFormats = ["pptx", "ppt"]
         self.outputMimeType = "application/vnd.openxmlformats-officedocument.presentationml.presentation"
     
diff --git a/modules/services/serviceGeneration/subContentGenerator.py b/modules/services/serviceGeneration/subContentGenerator.py
index 1b1f64a9..0f75f595 100644
--- a/modules/services/serviceGeneration/subContentGenerator.py
+++ b/modules/services/serviceGeneration/subContentGenerator.py
@@ -475,11 +475,34 @@ class ContentGenerator:
                         except (json.JSONDecodeError, ValueError) as recoveryError:
                             logger.error(f"JSON recovery failed: {str(recoveryError)}")
                             logger.error(f"Recovered JSON (first 500 chars): {recoveredJson[:500] if 'recoveredJson' in locals() else 'N/A'}")
-                            # Check if raw response might be truncated
-                            if len(rawContent) <= len(extractedJson) + 100:  # Raw content is similar length to extracted
-                                logger.warning(f"Raw AI response may be truncated (length: {len(rawContent)} chars)")
-                                logger.warning(f"Consider increasing max_tokens for AI calls or checking token limits")
-                            raise ValueError(f"Invalid JSON in AI response (truncated?): {str(e)}")
+                            logger.error(f"Recovered JSON (last 200 chars): {recoveredJson[-200:] if 'recoveredJson' in locals() else 'N/A'}")
+                            
+                            # Last resort: try to extract partial content and create minimal valid JSON
+                            try:
+                                # Try to extract text content before the truncation point
+                                import re
+                                # Look for text field that might be partially complete
+                                textMatch = re.search(r'"text"\s*:\s*"([^"]*)', extractedJson)
+                                if textMatch:
+                                    partialText = textMatch.group(1)
+                                    # Create minimal valid JSON with truncated text marked
+                                    elementsData = {
+                                        "elements": [{
+                                            "text": partialText + "... [Content truncated due to token limit]"
+                                        }]
+                                    }
+                                    logger.warning(f"Created minimal JSON structure with truncated text for section {section.get('id')}")
+                                else:
+                                    # If no text found, create empty structure
+                                    elementsData = {"elements": []}
+                                    logger.warning(f"Created empty JSON structure for section {section.get('id')} due to recovery failure")
+                            except Exception as fallbackError:
+                                logger.error(f"Fallback recovery also failed: {str(fallbackError)}")
+                                # Check if raw response might be truncated
+                                if len(rawContent) <= len(extractedJson) + 100:  # Raw content is similar length to extracted
+                                    logger.warning(f"Raw AI response may be truncated (length: {len(rawContent)} chars)")
+                                    logger.warning(f"Consider increasing max_tokens for AI calls or checking token limits")
+                                raise ValueError(f"Invalid JSON in AI response (truncated?): {str(e)}")
                     else:
                         raise ValueError(f"Invalid JSON in AI response: {str(e)}")
                 else:
diff --git a/modules/shared/jsonUtils.py b/modules/shared/jsonUtils.py
index 907e84a6..f2678b63 100644
--- a/modules/shared/jsonUtils.py
+++ b/modules/shared/jsonUtils.py
@@ -201,6 +201,7 @@ def closeJsonStructures(text: str) -> str:
     # Look for patterns like: "value" or "value\n (unterminated)
     # Check if we're in the middle of a string value when text ends
     if result.strip():
+        import re
         # Count quotes - if odd number, we have an unterminated string
         quoteCount = result.count('"')
         if quoteCount % 2 == 1:
@@ -220,30 +221,62 @@ def closeJsonStructures(text: str) -> str:
                     result += '"'
         else:
             # Even number of quotes, but might still be in middle of string if cut off
-            # Check if text ends with a colon followed by a quote (start of string value)
-            # or ends with text that looks like it's inside a string (no closing quote after last quote)
-            import re
-            # Pattern: ends with "text" where text doesn't end with quote
-            # Look for pattern like: "text": "incomplete
+            # More robust detection: check if text ends with alphanumeric/text chars after a quote
+            # This handles cases like: "text": "value cut off mid-word
+            
+            # Pattern 1: ends with colon + quote + text (no closing quote)
             if re.search(r':\s*"[^"]*$', result):
                 # We're in the middle of a string value, close it
                 result += '"'
-            # Also check if we end with text after a quote (like "key": "value but cut off)
-            elif re.search(r'"\s*:\s*"[^"]*[^",}\]]$', result):
-                # Check if last quote is followed by non-quote, non-structural chars
+            else:
+                # Pattern 2: find last quote and check what comes after
                 lastQuotePos = result.rfind('"')
                 if lastQuotePos >= 0:
                     afterQuote = result[lastQuotePos + 1:]
-                    # If after quote we have text but no closing quote, comma, or brace, we're in a string
-                    if afterQuote and not re.match(r'^\s*[,}\]\]]', afterQuote):
-                        # Check if it's escaped
-                        escapeCount = 0
-                        i = lastQuotePos - 1
-                        while i >= 0 and result[i] == '\\':
-                            escapeCount += 1
-                            i -= 1
-                        if escapeCount % 2 == 0:
-                            result += '"'
+                    # If after quote we have text (alphanumeric/whitespace) but no closing quote/comma/brace
+                    # and the text doesn't end with structural characters, we're likely in a string
+                    if afterQuote:
+                        # Check if it looks like we're in a string value (has text, no closing quote)
+                        # Pattern: ends with letters/numbers/spaces, not ending with quote, comma, }, or ]
+                        if re.search(r'[a-zA-Z0-9\s]$', result) and not re.match(r'^\s*[,}\]\]]', afterQuote):
+                            # Check if it's escaped
+                            escapeCount = 0
+                            i = lastQuotePos - 1
+                            while i >= 0 and result[i] == '\\':
+                                escapeCount += 1
+                                i -= 1
+                            if escapeCount % 2 == 0:
+                                # Verify we're actually in a string context (not in a key name)
+                                # Look backwards to see if we have ": " before the quote (value context)
+                                beforeQuote = result[:lastQuotePos]
+                                # Check if we're in a value context (has ": " before quote) or in an array (has "[ before quote)
+                                if re.search(r':\s*"', beforeQuote[-50:]) or re.search(r'\[\s*"', beforeQuote[-50:]):
+                                    result += '"'
+                                # Also check if text ends with alphanumeric (likely cut off mid-word)
+                                elif re.search(r'[a-zA-Z]$', result):
+                                    # If we end with a letter and have a quote before it, likely in a string
+                                    result += '"'
+            
+            # Final fallback: if text ends with alphanumeric and we have quotes, try to close the last string
+            # This handles edge cases where patterns above didn't match
+            if result.strip() and re.search(r'[a-zA-Z0-9]$', result):
+                # Count quotes - if we have quotes and end with text, might be in a string
+                if quoteCount > 0:
+                    lastQuotePos = result.rfind('"')
+                    if lastQuotePos >= 0:
+                        afterQuote = result[lastQuotePos + 1:]
+                        # If after quote is text (not empty, not structural), close it
+                        if afterQuote and re.search(r'^[a-zA-Z0-9\s]+$', afterQuote[:50]):  # Check first 50 chars after quote
+                            # Make sure we're not already closed (check if next char would be quote/comma/brace)
+                            if not result.endswith('"') and not result.endswith(',') and not result.endswith('}') and not result.endswith(']'):
+                                # Check if escaped
+                                escapeCount = 0
+                                i = lastQuotePos - 1
+                                while i >= 0 and result[i] == '\\':
+                                    escapeCount += 1
+                                    i -= 1
+                                if escapeCount % 2 == 0:
+                                    result += '"'
     
     # Count open/close brackets and braces
     openBraces = result.count('{')