From 52adedab4affcd4b576fa78f29972f8d2b989c7e Mon Sep 17 00:00:00 2001
From: ValueOn AG <p.motsch@valueon.ch>
Date: Tue, 21 Oct 2025 17:12:13 +0200
Subject: [PATCH] architecture for looping ai calls tested and fixed

---
 modules/connectors/connectorAiAnthropic.py    | 25 +++++++++--
 modules/connectors/connectorAiOpenai.py       |  6 ++-
 modules/connectors/connectorAiPerplexity.py   | 10 ++---
 modules/interfaces/interfaceAiObjects.py      | 42 ++++++++++++-------
 modules/services/serviceAi/subCoreAi.py       | 29 ++++++++++---
 .../mainServiceGeneration.py                  | 31 +++++++++++---
 .../serviceGeneration/subPromptBuilder.py     | 10 ++---
 test_ai_behavior.py                           |  8 +---
 8 files changed, 113 insertions(+), 48 deletions(-)

diff --git a/modules/connectors/connectorAiAnthropic.py b/modules/connectors/connectorAiAnthropic.py
index 85cf62f2..16b235e0 100644
--- a/modules/connectors/connectorAiAnthropic.py
+++ b/modules/connectors/connectorAiAnthropic.py
@@ -39,6 +39,26 @@ class AiAnthropic:
         
         logger.info(f"Anthropic Connector initialized with model: {self.modelName}")
     
+    def _getMaxTokensForModel(self, maxTokens: int = None) -> int:
+        """Get appropriate max_tokens for the current model."""
+        if maxTokens is not None:
+            return maxTokens
+        
+        # Model-specific defaults based on Anthropic's limits
+        model_name = self.modelName.lower()
+        if "claude-3-5-sonnet" in model_name:
+            return 200000  # Claude 3.5 Sonnet max
+        elif "claude-3-5-haiku" in model_name:
+            return 200000  # Claude 3.5 Haiku max
+        elif "claude-3-opus" in model_name:
+            return 200000  # Claude 3 Opus max
+        elif "claude-3-sonnet" in model_name:
+            return 200000  # Claude 3 Sonnet max
+        elif "claude-3-haiku" in model_name:
+            return 200000  # Claude 3 Haiku max
+        else:
+            return 200000  # Default to maximum for unknown models
+    
     async def callAiBasic(self, messages: List[Dict[str, Any]], temperature: float = None, maxTokens: int = None) -> Dict[str, Any]:
         """
         Calls the Anthropic API with the given messages.
@@ -106,9 +126,8 @@ class AiAnthropic:
                 "temperature": temperature,
             }
             
-            # Only add max_tokens if it's explicitly set
-            if maxTokens is not None:
-                payload["max_tokens"] = maxTokens
+            # Anthropic requires max_tokens - use model-appropriate value
+            payload["max_tokens"] = self._getMaxTokensForModel(maxTokens)
             if system_prompt:
                 payload["system"] = system_prompt
 
diff --git a/modules/connectors/connectorAiOpenai.py b/modules/connectors/connectorAiOpenai.py
index c768888e..8aac34cd 100644
--- a/modules/connectors/connectorAiOpenai.py
+++ b/modules/connectors/connectorAiOpenai.py
@@ -80,7 +80,8 @@ class AiOpenai:
             )
             
             if response.status_code != 200:
-                logger.error(f"OpenAI API error: {response.status_code} - {response.text}")
+                error_message = f"OpenAI API error: {response.status_code} - {response.text}"
+                logger.error(error_message)
                 
                 # Check for context length exceeded error
                 if response.status_code == 400:
@@ -95,7 +96,8 @@ class AiOpenai:
                     except (ValueError, KeyError):
                         pass  # If we can't parse the error, fall through to generic error
                 
-                raise HTTPException(status_code=500, detail="Error communicating with OpenAI API")
+                # Include the actual error details in the exception
+                raise HTTPException(status_code=500, detail=error_message)
             
             responseJson = response.json()
             content = responseJson["choices"][0]["message"]["content"]
diff --git a/modules/connectors/connectorAiPerplexity.py b/modules/connectors/connectorAiPerplexity.py
index b075a84d..4634cf1d 100644
--- a/modules/connectors/connectorAiPerplexity.py
+++ b/modules/connectors/connectorAiPerplexity.py
@@ -68,9 +68,8 @@ class AiPerplexity:
                 "temperature": temperature
             }
             
-            # Only add max_tokens if it's explicitly set
-            if maxTokens is not None:
-                payload["max_tokens"] = maxTokens
+            # Add max_tokens - use provided value or default to 128000 (Perplexity's typical limit)
+            payload["max_tokens"] = maxTokens if maxTokens is not None else 128000
             
             response = await self.httpClient.post(
                 self.apiUrl,
@@ -135,9 +134,8 @@ class AiPerplexity:
                 "temperature": temperature
             }
             
-            # Only add max_tokens if it's explicitly set
-            if maxTokens is not None:
-                payload["max_tokens"] = maxTokens
+            # Add max_tokens - use provided value or default to 128000 (Perplexity's typical limit)
+            payload["max_tokens"] = maxTokens if maxTokens is not None else 128000
             
             response = await self.httpClient.post(
                 self.apiUrl,
diff --git a/modules/interfaces/interfaceAiObjects.py b/modules/interfaces/interfaceAiObjects.py
index 2e9b6b38..337a2878 100644
--- a/modules/interfaces/interfaceAiObjects.py
+++ b/modules/interfaces/interfaceAiObjects.py
@@ -525,25 +525,24 @@ class AiObjects:
             try:
                 logger.info(f"Attempting AI call with model: {modelName} (attempt {attempt + 1}/{len(fallbackModels)})")
                 
-                # Store the selected model for token limit resolution
-                self._lastSelectedModel = modelName
-                
-                # Replace <TOKEN_LIMIT> placeholder in prompt and context if present
+                # Replace <TOKEN_LIMIT> placeholder in prompt for this specific model
                 context_length = aiModels[modelName].get("contextLength", 0)
                 if context_length > 0:
                     token_limit = str(context_length)
                 else:
-                    token_limit = "4000"  # Default for text generation
+                    token_limit = "16000"  # Default for text generation
                 
-                if "<TOKEN_LIMIT>" in prompt:
-                    prompt = prompt.replace("<TOKEN_LIMIT>", token_limit)
+                # Create a copy of the prompt for this model call
+                modelPrompt = prompt
+                if "<TOKEN_LIMIT>" in modelPrompt:
+                    modelPrompt = modelPrompt.replace("<TOKEN_LIMIT>", token_limit)
                     logger.debug(f"Replaced <TOKEN_LIMIT> with {token_limit} for model {modelName}")
                 
                 # Update messages array with replaced content
                 messages = []
                 if context:
                     messages.append({"role": "system", "content": f"Context from documents:\n{context}"})
-                messages.append({"role": "user", "content": prompt})
+                messages.append({"role": "user", "content": modelPrompt})
                 
                 # Start timing
                 startTime = time.time()
@@ -562,19 +561,19 @@ class AiObjects:
                         content = response["choices"][0]["message"]["content"]
                 elif functionName == "callAiWithWebSearch":
                     # Perplexity web search function
-                    query = prompt
+                    query = modelPrompt
                     if context:
-                        query = f"Context: {context}\n\nQuery: {prompt}"
+                        query = f"Context: {context}\n\nQuery: {modelPrompt}"
                     content = await connector.callAiWithWebSearch(query)
                 elif functionName == "researchTopic":
                     # Perplexity research function
-                    content = await connector.researchTopic(prompt)
+                    content = await connector.researchTopic(modelPrompt)
                 elif functionName == "answerQuestion":
                     # Perplexity question answering function
-                    content = await connector.answerQuestion(prompt, context)
+                    content = await connector.answerQuestion(modelPrompt, context)
                 elif functionName == "getCurrentNews":
                     # Perplexity news function
-                    content = await connector.getCurrentNews(prompt)
+                    content = await connector.getCurrentNews(modelPrompt)
                 else:
                     raise ValueError(f"Function {functionName} not supported for text generation")
 
@@ -599,7 +598,14 @@ class AiObjects:
                 
             except Exception as e:
                 lastError = e
-                logger.warning(f"❌ AI call failed with model {modelName}: {str(e)}")
+                # Enhanced error logging with more details
+                error_details = str(e)
+                if hasattr(e, 'detail'):
+                    error_details = f"{error_details} (detail: {e.detail})"
+                if hasattr(e, 'status_code'):
+                    error_details = f"{error_details} (status: {e.status_code})"
+                
+                logger.warning(f"❌ AI call failed with model {modelName}: {error_details}")
                 
                 # If this is not the last model, try the next one
                 if attempt < len(fallbackModels) - 1:
@@ -611,7 +617,13 @@ class AiObjects:
                     break
 
         # All fallback attempts failed - return error response
-        errorMsg = f"All AI models failed for operation {options.operationType}. Last error: {str(lastError)}"
+        last_error_details = str(lastError)
+        if hasattr(lastError, 'detail'):
+            last_error_details = f"{last_error_details} (detail: {lastError.detail})"
+        if hasattr(lastError, 'status_code'):
+            last_error_details = f"{last_error_details} (status: {lastError.status_code})"
+            
+        errorMsg = f"All AI models failed for operation {options.operationType}. Last error: {last_error_details}"
         logger.error(errorMsg)
         return AiCallResponse(
             content=errorMsg,
diff --git a/modules/services/serviceAi/subCoreAi.py b/modules/services/serviceAi/subCoreAi.py
index 6afaafce..07caa972 100644
--- a/modules/services/serviceAi/subCoreAi.py
+++ b/modules/services/serviceAi/subCoreAi.py
@@ -15,14 +15,14 @@ CRITICAL LIMITS: <TOKEN_LIMIT> tokens total (reserve 20% for JSON structure)
 MANDATORY RULES:
 1. STOP at approximately 80% of limit to ensure valid JSON completion
 2. Return ONLY raw JSON (no ```json blocks, no text before/after)
-3. ALWAYS include "continuation" field - this is MANDATORY
 
 CONTINUATION REQUIREMENTS:
+Refer to the json object below where to set the "continuation" information:
 - If you can complete the full request: {"continuation": null}
 - If you must stop early: {
   "continuation": {
-    "last_data_items": "exact last items you generated (copy them exactly)",
-    "next_instruction": "Continue from [exact last item] - generate next items"
+    "last_data_items": "delivered last data for context (copy them)",
+    "next_instruction": "instruction for next data to deliver"
   }
 }
 
@@ -101,7 +101,7 @@ class SubCoreAi:
             elif loopInstruction and iteration > 1:
                 continuationContent = self._buildContinuationContent(accumulatedContent, iteration)
                 if "LOOP_INSTRUCTION" in prompt:
-                    iterationPrompt = prompt.replace("LOOP_INSTRUCTION", f"{loopInstruction}\n\n{continuationContent}")
+                    iterationPrompt = prompt.replace("LOOP_INSTRUCTION", f"{continuationContent}\n\n{loopInstruction}")
                 else:
                     iterationPrompt = prompt
             else:
@@ -208,10 +208,29 @@ class SubCoreAi:
             except (json.JSONDecodeError, KeyError, ValueError):
                 pass
         
+        # Extract specific attributes from continuation object
+        last_data_items = ""
+        next_instruction = ""
+        
+        if continuation_description:
+            try:
+                if isinstance(continuation_description, str):
+                    continuation_obj = json.loads(continuation_description)
+                else:
+                    continuation_obj = continuation_description
+                
+                if isinstance(continuation_obj, dict):
+                    last_data_items = continuation_obj.get("last_data_items", "")
+                    next_instruction = continuation_obj.get("next_instruction", "")
+            except (json.JSONDecodeError, TypeError):
+                pass
+        
         continuation_content = f"""CONTINUATION REQUEST (Iteration {iteration}):
 You are continuing a previous response. DO NOT repeat any previous content.
 
-{f"CONTINUATION INSTRUCTIONS: {continuation_description}" if continuation_description else "No specific continuation instructions provided."}
+{f"Already delivered data: {last_data_items}" if last_data_items else "No previous data specified"}
+
+{f"Your task to deliver: {next_instruction}" if next_instruction else "No specific task provided"}
 
 CRITICAL REQUIREMENTS:
 - Start from the exact point specified in continuation instructions
diff --git a/modules/services/serviceGeneration/mainServiceGeneration.py b/modules/services/serviceGeneration/mainServiceGeneration.py
index cbb5cb0f..5d4489ac 100644
--- a/modules/services/serviceGeneration/mainServiceGeneration.py
+++ b/modules/services/serviceGeneration/mainServiceGeneration.py
@@ -315,10 +315,31 @@ class GenerationService:
             if not isinstance(extractedContent, dict):
                 raise ValueError("extractedContent must be a JSON dictionary")
             
-            if "sections" not in extractedContent:
-                raise ValueError("extractedContent must contain 'sections' field")
-
-            # Remove extra debug file writes for render inputs per simplification
+            # Check if this is a multi-document structure
+            if "documents" in extractedContent and len(extractedContent["documents"]) > 1:
+                # Multiple documents - use multi-file renderer
+                generated_documents = await self._renderMultiFileReport(extractedContent, outputFormat, title, userPrompt, aiService)
+                # For multi-document, return the first document's content and mime type
+                if generated_documents:
+                    return generated_documents[0]["content"], generated_documents[0]["mime_type"]
+                else:
+                    raise ValueError("No documents could be rendered")
+            elif "documents" in extractedContent and len(extractedContent["documents"]) == 1:
+                # Single document in documents array - extract sections
+                single_doc = extractedContent["documents"][0]
+                if "sections" not in single_doc:
+                    raise ValueError("Document must contain 'sections' field")
+                # Create content for single document renderer
+                contentToRender = {
+                    "sections": single_doc["sections"],
+                    "metadata": extractedContent.get("metadata", {}),
+                    "continuation": extractedContent.get("continuation", None)
+                }
+            elif "sections" in extractedContent:
+                # Direct sections format
+                contentToRender = extractedContent
+            else:
+                raise ValueError("extractedContent must contain 'sections' field or 'documents' array")
 
             # Get the appropriate renderer for the format
             renderer = self._getFormatRenderer(outputFormat)
@@ -326,7 +347,7 @@ class GenerationService:
                 raise ValueError(f"Unsupported output format: {outputFormat}")
             
             # Render the JSON content directly (AI generation handled by main service)
-            renderedContent, mimeType = await renderer.render(extractedContent, title, userPrompt, aiService)
+            renderedContent, mimeType = await renderer.render(contentToRender, title, userPrompt, aiService)
             # Remove extra debug output file writes
             
             logger.info(f"Successfully rendered JSON report to {outputFormat} format: {len(renderedContent)} characters")
diff --git a/modules/services/serviceGeneration/subPromptBuilder.py b/modules/services/serviceGeneration/subPromptBuilder.py
index 8f4afdb4..d326772c 100644
--- a/modules/services/serviceGeneration/subPromptBuilder.py
+++ b/modules/services/serviceGeneration/subPromptBuilder.py
@@ -19,7 +19,6 @@ logger = logging.getLogger(__name__)
 
 # Centralized JSON structure template for document generation
 JSON_STRUCTURE_TEMPLATE = """{
-  "continuation": null,
   "metadata": {
     "title": "{{DOCUMENT_TITLE}}",
     "splitStrategy": "single_document",
@@ -44,7 +43,8 @@ JSON_STRUCTURE_TEMPLATE = """{
         "order": 1
       }
     ]
-  }]
+  }],
+  "continuation": null,
 }"""
 
 async def buildAdaptiveExtractionPrompt(
@@ -185,17 +185,17 @@ async def buildGenerationPrompt(
     # Always use the proper generation prompt template with LOOP_INSTRUCTION
     result = f"""Generate structured JSON content for document creation.
 
-USER REQUEST: "{userPrompt}"
+USER CONTEXT: "{userPrompt}"
 DOCUMENT TITLE: "{title}"
 TARGET FORMAT: {outputFormat}
 
+LOOP_INSTRUCTION
+
 RULES:
 - Follow the template structure below exactly; emit only one JSON object in the response
 - Fill sections with content based on the user request
 - Use appropriate content_type
 
-LOOP_INSTRUCTION
-
 Return ONLY valid JSON matching this structure (template below). Do not include any prose before/after. Use this as the single template reference for your output:
 {json_template}
 """
diff --git a/test_ai_behavior.py b/test_ai_behavior.py
index cd6d1b32..b5951d06 100644
--- a/test_ai_behavior.py
+++ b/test_ai_behavior.py
@@ -334,13 +334,7 @@ class AIBehaviorTester:
 PROMPT_SCENARIOS = {
     "Prime Numbers Test": """Generate the first 5000 prime numbers in a table with 10 columns per row.""",
 
-    "Fibonacci Sequence": """Generate the first 1000 Fibonacci numbers in a table with 5 columns per row.""",
-
-    "Multiplication Table": """Generate multiplication tables from 1 to 50, each table with 10 columns per row.""",
-
-    "Random Data": """Generate 2000 random numbers between 1 and 10000 in a table with 8 columns per row.""",
-
-    "Text Content": """Generate a comprehensive guide about machine learning with 50 sections, each containing detailed explanations and examples."""
+    "Text Content": """Generate a comprehensive guide about how to bring a new product to market in 10 sections, each containing detailed explanations and examples."""
 }
 
 async def main():