architecture for looping ai calls tested and fixed

2025-10-21 17:12:13 +02:00 · 2025-10-21 17:12:13 +02:00 · 52adedab4a
commit 52adedab4a
parent 1badb8468a
8 changed files with 113 additions and 48 deletions
--- a/modules/connectors/connectorAiAnthropic.py
+++ b/modules/connectors/connectorAiAnthropic.py
@ -39,6 +39,26 @@ class AiAnthropic:
        
        logger.info(f"Anthropic Connector initialized with model: {self.modelName}")
    
+    def _getMaxTokensForModel(self, maxTokens: int = None) -> int:
+        """Get appropriate max_tokens for the current model."""
+        if maxTokens is not None:
+            return maxTokens
+        
+        # Model-specific defaults based on Anthropic's limits
+        model_name = self.modelName.lower()
+        if "claude-3-5-sonnet" in model_name:
+            return 200000  # Claude 3.5 Sonnet max
+        elif "claude-3-5-haiku" in model_name:
+            return 200000  # Claude 3.5 Haiku max
+        elif "claude-3-opus" in model_name:
+            return 200000  # Claude 3 Opus max
+        elif "claude-3-sonnet" in model_name:
+            return 200000  # Claude 3 Sonnet max
+        elif "claude-3-haiku" in model_name:
+            return 200000  # Claude 3 Haiku max
+        else:
+            return 200000  # Default to maximum for unknown models
+    
    async def callAiBasic(self, messages: List[Dict[str, Any]], temperature: float = None, maxTokens: int = None) -> Dict[str, Any]:
        """
        Calls the Anthropic API with the given messages.
@ -106,9 +126,8 @@ class AiAnthropic:
                "temperature": temperature,
            }
            
-            # Only add max_tokens if it's explicitly set
-            if maxTokens is not None:
-                payload["max_tokens"] = maxTokens
+            # Anthropic requires max_tokens - use model-appropriate value
+            payload["max_tokens"] = self._getMaxTokensForModel(maxTokens)
            if system_prompt:
                payload["system"] = system_prompt

--- a/modules/connectors/connectorAiOpenai.py
+++ b/modules/connectors/connectorAiOpenai.py
@ -80,7 +80,8 @@ class AiOpenai:
            )
            
            if response.status_code != 200:
-                logger.error(f"OpenAI API error: {response.status_code} - {response.text}")
+                error_message = f"OpenAI API error: {response.status_code} - {response.text}"
+                logger.error(error_message)
                
                # Check for context length exceeded error
                if response.status_code == 400:
@ -95,7 +96,8 @@ class AiOpenai:
                    except (ValueError, KeyError):
                        pass  # If we can't parse the error, fall through to generic error
                
-                raise HTTPException(status_code=500, detail="Error communicating with OpenAI API")
+                # Include the actual error details in the exception
+                raise HTTPException(status_code=500, detail=error_message)
            
            responseJson = response.json()
            content = responseJson["choices"][0]["message"]["content"]
--- a/modules/connectors/connectorAiPerplexity.py
+++ b/modules/connectors/connectorAiPerplexity.py
@ -68,9 +68,8 @@ class AiPerplexity:
                "temperature": temperature
            }
            
-            # Only add max_tokens if it's explicitly set
-            if maxTokens is not None:
-                payload["max_tokens"] = maxTokens
+            # Add max_tokens - use provided value or default to 128000 (Perplexity's typical limit)
+            payload["max_tokens"] = maxTokens if maxTokens is not None else 128000
            
            response = await self.httpClient.post(
                self.apiUrl,
@ -135,9 +134,8 @@ class AiPerplexity:
                "temperature": temperature
            }
            
-            # Only add max_tokens if it's explicitly set
-            if maxTokens is not None:
-                payload["max_tokens"] = maxTokens
+            # Add max_tokens - use provided value or default to 128000 (Perplexity's typical limit)
+            payload["max_tokens"] = maxTokens if maxTokens is not None else 128000
            
            response = await self.httpClient.post(
                self.apiUrl,
--- a/modules/interfaces/interfaceAiObjects.py
+++ b/modules/interfaces/interfaceAiObjects.py
@ -525,25 +525,24 @@ class AiObjects:
            try:
                logger.info(f"Attempting AI call with model: {modelName} (attempt {attempt + 1}/{len(fallbackModels)})")
                
-                # Store the selected model for token limit resolution
-                self._lastSelectedModel = modelName
-                
-                # Replace <TOKEN_LIMIT> placeholder in prompt and context if present
+                # Replace <TOKEN_LIMIT> placeholder in prompt for this specific model
                context_length = aiModels[modelName].get("contextLength", 0)
                if context_length > 0:
                    token_limit = str(context_length)
                else:
-                    token_limit = "4000"  # Default for text generation
+                    token_limit = "16000"  # Default for text generation
                
-                if "<TOKEN_LIMIT>" in prompt:
-                    prompt = prompt.replace("<TOKEN_LIMIT>", token_limit)
+                # Create a copy of the prompt for this model call
+                modelPrompt = prompt
+                if "<TOKEN_LIMIT>" in modelPrompt:
+                    modelPrompt = modelPrompt.replace("<TOKEN_LIMIT>", token_limit)
                    logger.debug(f"Replaced <TOKEN_LIMIT> with {token_limit} for model {modelName}")
                
                # Update messages array with replaced content
                messages = []
                if context:
                    messages.append({"role": "system", "content": f"Context from documents:\n{context}"})
-                messages.append({"role": "user", "content": prompt})
+                messages.append({"role": "user", "content": modelPrompt})
                
                # Start timing
                startTime = time.time()
@ -562,19 +561,19 @@ class AiObjects:
                        content = response["choices"][0]["message"]["content"]
                elif functionName == "callAiWithWebSearch":
                    # Perplexity web search function
-                    query = prompt
+                    query = modelPrompt
                    if context:
-                        query = f"Context: {context}\n\nQuery: {prompt}"
+                        query = f"Context: {context}\n\nQuery: {modelPrompt}"
                    content = await connector.callAiWithWebSearch(query)
                elif functionName == "researchTopic":
                    # Perplexity research function
-                    content = await connector.researchTopic(prompt)
+                    content = await connector.researchTopic(modelPrompt)
                elif functionName == "answerQuestion":
                    # Perplexity question answering function
-                    content = await connector.answerQuestion(prompt, context)
+                    content = await connector.answerQuestion(modelPrompt, context)
                elif functionName == "getCurrentNews":
                    # Perplexity news function
-                    content = await connector.getCurrentNews(prompt)
+                    content = await connector.getCurrentNews(modelPrompt)
                else:
                    raise ValueError(f"Function {functionName} not supported for text generation")

@ -599,7 +598,14 @@ class AiObjects:
                
            except Exception as e:
                lastError = e
-                logger.warning(f"❌ AI call failed with model {modelName}: {str(e)}")
+                # Enhanced error logging with more details
+                error_details = str(e)
+                if hasattr(e, 'detail'):
+                    error_details = f"{error_details} (detail: {e.detail})"
+                if hasattr(e, 'status_code'):
+                    error_details = f"{error_details} (status: {e.status_code})"
+                
+                logger.warning(f"❌ AI call failed with model {modelName}: {error_details}")
                
                # If this is not the last model, try the next one
                if attempt < len(fallbackModels) - 1:
@ -611,7 +617,13 @@ class AiObjects:
                    break

        # All fallback attempts failed - return error response
-        errorMsg = f"All AI models failed for operation {options.operationType}. Last error: {str(lastError)}"
+        last_error_details = str(lastError)
+        if hasattr(lastError, 'detail'):
+            last_error_details = f"{last_error_details} (detail: {lastError.detail})"
+        if hasattr(lastError, 'status_code'):
+            last_error_details = f"{last_error_details} (status: {lastError.status_code})"
+            
+        errorMsg = f"All AI models failed for operation {options.operationType}. Last error: {last_error_details}"
        logger.error(errorMsg)
        return AiCallResponse(
            content=errorMsg,
--- a/modules/services/serviceAi/subCoreAi.py
+++ b/modules/services/serviceAi/subCoreAi.py
@ -15,14 +15,14 @@ CRITICAL LIMITS: <TOKEN_LIMIT> tokens total (reserve 20% for JSON structure)
 MANDATORY RULES:
 1. STOP at approximately 80% of limit to ensure valid JSON completion
 2. Return ONLY raw JSON (no ```json blocks, no text before/after)
-3. ALWAYS include "continuation" field - this is MANDATORY

 CONTINUATION REQUIREMENTS:
+Refer to the json object below where to set the "continuation" information:
 - If you can complete the full request: {"continuation": null}
 - If you must stop early: {
  "continuation": {
-    "last_data_items": "exact last items you generated (copy them exactly)",
-    "next_instruction": "Continue from [exact last item] - generate next items"
+    "last_data_items": "delivered last data for context (copy them)",
+    "next_instruction": "instruction for next data to deliver"
  }
 }

@ -101,7 +101,7 @@ class SubCoreAi:
            elif loopInstruction and iteration > 1:
                continuationContent = self._buildContinuationContent(accumulatedContent, iteration)
                if "LOOP_INSTRUCTION" in prompt:
-                    iterationPrompt = prompt.replace("LOOP_INSTRUCTION", f"{loopInstruction}\n\n{continuationContent}")
+                    iterationPrompt = prompt.replace("LOOP_INSTRUCTION", f"{continuationContent}\n\n{loopInstruction}")
                else:
                    iterationPrompt = prompt
            else:
@ -208,10 +208,29 @@ class SubCoreAi:
            except (json.JSONDecodeError, KeyError, ValueError):
                pass
        
+        # Extract specific attributes from continuation object
+        last_data_items = ""
+        next_instruction = ""
+        
+        if continuation_description:
+            try:
+                if isinstance(continuation_description, str):
+                    continuation_obj = json.loads(continuation_description)
+                else:
+                    continuation_obj = continuation_description
+                
+                if isinstance(continuation_obj, dict):
+                    last_data_items = continuation_obj.get("last_data_items", "")
+                    next_instruction = continuation_obj.get("next_instruction", "")
+            except (json.JSONDecodeError, TypeError):
+                pass
+        
        continuation_content = f"""CONTINUATION REQUEST (Iteration {iteration}):
 You are continuing a previous response. DO NOT repeat any previous content.

-{f"CONTINUATION INSTRUCTIONS: {continuation_description}" if continuation_description else "No specific continuation instructions provided."}
+{f"Already delivered data: {last_data_items}" if last_data_items else "No previous data specified"}
+
+{f"Your task to deliver: {next_instruction}" if next_instruction else "No specific task provided"}

 CRITICAL REQUIREMENTS:
 - Start from the exact point specified in continuation instructions
--- a/modules/services/serviceGeneration/mainServiceGeneration.py
+++ b/modules/services/serviceGeneration/mainServiceGeneration.py
@ -315,10 +315,31 @@ class GenerationService:
            if not isinstance(extractedContent, dict):
                raise ValueError("extractedContent must be a JSON dictionary")
            
-            if "sections" not in extractedContent:
-                raise ValueError("extractedContent must contain 'sections' field")
-
-            # Remove extra debug file writes for render inputs per simplification
+            # Check if this is a multi-document structure
+            if "documents" in extractedContent and len(extractedContent["documents"]) > 1:
+                # Multiple documents - use multi-file renderer
+                generated_documents = await self._renderMultiFileReport(extractedContent, outputFormat, title, userPrompt, aiService)
+                # For multi-document, return the first document's content and mime type
+                if generated_documents:
+                    return generated_documents[0]["content"], generated_documents[0]["mime_type"]
+                else:
+                    raise ValueError("No documents could be rendered")
+            elif "documents" in extractedContent and len(extractedContent["documents"]) == 1:
+                # Single document in documents array - extract sections
+                single_doc = extractedContent["documents"][0]
+                if "sections" not in single_doc:
+                    raise ValueError("Document must contain 'sections' field")
+                # Create content for single document renderer
+                contentToRender = {
+                    "sections": single_doc["sections"],
+                    "metadata": extractedContent.get("metadata", {}),
+                    "continuation": extractedContent.get("continuation", None)
+                }
+            elif "sections" in extractedContent:
+                # Direct sections format
+                contentToRender = extractedContent
+            else:
+                raise ValueError("extractedContent must contain 'sections' field or 'documents' array")

            # Get the appropriate renderer for the format
            renderer = self._getFormatRenderer(outputFormat)
@ -326,7 +347,7 @@ class GenerationService:
                raise ValueError(f"Unsupported output format: {outputFormat}")
            
            # Render the JSON content directly (AI generation handled by main service)
-            renderedContent, mimeType = await renderer.render(extractedContent, title, userPrompt, aiService)
+            renderedContent, mimeType = await renderer.render(contentToRender, title, userPrompt, aiService)
            # Remove extra debug output file writes
            
            logger.info(f"Successfully rendered JSON report to {outputFormat} format: {len(renderedContent)} characters")
--- a/modules/services/serviceGeneration/subPromptBuilder.py
+++ b/modules/services/serviceGeneration/subPromptBuilder.py
@ -19,7 +19,6 @@ logger = logging.getLogger(__name__)

 # Centralized JSON structure template for document generation
 JSON_STRUCTURE_TEMPLATE = """{
-  "continuation": null,
  "metadata": {
    "title": "{{DOCUMENT_TITLE}}",
    "splitStrategy": "single_document",
@ -44,7 +43,8 @@ JSON_STRUCTURE_TEMPLATE = """{
        "order": 1
      }
    ]
-  }]
+  }],
+  "continuation": null,
 }"""

 async def buildAdaptiveExtractionPrompt(
@ -185,17 +185,17 @@ async def buildGenerationPrompt(
    # Always use the proper generation prompt template with LOOP_INSTRUCTION
    result = f"""Generate structured JSON content for document creation.

-USER REQUEST: "{userPrompt}"
+USER CONTEXT: "{userPrompt}"
 DOCUMENT TITLE: "{title}"
 TARGET FORMAT: {outputFormat}

+LOOP_INSTRUCTION
+
 RULES:
 - Follow the template structure below exactly; emit only one JSON object in the response
 - Fill sections with content based on the user request
 - Use appropriate content_type

-LOOP_INSTRUCTION
-
 Return ONLY valid JSON matching this structure (template below). Do not include any prose before/after. Use this as the single template reference for your output:
 {json_template}
 """
--- a/test_ai_behavior.py
+++ b/test_ai_behavior.py
@ -334,13 +334,7 @@ class AIBehaviorTester:
 PROMPT_SCENARIOS = {
    "Prime Numbers Test": """Generate the first 5000 prime numbers in a table with 10 columns per row.""",

-    "Fibonacci Sequence": """Generate the first 1000 Fibonacci numbers in a table with 5 columns per row.""",
-
-    "Multiplication Table": """Generate multiplication tables from 1 to 50, each table with 10 columns per row.""",
-
-    "Random Data": """Generate 2000 random numbers between 1 and 10000 in a table with 8 columns per row.""",
-
-    "Text Content": """Generate a comprehensive guide about machine learning with 50 sections, each containing detailed explanations and examples."""
+    "Text Content": """Generate a comprehensive guide about how to bring a new product to market in 10 sections, each containing detailed explanations and examples."""
 }

 async def main():