architecture for looping ai calls tested and fixed

This commit is contained in:
ValueOn AG 2025-10-21 17:12:13 +02:00
parent 1badb8468a
commit 52adedab4a
8 changed files with 113 additions and 48 deletions

View file

@ -39,6 +39,26 @@ class AiAnthropic:
logger.info(f"Anthropic Connector initialized with model: {self.modelName}") logger.info(f"Anthropic Connector initialized with model: {self.modelName}")
def _getMaxTokensForModel(self, maxTokens: int = None) -> int:
"""Get appropriate max_tokens for the current model."""
if maxTokens is not None:
return maxTokens
# Model-specific defaults based on Anthropic's limits
model_name = self.modelName.lower()
if "claude-3-5-sonnet" in model_name:
return 200000 # Claude 3.5 Sonnet max
elif "claude-3-5-haiku" in model_name:
return 200000 # Claude 3.5 Haiku max
elif "claude-3-opus" in model_name:
return 200000 # Claude 3 Opus max
elif "claude-3-sonnet" in model_name:
return 200000 # Claude 3 Sonnet max
elif "claude-3-haiku" in model_name:
return 200000 # Claude 3 Haiku max
else:
return 200000 # Default to maximum for unknown models
async def callAiBasic(self, messages: List[Dict[str, Any]], temperature: float = None, maxTokens: int = None) -> Dict[str, Any]: async def callAiBasic(self, messages: List[Dict[str, Any]], temperature: float = None, maxTokens: int = None) -> Dict[str, Any]:
""" """
Calls the Anthropic API with the given messages. Calls the Anthropic API with the given messages.
@ -106,9 +126,8 @@ class AiAnthropic:
"temperature": temperature, "temperature": temperature,
} }
# Only add max_tokens if it's explicitly set # Anthropic requires max_tokens - use model-appropriate value
if maxTokens is not None: payload["max_tokens"] = self._getMaxTokensForModel(maxTokens)
payload["max_tokens"] = maxTokens
if system_prompt: if system_prompt:
payload["system"] = system_prompt payload["system"] = system_prompt

View file

@ -80,7 +80,8 @@ class AiOpenai:
) )
if response.status_code != 200: if response.status_code != 200:
logger.error(f"OpenAI API error: {response.status_code} - {response.text}") error_message = f"OpenAI API error: {response.status_code} - {response.text}"
logger.error(error_message)
# Check for context length exceeded error # Check for context length exceeded error
if response.status_code == 400: if response.status_code == 400:
@ -95,7 +96,8 @@ class AiOpenai:
except (ValueError, KeyError): except (ValueError, KeyError):
pass # If we can't parse the error, fall through to generic error pass # If we can't parse the error, fall through to generic error
raise HTTPException(status_code=500, detail="Error communicating with OpenAI API") # Include the actual error details in the exception
raise HTTPException(status_code=500, detail=error_message)
responseJson = response.json() responseJson = response.json()
content = responseJson["choices"][0]["message"]["content"] content = responseJson["choices"][0]["message"]["content"]

View file

@ -68,9 +68,8 @@ class AiPerplexity:
"temperature": temperature "temperature": temperature
} }
# Only add max_tokens if it's explicitly set # Add max_tokens - use provided value or default to 128000 (Perplexity's typical limit)
if maxTokens is not None: payload["max_tokens"] = maxTokens if maxTokens is not None else 128000
payload["max_tokens"] = maxTokens
response = await self.httpClient.post( response = await self.httpClient.post(
self.apiUrl, self.apiUrl,
@ -135,9 +134,8 @@ class AiPerplexity:
"temperature": temperature "temperature": temperature
} }
# Only add max_tokens if it's explicitly set # Add max_tokens - use provided value or default to 128000 (Perplexity's typical limit)
if maxTokens is not None: payload["max_tokens"] = maxTokens if maxTokens is not None else 128000
payload["max_tokens"] = maxTokens
response = await self.httpClient.post( response = await self.httpClient.post(
self.apiUrl, self.apiUrl,

View file

@ -525,25 +525,24 @@ class AiObjects:
try: try:
logger.info(f"Attempting AI call with model: {modelName} (attempt {attempt + 1}/{len(fallbackModels)})") logger.info(f"Attempting AI call with model: {modelName} (attempt {attempt + 1}/{len(fallbackModels)})")
# Store the selected model for token limit resolution # Replace <TOKEN_LIMIT> placeholder in prompt for this specific model
self._lastSelectedModel = modelName
# Replace <TOKEN_LIMIT> placeholder in prompt and context if present
context_length = aiModels[modelName].get("contextLength", 0) context_length = aiModels[modelName].get("contextLength", 0)
if context_length > 0: if context_length > 0:
token_limit = str(context_length) token_limit = str(context_length)
else: else:
token_limit = "4000" # Default for text generation token_limit = "16000" # Default for text generation
if "<TOKEN_LIMIT>" in prompt: # Create a copy of the prompt for this model call
prompt = prompt.replace("<TOKEN_LIMIT>", token_limit) modelPrompt = prompt
if "<TOKEN_LIMIT>" in modelPrompt:
modelPrompt = modelPrompt.replace("<TOKEN_LIMIT>", token_limit)
logger.debug(f"Replaced <TOKEN_LIMIT> with {token_limit} for model {modelName}") logger.debug(f"Replaced <TOKEN_LIMIT> with {token_limit} for model {modelName}")
# Update messages array with replaced content # Update messages array with replaced content
messages = [] messages = []
if context: if context:
messages.append({"role": "system", "content": f"Context from documents:\n{context}"}) messages.append({"role": "system", "content": f"Context from documents:\n{context}"})
messages.append({"role": "user", "content": prompt}) messages.append({"role": "user", "content": modelPrompt})
# Start timing # Start timing
startTime = time.time() startTime = time.time()
@ -562,19 +561,19 @@ class AiObjects:
content = response["choices"][0]["message"]["content"] content = response["choices"][0]["message"]["content"]
elif functionName == "callAiWithWebSearch": elif functionName == "callAiWithWebSearch":
# Perplexity web search function # Perplexity web search function
query = prompt query = modelPrompt
if context: if context:
query = f"Context: {context}\n\nQuery: {prompt}" query = f"Context: {context}\n\nQuery: {modelPrompt}"
content = await connector.callAiWithWebSearch(query) content = await connector.callAiWithWebSearch(query)
elif functionName == "researchTopic": elif functionName == "researchTopic":
# Perplexity research function # Perplexity research function
content = await connector.researchTopic(prompt) content = await connector.researchTopic(modelPrompt)
elif functionName == "answerQuestion": elif functionName == "answerQuestion":
# Perplexity question answering function # Perplexity question answering function
content = await connector.answerQuestion(prompt, context) content = await connector.answerQuestion(modelPrompt, context)
elif functionName == "getCurrentNews": elif functionName == "getCurrentNews":
# Perplexity news function # Perplexity news function
content = await connector.getCurrentNews(prompt) content = await connector.getCurrentNews(modelPrompt)
else: else:
raise ValueError(f"Function {functionName} not supported for text generation") raise ValueError(f"Function {functionName} not supported for text generation")
@ -599,7 +598,14 @@ class AiObjects:
except Exception as e: except Exception as e:
lastError = e lastError = e
logger.warning(f"❌ AI call failed with model {modelName}: {str(e)}") # Enhanced error logging with more details
error_details = str(e)
if hasattr(e, 'detail'):
error_details = f"{error_details} (detail: {e.detail})"
if hasattr(e, 'status_code'):
error_details = f"{error_details} (status: {e.status_code})"
logger.warning(f"❌ AI call failed with model {modelName}: {error_details}")
# If this is not the last model, try the next one # If this is not the last model, try the next one
if attempt < len(fallbackModels) - 1: if attempt < len(fallbackModels) - 1:
@ -611,7 +617,13 @@ class AiObjects:
break break
# All fallback attempts failed - return error response # All fallback attempts failed - return error response
errorMsg = f"All AI models failed for operation {options.operationType}. Last error: {str(lastError)}" last_error_details = str(lastError)
if hasattr(lastError, 'detail'):
last_error_details = f"{last_error_details} (detail: {lastError.detail})"
if hasattr(lastError, 'status_code'):
last_error_details = f"{last_error_details} (status: {lastError.status_code})"
errorMsg = f"All AI models failed for operation {options.operationType}. Last error: {last_error_details}"
logger.error(errorMsg) logger.error(errorMsg)
return AiCallResponse( return AiCallResponse(
content=errorMsg, content=errorMsg,

View file

@ -15,14 +15,14 @@ CRITICAL LIMITS: <TOKEN_LIMIT> tokens total (reserve 20% for JSON structure)
MANDATORY RULES: MANDATORY RULES:
1. STOP at approximately 80% of limit to ensure valid JSON completion 1. STOP at approximately 80% of limit to ensure valid JSON completion
2. Return ONLY raw JSON (no ```json blocks, no text before/after) 2. Return ONLY raw JSON (no ```json blocks, no text before/after)
3. ALWAYS include "continuation" field - this is MANDATORY
CONTINUATION REQUIREMENTS: CONTINUATION REQUIREMENTS:
Refer to the json object below where to set the "continuation" information:
- If you can complete the full request: {"continuation": null} - If you can complete the full request: {"continuation": null}
- If you must stop early: { - If you must stop early: {
"continuation": { "continuation": {
"last_data_items": "exact last items you generated (copy them exactly)", "last_data_items": "delivered last data for context (copy them)",
"next_instruction": "Continue from [exact last item] - generate next items" "next_instruction": "instruction for next data to deliver"
} }
} }
@ -101,7 +101,7 @@ class SubCoreAi:
elif loopInstruction and iteration > 1: elif loopInstruction and iteration > 1:
continuationContent = self._buildContinuationContent(accumulatedContent, iteration) continuationContent = self._buildContinuationContent(accumulatedContent, iteration)
if "LOOP_INSTRUCTION" in prompt: if "LOOP_INSTRUCTION" in prompt:
iterationPrompt = prompt.replace("LOOP_INSTRUCTION", f"{loopInstruction}\n\n{continuationContent}") iterationPrompt = prompt.replace("LOOP_INSTRUCTION", f"{continuationContent}\n\n{loopInstruction}")
else: else:
iterationPrompt = prompt iterationPrompt = prompt
else: else:
@ -208,10 +208,29 @@ class SubCoreAi:
except (json.JSONDecodeError, KeyError, ValueError): except (json.JSONDecodeError, KeyError, ValueError):
pass pass
# Extract specific attributes from continuation object
last_data_items = ""
next_instruction = ""
if continuation_description:
try:
if isinstance(continuation_description, str):
continuation_obj = json.loads(continuation_description)
else:
continuation_obj = continuation_description
if isinstance(continuation_obj, dict):
last_data_items = continuation_obj.get("last_data_items", "")
next_instruction = continuation_obj.get("next_instruction", "")
except (json.JSONDecodeError, TypeError):
pass
continuation_content = f"""CONTINUATION REQUEST (Iteration {iteration}): continuation_content = f"""CONTINUATION REQUEST (Iteration {iteration}):
You are continuing a previous response. DO NOT repeat any previous content. You are continuing a previous response. DO NOT repeat any previous content.
{f"CONTINUATION INSTRUCTIONS: {continuation_description}" if continuation_description else "No specific continuation instructions provided."} {f"Already delivered data: {last_data_items}" if last_data_items else "No previous data specified"}
{f"Your task to deliver: {next_instruction}" if next_instruction else "No specific task provided"}
CRITICAL REQUIREMENTS: CRITICAL REQUIREMENTS:
- Start from the exact point specified in continuation instructions - Start from the exact point specified in continuation instructions

View file

@ -315,10 +315,31 @@ class GenerationService:
if not isinstance(extractedContent, dict): if not isinstance(extractedContent, dict):
raise ValueError("extractedContent must be a JSON dictionary") raise ValueError("extractedContent must be a JSON dictionary")
if "sections" not in extractedContent: # Check if this is a multi-document structure
raise ValueError("extractedContent must contain 'sections' field") if "documents" in extractedContent and len(extractedContent["documents"]) > 1:
# Multiple documents - use multi-file renderer
# Remove extra debug file writes for render inputs per simplification generated_documents = await self._renderMultiFileReport(extractedContent, outputFormat, title, userPrompt, aiService)
# For multi-document, return the first document's content and mime type
if generated_documents:
return generated_documents[0]["content"], generated_documents[0]["mime_type"]
else:
raise ValueError("No documents could be rendered")
elif "documents" in extractedContent and len(extractedContent["documents"]) == 1:
# Single document in documents array - extract sections
single_doc = extractedContent["documents"][0]
if "sections" not in single_doc:
raise ValueError("Document must contain 'sections' field")
# Create content for single document renderer
contentToRender = {
"sections": single_doc["sections"],
"metadata": extractedContent.get("metadata", {}),
"continuation": extractedContent.get("continuation", None)
}
elif "sections" in extractedContent:
# Direct sections format
contentToRender = extractedContent
else:
raise ValueError("extractedContent must contain 'sections' field or 'documents' array")
# Get the appropriate renderer for the format # Get the appropriate renderer for the format
renderer = self._getFormatRenderer(outputFormat) renderer = self._getFormatRenderer(outputFormat)
@ -326,7 +347,7 @@ class GenerationService:
raise ValueError(f"Unsupported output format: {outputFormat}") raise ValueError(f"Unsupported output format: {outputFormat}")
# Render the JSON content directly (AI generation handled by main service) # Render the JSON content directly (AI generation handled by main service)
renderedContent, mimeType = await renderer.render(extractedContent, title, userPrompt, aiService) renderedContent, mimeType = await renderer.render(contentToRender, title, userPrompt, aiService)
# Remove extra debug output file writes # Remove extra debug output file writes
logger.info(f"Successfully rendered JSON report to {outputFormat} format: {len(renderedContent)} characters") logger.info(f"Successfully rendered JSON report to {outputFormat} format: {len(renderedContent)} characters")

View file

@ -19,7 +19,6 @@ logger = logging.getLogger(__name__)
# Centralized JSON structure template for document generation # Centralized JSON structure template for document generation
JSON_STRUCTURE_TEMPLATE = """{ JSON_STRUCTURE_TEMPLATE = """{
"continuation": null,
"metadata": { "metadata": {
"title": "{{DOCUMENT_TITLE}}", "title": "{{DOCUMENT_TITLE}}",
"splitStrategy": "single_document", "splitStrategy": "single_document",
@ -44,7 +43,8 @@ JSON_STRUCTURE_TEMPLATE = """{
"order": 1 "order": 1
} }
] ]
}] }],
"continuation": null,
}""" }"""
async def buildAdaptiveExtractionPrompt( async def buildAdaptiveExtractionPrompt(
@ -185,17 +185,17 @@ async def buildGenerationPrompt(
# Always use the proper generation prompt template with LOOP_INSTRUCTION # Always use the proper generation prompt template with LOOP_INSTRUCTION
result = f"""Generate structured JSON content for document creation. result = f"""Generate structured JSON content for document creation.
USER REQUEST: "{userPrompt}" USER CONTEXT: "{userPrompt}"
DOCUMENT TITLE: "{title}" DOCUMENT TITLE: "{title}"
TARGET FORMAT: {outputFormat} TARGET FORMAT: {outputFormat}
LOOP_INSTRUCTION
RULES: RULES:
- Follow the template structure below exactly; emit only one JSON object in the response - Follow the template structure below exactly; emit only one JSON object in the response
- Fill sections with content based on the user request - Fill sections with content based on the user request
- Use appropriate content_type - Use appropriate content_type
LOOP_INSTRUCTION
Return ONLY valid JSON matching this structure (template below). Do not include any prose before/after. Use this as the single template reference for your output: Return ONLY valid JSON matching this structure (template below). Do not include any prose before/after. Use this as the single template reference for your output:
{json_template} {json_template}
""" """

View file

@ -334,13 +334,7 @@ class AIBehaviorTester:
PROMPT_SCENARIOS = { PROMPT_SCENARIOS = {
"Prime Numbers Test": """Generate the first 5000 prime numbers in a table with 10 columns per row.""", "Prime Numbers Test": """Generate the first 5000 prime numbers in a table with 10 columns per row.""",
"Fibonacci Sequence": """Generate the first 1000 Fibonacci numbers in a table with 5 columns per row.""", "Text Content": """Generate a comprehensive guide about how to bring a new product to market in 10 sections, each containing detailed explanations and examples."""
"Multiplication Table": """Generate multiplication tables from 1 to 50, each table with 10 columns per row.""",
"Random Data": """Generate 2000 random numbers between 1 and 10000 in a table with 8 columns per row.""",
"Text Content": """Generate a comprehensive guide about machine learning with 50 sections, each containing detailed explanations and examples."""
} }
async def main(): async def main():