architecture for looping ai calls tested and fixed
This commit is contained in:
parent
1badb8468a
commit
52adedab4a
8 changed files with 113 additions and 48 deletions
|
|
@ -39,6 +39,26 @@ class AiAnthropic:
|
|||
|
||||
logger.info(f"Anthropic Connector initialized with model: {self.modelName}")
|
||||
|
||||
def _getMaxTokensForModel(self, maxTokens: int = None) -> int:
|
||||
"""Get appropriate max_tokens for the current model."""
|
||||
if maxTokens is not None:
|
||||
return maxTokens
|
||||
|
||||
# Model-specific defaults based on Anthropic's limits
|
||||
model_name = self.modelName.lower()
|
||||
if "claude-3-5-sonnet" in model_name:
|
||||
return 200000 # Claude 3.5 Sonnet max
|
||||
elif "claude-3-5-haiku" in model_name:
|
||||
return 200000 # Claude 3.5 Haiku max
|
||||
elif "claude-3-opus" in model_name:
|
||||
return 200000 # Claude 3 Opus max
|
||||
elif "claude-3-sonnet" in model_name:
|
||||
return 200000 # Claude 3 Sonnet max
|
||||
elif "claude-3-haiku" in model_name:
|
||||
return 200000 # Claude 3 Haiku max
|
||||
else:
|
||||
return 200000 # Default to maximum for unknown models
|
||||
|
||||
async def callAiBasic(self, messages: List[Dict[str, Any]], temperature: float = None, maxTokens: int = None) -> Dict[str, Any]:
|
||||
"""
|
||||
Calls the Anthropic API with the given messages.
|
||||
|
|
@ -106,9 +126,8 @@ class AiAnthropic:
|
|||
"temperature": temperature,
|
||||
}
|
||||
|
||||
# Only add max_tokens if it's explicitly set
|
||||
if maxTokens is not None:
|
||||
payload["max_tokens"] = maxTokens
|
||||
# Anthropic requires max_tokens - use model-appropriate value
|
||||
payload["max_tokens"] = self._getMaxTokensForModel(maxTokens)
|
||||
if system_prompt:
|
||||
payload["system"] = system_prompt
|
||||
|
||||
|
|
|
|||
|
|
@ -80,7 +80,8 @@ class AiOpenai:
|
|||
)
|
||||
|
||||
if response.status_code != 200:
|
||||
logger.error(f"OpenAI API error: {response.status_code} - {response.text}")
|
||||
error_message = f"OpenAI API error: {response.status_code} - {response.text}"
|
||||
logger.error(error_message)
|
||||
|
||||
# Check for context length exceeded error
|
||||
if response.status_code == 400:
|
||||
|
|
@ -95,7 +96,8 @@ class AiOpenai:
|
|||
except (ValueError, KeyError):
|
||||
pass # If we can't parse the error, fall through to generic error
|
||||
|
||||
raise HTTPException(status_code=500, detail="Error communicating with OpenAI API")
|
||||
# Include the actual error details in the exception
|
||||
raise HTTPException(status_code=500, detail=error_message)
|
||||
|
||||
responseJson = response.json()
|
||||
content = responseJson["choices"][0]["message"]["content"]
|
||||
|
|
|
|||
|
|
@ -68,9 +68,8 @@ class AiPerplexity:
|
|||
"temperature": temperature
|
||||
}
|
||||
|
||||
# Only add max_tokens if it's explicitly set
|
||||
if maxTokens is not None:
|
||||
payload["max_tokens"] = maxTokens
|
||||
# Add max_tokens - use provided value or default to 128000 (Perplexity's typical limit)
|
||||
payload["max_tokens"] = maxTokens if maxTokens is not None else 128000
|
||||
|
||||
response = await self.httpClient.post(
|
||||
self.apiUrl,
|
||||
|
|
@ -135,9 +134,8 @@ class AiPerplexity:
|
|||
"temperature": temperature
|
||||
}
|
||||
|
||||
# Only add max_tokens if it's explicitly set
|
||||
if maxTokens is not None:
|
||||
payload["max_tokens"] = maxTokens
|
||||
# Add max_tokens - use provided value or default to 128000 (Perplexity's typical limit)
|
||||
payload["max_tokens"] = maxTokens if maxTokens is not None else 128000
|
||||
|
||||
response = await self.httpClient.post(
|
||||
self.apiUrl,
|
||||
|
|
|
|||
|
|
@ -525,25 +525,24 @@ class AiObjects:
|
|||
try:
|
||||
logger.info(f"Attempting AI call with model: {modelName} (attempt {attempt + 1}/{len(fallbackModels)})")
|
||||
|
||||
# Store the selected model for token limit resolution
|
||||
self._lastSelectedModel = modelName
|
||||
|
||||
# Replace <TOKEN_LIMIT> placeholder in prompt and context if present
|
||||
# Replace <TOKEN_LIMIT> placeholder in prompt for this specific model
|
||||
context_length = aiModels[modelName].get("contextLength", 0)
|
||||
if context_length > 0:
|
||||
token_limit = str(context_length)
|
||||
else:
|
||||
token_limit = "4000" # Default for text generation
|
||||
token_limit = "16000" # Default for text generation
|
||||
|
||||
if "<TOKEN_LIMIT>" in prompt:
|
||||
prompt = prompt.replace("<TOKEN_LIMIT>", token_limit)
|
||||
# Create a copy of the prompt for this model call
|
||||
modelPrompt = prompt
|
||||
if "<TOKEN_LIMIT>" in modelPrompt:
|
||||
modelPrompt = modelPrompt.replace("<TOKEN_LIMIT>", token_limit)
|
||||
logger.debug(f"Replaced <TOKEN_LIMIT> with {token_limit} for model {modelName}")
|
||||
|
||||
# Update messages array with replaced content
|
||||
messages = []
|
||||
if context:
|
||||
messages.append({"role": "system", "content": f"Context from documents:\n{context}"})
|
||||
messages.append({"role": "user", "content": prompt})
|
||||
messages.append({"role": "user", "content": modelPrompt})
|
||||
|
||||
# Start timing
|
||||
startTime = time.time()
|
||||
|
|
@ -562,19 +561,19 @@ class AiObjects:
|
|||
content = response["choices"][0]["message"]["content"]
|
||||
elif functionName == "callAiWithWebSearch":
|
||||
# Perplexity web search function
|
||||
query = prompt
|
||||
query = modelPrompt
|
||||
if context:
|
||||
query = f"Context: {context}\n\nQuery: {prompt}"
|
||||
query = f"Context: {context}\n\nQuery: {modelPrompt}"
|
||||
content = await connector.callAiWithWebSearch(query)
|
||||
elif functionName == "researchTopic":
|
||||
# Perplexity research function
|
||||
content = await connector.researchTopic(prompt)
|
||||
content = await connector.researchTopic(modelPrompt)
|
||||
elif functionName == "answerQuestion":
|
||||
# Perplexity question answering function
|
||||
content = await connector.answerQuestion(prompt, context)
|
||||
content = await connector.answerQuestion(modelPrompt, context)
|
||||
elif functionName == "getCurrentNews":
|
||||
# Perplexity news function
|
||||
content = await connector.getCurrentNews(prompt)
|
||||
content = await connector.getCurrentNews(modelPrompt)
|
||||
else:
|
||||
raise ValueError(f"Function {functionName} not supported for text generation")
|
||||
|
||||
|
|
@ -599,7 +598,14 @@ class AiObjects:
|
|||
|
||||
except Exception as e:
|
||||
lastError = e
|
||||
logger.warning(f"❌ AI call failed with model {modelName}: {str(e)}")
|
||||
# Enhanced error logging with more details
|
||||
error_details = str(e)
|
||||
if hasattr(e, 'detail'):
|
||||
error_details = f"{error_details} (detail: {e.detail})"
|
||||
if hasattr(e, 'status_code'):
|
||||
error_details = f"{error_details} (status: {e.status_code})"
|
||||
|
||||
logger.warning(f"❌ AI call failed with model {modelName}: {error_details}")
|
||||
|
||||
# If this is not the last model, try the next one
|
||||
if attempt < len(fallbackModels) - 1:
|
||||
|
|
@ -611,7 +617,13 @@ class AiObjects:
|
|||
break
|
||||
|
||||
# All fallback attempts failed - return error response
|
||||
errorMsg = f"All AI models failed for operation {options.operationType}. Last error: {str(lastError)}"
|
||||
last_error_details = str(lastError)
|
||||
if hasattr(lastError, 'detail'):
|
||||
last_error_details = f"{last_error_details} (detail: {lastError.detail})"
|
||||
if hasattr(lastError, 'status_code'):
|
||||
last_error_details = f"{last_error_details} (status: {lastError.status_code})"
|
||||
|
||||
errorMsg = f"All AI models failed for operation {options.operationType}. Last error: {last_error_details}"
|
||||
logger.error(errorMsg)
|
||||
return AiCallResponse(
|
||||
content=errorMsg,
|
||||
|
|
|
|||
|
|
@ -15,14 +15,14 @@ CRITICAL LIMITS: <TOKEN_LIMIT> tokens total (reserve 20% for JSON structure)
|
|||
MANDATORY RULES:
|
||||
1. STOP at approximately 80% of limit to ensure valid JSON completion
|
||||
2. Return ONLY raw JSON (no ```json blocks, no text before/after)
|
||||
3. ALWAYS include "continuation" field - this is MANDATORY
|
||||
|
||||
CONTINUATION REQUIREMENTS:
|
||||
Refer to the json object below where to set the "continuation" information:
|
||||
- If you can complete the full request: {"continuation": null}
|
||||
- If you must stop early: {
|
||||
"continuation": {
|
||||
"last_data_items": "exact last items you generated (copy them exactly)",
|
||||
"next_instruction": "Continue from [exact last item] - generate next items"
|
||||
"last_data_items": "delivered last data for context (copy them)",
|
||||
"next_instruction": "instruction for next data to deliver"
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -101,7 +101,7 @@ class SubCoreAi:
|
|||
elif loopInstruction and iteration > 1:
|
||||
continuationContent = self._buildContinuationContent(accumulatedContent, iteration)
|
||||
if "LOOP_INSTRUCTION" in prompt:
|
||||
iterationPrompt = prompt.replace("LOOP_INSTRUCTION", f"{loopInstruction}\n\n{continuationContent}")
|
||||
iterationPrompt = prompt.replace("LOOP_INSTRUCTION", f"{continuationContent}\n\n{loopInstruction}")
|
||||
else:
|
||||
iterationPrompt = prompt
|
||||
else:
|
||||
|
|
@ -208,10 +208,29 @@ class SubCoreAi:
|
|||
except (json.JSONDecodeError, KeyError, ValueError):
|
||||
pass
|
||||
|
||||
# Extract specific attributes from continuation object
|
||||
last_data_items = ""
|
||||
next_instruction = ""
|
||||
|
||||
if continuation_description:
|
||||
try:
|
||||
if isinstance(continuation_description, str):
|
||||
continuation_obj = json.loads(continuation_description)
|
||||
else:
|
||||
continuation_obj = continuation_description
|
||||
|
||||
if isinstance(continuation_obj, dict):
|
||||
last_data_items = continuation_obj.get("last_data_items", "")
|
||||
next_instruction = continuation_obj.get("next_instruction", "")
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
pass
|
||||
|
||||
continuation_content = f"""CONTINUATION REQUEST (Iteration {iteration}):
|
||||
You are continuing a previous response. DO NOT repeat any previous content.
|
||||
|
||||
{f"CONTINUATION INSTRUCTIONS: {continuation_description}" if continuation_description else "No specific continuation instructions provided."}
|
||||
{f"Already delivered data: {last_data_items}" if last_data_items else "No previous data specified"}
|
||||
|
||||
{f"Your task to deliver: {next_instruction}" if next_instruction else "No specific task provided"}
|
||||
|
||||
CRITICAL REQUIREMENTS:
|
||||
- Start from the exact point specified in continuation instructions
|
||||
|
|
|
|||
|
|
@ -315,10 +315,31 @@ class GenerationService:
|
|||
if not isinstance(extractedContent, dict):
|
||||
raise ValueError("extractedContent must be a JSON dictionary")
|
||||
|
||||
if "sections" not in extractedContent:
|
||||
raise ValueError("extractedContent must contain 'sections' field")
|
||||
|
||||
# Remove extra debug file writes for render inputs per simplification
|
||||
# Check if this is a multi-document structure
|
||||
if "documents" in extractedContent and len(extractedContent["documents"]) > 1:
|
||||
# Multiple documents - use multi-file renderer
|
||||
generated_documents = await self._renderMultiFileReport(extractedContent, outputFormat, title, userPrompt, aiService)
|
||||
# For multi-document, return the first document's content and mime type
|
||||
if generated_documents:
|
||||
return generated_documents[0]["content"], generated_documents[0]["mime_type"]
|
||||
else:
|
||||
raise ValueError("No documents could be rendered")
|
||||
elif "documents" in extractedContent and len(extractedContent["documents"]) == 1:
|
||||
# Single document in documents array - extract sections
|
||||
single_doc = extractedContent["documents"][0]
|
||||
if "sections" not in single_doc:
|
||||
raise ValueError("Document must contain 'sections' field")
|
||||
# Create content for single document renderer
|
||||
contentToRender = {
|
||||
"sections": single_doc["sections"],
|
||||
"metadata": extractedContent.get("metadata", {}),
|
||||
"continuation": extractedContent.get("continuation", None)
|
||||
}
|
||||
elif "sections" in extractedContent:
|
||||
# Direct sections format
|
||||
contentToRender = extractedContent
|
||||
else:
|
||||
raise ValueError("extractedContent must contain 'sections' field or 'documents' array")
|
||||
|
||||
# Get the appropriate renderer for the format
|
||||
renderer = self._getFormatRenderer(outputFormat)
|
||||
|
|
@ -326,7 +347,7 @@ class GenerationService:
|
|||
raise ValueError(f"Unsupported output format: {outputFormat}")
|
||||
|
||||
# Render the JSON content directly (AI generation handled by main service)
|
||||
renderedContent, mimeType = await renderer.render(extractedContent, title, userPrompt, aiService)
|
||||
renderedContent, mimeType = await renderer.render(contentToRender, title, userPrompt, aiService)
|
||||
# Remove extra debug output file writes
|
||||
|
||||
logger.info(f"Successfully rendered JSON report to {outputFormat} format: {len(renderedContent)} characters")
|
||||
|
|
|
|||
|
|
@ -19,7 +19,6 @@ logger = logging.getLogger(__name__)
|
|||
|
||||
# Centralized JSON structure template for document generation
|
||||
JSON_STRUCTURE_TEMPLATE = """{
|
||||
"continuation": null,
|
||||
"metadata": {
|
||||
"title": "{{DOCUMENT_TITLE}}",
|
||||
"splitStrategy": "single_document",
|
||||
|
|
@ -44,7 +43,8 @@ JSON_STRUCTURE_TEMPLATE = """{
|
|||
"order": 1
|
||||
}
|
||||
]
|
||||
}]
|
||||
}],
|
||||
"continuation": null,
|
||||
}"""
|
||||
|
||||
async def buildAdaptiveExtractionPrompt(
|
||||
|
|
@ -185,17 +185,17 @@ async def buildGenerationPrompt(
|
|||
# Always use the proper generation prompt template with LOOP_INSTRUCTION
|
||||
result = f"""Generate structured JSON content for document creation.
|
||||
|
||||
USER REQUEST: "{userPrompt}"
|
||||
USER CONTEXT: "{userPrompt}"
|
||||
DOCUMENT TITLE: "{title}"
|
||||
TARGET FORMAT: {outputFormat}
|
||||
|
||||
LOOP_INSTRUCTION
|
||||
|
||||
RULES:
|
||||
- Follow the template structure below exactly; emit only one JSON object in the response
|
||||
- Fill sections with content based on the user request
|
||||
- Use appropriate content_type
|
||||
|
||||
LOOP_INSTRUCTION
|
||||
|
||||
Return ONLY valid JSON matching this structure (template below). Do not include any prose before/after. Use this as the single template reference for your output:
|
||||
{json_template}
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -334,13 +334,7 @@ class AIBehaviorTester:
|
|||
PROMPT_SCENARIOS = {
|
||||
"Prime Numbers Test": """Generate the first 5000 prime numbers in a table with 10 columns per row.""",
|
||||
|
||||
"Fibonacci Sequence": """Generate the first 1000 Fibonacci numbers in a table with 5 columns per row.""",
|
||||
|
||||
"Multiplication Table": """Generate multiplication tables from 1 to 50, each table with 10 columns per row.""",
|
||||
|
||||
"Random Data": """Generate 2000 random numbers between 1 and 10000 in a table with 8 columns per row.""",
|
||||
|
||||
"Text Content": """Generate a comprehensive guide about machine learning with 50 sections, each containing detailed explanations and examples."""
|
||||
"Text Content": """Generate a comprehensive guide about how to bring a new product to market in 10 sections, each containing detailed explanations and examples."""
|
||||
}
|
||||
|
||||
async def main():
|
||||
|
|
|
|||
Loading…
Reference in a new issue