unified centralized ai generation service implemented, start testing
This commit is contained in:
parent
6b7094c84d
commit
11522bd763
19 changed files with 1237 additions and 1372 deletions
|
|
@ -1,4 +1,5 @@
|
|||
import logging
|
||||
import re
|
||||
from typing import Dict, Any, List, Optional, Tuple, Union
|
||||
from modules.datamodels.datamodelChat import PromptPlaceholder
|
||||
|
||||
|
|
@ -189,3 +190,69 @@ class AiService:
|
|||
prompt, documents, placeholders, options, outputFormat, title,
|
||||
documentProcessor, documentGenerator
|
||||
)
|
||||
|
||||
def sanitizePromptContent(self, content: str, contentType: str = "text") -> str:
|
||||
"""
|
||||
Centralized prompt content sanitization to prevent injection attacks and ensure safe presentation.
|
||||
|
||||
This is the single source of truth for all prompt sanitization across the system.
|
||||
Replaces all scattered sanitization functions with a unified approach.
|
||||
|
||||
Args:
|
||||
content: The content to sanitize
|
||||
contentType: Type of content ("text", "userinput", "json", "document")
|
||||
|
||||
Returns:
|
||||
Safely sanitized content ready for AI prompt insertion
|
||||
"""
|
||||
if not content:
|
||||
return ""
|
||||
|
||||
try:
|
||||
# Convert to string if not already
|
||||
content_str = str(content)
|
||||
|
||||
# Remove null bytes and control characters (except newlines and tabs)
|
||||
sanitized = re.sub(r'[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]', '', content_str)
|
||||
|
||||
# Handle different content types with appropriate sanitization
|
||||
if contentType == "userinput":
|
||||
# Extra security for user-controlled content
|
||||
# Escape curly braces to prevent placeholder injection
|
||||
sanitized = sanitized.replace('{', '{{').replace('}', '}}')
|
||||
# Escape quotes and wrap in single quotes
|
||||
sanitized = sanitized.replace('"', '\\"').replace("'", "\\'")
|
||||
return f"'{sanitized}'"
|
||||
|
||||
elif contentType == "json":
|
||||
# For JSON content, escape quotes and backslashes
|
||||
sanitized = sanitized.replace('\\', '\\\\')
|
||||
sanitized = sanitized.replace('"', '\\"')
|
||||
sanitized = sanitized.replace('\n', '\\n')
|
||||
sanitized = sanitized.replace('\r', '\\r')
|
||||
sanitized = sanitized.replace('\t', '\\t')
|
||||
|
||||
elif contentType == "document":
|
||||
# For document content, escape special characters
|
||||
sanitized = sanitized.replace('\\', '\\\\')
|
||||
sanitized = sanitized.replace('"', '\\"')
|
||||
sanitized = sanitized.replace("'", "\\'")
|
||||
sanitized = sanitized.replace('\n', '\\n')
|
||||
sanitized = sanitized.replace('\r', '\\r')
|
||||
sanitized = sanitized.replace('\t', '\\t')
|
||||
|
||||
else: # contentType == "text" or default
|
||||
# Basic text sanitization
|
||||
sanitized = sanitized.replace('\\', '\\\\')
|
||||
sanitized = sanitized.replace('"', '\\"')
|
||||
sanitized = sanitized.replace("'", "\\'")
|
||||
sanitized = sanitized.replace('\n', '\\n')
|
||||
sanitized = sanitized.replace('\r', '\\r')
|
||||
sanitized = sanitized.replace('\t', '\\t')
|
||||
|
||||
return sanitized
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error sanitizing prompt content: {str(e)}")
|
||||
# Return a safe fallback
|
||||
return "[ERROR: Content could not be safely sanitized]"
|
||||
|
|
|
|||
|
|
@ -75,38 +75,105 @@ class SubCoreAi:
|
|||
else:
|
||||
full_prompt = prompt
|
||||
|
||||
# Timestamp-only prompt debug writing removed
|
||||
# Check for unresolved placeholders and clean them up
|
||||
try:
|
||||
import re
|
||||
# Find only {{KEY:...}} patterns that need to be removed
|
||||
unresolved_placeholders = re.findall(r'\{\{KEY:[^}]+\}\}', full_prompt)
|
||||
if unresolved_placeholders:
|
||||
logger.warning(f"Found unresolved KEY placeholders in prompt: {unresolved_placeholders}")
|
||||
# Remove only {{KEY:...}} patterns, leave other {{...}} content intact
|
||||
full_prompt = re.sub(r'\{\{KEY:[^}]+\}\}', '', full_prompt)
|
||||
# Clean up extra whitespace
|
||||
full_prompt = re.sub(r'\n\s*\n\s*\n', '\n\n', full_prompt)
|
||||
full_prompt = full_prompt.strip()
|
||||
logger.info("Cleaned up unresolved KEY placeholders from prompt")
|
||||
except Exception as e:
|
||||
logger.warning(f"Error cleaning up prompt placeholders: {str(e)}")
|
||||
|
||||
# Log the final integrated prompt that AI will receive
|
||||
try:
|
||||
from modules.shared.debugLogger import writeDebugFile
|
||||
# Determine the prompt type based on operation type
|
||||
if options.operationType == OperationType.GENERATE_PLAN:
|
||||
prompt_type = "taskplanPrompt"
|
||||
elif options.operationType == OperationType.ANALYSE_CONTENT:
|
||||
prompt_type = "analysisPrompt"
|
||||
else:
|
||||
prompt_type = "aiPrompt"
|
||||
|
||||
writeDebugFile(full_prompt, prompt_type, documents)
|
||||
except Exception:
|
||||
pass # Don't fail on debug logging
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Handle document generation with specific output format
|
||||
# Handle document generation with specific output format using unified approach
|
||||
if outputFormat and documentGenerator:
|
||||
result = await documentGenerator.callAiWithDocumentGeneration(prompt, documents, options, outputFormat, title)
|
||||
# Use unified generation method for all document generation
|
||||
if documents and len(documents) > 0:
|
||||
# Extract content from documents first
|
||||
logger.info(f"Extracting content from {len(documents)} documents")
|
||||
extracted_content = await documentProcessor.callAiText(full_prompt, documents, options)
|
||||
# Generate with extracted content
|
||||
generated_json = await self._callAiUnifiedGeneration(full_prompt, extracted_content, options, outputFormat, title)
|
||||
else:
|
||||
# Direct generation without documents
|
||||
logger.info("No documents provided - using direct generation")
|
||||
generated_json = await self._callAiUnifiedGeneration(full_prompt, None, options, outputFormat, title)
|
||||
|
||||
# Parse the generated JSON
|
||||
try:
|
||||
import json
|
||||
generated_data = json.loads(generated_json)
|
||||
except json.JSONDecodeError as e:
|
||||
logger.error(f"Failed to parse generated JSON: {str(e)}")
|
||||
return {"success": False, "error": f"Generated content is not valid JSON: {str(e)}"}
|
||||
|
||||
# Render to final format using the existing renderer
|
||||
try:
|
||||
from modules.services.serviceGeneration.mainServiceGeneration import GenerationService
|
||||
generationService = GenerationService(self.services)
|
||||
rendered_content, mime_type = await generationService.renderReport(
|
||||
generated_data, outputFormat, title or "Generated Document", full_prompt, self
|
||||
)
|
||||
|
||||
# Build result in the expected format
|
||||
result = {
|
||||
"success": True,
|
||||
"content": generated_data,
|
||||
"documents": [{
|
||||
"documentName": f"generated.{outputFormat}",
|
||||
"documentData": rendered_content,
|
||||
"mimeType": mime_type,
|
||||
"title": title or "Generated Document"
|
||||
}],
|
||||
"is_multi_file": False,
|
||||
"format": outputFormat,
|
||||
"title": title,
|
||||
"split_strategy": "single",
|
||||
"total_documents": 1,
|
||||
"processed_documents": 1
|
||||
}
|
||||
|
||||
# Log AI response for debugging
|
||||
try:
|
||||
if isinstance(result, dict) and 'content' in result:
|
||||
self._writeAiResponseDebug(
|
||||
label='ai_document_generation',
|
||||
content=result['content'],
|
||||
partIndex=1,
|
||||
modelName=None, # Document generation doesn't return model info
|
||||
continuation=False
|
||||
)
|
||||
from modules.shared.debugLogger import writeDebugFile
|
||||
writeDebugFile(str(result), "documentGenerationResponse", documents)
|
||||
except Exception:
|
||||
pass
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error rendering document: {str(e)}")
|
||||
return {"success": False, "error": f"Rendering failed: {str(e)}"}
|
||||
|
||||
if call_type == "planning":
|
||||
result = await self._callAiPlanning(prompt, placeholders_dict, placeholders_meta, options)
|
||||
# Log AI response for debugging
|
||||
try:
|
||||
self._writeAiResponseDebug(
|
||||
label='ai_planning',
|
||||
content=result or "",
|
||||
partIndex=1,
|
||||
modelName=None, # Planning doesn't return model info
|
||||
continuation=False
|
||||
)
|
||||
from modules.shared.debugLogger import writeDebugFile
|
||||
writeDebugFile(str(result or ""), "taskplanResponse", documents)
|
||||
except Exception:
|
||||
pass
|
||||
return result
|
||||
|
|
@ -125,31 +192,13 @@ class SubCoreAi:
|
|||
if documentProcessor and documents:
|
||||
result = await documentProcessor.callAiText(full_prompt, documents, options)
|
||||
else:
|
||||
# Fallback to direct AI call if no document processor available
|
||||
request = AiCallRequest(
|
||||
prompt=full_prompt,
|
||||
context="",
|
||||
options=options
|
||||
)
|
||||
response = await self.aiObjects.call(request)
|
||||
result = response.content
|
||||
|
||||
# Emit stats for direct AI call
|
||||
self.services.workflow.storeWorkflowStat(
|
||||
self.services.currentWorkflow,
|
||||
response,
|
||||
f"ai.call.{options.operationType}"
|
||||
)
|
||||
# Enhanced direct AI call with partial results support
|
||||
result = await self._callAiWithPartialResults(full_prompt, options)
|
||||
|
||||
# Log AI response for debugging (additional logging for text calls)
|
||||
try:
|
||||
self._writeAiResponseDebug(
|
||||
label='ai_text_main',
|
||||
content=result or "",
|
||||
partIndex=1,
|
||||
modelName=None, # Text calls already log internally
|
||||
continuation=False
|
||||
)
|
||||
from modules.shared.debugLogger import writeDebugFile
|
||||
writeDebugFile(str(result or ""), "aiTextResponse", documents)
|
||||
except Exception:
|
||||
pass
|
||||
return result
|
||||
|
|
@ -349,6 +398,253 @@ class SubCoreAi:
|
|||
pass
|
||||
return response.content
|
||||
|
||||
async def _callAiWithPartialResults(
|
||||
self,
|
||||
prompt: str,
|
||||
options: AiCallOptions
|
||||
) -> str:
|
||||
"""
|
||||
Call AI with partial results continuation logic for direct calls.
|
||||
Handles cases where AI needs to generate large responses in chunks.
|
||||
"""
|
||||
logger.info("Starting direct AI call with partial results support")
|
||||
|
||||
# Build enhanced prompt with continuation instructions
|
||||
enhanced_prompt = self._buildDirectContinuationPrompt(prompt)
|
||||
|
||||
# Process with continuation logic
|
||||
return await self._processDirectWithContinuationLoop(enhanced_prompt, options)
|
||||
|
||||
def _buildDirectContinuationPrompt(self, base_prompt: str) -> str:
|
||||
"""
|
||||
Build a prompt for direct AI calls that includes partial results instructions.
|
||||
"""
|
||||
continuation_instructions = """
|
||||
|
||||
IMPORTANT: If your response is too large to generate completely in one response, you can deliver partial results and continue.
|
||||
|
||||
CONTINUATION LOGIC:
|
||||
- If you cannot complete the full response, end your response with:
|
||||
[CONTINUE: brief description of what still needs to be generated]
|
||||
- The system will call you again to continue from where you left off
|
||||
- Continue generating from the exact point where you stopped
|
||||
- Maintain consistency with your previous partial response
|
||||
- Only stop when you have generated the complete response
|
||||
|
||||
Examples:
|
||||
|
||||
Example - Code Generation:
|
||||
If generating a large code file and you can only generate part of it:
|
||||
- Generate the first part (imports, classes, functions)
|
||||
- End with: [CONTINUE: Generate the remaining methods and main execution code]
|
||||
- In the next call, continue from where you left off
|
||||
|
||||
Example - Documentation:
|
||||
If writing comprehensive documentation and you can only generate sections 1-3:
|
||||
- Generate sections 1-3 with full content
|
||||
- End with: [CONTINUE: Generate sections 4-8 covering advanced topics and examples]
|
||||
- In the next call, continue with sections 4-8
|
||||
|
||||
This allows you to handle very large responses that exceed normal limits.
|
||||
"""
|
||||
|
||||
return f"{base_prompt}{continuation_instructions}"
|
||||
|
||||
async def _processDirectWithContinuationLoop(
|
||||
self,
|
||||
enhanced_prompt: str,
|
||||
options: AiCallOptions
|
||||
) -> str:
|
||||
"""
|
||||
Process direct AI call with continuation loop until complete.
|
||||
"""
|
||||
max_iterations = 10 # Prevent infinite loops
|
||||
iteration = 0
|
||||
accumulated_content = []
|
||||
continuation_hint = None
|
||||
|
||||
while iteration < max_iterations:
|
||||
iteration += 1
|
||||
logger.info(f"Direct AI continuation iteration {iteration}/{max_iterations}")
|
||||
|
||||
# Build prompt for this iteration
|
||||
if continuation_hint:
|
||||
iteration_prompt = self._buildDirectContinuationIterationPrompt(
|
||||
enhanced_prompt, continuation_hint, accumulated_content
|
||||
)
|
||||
else:
|
||||
iteration_prompt = enhanced_prompt
|
||||
|
||||
# Make AI call for this iteration
|
||||
try:
|
||||
request = AiCallRequest(
|
||||
prompt=iteration_prompt,
|
||||
context="",
|
||||
options=options
|
||||
)
|
||||
response = await self.aiObjects.call(request)
|
||||
result = response.content
|
||||
|
||||
# Emit stats for this iteration
|
||||
self.services.workflow.storeWorkflowStat(
|
||||
self.services.currentWorkflow,
|
||||
response,
|
||||
f"ai.call.{options.operationType}.iteration_{iteration}"
|
||||
)
|
||||
|
||||
if not result or not result.strip():
|
||||
logger.warning(f"Iteration {iteration}: Empty response, stopping")
|
||||
break
|
||||
|
||||
# Check for continuation marker
|
||||
if "[CONTINUE:" in result:
|
||||
# Extract the continuation hint
|
||||
import re
|
||||
continue_match = re.search(r'\[CONTINUE:\s*([^\]]+)\]', result)
|
||||
if continue_match:
|
||||
continuation_hint = continue_match.group(1).strip()
|
||||
# Remove the continuation marker from the result
|
||||
result = re.sub(r'\s*\[CONTINUE:[^\]]+\]', '', result).strip()
|
||||
else:
|
||||
continuation_hint = "Continue from where you left off"
|
||||
|
||||
# Add this partial result to accumulated content
|
||||
if result.strip():
|
||||
accumulated_content.append(result.strip())
|
||||
|
||||
logger.info(f"Iteration {iteration}: Partial result added, continue hint: {continuation_hint}")
|
||||
else:
|
||||
# No continuation marker - this is the final result
|
||||
if result.strip():
|
||||
accumulated_content.append(result.strip())
|
||||
|
||||
logger.info(f"Direct AI continuation complete after {iteration} iterations")
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Direct AI iteration {iteration} failed: {str(e)}")
|
||||
break
|
||||
|
||||
if iteration >= max_iterations:
|
||||
logger.warning(f"Direct AI continuation stopped after maximum iterations ({max_iterations})")
|
||||
|
||||
# For JSON responses, we need to merge them properly instead of concatenating
|
||||
if accumulated_content:
|
||||
import json
|
||||
# Parse each part as JSON and merge them
|
||||
merged_documents = []
|
||||
merged_metadata = None
|
||||
|
||||
for content in accumulated_content:
|
||||
parsed = json.loads(content)
|
||||
if isinstance(parsed, dict):
|
||||
# Extract metadata from first valid JSON
|
||||
if merged_metadata is None and "metadata" in parsed:
|
||||
merged_metadata = parsed["metadata"]
|
||||
|
||||
# Extract documents from this part
|
||||
if "documents" in parsed and isinstance(parsed["documents"], list):
|
||||
merged_documents.extend(parsed["documents"])
|
||||
|
||||
# Create final merged JSON - NO FALLBACK
|
||||
final_result = json.dumps({
|
||||
"metadata": merged_metadata or {
|
||||
"title": "Generated Document",
|
||||
"splitStrategy": "single_document",
|
||||
"source_documents": [],
|
||||
"extraction_method": "ai_generation"
|
||||
},
|
||||
"documents": merged_documents
|
||||
}, indent=2)
|
||||
else:
|
||||
# Return empty JSON structure if no content
|
||||
final_result = json.dumps({
|
||||
"metadata": {
|
||||
"title": "Generated Document",
|
||||
"splitStrategy": "single_document",
|
||||
"source_documents": [],
|
||||
"extraction_method": "ai_generation"
|
||||
},
|
||||
"documents": []
|
||||
}, indent=2)
|
||||
|
||||
logger.info(f"Final direct AI result: {len(accumulated_content)} parts from {iteration} iterations")
|
||||
return final_result
|
||||
|
||||
def _buildDirectContinuationIterationPrompt(
|
||||
self,
|
||||
base_prompt: str,
|
||||
continuation_hint: str,
|
||||
accumulated_content: List[str]
|
||||
) -> str:
|
||||
"""
|
||||
Build a prompt for continuation iteration with context.
|
||||
"""
|
||||
# Build context of what's already been generated
|
||||
context_summary = "PREVIOUSLY GENERATED CONTENT:\n"
|
||||
for i, content in enumerate(accumulated_content[-2:]): # Show last 2 parts for context
|
||||
preview = content[:200] + "..." if len(content) > 200 else content
|
||||
context_summary += f"Part {i+1}: {preview}\n"
|
||||
|
||||
continuation_prompt = f"""
|
||||
{base_prompt}
|
||||
|
||||
{context_summary}
|
||||
|
||||
CONTINUATION INSTRUCTIONS:
|
||||
- Continue from where you left off
|
||||
- Continuation hint: {continuation_hint}
|
||||
- Generate the next part of the content
|
||||
- Maintain consistency with previously generated content
|
||||
- End with [CONTINUE: description] if more content is needed
|
||||
- End without [CONTINUE] if the response is complete
|
||||
"""
|
||||
|
||||
return continuation_prompt
|
||||
|
||||
async def _callAiUnifiedGeneration(
|
||||
self,
|
||||
prompt: str,
|
||||
extracted_content: Optional[str] = None,
|
||||
options: Optional[AiCallOptions] = None,
|
||||
outputFormat: str = "json",
|
||||
title: str = "Generated Document"
|
||||
) -> str:
|
||||
"""
|
||||
Unified generation method that handles both scenarios:
|
||||
- With extracted content (from documents)
|
||||
- Without extracted content (direct generation)
|
||||
|
||||
Always uses continuation logic for long responses.
|
||||
Always returns standardized JSON format using the multi-document schema.
|
||||
"""
|
||||
if options is None:
|
||||
options = AiCallOptions()
|
||||
|
||||
logger.info("Starting unified AI generation with continuation logic")
|
||||
|
||||
# Use the existing buildGenerationPrompt to get the proper canonical format instructions
|
||||
from modules.services.serviceGeneration.subPromptBuilder import buildGenerationPrompt
|
||||
|
||||
# Build the generation prompt using the existing system
|
||||
generation_prompt = await buildGenerationPrompt(
|
||||
outputFormat=outputFormat,
|
||||
userPrompt=prompt,
|
||||
title=title,
|
||||
aiService=self,
|
||||
services=self.services
|
||||
)
|
||||
|
||||
# If we have extracted content, prepend it to the prompt
|
||||
if extracted_content:
|
||||
generation_prompt = f"""EXTRACTED CONTENT FROM DOCUMENTS:
|
||||
{extracted_content}
|
||||
|
||||
{generation_prompt}"""
|
||||
|
||||
# Use continuation logic for long responses
|
||||
return await self._processDirectWithContinuationLoop(generation_prompt, options)
|
||||
|
||||
async def _callAiDirect(
|
||||
self,
|
||||
prompt: str,
|
||||
|
|
@ -503,10 +799,6 @@ class SubCoreAi:
|
|||
|
||||
return full_prompt
|
||||
|
||||
def _writeAiResponseDebug(self, label: str, content: Any, partIndex: int = 1, modelName: str = None, continuation: bool = None) -> None:
|
||||
"""Disabled verbose debug writing; only minimal files elsewhere."""
|
||||
return
|
||||
|
||||
def _exceedsTokenLimit(self, text: str, model: ModelCapabilities, safety_margin: float) -> bool:
|
||||
"""
|
||||
Check if text exceeds model token limit with safety margin.
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -107,7 +107,7 @@ class SubDocumentProcessing:
|
|||
# Save merged extraction content to debug
|
||||
try:
|
||||
from modules.shared.debugLogger import writeDebugFile
|
||||
writeDebugFile(mergedContent or '', "extraction_merged")
|
||||
writeDebugFile(mergedContent or '', "extractionMergedText")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
|
@ -202,7 +202,7 @@ class SubDocumentProcessing:
|
|||
from modules.shared.debugLogger import writeDebugFile
|
||||
import json as _json
|
||||
jsonStr = _json.dumps(mergedJsonDocument, ensure_ascii=False, indent=2)
|
||||
writeDebugFile(jsonStr, "extraction_merged_json", mergedJsonDocument)
|
||||
writeDebugFile(jsonStr, "extractionMergedJson")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
|
@ -225,6 +225,7 @@ class SubDocumentProcessing:
|
|||
"""
|
||||
Process documents with per-chunk AI calls and merge results in JSON mode.
|
||||
Uses a custom prompt instead of the default extraction prompt.
|
||||
Enhanced with partial results continuation logic.
|
||||
"""
|
||||
if not documents:
|
||||
return {"metadata": {"title": "Empty Document"}, "sections": []}
|
||||
|
|
@ -305,6 +306,199 @@ class SubDocumentProcessing:
|
|||
logger.error(f"Error in per-chunk JSON processing: {str(e)}")
|
||||
return {"metadata": {"title": "Error Document"}, "sections": []}
|
||||
|
||||
async def processDocumentsWithContinuation(
|
||||
self,
|
||||
documents: List[ChatDocument],
|
||||
custom_prompt: str,
|
||||
options: Optional[AiCallOptions] = None
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Process documents with partial results continuation logic.
|
||||
Handles AI responses that indicate partial completion and loops until complete.
|
||||
"""
|
||||
if not documents:
|
||||
return {"metadata": {"title": "Empty Document"}, "sections": []}
|
||||
|
||||
logger.info("Starting document processing with continuation logic")
|
||||
|
||||
# Build enhanced prompt with continuation instructions
|
||||
enhanced_prompt = self._buildContinuationPrompt(custom_prompt)
|
||||
|
||||
# Process with continuation logic
|
||||
return await self._processWithContinuationLoop(documents, enhanced_prompt, options)
|
||||
|
||||
def _buildContinuationPrompt(self, base_prompt: str) -> str:
|
||||
"""
|
||||
Build a prompt that includes partial results continuation instructions.
|
||||
"""
|
||||
continuation_instructions = """
|
||||
|
||||
IMPORTANT CHUNKING LOGIC:
|
||||
- If the response is too large to generate completely in one response, set "continue": true
|
||||
- When "continue": true, include a "continuation_context" field with:
|
||||
- "last_section_id": "id of the last completed section"
|
||||
- "last_element_index": "index of the last completed element in that section"
|
||||
- "remaining_requirements": "brief description of what still needs to be generated"
|
||||
- The AI will be called again with this context to continue generation
|
||||
- Only set "continue": false when the response is completely generated
|
||||
|
||||
OUTPUT FORMAT: Return only valid JSON in this exact structure:
|
||||
{
|
||||
"metadata": {
|
||||
"title": "Document Title"
|
||||
},
|
||||
"sections": [
|
||||
{
|
||||
"id": "section_1",
|
||||
"content_type": "paragraph",
|
||||
"elements": [
|
||||
{
|
||||
"text": "This is the actual content that should be generated."
|
||||
}
|
||||
],
|
||||
"order": 1
|
||||
}
|
||||
],
|
||||
"continue": false,
|
||||
"continuation_context": {
|
||||
"last_section_id": "section_1",
|
||||
"last_element_index": 0,
|
||||
"remaining_requirements": "description of what still needs to be generated"
|
||||
}
|
||||
}
|
||||
|
||||
The AI should generate content using the canonical format with "sections" and "elements".
|
||||
"""
|
||||
|
||||
return f"{base_prompt}{continuation_instructions}"
|
||||
|
||||
async def _processWithContinuationLoop(
|
||||
self,
|
||||
documents: List[ChatDocument],
|
||||
enhanced_prompt: str,
|
||||
options: Optional[AiCallOptions] = None
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Process documents with continuation loop until complete.
|
||||
"""
|
||||
max_iterations = 10 # Prevent infinite loops
|
||||
iteration = 0
|
||||
accumulated_sections = []
|
||||
continuation_context = None
|
||||
|
||||
while iteration < max_iterations:
|
||||
iteration += 1
|
||||
logger.info(f"Continuation iteration {iteration}/{max_iterations}")
|
||||
|
||||
# Build prompt for this iteration
|
||||
if continuation_context:
|
||||
iteration_prompt = self._buildContinuationIterationPrompt(
|
||||
enhanced_prompt, continuation_context, accumulated_sections
|
||||
)
|
||||
else:
|
||||
iteration_prompt = enhanced_prompt
|
||||
|
||||
# Process documents for this iteration
|
||||
try:
|
||||
# Use the existing processing method
|
||||
result = await self.processDocumentsPerChunkJsonWithPrompt(
|
||||
documents, iteration_prompt, options
|
||||
)
|
||||
|
||||
# Check if this is a valid JSON response
|
||||
if not isinstance(result, dict):
|
||||
logger.warning(f"Iteration {iteration}: Invalid result type, stopping")
|
||||
break
|
||||
|
||||
# Extract sections from result
|
||||
sections = result.get("sections", [])
|
||||
if not sections:
|
||||
logger.warning(f"Iteration {iteration}: No sections found, stopping")
|
||||
break
|
||||
|
||||
# Add sections to accumulated results
|
||||
for section in sections:
|
||||
# Update section order to maintain sequence
|
||||
section["order"] = len(accumulated_sections) + 1
|
||||
accumulated_sections.append(section)
|
||||
|
||||
# Check if continuation is needed
|
||||
continue_flag = result.get("continue", False)
|
||||
continuation_context = result.get("continuation_context")
|
||||
|
||||
logger.info(f"Iteration {iteration}: Added {len(sections)} sections, continue={continue_flag}")
|
||||
|
||||
if not continue_flag:
|
||||
logger.info(f"Continuation complete after {iteration} iterations")
|
||||
break
|
||||
|
||||
if not continuation_context:
|
||||
logger.warning(f"Iteration {iteration}: continue=true but no continuation_context, stopping")
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Iteration {iteration} failed: {str(e)}")
|
||||
break
|
||||
|
||||
if iteration >= max_iterations:
|
||||
logger.warning(f"Continuation stopped after maximum iterations ({max_iterations})")
|
||||
|
||||
# Build final result
|
||||
final_result = {
|
||||
"metadata": {
|
||||
"title": "Generated Document",
|
||||
"total_sections": len(accumulated_sections),
|
||||
"iterations": iteration,
|
||||
"continuation_used": iteration > 1
|
||||
},
|
||||
"sections": accumulated_sections,
|
||||
"continue": False
|
||||
}
|
||||
|
||||
logger.info(f"Final result: {len(accumulated_sections)} sections from {iteration} iterations")
|
||||
return final_result
|
||||
|
||||
def _buildContinuationIterationPrompt(
|
||||
self,
|
||||
base_prompt: str,
|
||||
continuation_context: Dict[str, Any],
|
||||
accumulated_sections: List[Dict[str, Any]]
|
||||
) -> str:
|
||||
"""
|
||||
Build a prompt for continuation iteration with context.
|
||||
"""
|
||||
last_section_id = continuation_context.get("last_section_id", "")
|
||||
last_element_index = continuation_context.get("last_element_index", 0)
|
||||
remaining_requirements = continuation_context.get("remaining_requirements", "")
|
||||
|
||||
# Build context of what's already been generated
|
||||
context_summary = "PREVIOUSLY GENERATED CONTENT:\n"
|
||||
for i, section in enumerate(accumulated_sections[-3:]): # Show last 3 sections for context
|
||||
context_summary += f"Section {i+1}: {section.get('id', 'unknown')}\n"
|
||||
if 'elements' in section and section['elements']:
|
||||
first_element = section['elements'][0]
|
||||
if 'text' in first_element:
|
||||
preview = first_element['text'][:100] + "..." if len(first_element['text']) > 100 else first_element['text']
|
||||
context_summary += f" Preview: {preview}\n"
|
||||
|
||||
continuation_prompt = f"""
|
||||
{base_prompt}
|
||||
|
||||
{context_summary}
|
||||
|
||||
CONTINUATION INSTRUCTIONS:
|
||||
- Continue from where you left off
|
||||
- Last completed section: {last_section_id}
|
||||
- Last completed element index: {last_element_index}
|
||||
- Remaining requirements: {remaining_requirements}
|
||||
- Generate the next part of the content
|
||||
- Maintain consistency with previously generated content
|
||||
- Use the same JSON format as before
|
||||
- Set "continue": true if more content is needed, false if complete
|
||||
"""
|
||||
|
||||
return continuation_prompt
|
||||
|
||||
async def callAiText(
|
||||
self,
|
||||
prompt: str,
|
||||
|
|
@ -522,14 +716,8 @@ class SubDocumentProcessing:
|
|||
# Save extraction prompt and response to debug
|
||||
try:
|
||||
from modules.shared.debugLogger import writeDebugFile
|
||||
debugData = {
|
||||
"chunk_index": chunk_index,
|
||||
"mime_type": part.mimeType,
|
||||
"type_group": part.typeGroup,
|
||||
"context_length": len(part.data) if part.data else 0
|
||||
}
|
||||
writeDebugFile(augmented_prompt, f"extraction_chunk_{chunk_index}", debugData)
|
||||
writeDebugFile(ai_result or '', f"extraction_chunk_{chunk_index}_response")
|
||||
writeDebugFile(augmented_prompt, f"extraction-Chunk{chunk_index}-Prompt")
|
||||
writeDebugFile(ai_result or '', f"extraction-Chunk{chunk_index}-Response")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
|
@ -629,14 +817,8 @@ class SubDocumentProcessing:
|
|||
# Save extraction prompt and response to debug
|
||||
try:
|
||||
from modules.shared.debugLogger import writeDebugFile
|
||||
debugData = {
|
||||
"chunk_index": chunk_index,
|
||||
"mime_type": part.mimeType,
|
||||
"type_group": part.typeGroup,
|
||||
"context_length": len(part.data) if part.data else 0
|
||||
}
|
||||
writeDebugFile(augmented_prompt_text, f"extraction_chunk_{chunk_index}", debugData)
|
||||
writeDebugFile(ai_result or '', f"extraction_chunk_{chunk_index}_response")
|
||||
writeDebugFile(augmented_prompt_text, f"extractionChunk{chunk_index}-Prompt")
|
||||
writeDebugFile(ai_result or '', f"extractionChunk{chunk_index}-Response")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
|
|
|||
|
|
@ -372,59 +372,6 @@ class GenerationService:
|
|||
services=self.services
|
||||
)
|
||||
|
||||
async def getGenericExtractionPrompt(
|
||||
self,
|
||||
outputFormat: str,
|
||||
userPrompt: str,
|
||||
title: str,
|
||||
aiService=None
|
||||
) -> str:
|
||||
"""Get generic extraction prompt that works for both single and multi-file."""
|
||||
from .subPromptBuilder import buildGenericExtractionPrompt
|
||||
return await buildGenericExtractionPrompt(
|
||||
outputFormat=outputFormat,
|
||||
userPrompt=userPrompt,
|
||||
title=title,
|
||||
aiService=aiService,
|
||||
services=self.services
|
||||
)
|
||||
|
||||
async def getExtractionPrompt(self, outputFormat: str, userPrompt: str, title: str, aiService=None) -> str:
|
||||
"""
|
||||
Get the format-specific extraction prompt for AI content extraction.
|
||||
|
||||
Args:
|
||||
outputFormat: Target format (html, pdf, docx, txt, md, json, csv, xlsx)
|
||||
userPrompt: User's original prompt for report generation
|
||||
title: Report title
|
||||
aiService: AI service instance for intent extraction
|
||||
|
||||
Returns:
|
||||
str: Format-specific prompt for AI extraction
|
||||
"""
|
||||
try:
|
||||
# Get the appropriate renderer for the format
|
||||
renderer = self._getFormatRenderer(outputFormat)
|
||||
if not renderer:
|
||||
raise ValueError(f"Unsupported output format: {outputFormat}")
|
||||
|
||||
# Build centralized prompt with generic rules + format-specific guidelines
|
||||
from .subPromptBuilder import buildExtractionPrompt
|
||||
extractionPrompt = await buildExtractionPrompt(
|
||||
outputFormat=outputFormat,
|
||||
renderer=renderer,
|
||||
userPrompt=userPrompt,
|
||||
title=title,
|
||||
aiService=aiService,
|
||||
services=self.services
|
||||
)
|
||||
|
||||
logger.info(f"Generated {outputFormat}-specific extraction prompt: {len(extractionPrompt)} characters")
|
||||
return extractionPrompt
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting extraction prompt for {outputFormat}: {str(e)}")
|
||||
raise
|
||||
|
||||
async def renderAdaptiveReport(
|
||||
self,
|
||||
|
|
|
|||
|
|
@ -344,12 +344,8 @@ class BaseRenderer(ABC):
|
|||
# Save styling prompt and response to debug
|
||||
try:
|
||||
from modules.shared.debugLogger import writeDebugFile
|
||||
debugData = {
|
||||
"template_length": len(style_template),
|
||||
"default_styles_keys": list(default_styles.keys()) if isinstance(default_styles, dict) else []
|
||||
}
|
||||
writeDebugFile(style_template, "renderer_styling", debugData)
|
||||
writeDebugFile(response.content or '', "renderer_styling_response")
|
||||
writeDebugFile(style_template, "rendererStylingPrompt")
|
||||
writeDebugFile(response.content or '', "rendererStylingResponse")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
|
|
|||
|
|
@ -62,12 +62,7 @@ class RendererImage(BaseRenderer):
|
|||
# Save image generation prompt to debug
|
||||
try:
|
||||
from modules.shared.debugLogger import writeDebugFile
|
||||
debugData = {
|
||||
"title": document_title,
|
||||
"user_prompt_length": len(user_prompt) if user_prompt else 0,
|
||||
"extracted_content_keys": list(extracted_content.keys()) if isinstance(extracted_content, dict) else []
|
||||
}
|
||||
writeDebugFile(image_prompt, "renderer_image_generation", debugData)
|
||||
writeDebugFile(image_prompt, "rendererImageGenerationPrompt")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
|
@ -82,12 +77,7 @@ class RendererImage(BaseRenderer):
|
|||
# Save image generation response to debug
|
||||
try:
|
||||
from modules.shared.debugLogger import writeDebugFile
|
||||
responseData = {
|
||||
"success": image_result.get("success", False) if image_result else False,
|
||||
"has_image_data": bool(image_result.get("image_data", "")) if image_result else False,
|
||||
"result_keys": list(image_result.keys()) if isinstance(image_result, dict) else []
|
||||
}
|
||||
writeDebugFile(str(image_result), "renderer_image_generation_response", responseData)
|
||||
writeDebugFile(str(image_result), "rendererImageGenerationResponse")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
|
@ -114,7 +104,7 @@ class RendererImage(BaseRenderer):
|
|||
|
||||
# Add user's original intent if available
|
||||
if user_prompt:
|
||||
prompt_parts.append(f"User Request: {user_prompt}")
|
||||
prompt_parts.append(f"User Request: {ai_service.sanitizePromptContent(user_prompt, 'userinput')}")
|
||||
|
||||
# Add document title
|
||||
prompt_parts.append(f"Document Title: {title}")
|
||||
|
|
@ -151,7 +141,7 @@ class RendererImage(BaseRenderer):
|
|||
# Fallback to minimal prompt if AI compression fails or is still too long
|
||||
minimal_prompt = f"Create a professional image representing: {title}"
|
||||
if user_prompt:
|
||||
minimal_prompt += f" - {user_prompt}"
|
||||
minimal_prompt += f" - {ai_service.sanitizePromptContent(user_prompt, 'userinput')}"
|
||||
|
||||
# If even the minimal prompt is too long, truncate it
|
||||
if len(minimal_prompt) > 4000:
|
||||
|
|
|
|||
|
|
@ -81,64 +81,20 @@ async def buildAdaptiveExtractionPrompt(
|
|||
]
|
||||
}
|
||||
|
||||
# Single-file example data instead of schema
|
||||
single_file_example = {
|
||||
"metadata": {
|
||||
"title": "Single Document Example",
|
||||
"source_documents": ["doc_001"],
|
||||
"extraction_method": "ai_extraction"
|
||||
},
|
||||
"sections": [
|
||||
{
|
||||
"id": "section_1",
|
||||
"content_type": "heading",
|
||||
"elements": [
|
||||
{
|
||||
"level": 1,
|
||||
"text": "1. SECTION TITLE"
|
||||
}
|
||||
],
|
||||
"order": 1
|
||||
},
|
||||
{
|
||||
"id": "section_2",
|
||||
"content_type": "paragraph",
|
||||
"elements": [
|
||||
{
|
||||
"text": "This is the actual content that should be extracted from the document."
|
||||
}
|
||||
],
|
||||
"order": 2
|
||||
},
|
||||
{
|
||||
"id": "section_3",
|
||||
"content_type": "table",
|
||||
"elements": [
|
||||
{
|
||||
"headers": ["Column 1", "Column 2"],
|
||||
"rows": [["Value 1", "Value 2"]]
|
||||
}
|
||||
],
|
||||
"order": 3
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
if promptAnalysis.get("is_multi_file", False):
|
||||
# Multi-file prompt
|
||||
# UNIFIED APPROACH: Always use multi-document format (single doc = multi with n=1)
|
||||
adaptive_prompt = f"""
|
||||
{userPrompt}
|
||||
{services.ai.sanitizePromptContent(userPrompt, 'userinput')}
|
||||
|
||||
You are a document processing assistant that extracts and structures content from documents. Your task is to analyze the provided document content and create a structured JSON output.
|
||||
|
||||
TASK: Extract the actual content from the document and organize it into separate sections, where each section will become a separate file.
|
||||
TASK: Extract the actual content from the document and organize it into documents. For single documents, create one document entry. For multi-document requests, create multiple document entries.
|
||||
|
||||
REQUIREMENTS:
|
||||
1. Analyze the document content provided in the context below
|
||||
2. Identify distinct sections in the document (by headings, topics, or logical breaks)
|
||||
3. Create one JSON document entry for each section found
|
||||
3. Create one or more JSON document entries based on the content structure
|
||||
4. Extract the real content from each section (headings, paragraphs, lists, etc.)
|
||||
5. Generate appropriate filenames for each section
|
||||
5. Generate appropriate filenames for each document
|
||||
|
||||
CRITICAL: You MUST return a JSON structure with a "documents" array, NOT a "sections" array.
|
||||
|
||||
|
|
@ -147,17 +103,18 @@ OUTPUT FORMAT: Return only valid JSON in this exact structure:
|
|||
|
||||
IMPORTANT: The JSON must have a "documents" key containing an array of document objects. Each document object must have:
|
||||
- "id": unique identifier
|
||||
- "title": section title from the document
|
||||
- "filename": appropriate filename for the section
|
||||
- "title": document title
|
||||
- "filename": appropriate filename for the document
|
||||
- "sections": array of content sections
|
||||
|
||||
DO NOT return a JSON with "sections" at the root level. Return a JSON with "documents" at the root level.
|
||||
|
||||
INSTRUCTIONS:
|
||||
- Replace "REPLACE_WITH_ACTUAL_*" placeholders with real content from the document
|
||||
- For single document requests: Create one document with all content in its sections
|
||||
- For multi-document requests: Create multiple documents, each with relevant sections
|
||||
- Use actual section titles, headings, and text from the document
|
||||
- Create meaningful filenames based on section content
|
||||
- Ensure each section contains the complete content for that part of the document
|
||||
- Create meaningful filenames based on content
|
||||
- Ensure each section contains the complete content for that part
|
||||
- Do not use generic placeholder text like "Section 1", "Section 2"
|
||||
- Extract real headings, paragraphs, lists, and other content elements
|
||||
- CRITICAL: Return JSON with "documents" array, not "sections" array
|
||||
|
|
@ -181,58 +138,12 @@ Image Analysis Requirements:
|
|||
|
||||
Return only the JSON structure with actual data from the documents. Do not include any text before or after the JSON.
|
||||
|
||||
Extract the ACTUAL CONTENT from the source documents. Do not use placeholder text like "Section 1", "Section 2", etc. Extract the real headings, paragraphs, and content from the documents.
|
||||
""".strip()
|
||||
else:
|
||||
# Single-file prompt - use example data instead of schema
|
||||
adaptive_prompt = f"""
|
||||
{userPrompt}
|
||||
|
||||
You are a document processing assistant that extracts and structures content from documents. Your task is to analyze the provided document content and create a structured JSON output.
|
||||
|
||||
TASK: Extract the actual content from the document and organize it into structured sections.
|
||||
|
||||
REQUIREMENTS:
|
||||
1. Analyze the document content provided in the context below
|
||||
2. Extract all content and organize it into logical sections
|
||||
3. Create structured JSON with sections containing the extracted content
|
||||
4. Preserve the original structure and data
|
||||
|
||||
OUTPUT FORMAT: Return only valid JSON in this exact structure:
|
||||
{json.dumps(single_file_example, indent=2)}
|
||||
|
||||
INSTRUCTIONS:
|
||||
- Replace example data with actual content from the document
|
||||
- Use actual headings, paragraphs, and text from the document
|
||||
- Ensure all content is properly structured
|
||||
- Do not use generic placeholder text
|
||||
- Extract real content from the documents
|
||||
|
||||
CONTEXT (Document Content):
|
||||
|
||||
Content Types to Extract:
|
||||
1. Tables: Extract all rows and columns with proper headers
|
||||
2. Lists: Extract all items with proper nesting
|
||||
3. Headings: Extract with appropriate levels
|
||||
4. Paragraphs: Extract as structured text
|
||||
5. Code: Extract code blocks with language identification
|
||||
6. Images: Analyze images and describe all visible content including text, tables, logos, graphics, layout, and visual elements
|
||||
|
||||
Image Analysis Requirements:
|
||||
- If you cannot analyze an image for any reason, explain why in the JSON response
|
||||
- Describe everything you see in the image
|
||||
- Include all text content, tables, logos, graphics, layout, and visual elements
|
||||
- If the image is too small, corrupted, or unclear, explain this
|
||||
- Always provide feedback - never return empty responses
|
||||
|
||||
Return only the JSON structure with actual data from the documents. Do not include any text before or after the JSON.
|
||||
|
||||
Extract the ACTUAL CONTENT from the source documents. Do not use placeholder text like "Section 1", "Section 2", etc. Extract the real headings, paragraphs, and content from the documents.
|
||||
""".strip()
|
||||
|
||||
return adaptive_prompt
|
||||
|
||||
async def buildGenericExtractionPrompt(
|
||||
async def buildGenerationPrompt(
|
||||
outputFormat: str,
|
||||
userPrompt: str,
|
||||
title: str,
|
||||
|
|
|
|||
|
|
@ -3,9 +3,8 @@ Simple debug logger for AI prompts and responses.
|
|||
Writes files chronologically to gateway/test-chat/ai/ with sequential numbering.
|
||||
"""
|
||||
import os
|
||||
import json
|
||||
from datetime import datetime, UTC
|
||||
from typing import Any, Optional
|
||||
from typing import List, Optional
|
||||
|
||||
|
||||
def _getDebugDir() -> str:
|
||||
|
|
@ -25,64 +24,16 @@ def _getNextSequenceNumber() -> int:
|
|||
return len(files) + 1
|
||||
|
||||
|
||||
def _formatJsonReadable(data: Any) -> str:
|
||||
"""
|
||||
Format JSON data in a readable line-by-line structure.
|
||||
Handles both structured objects and text representations of dicts/lists.
|
||||
|
||||
Args:
|
||||
data: The data to format
|
||||
|
||||
Returns:
|
||||
Formatted string representation
|
||||
"""
|
||||
try:
|
||||
# First try to parse if it's a string representation
|
||||
if isinstance(data, str):
|
||||
try:
|
||||
# Try to parse as JSON first
|
||||
parsed = json.loads(data)
|
||||
data = parsed
|
||||
except json.JSONDecodeError:
|
||||
# Try to evaluate as Python literal (for dict/list strings)
|
||||
try:
|
||||
import ast
|
||||
parsed = ast.literal_eval(data)
|
||||
if isinstance(parsed, (dict, list)):
|
||||
data = parsed
|
||||
except (ValueError, SyntaxError):
|
||||
# If all parsing fails, treat as plain text
|
||||
pass
|
||||
|
||||
# Convert to JSON string with proper indentation
|
||||
if isinstance(data, (dict, list)):
|
||||
jsonStr = json.dumps(data, ensure_ascii=False, default=str, indent=2)
|
||||
else:
|
||||
jsonStr = str(data)
|
||||
|
||||
# Split into lines and add line numbers for better readability
|
||||
lines = jsonStr.split('\n')
|
||||
formattedLines = []
|
||||
|
||||
for i, line in enumerate(lines, 1):
|
||||
# Add line number and proper spacing
|
||||
lineNum = f"{i:3d}: "
|
||||
formattedLines.append(f"{lineNum}{line}")
|
||||
|
||||
return '\n'.join(formattedLines)
|
||||
except Exception:
|
||||
# Fallback to string representation if JSON formatting fails
|
||||
return str(data)
|
||||
|
||||
|
||||
def writeDebugFile(content: str, fileType: str, data: Optional[Any] = None) -> None:
|
||||
def writeDebugFile(content: str, fileType: str, documents: Optional[List] = None) -> None:
|
||||
"""
|
||||
Write debug content to a file with sequential numbering.
|
||||
Writes the content as-is since it's already the final integrated prompt.
|
||||
Includes document list labels for tracing enhancement.
|
||||
|
||||
Args:
|
||||
content: The main content to write
|
||||
fileType: Type of file (e.g., 'prompt', 'response', 'placeholders')
|
||||
data: Optional additional data to include as JSON
|
||||
content: The main content to write (already integrated)
|
||||
fileType: Type of file (e.g., 'prompt_final', 'response')
|
||||
documents: Optional list of documents for tracing
|
||||
"""
|
||||
try:
|
||||
debugDir = _getDebugDir()
|
||||
|
|
@ -96,27 +47,23 @@ def writeDebugFile(content: str, fileType: str, data: Optional[Any] = None) -> N
|
|||
filename = f"{tsWithSeq}-{fileType}.txt"
|
||||
filepath = os.path.join(debugDir, filename)
|
||||
|
||||
# Build content with document tracing
|
||||
debug_content = content
|
||||
|
||||
# Add document list labels for tracing enhancement
|
||||
if documents:
|
||||
debug_content += "\n\n=== DOCUMENT LIST FOR TRACING ===\n"
|
||||
for i, doc in enumerate(documents):
|
||||
if hasattr(doc, 'fileName'):
|
||||
debug_content += f"Document {i+1}: {doc.fileName} ({doc.mimeType})\n"
|
||||
elif hasattr(doc, 'fileId'):
|
||||
debug_content += f"Document {i+1}: {doc.fileId} ({getattr(doc, 'mimeType', 'unknown')})\n"
|
||||
else:
|
||||
debug_content += f"Document {i+1}: {str(doc)[:100]}...\n"
|
||||
|
||||
# Write the content with document tracing
|
||||
with open(filepath, 'w', encoding='utf-8') as f:
|
||||
f.write(content)
|
||||
|
||||
# If structured data provided, also append a human-readable section to the main .txt
|
||||
try:
|
||||
if data is not None:
|
||||
formatted = _formatJsonReadable(data)
|
||||
with open(filepath, 'a', encoding='utf-8') as f:
|
||||
f.write("\n\n=== FORMATTED DATA (human-readable) ===\n")
|
||||
f.write(formatted)
|
||||
f.write("\n")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# If additional data provided, write it as a separate JSON file with readable formatting
|
||||
if data is not None:
|
||||
jsonFilename = f"{tsWithSeq}-{fileType}_data.json"
|
||||
jsonFilepath = os.path.join(debugDir, jsonFilename)
|
||||
with open(jsonFilepath, 'w', encoding='utf-8') as f:
|
||||
formattedData = _formatJsonReadable(data)
|
||||
f.write(formattedData)
|
||||
f.write(debug_content)
|
||||
|
||||
except Exception as e:
|
||||
# Silent fail - don't break the main flow
|
||||
|
|
|
|||
|
|
@ -125,10 +125,6 @@ DELIVERED CONTENT TO CHECK:
|
|||
documents=None,
|
||||
options=request_options
|
||||
)
|
||||
# Write validation prompt/response to debug
|
||||
from modules.shared.debugLogger import writeDebugFile
|
||||
writeDebugFile(validationPrompt, "validation_content_prompt")
|
||||
writeDebugFile(response or '', "validation_content_response")
|
||||
|
||||
# No retries or correction prompts here; parse-or-fail below
|
||||
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@ class IntentAnalyzer:
|
|||
analysisPrompt = f"""
|
||||
You are an intent analyzer. Analyze the user's request to understand what they want delivered.
|
||||
|
||||
USER REQUEST: {userPrompt}
|
||||
USER REQUEST: {self.services.ai.sanitizePromptContent(userPrompt, 'userinput')}
|
||||
|
||||
CONTEXT: {getattr(context.task_step, 'objective', '') if hasattr(context, 'task_step') and context.task_step else ''}
|
||||
|
||||
|
|
@ -62,17 +62,12 @@ CRITICAL: Respond with ONLY the JSON object below. Do not include any explanator
|
|||
from modules.datamodels.datamodelAi import AiCallOptions, OperationType
|
||||
request_options = AiCallOptions()
|
||||
request_options.operationType = OperationType.GENERAL
|
||||
# Write prompt to debug
|
||||
from modules.shared.debugLogger import writeDebugFile
|
||||
writeDebugFile(analysisPrompt, "intent_prompt")
|
||||
|
||||
response = await self.services.ai.callAi(
|
||||
prompt=analysisPrompt,
|
||||
documents=None,
|
||||
options=request_options
|
||||
)
|
||||
# Write response to debug
|
||||
writeDebugFile(response or '', "intent_response")
|
||||
|
||||
# No retries or correction prompts here; parse-or-fail below
|
||||
|
||||
|
|
|
|||
|
|
@ -94,10 +94,6 @@ class TaskPlanner:
|
|||
taskPlanningPromptTemplate = bundle.prompt
|
||||
placeholders = bundle.placeholders
|
||||
|
||||
# Write task planning prompt to debug
|
||||
from modules.shared.debugLogger import writeDebugFile
|
||||
writeDebugFile(taskPlanningPromptTemplate, "taskplan_prompt", placeholders)
|
||||
|
||||
# Centralized AI call: Task planning (quality, detailed) with placeholders
|
||||
options = AiCallOptions(
|
||||
operationType=OperationType.GENERATE_PLAN,
|
||||
|
|
@ -119,9 +115,6 @@ class TaskPlanner:
|
|||
if not prompt:
|
||||
raise ValueError("AI service returned no response for task planning")
|
||||
|
||||
# Write task planning response to debug
|
||||
writeDebugFile(prompt or '', "taskplan_response")
|
||||
|
||||
# Parse task plan response
|
||||
try:
|
||||
jsonStart = prompt.find('{')
|
||||
|
|
|
|||
|
|
@ -20,7 +20,6 @@ from modules.workflows.processing.shared.promptGenerationActionsReact import (
|
|||
generateReactParametersPrompt,
|
||||
generateReactRefinementPrompt
|
||||
)
|
||||
from modules.shared.debugLogger import writeDebugFile
|
||||
from modules.workflows.processing.shared.placeholderFactory import extractReviewContent
|
||||
from modules.workflows.processing.adaptive import IntentAnalyzer, ContentValidator, LearningEngine, ProgressTracker
|
||||
from modules.workflows.processing.adaptive.adaptiveLearningEngine import AdaptiveLearningEngine
|
||||
|
|
@ -191,10 +190,6 @@ class ReactMode(BaseMode):
|
|||
promptTemplate = bundle.prompt
|
||||
placeholders = bundle.placeholders
|
||||
|
||||
# Write action selection prompt to debug
|
||||
from modules.shared.debugLogger import writeDebugFile
|
||||
writeDebugFile(promptTemplate, "action_selection_prompt", placeholders)
|
||||
|
||||
# Centralized AI call for plan selection (use plan generation quality)
|
||||
options = AiCallOptions(
|
||||
operationType=OperationType.GENERATE_PLAN,
|
||||
|
|
@ -211,8 +206,6 @@ class ReactMode(BaseMode):
|
|||
placeholders=placeholders,
|
||||
options=options
|
||||
)
|
||||
# Write action selection response to debug
|
||||
writeDebugFile(response or '', "action_selection_response")
|
||||
jsonStart = response.find('{') if response else -1
|
||||
jsonEnd = response.rfind('}') + 1 if response else 0
|
||||
if jsonStart == -1 or jsonEnd == 0:
|
||||
|
|
@ -306,9 +299,6 @@ class ReactMode(BaseMode):
|
|||
promptTemplate = bundle.prompt
|
||||
placeholders = bundle.placeholders
|
||||
|
||||
# Write parameters prompt to debug
|
||||
writeDebugFile(promptTemplate, "parameters_prompt", placeholders)
|
||||
|
||||
# Centralized AI call for parameter suggestion (balanced analysis)
|
||||
options = AiCallOptions(
|
||||
operationType=OperationType.ANALYSE_CONTENT,
|
||||
|
|
@ -367,12 +357,11 @@ class ReactMode(BaseMode):
|
|||
if 'language' not in parameters and hasattr(self.services, 'user') and getattr(self.services.user, 'language', None):
|
||||
parameters['language'] = self.services.user.language
|
||||
|
||||
# Write parameters response to debug
|
||||
# Build merged parameters object
|
||||
mergedParamObj = {
|
||||
"schema": (paramObj.get('schema') if isinstance(paramObj, dict) else 'parameters_v1'),
|
||||
"parameters": parameters
|
||||
}
|
||||
writeDebugFile(str(mergedParamObj), "parameters_response", mergedParamObj)
|
||||
|
||||
# Build a synthetic ActionItem for execution routing and labels
|
||||
currentRound = getattr(self.workflow, 'currentRound', 0)
|
||||
|
|
@ -625,9 +614,6 @@ class ReactMode(BaseMode):
|
|||
promptTemplate = bundle.prompt
|
||||
placeholders = bundle.placeholders
|
||||
|
||||
# Write refinement/validation prompt to debug
|
||||
writeDebugFile(promptTemplate, "validation_refinement_prompt", placeholders)
|
||||
|
||||
# Centralized AI call for refinement decision (balanced analysis)
|
||||
options = AiCallOptions(
|
||||
operationType=OperationType.ANALYSE_CONTENT,
|
||||
|
|
@ -644,8 +630,6 @@ class ReactMode(BaseMode):
|
|||
placeholders=placeholders,
|
||||
options=options
|
||||
)
|
||||
# Write refinement/validation response to debug
|
||||
writeDebugFile(resp or '', "validation_refinement_response")
|
||||
|
||||
# More robust JSON extraction
|
||||
if not resp:
|
||||
|
|
|
|||
|
|
@ -36,6 +36,9 @@ def generateActionDefinitionPrompt(services, context: Any) -> PromptBundle:
|
|||
|
||||
## 📋 Context
|
||||
|
||||
### User Language
|
||||
{{KEY:USER_LANGUAGE}}
|
||||
|
||||
### Task Objective
|
||||
{{KEY:USER_PROMPT}}
|
||||
|
||||
|
|
@ -45,9 +48,6 @@ def generateActionDefinitionPrompt(services, context: Any) -> PromptBundle:
|
|||
### Available Connections
|
||||
{{KEY:AVAILABLE_CONNECTIONS_INDEX}}
|
||||
|
||||
### User Language
|
||||
{{KEY:USER_LANGUAGE}}
|
||||
|
||||
### Workflow History
|
||||
{{KEY:WORKFLOW_HISTORY}}
|
||||
|
||||
|
|
@ -77,7 +77,7 @@ def generateActionDefinitionPrompt(services, context: Any) -> PromptBundle:
|
|||
"parameters": {},
|
||||
"resultLabel": "round{current_round}_task{current_task}_action{action_number}_{descriptive_label}",
|
||||
"description": "What this action accomplishes",
|
||||
"userMessage": "User-friendly message in {{KEY:USER_LANGUAGE}}"
|
||||
"userMessage": "User-friendly message in language '{{KEY:USER_LANGUAGE}}'"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -118,7 +118,7 @@ def generateActionDefinitionPrompt(services, context: Any) -> PromptBundle:
|
|||
- **Make labels meaningful** for future reference
|
||||
|
||||
### User Messages
|
||||
- **Write in user language** ({{KEY:USER_LANGUAGE}})
|
||||
- **Write in user language:** '{{KEY:USER_LANGUAGE}}'
|
||||
- **Explain what's happening** in user-friendly terms
|
||||
- **Keep messages concise** but informative
|
||||
|
||||
|
|
@ -171,7 +171,7 @@ def generateResultReviewPrompt(context: Any) -> PromptBundle:
|
|||
"met_criteria": ["criteria1", "criteria2"],
|
||||
"unmet_criteria": ["criteria3", "criteria4"],
|
||||
"confidence": 0.85,
|
||||
"userMessage": "User-friendly message explaining the validation result"
|
||||
"userMessage": "User-friendly message explaining the validation result in language '{{KEY:USER_LANGUAGE}}'"
|
||||
}
|
||||
```
|
||||
|
||||
|
|
|
|||
|
|
@ -24,6 +24,7 @@ def generateReactPlanSelectionPrompt(services, context: Any, learningEngine=None
|
|||
"""Define placeholders first, then the template; return PromptBundle."""
|
||||
placeholders: List[PromptPlaceholder] = [
|
||||
PromptPlaceholder(label="USER_PROMPT", content=extractUserPrompt(context), summaryAllowed=False),
|
||||
PromptPlaceholder(label="USER_LANGUAGE", content=extractUserLanguage(services), summaryAllowed=False),
|
||||
PromptPlaceholder(label="AVAILABLE_DOCUMENTS_SUMMARY", content=extractAvailableDocumentsSummary(services, context), summaryAllowed=True),
|
||||
PromptPlaceholder(label="AVAILABLE_METHODS", content=extractAvailableMethods(services), summaryAllowed=False),
|
||||
# Provide enriched history context for Stage 1 to craft parametersContext
|
||||
|
|
@ -68,26 +69,20 @@ AVAILABLE_DOCUMENTS_INDEX:
|
|||
AVAILABLE_CONNECTIONS_INDEX:
|
||||
{{KEY:AVAILABLE_CONNECTIONS_INDEX}}
|
||||
|
||||
{{#if ADAPTIVE_GUIDANCE}}
|
||||
LEARNING-BASED GUIDANCE:
|
||||
{{KEY:ADAPTIVE_GUIDANCE}}
|
||||
|
||||
{{#if FAILURE_ANALYSIS}}
|
||||
FAILURE ANALYSIS:
|
||||
{{KEY:FAILURE_ANALYSIS}}
|
||||
{{/if}}
|
||||
|
||||
ESCALATION LEVEL: {{KEY:ESCALATION_LEVEL}}
|
||||
{{/if}}
|
||||
|
||||
REPLY: Return ONLY a JSON object with the following structure (no comments, no extra text). The chosen action MUST:
|
||||
- be the next logical incremental step toward fulfilling the objective
|
||||
- not attempt to complete the entire objective in one step
|
||||
- if producing files, target exactly one output format for this step
|
||||
- reference ONLY existing document IDs/labels from AVAILABLE_DOCUMENTS_INDEX
|
||||
{{#if ADAPTIVE_GUIDANCE}}
|
||||
- learn from previous validation feedback and avoid repeated mistakes
|
||||
{{/if}}
|
||||
{{
|
||||
"action": "method.action_name",
|
||||
"actionObjective": "...",
|
||||
|
|
@ -112,10 +107,8 @@ RULES:
|
|||
- Copy references EXACTLY as shown in AVAILABLE_DOCUMENTS_INDEX
|
||||
6. For requiredConnection, use ONLY an exact label from AVAILABLE_CONNECTIONS_INDEX
|
||||
7. Plan incrementally: if the overall intent needs multiple output formats (e.g., CSV and HTML), choose one format in this step and leave the other(s) for subsequent steps
|
||||
{{#if ADAPTIVE_GUIDANCE}}
|
||||
8. CRITICAL: Learn from previous validation feedback - avoid repeating the same mistakes
|
||||
9. If previous attempts failed, consider alternative approaches or more specific parameters
|
||||
{{/if}}
|
||||
"""
|
||||
|
||||
return PromptBundle(prompt=template, placeholders=placeholders)
|
||||
|
|
@ -197,6 +190,7 @@ Excludes documents/connections/history entirely.
|
|||
placeholders: List[PromptPlaceholder] = [
|
||||
PromptPlaceholder(label="ACTION_OBJECTIVE", content=actionObjective, summaryAllowed=False),
|
||||
PromptPlaceholder(label="SELECTED_ACTION", content=compoundActionName, summaryAllowed=False),
|
||||
PromptPlaceholder(label="USER_LANGUAGE", content=extractUserLanguage(services), summaryAllowed=False),
|
||||
PromptPlaceholder(label="PARAMETERS_CONTEXT", content=(parametersContext or ""), summaryAllowed=True),
|
||||
PromptPlaceholder(label="ACTION_PARAMETERS", content=actionParametersText, summaryAllowed=False),
|
||||
PromptPlaceholder(label="LEARNINGS", content=learningsText, summaryAllowed=True),
|
||||
|
|
@ -225,19 +219,13 @@ CONTEXT AND OBJECTIVE:
|
|||
SELECTED_ACTION:
|
||||
{{KEY:SELECTED_ACTION}}
|
||||
|
||||
{{#if PARAMETER_GUIDANCE}}
|
||||
LEARNING-BASED PARAMETER GUIDANCE:
|
||||
{{KEY:PARAMETER_GUIDANCE}}
|
||||
|
||||
{{#if ATTEMPT_NUMBER}}
|
||||
ATTEMPT NUMBER: {{KEY:ATTEMPT_NUMBER}}
|
||||
{{/if}}
|
||||
|
||||
{{#if FAILURE_ANALYSIS}}
|
||||
PREVIOUS FAILURE ANALYSIS:
|
||||
{{KEY:FAILURE_ANALYSIS}}
|
||||
{{/if}}
|
||||
{{/if}}
|
||||
|
||||
REPLY (ONLY JSON):
|
||||
{{
|
||||
|
|
@ -264,19 +252,15 @@ INSTRUCTIONS:
|
|||
- Fill in appropriate values based on the context and objective
|
||||
- Do NOT invent new parameters
|
||||
- Do NOT include: documentList, connectionReference, history, documents, connections
|
||||
{{#if PARAMETER_GUIDANCE}}
|
||||
- CRITICAL: Follow the learning-based parameter guidance above
|
||||
- Learn from previous validation failures and adjust parameters accordingly
|
||||
{{/if}}
|
||||
|
||||
RULES:
|
||||
- Return ONLY JSON (no markdown, no prose)
|
||||
- Use ONLY the exact parameter names listed in REQUIRED PARAMETERS FOR THIS ACTION
|
||||
- Do NOT add any parameters not listed above
|
||||
- Do NOT add nested objects or custom fields
|
||||
{{#if PARAMETER_GUIDANCE}}
|
||||
- Apply learning insights to avoid repeated parameter mistakes
|
||||
{{/if}}
|
||||
"""
|
||||
|
||||
return PromptBundle(prompt=template, placeholders=placeholders)
|
||||
|
|
@ -285,6 +269,7 @@ def generateReactRefinementPrompt(services, context: Any, reviewContent: str) ->
|
|||
"""Define placeholders first, then the template; return PromptBundle."""
|
||||
placeholders: List[PromptPlaceholder] = [
|
||||
PromptPlaceholder(label="USER_PROMPT", content=extractUserPrompt(context), summaryAllowed=False),
|
||||
PromptPlaceholder(label="USER_LANGUAGE", content=extractUserLanguage(services), summaryAllowed=False),
|
||||
PromptPlaceholder(label="REVIEW_CONTENT", content=reviewContent, summaryAllowed=True),
|
||||
]
|
||||
|
||||
|
|
|
|||
|
|
@ -75,7 +75,7 @@ Break down user requests into logical, executable task steps.
|
|||
```json
|
||||
{
|
||||
"overview": "Brief description of the overall plan",
|
||||
"userMessage": "User-friendly message explaining the task plan (use {{KEY:USER_LANGUAGE}} language)",
|
||||
"userMessage": "User-friendly message explaining the task plan in language '{{KEY:USER_LANGUAGE}}'",
|
||||
"tasks": [
|
||||
{
|
||||
"id": "task_1",
|
||||
|
|
@ -83,7 +83,7 @@ Break down user requests into logical, executable task steps.
|
|||
"dependencies": ["task_0"],
|
||||
"success_criteria": ["measurable criteria 1", "measurable criteria 2"],
|
||||
"estimated_complexity": "low|medium|high",
|
||||
"userMessage": "What this task will accomplish"
|
||||
"userMessage": "What this task will accomplish in language '{{KEY:USER_LANGUAGE}}'"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,216 +0,0 @@
|
|||
"""
|
||||
Security utilities for AI prompt construction.
|
||||
Provides secure content escaping to prevent prompt injection attacks.
|
||||
"""
|
||||
|
||||
import re
|
||||
import json
|
||||
import logging
|
||||
from typing import Any, Union, List, Dict
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def _escapeForAiPrompt(content: str) -> str:
|
||||
"""
|
||||
Securely escape content for AI prompts to prevent injection attacks.
|
||||
|
||||
This function:
|
||||
1. Escapes all special characters that could break prompt structure
|
||||
2. Wraps content in secure delimiters
|
||||
3. Handles multi-line content safely
|
||||
4. Prevents quote injection and context breaking
|
||||
|
||||
Args:
|
||||
content: The content to escape
|
||||
|
||||
Returns:
|
||||
Safely escaped content wrapped in secure delimiters
|
||||
"""
|
||||
if not content:
|
||||
return ""
|
||||
|
||||
# Convert to string if not already
|
||||
content_str = str(content)
|
||||
|
||||
# Remove or escape dangerous characters that could break prompt structure
|
||||
# This includes quotes, backslashes, and other special characters
|
||||
escaped = content_str
|
||||
|
||||
# Escape backslashes first (order matters)
|
||||
escaped = escaped.replace('\\', '\\\\')
|
||||
|
||||
# Escape quotes and other special characters
|
||||
escaped = escaped.replace('"', '\\"')
|
||||
escaped = escaped.replace("'", "\\'")
|
||||
escaped = escaped.replace('\n', '\\n')
|
||||
escaped = escaped.replace('\r', '\\r')
|
||||
escaped = escaped.replace('\t', '\\t')
|
||||
|
||||
# Remove or escape other potentially dangerous characters
|
||||
# Remove control characters except newlines (already handled above)
|
||||
escaped = re.sub(r'[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]', '', escaped)
|
||||
|
||||
# Wrap in secure delimiters with clear boundaries
|
||||
# Using a unique delimiter pattern that's unlikely to appear in user content
|
||||
secure_delimiter_start = "===USER_CONTENT_START==="
|
||||
secure_delimiter_end = "===USER_CONTENT_END==="
|
||||
|
||||
return f"{secure_delimiter_start}\n{escaped}\n{secure_delimiter_end}"
|
||||
|
||||
def _escapeForJsonPrompt(content: Any) -> str:
|
||||
"""
|
||||
Securely escape content for JSON-based AI prompts.
|
||||
|
||||
Args:
|
||||
content: The content to escape (can be any type)
|
||||
|
||||
Returns:
|
||||
Safely escaped JSON string
|
||||
"""
|
||||
try:
|
||||
# Convert to JSON string with proper escaping
|
||||
json_str = json.dumps(content, ensure_ascii=False, separators=(',', ':'))
|
||||
return json_str
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to escape content as JSON: {str(e)}")
|
||||
# Fallback to string escaping
|
||||
return _escapeForAiPrompt(str(content))
|
||||
|
||||
def _escapeForListPrompt(items: List[Any]) -> str:
|
||||
"""
|
||||
Securely escape a list of items for AI prompts.
|
||||
|
||||
Args:
|
||||
items: List of items to escape
|
||||
|
||||
Returns:
|
||||
Safely escaped list representation
|
||||
"""
|
||||
if not items:
|
||||
return "[]"
|
||||
|
||||
try:
|
||||
escaped_items = []
|
||||
for item in items:
|
||||
if isinstance(item, (dict, list)):
|
||||
escaped_items.append(_escapeForJsonPrompt(item))
|
||||
else:
|
||||
escaped_items.append(_escapeForAiPrompt(str(item)))
|
||||
|
||||
return f"[{', '.join(escaped_items)}]"
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to escape list content: {str(e)}")
|
||||
return "[]"
|
||||
|
||||
def securePromptContent(content: Any, content_type: str = "text") -> str:
|
||||
"""
|
||||
Main function to securely escape content for AI prompts.
|
||||
|
||||
Args:
|
||||
content: The content to escape
|
||||
content_type: Type of content ("text", "json", "list", "user_prompt", "document_content")
|
||||
|
||||
Returns:
|
||||
Safely escaped content ready for AI prompt insertion
|
||||
"""
|
||||
if content is None:
|
||||
return ""
|
||||
|
||||
try:
|
||||
if content_type == "json":
|
||||
return _escapeForJsonPrompt(content)
|
||||
elif content_type == "list":
|
||||
if isinstance(content, list):
|
||||
return _escapeForListPrompt(content)
|
||||
else:
|
||||
return _escapeForAiPrompt(str(content))
|
||||
elif content_type in ["user_prompt", "document_content"]:
|
||||
# Extra security for user-controlled content
|
||||
escaped = _escapeForAiPrompt(str(content))
|
||||
# Add additional warning for AI
|
||||
return f"⚠️ USER_CONTROLLED_CONTENT: {escaped}"
|
||||
else: # content_type == "text" or default
|
||||
return _escapeForAiPrompt(str(content))
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error escaping content for AI prompt: {str(e)}")
|
||||
# Return a safe fallback
|
||||
return "[ERROR: Content could not be safely escaped]"
|
||||
|
||||
def buildSecurePrompt(template: str, **kwargs) -> str:
|
||||
"""
|
||||
Build a secure AI prompt by safely inserting content into a template.
|
||||
|
||||
Args:
|
||||
template: The prompt template with {key} placeholders
|
||||
**kwargs: Key-value pairs for template substitution
|
||||
|
||||
Returns:
|
||||
Securely constructed prompt
|
||||
"""
|
||||
try:
|
||||
# Escape all values before substitution
|
||||
escaped_kwargs = {}
|
||||
for key, value in kwargs.items():
|
||||
if key.endswith('_json'):
|
||||
escaped_kwargs[key] = securePromptContent(value, "json")
|
||||
elif key.endswith('_list'):
|
||||
escaped_kwargs[key] = securePromptContent(value, "list")
|
||||
elif key in ['user_prompt', 'context', 'document_content', 'user_input']:
|
||||
escaped_kwargs[key] = securePromptContent(value, "user_prompt")
|
||||
else:
|
||||
escaped_kwargs[key] = securePromptContent(value, "text")
|
||||
|
||||
# Use safe string formatting
|
||||
return template.format(**escaped_kwargs)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error building secure prompt: {str(e)}")
|
||||
return template # Return original template if escaping fails
|
||||
|
||||
def validatePromptSecurity(prompt: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Validate that a prompt is secure and doesn't contain injection patterns.
|
||||
|
||||
Args:
|
||||
prompt: The prompt to validate
|
||||
|
||||
Returns:
|
||||
Dictionary with validation results
|
||||
"""
|
||||
issues = []
|
||||
|
||||
# Check for unescaped quotes that could break JSON
|
||||
if '"' in prompt and '\\"' not in prompt:
|
||||
# Check if quotes are properly escaped
|
||||
unescaped_quotes = re.findall(r'(?<!\\)"', prompt)
|
||||
if unescaped_quotes:
|
||||
issues.append("Unescaped quotes detected")
|
||||
|
||||
# Check for potential injection patterns
|
||||
injection_patterns = [
|
||||
r'ignore\s+previous\s+instructions',
|
||||
r'forget\s+everything',
|
||||
r'you\s+are\s+now',
|
||||
r'system\s*:',
|
||||
r'assistant\s*:',
|
||||
r'user\s*:',
|
||||
r'<\|.*\|>', # Special tokens
|
||||
]
|
||||
|
||||
for pattern in injection_patterns:
|
||||
if re.search(pattern, prompt, re.IGNORECASE):
|
||||
issues.append(f"Potential injection pattern detected: {pattern}")
|
||||
|
||||
# Check for proper content delimiters
|
||||
if "===USER_CONTENT_START===" not in prompt and "===USER_CONTENT_END===" not in prompt:
|
||||
# This might be okay for some prompts, but flag for review
|
||||
if any(keyword in prompt.lower() for keyword in ['context', 'user', 'input', 'prompt']):
|
||||
issues.append("User content may not be properly delimited")
|
||||
|
||||
return {
|
||||
"is_secure": len(issues) == 0,
|
||||
"issues": issues,
|
||||
"prompt_length": len(prompt),
|
||||
"has_user_content_delimiters": "===USER_CONTENT_START===" in prompt
|
||||
}
|
||||
|
|
@ -216,7 +216,7 @@ class WorkflowManager:
|
|||
" }\n"
|
||||
" ]\n"
|
||||
"}\n\n"
|
||||
f"User message:\n{userInput.prompt}"
|
||||
f"User message:\n{self.services.ai.sanitizePromptContent(userInput.prompt, 'userinput')}"
|
||||
)
|
||||
|
||||
# Call AI analyzer
|
||||
|
|
@ -716,6 +716,7 @@ class WorkflowManager:
|
|||
logger.error(f"Error processing file ID {fileId}: {str(e)}")
|
||||
return documents
|
||||
|
||||
|
||||
def _setUserLanguage(self, language: str) -> None:
|
||||
"""Set user language for the service center"""
|
||||
self.services.user.language = language
|
||||
|
|
|
|||
258
test_unified_architecture.py
Normal file
258
test_unified_architecture.py
Normal file
|
|
@ -0,0 +1,258 @@
|
|||
import asyncio
|
||||
import sys
|
||||
import os
|
||||
from unittest.mock import AsyncMock, MagicMock
|
||||
|
||||
# Add the project root to the sys.path
|
||||
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..')))
|
||||
|
||||
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationType
|
||||
from modules.datamodels.datamodelChat import ChatDocument
|
||||
from modules.services.serviceAi.subCoreAi import SubCoreAi
|
||||
|
||||
class MockAiObjects:
|
||||
def __init__(self, responses):
|
||||
self.responses = responses
|
||||
self.call_count = 0
|
||||
|
||||
async def call(self, request: AiCallRequest):
|
||||
if self.call_count < len(self.responses):
|
||||
response_content = self.responses[self.call_count]
|
||||
self.call_count += 1
|
||||
mock_response = MagicMock()
|
||||
mock_response.content = response_content
|
||||
mock_response.modelName = "mock-model"
|
||||
mock_response.priceUsd = 0.001
|
||||
mock_response.processingTime = 0.1
|
||||
print(f" Mock AI Call {self.call_count}: Responding with partial result (length: {len(response_content)})")
|
||||
return mock_response
|
||||
else:
|
||||
print(" Mock AI Call: No more mock responses, returning empty.")
|
||||
mock_response = MagicMock()
|
||||
mock_response.content = ""
|
||||
return mock_response
|
||||
|
||||
class MockServices:
|
||||
def __init__(self):
|
||||
self.currentWorkflow = MagicMock()
|
||||
self.currentWorkflow.id = "test_workflow_123"
|
||||
self.workflow = MagicMock()
|
||||
self.workflow.createProgressLogger.return_value = MagicMock()
|
||||
self.workflow.storeWorkflowStat = AsyncMock()
|
||||
self.ai = MagicMock()
|
||||
self.ai.sanitizePromptContent.side_effect = lambda content, type: content
|
||||
self.utils = MagicMock()
|
||||
self.utils.debugLogToFile.side_effect = lambda msg, tag: print(f" DEBUG ({tag}): {msg}")
|
||||
self.utils.configGet.return_value = False # Disable debug files for tests
|
||||
|
||||
class MockDocumentProcessor:
|
||||
async def callAiText(self, prompt, documents, options):
|
||||
return "Extracted content from documents: Sample text content"
|
||||
|
||||
async def test_unified_architecture():
|
||||
print("\n=== Testing Unified Architecture ===")
|
||||
|
||||
# Mock responses: 1 for generation prompt building + 2 for actual generation
|
||||
mock_responses = [
|
||||
# Response 1: Generation prompt building
|
||||
"Generate JSON content that creates a structured document with prime numbers in a table format. Use the canonical JSON format with sections and elements.",
|
||||
|
||||
# Response 2: First part of generation
|
||||
"""{
|
||||
"metadata": {
|
||||
"title": "Prime Numbers List",
|
||||
"splitStrategy": "single_document",
|
||||
"source_documents": [],
|
||||
"extraction_method": "ai_generation"
|
||||
},
|
||||
"documents": [
|
||||
{
|
||||
"id": "doc_primes_1_500",
|
||||
"title": "Prime Numbers 1-500",
|
||||
"filename": "primes_1_500.docx",
|
||||
"sections": [
|
||||
{
|
||||
"id": "section_1",
|
||||
"content_type": "table",
|
||||
"elements": [
|
||||
{
|
||||
"headers": ["Number", "Prime"],
|
||||
"rows": [
|
||||
["1", "2"], ["2", "3"], ["3", "5"], ["4", "7"], ["5", "11"]
|
||||
]
|
||||
}
|
||||
],
|
||||
"order": 1
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
} [CONTINUE: Generate remaining prime numbers from 501 to 1000]""",
|
||||
|
||||
# Response 3: Second part of generation
|
||||
"""{
|
||||
"metadata": {
|
||||
"title": "Prime Numbers List",
|
||||
"splitStrategy": "single_document",
|
||||
"source_documents": [],
|
||||
"extraction_method": "ai_generation"
|
||||
},
|
||||
"documents": [
|
||||
{
|
||||
"id": "doc_primes_501_1000",
|
||||
"title": "Prime Numbers 501-1000",
|
||||
"filename": "primes_501_1000.docx",
|
||||
"sections": [
|
||||
{
|
||||
"id": "section_2",
|
||||
"content_type": "table",
|
||||
"elements": [
|
||||
{
|
||||
"headers": ["Number", "Prime"],
|
||||
"rows": [
|
||||
["501", "3571"], ["502", "3572"], ["503", "3581"]
|
||||
]
|
||||
}
|
||||
],
|
||||
"order": 2
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}"""
|
||||
]
|
||||
|
||||
mock_ai_objects = MockAiObjects(mock_responses)
|
||||
mock_services = MockServices()
|
||||
mock_document_processor = MockDocumentProcessor()
|
||||
|
||||
core_ai_service = SubCoreAi(mock_services, mock_ai_objects)
|
||||
|
||||
prompt = "Generate the first 1000 prime numbers and arrange them in a structured table format."
|
||||
options = AiCallOptions(operationType=OperationType.GENERATE_CONTENT)
|
||||
output_format = "docx"
|
||||
title = "Prime Numbers List"
|
||||
|
||||
print(f"User Prompt: '{prompt}'")
|
||||
print("Testing unified architecture with direct generation (no documents)...")
|
||||
|
||||
# Test the unified generation method directly
|
||||
result = await core_ai_service._callAiUnifiedGeneration(prompt, None, options, output_format, title)
|
||||
|
||||
print("\n--- Generated JSON Result ---")
|
||||
print(f"Result length: {len(result)} characters")
|
||||
print(f"Result preview: {result[:300]}...")
|
||||
|
||||
# Verify it's valid JSON
|
||||
import json
|
||||
try:
|
||||
parsed_result = json.loads(result)
|
||||
print(f"✅ Valid JSON with {len(parsed_result.get('documents', []))} documents")
|
||||
|
||||
# Verify it's using the multi-document format
|
||||
if "documents" in parsed_result and "metadata" in parsed_result:
|
||||
print("✅ Using unified multi-document format")
|
||||
print("✅ Architecture is properly unified!")
|
||||
return True
|
||||
else:
|
||||
print("❌ Not using multi-document format")
|
||||
return False
|
||||
except json.JSONDecodeError as e:
|
||||
print(f"❌ Invalid JSON: {str(e)}")
|
||||
return False
|
||||
|
||||
async def test_with_documents():
|
||||
print("\n=== Testing Unified Architecture WITH Documents ===")
|
||||
|
||||
# Mock responses: 1 for generation prompt building + 1 for actual generation
|
||||
mock_responses = [
|
||||
# Response 1: Generation prompt building
|
||||
"Generate JSON content that creates a comprehensive fruit analysis report based on the extracted content. Use the canonical JSON format with sections and elements.",
|
||||
|
||||
# Response 2: Generation with extracted content
|
||||
"""{
|
||||
"metadata": {
|
||||
"title": "Fruit Analysis Report",
|
||||
"splitStrategy": "single_document",
|
||||
"source_documents": ["doc1"],
|
||||
"extraction_method": "ai_generation"
|
||||
},
|
||||
"documents": [
|
||||
{
|
||||
"id": "doc_fruit_analysis",
|
||||
"title": "Fruit Analysis Report",
|
||||
"filename": "fruit_analysis.docx",
|
||||
"sections": [
|
||||
{
|
||||
"id": "section_1",
|
||||
"content_type": "paragraph",
|
||||
"elements": [
|
||||
{
|
||||
"text": "Based on the extracted content, here is a comprehensive fruit analysis..."
|
||||
}
|
||||
],
|
||||
"order": 1
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}"""
|
||||
]
|
||||
|
||||
mock_ai_objects = MockAiObjects(mock_responses)
|
||||
mock_services = MockServices()
|
||||
mock_document_processor = MockDocumentProcessor()
|
||||
|
||||
core_ai_service = SubCoreAi(mock_services, mock_ai_objects)
|
||||
|
||||
prompt = "Extract all fruit information and create a comprehensive analysis report."
|
||||
options = AiCallOptions(operationType=OperationType.GENERATE_CONTENT)
|
||||
output_format = "docx"
|
||||
title = "Fruit Analysis Report"
|
||||
|
||||
print(f"User Prompt: '{prompt}'")
|
||||
print("Testing unified architecture with document extraction...")
|
||||
|
||||
# Test the unified generation method with extracted content
|
||||
result = await core_ai_service._callAiUnifiedGeneration(prompt, "Sample fruit data: apples, oranges, bananas", options, output_format, title)
|
||||
|
||||
print("\n--- Generated JSON Result ---")
|
||||
print(f"Result length: {len(result)} characters")
|
||||
print(f"Result preview: {result[:300]}...")
|
||||
|
||||
# Verify it's valid JSON
|
||||
import json
|
||||
try:
|
||||
parsed_result = json.loads(result)
|
||||
print(f"✅ Valid JSON with {len(parsed_result.get('documents', []))} documents")
|
||||
|
||||
# Verify it's using the multi-document format
|
||||
if "documents" in parsed_result and "metadata" in parsed_result:
|
||||
print("✅ Using unified multi-document format")
|
||||
print("✅ Architecture is properly unified!")
|
||||
return True
|
||||
else:
|
||||
print("❌ Not using multi-document format")
|
||||
return False
|
||||
except json.JSONDecodeError as e:
|
||||
print(f"❌ Invalid JSON: {str(e)}")
|
||||
return False
|
||||
|
||||
async def main():
|
||||
print("🚀 Testing Unified Architecture Implementation")
|
||||
print("=" * 60)
|
||||
|
||||
success1 = await test_unified_architecture()
|
||||
success2 = await test_with_documents()
|
||||
|
||||
if success1 and success2:
|
||||
print("\n🎉 ALL TESTS PASSED! Unified architecture is properly implemented.")
|
||||
print("✅ Single document = multi-document with n=1")
|
||||
print("✅ Always uses multi-document JSON format")
|
||||
print("✅ Continuation logic works for long responses")
|
||||
print("✅ Both scenarios (with/without documents) work")
|
||||
else:
|
||||
print("\n❌ Some tests failed. Please check the implementation.")
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
Loading…
Reference in a new issue