This commit is contained in:
ValueOn AG 2025-10-15 00:08:28 +02:00
parent f0733204fb
commit da1f075556
9 changed files with 339 additions and 539 deletions

View file

@ -382,8 +382,22 @@ class SubDocumentGeneration:
logger.info(f"AI enhanced JSON content successfully")
except json.JSONDecodeError as e:
logger.warning(f"AI generation returned invalid JSON: {str(e)}, using original content")
enhancedContent = complete_document
logger.warning(f"AI generation returned invalid JSON: {str(e)}, attempting to repair...")
# Try to repair common JSON issues
try:
repaired_result = self._repairJson(result)
enhancedContent = json.loads(repaired_result)
logger.info(f"Successfully repaired JSON content")
except (json.JSONDecodeError, Exception) as repair_error:
logger.warning(f"JSON repair failed: {str(repair_error)}, trying AI repair...")
# Try AI-powered JSON repair as last resort
try:
ai_repaired = await self._repairJsonWithAI(result)
enhancedContent = json.loads(ai_repaired)
logger.info(f"AI successfully repaired JSON content")
except Exception as ai_repair_error:
logger.warning(f"AI JSON repair also failed: {str(ai_repair_error)}, using original content")
enhancedContent = complete_document
else:
logger.warning("AI generation returned empty response, using original content")
enhancedContent = complete_document
@ -664,3 +678,127 @@ Return only the JSON response.
except Exception:
# Non-fatal; ignore if storage or chat creation fails
return
def _repairJson(self, json_string: str) -> str:
"""Repair common JSON syntax errors efficiently for large JSON."""
try:
import re
import json
# Remove any leading/trailing whitespace
json_string = json_string.strip()
# For large JSON, skip substring extraction and go straight to targeted repairs
logger.info(f"Attempting JSON repair for {len(json_string)} characters...")
# Try to parse first to see what specific error we get
try:
json.loads(json_string)
return json_string # Already valid
except json.JSONDecodeError as e:
error_msg = str(e)
logger.info(f"JSON error: {error_msg}")
# Apply targeted fixes based on the specific error
if "Expecting ',' delimiter" in error_msg:
# Fix missing commas between array elements
json_string = re.sub(r'\]\s*\[', '], [', json_string)
json_string = re.sub(r'\}\s*\{', '}, {', json_string)
# Fix missing commas between object properties
json_string = re.sub(r'("\s*:\s*[^,}]+)\s*(")', r'\1, \2', json_string)
if "Expecting value" in error_msg:
# Fix missing values (replace empty with null)
json_string = re.sub(r':\s*,', ': null,', json_string)
json_string = re.sub(r':\s*}', ': null}', json_string)
if "Expecting property name" in error_msg:
# Fix unquoted property names
json_string = re.sub(r'(\w+):', r'"\1":', json_string)
# Fix trailing commas before closing brackets/braces
json_string = re.sub(r',(\s*[}\]])', r'\1', json_string)
# Fix missing closing brackets/braces (only if reasonable)
open_braces = json_string.count('{')
close_braces = json_string.count('}')
open_brackets = json_string.count('[')
close_brackets = json_string.count(']')
# Only add missing brackets if the difference is small (avoid runaway)
if 0 < (open_braces - close_braces) <= 5:
missing_braces = open_braces - close_braces
json_string += '}' * missing_braces
if 0 < (open_brackets - close_brackets) <= 5:
missing_brackets = open_brackets - close_brackets
json_string += ']' * missing_brackets
# Try to parse again
try:
json.loads(json_string)
logger.info("JSON repair successful")
return json_string
except json.JSONDecodeError:
logger.warning("JSON repair failed - will try AI repair")
return json_string
except Exception as e:
logger.warning(f"JSON repair failed: {str(e)}")
return json_string
async def _repairJsonWithAI(self, malformed_json: str) -> str:
"""Use AI to repair malformed JSON efficiently for large files."""
try:
# Limit JSON size for AI processing (max 50KB to avoid token limits)
max_json_size = 50000
json_to_repair = malformed_json
if len(malformed_json) > max_json_size:
logger.warning(f"JSON too large ({len(malformed_json)} chars), truncating to {max_json_size} chars for AI repair")
# Try to find a good truncation point (end of a complete object/array)
truncate_at = max_json_size
for i in range(max_json_size, max(0, max_json_size - 1000), -1):
if malformed_json[i] in ['}', ']']:
truncate_at = i + 1
break
json_to_repair = malformed_json[:truncate_at] + "..."
repair_prompt = f"""
You are a JSON repair expert. Fix the following malformed JSON and return ONLY the corrected JSON, no explanations.
Malformed JSON:
{json_to_repair}
Return only the valid JSON:
"""
# Use AI to repair the JSON
repaired_json = await self.services.ai.callAi(
prompt=repair_prompt,
documents=None,
options={
"process_type": "text",
"operation_type": "generate_content",
"priority": "speed",
"max_cost": 0.01
}
)
# Clean up the response (remove any markdown formatting)
repaired_json = repaired_json.strip()
if repaired_json.startswith('```json'):
repaired_json = repaired_json[7:]
if repaired_json.endswith('```'):
repaired_json = repaired_json[:-3]
repaired_json = repaired_json.strip()
# Validate the repaired JSON
import json
json.loads(repaired_json)
logger.info("AI JSON repair successful")
return repaired_json
except Exception as e:
logger.warning(f"AI JSON repair failed: {str(e)}")
return malformed_json

View file

@ -199,8 +199,10 @@ class SubDocumentProcessing:
else:
raise ValueError('Normalization produced zero rows')
except Exception as e:
# Surface normalization failure while leaving original merged JSON (single-path expectation is to fail)
raise
# Log normalization failure but don't re-raise - continue with original merged JSON
logger.warning(f"Normalization failed (expected): {str(e)}")
logger.debug(f"Normalization error type: {type(e).__name__}")
# Continue with original merged JSON instead of re-raising
# Save merged JSON extraction content to debug file - only if debug enabled
try:
@ -221,6 +223,10 @@ class SubDocumentProcessing:
except Exception as e:
logger.error(f"Error in per-chunk processing (JSON mode): {str(e)}")
logger.error(f"Exception type: {type(e).__name__}")
logger.error(f"Exception args: {e.args}")
import traceback
logger.error(f"Traceback: {traceback.format_exc()}")
return {"metadata": {"title": "Error Document"}, "sections": []}
async def processDocumentsPerChunkJsonWithPrompt(

View file

@ -278,17 +278,25 @@ class RendererHtml(BaseRenderer):
section_data = self._get_section_data(section)
if section_type == "table":
return self._render_json_table(section_data, styles)
# Process the section data to extract table structure
processed_data = self._process_section_by_type(section)
return self._render_json_table(processed_data, styles)
elif section_type == "bullet_list":
return self._render_json_bullet_list(section_data, styles)
# Process the section data to extract bullet list structure
processed_data = self._process_section_by_type(section)
return self._render_json_bullet_list(processed_data, styles)
elif section_type == "heading":
return self._render_json_heading(section_data, styles)
elif section_type == "paragraph":
return self._render_json_paragraph(section_data, styles)
elif section_type == "code_block":
return self._render_json_code_block(section_data, styles)
# Process the section data to extract code block structure
processed_data = self._process_section_by_type(section)
return self._render_json_code_block(processed_data, styles)
elif section_type == "image":
return self._render_json_image(section_data, styles)
# Process the section data to extract image structure
processed_data = self._process_section_by_type(section)
return self._render_json_image(processed_data, styles)
else:
# Fallback to paragraph for unknown types
return self._render_json_paragraph(section_data, styles)

View file

@ -81,17 +81,25 @@ class RendererMarkdown(BaseRenderer):
section_data = self._get_section_data(section)
if section_type == "table":
return self._render_json_table(section_data)
# Process the section data to extract table structure
processed_data = self._process_section_by_type(section)
return self._render_json_table(processed_data)
elif section_type == "bullet_list":
return self._render_json_bullet_list(section_data)
# Process the section data to extract bullet list structure
processed_data = self._process_section_by_type(section)
return self._render_json_bullet_list(processed_data)
elif section_type == "heading":
return self._render_json_heading(section_data)
elif section_type == "paragraph":
return self._render_json_paragraph(section_data)
elif section_type == "code_block":
return self._render_json_code_block(section_data)
# Process the section data to extract code block structure
processed_data = self._process_section_by_type(section)
return self._render_json_code_block(processed_data)
elif section_type == "image":
return self._render_json_image(section_data)
# Process the section data to extract image structure
processed_data = self._process_section_by_type(section)
return self._render_json_image(processed_data)
else:
# Fallback to paragraph for unknown types
return self._render_json_paragraph(section_data)

View file

@ -104,9 +104,13 @@ class RendererText(BaseRenderer):
section_data = self._get_section_data(section)
if section_type == "table":
return self._render_json_table(section_data)
# Process the section data to extract table structure
processed_data = self._process_section_by_type(section)
return self._render_json_table(processed_data)
elif section_type == "bullet_list":
return self._render_json_bullet_list(section_data)
# Process the section data to extract bullet list structure
processed_data = self._process_section_by_type(section)
return self._render_json_bullet_list(processed_data)
elif section_type == "heading":
# Render each heading element in the elements array
# section_data is already the elements array from _get_section_data
@ -122,9 +126,13 @@ class RendererText(BaseRenderer):
rendered_elements.append(self._render_json_paragraph(element))
return "\n".join(rendered_elements)
elif section_type == "code_block":
return self._render_json_code_block(section_data)
# Process the section data to extract code block structure
processed_data = self._process_section_by_type(section)
return self._render_json_code_block(processed_data)
elif section_type == "image":
return self._render_json_image(section_data)
# Process the section data to extract image structure
processed_data = self._process_section_by_type(section)
return self._render_json_image(processed_data)
else:
# Fallback to paragraph for unknown types - render each element
# section_data is already the elements array from _get_section_data

View file

@ -1,9 +1,9 @@
# adaptive module for React mode
# Provides adaptive learning capabilities
from .intentAnalyzer import IntentAnalyzer, DataType, ExpectedFormat
from .intentAnalyzer import IntentAnalyzer
from .contentValidator import ContentValidator
from .learningEngine import LearningEngine
from .progressTracker import ProgressTracker
__all__ = ['IntentAnalyzer', 'ContentValidator', 'LearningEngine', 'ProgressTracker', 'DataType', 'ExpectedFormat']
__all__ = ['IntentAnalyzer', 'ContentValidator', 'LearningEngine', 'ProgressTracker']

View file

@ -1,9 +1,9 @@
# contentValidator.py
# Content validation for adaptive React mode
import re
import logging
import json
import re
from typing import List, Dict, Any
logger = logging.getLogger(__name__)
@ -11,40 +11,14 @@ logger = logging.getLogger(__name__)
class ContentValidator:
"""Validates delivered content against user intent"""
def __init__(self):
pass
def __init__(self, services=None):
self.services = services
def validateContent(self, documents: List[Any], intent: Dict[str, Any]) -> Dict[str, Any]:
async def validateContent(self, documents: List[Any], intent: Dict[str, Any]) -> Dict[str, Any]:
"""Validates delivered content against user intent using AI"""
try:
# First, try AI-based validation for intelligent gap analysis
aiValidation = self._validateWithAI(documents, intent)
if aiValidation:
return aiValidation
# Fallback to rule-based validation if AI validation fails
validationDetails = []
for doc in documents:
content = self._extractContent(doc)
detail = self._validateSingleDocument(content, doc, intent)
validationDetails.append(detail)
# Calculate overall success
overallSuccess = all(detail.get("successCriteriaMet", [False]) for detail in validationDetails)
# Calculate quality score
qualityScore = self._calculateQualityScore(validationDetails)
# Generate improvement suggestions
improvementSuggestions = self._generateImprovementSuggestions(validationDetails, intent)
return {
"overallSuccess": overallSuccess,
"qualityScore": qualityScore,
"validationDetails": validationDetails,
"improvementSuggestions": improvementSuggestions
}
# Use AI for comprehensive validation
return await self._validateWithAI(documents, intent)
except Exception as e:
logger.error(f"Error validating content: {str(e)}")
@ -63,260 +37,21 @@ class ContentValidator:
except Exception:
return ""
def _validateSingleDocument(self, content: str, doc: Any, intent: Dict[str, Any]) -> Dict[str, Any]:
"""Validates a single document against intent"""
# Check data type match
dataTypeMatch = self._checkDataTypeMatch(content, intent.get("dataType", "unknown"))
# Check format match
formatMatch = self._checkFormatMatch(content, intent.get("expectedFormat", "unknown"))
# Calculate quality score
qualityScore = self._calculateDocumentQualityScore(content, intent)
# Check success criteria
successCriteriaMet = self._checkSuccessCriteria(content, intent)
# Identify specific issues
specificIssues = self._identifySpecificIssues(content, intent)
# Generate improvement suggestions
improvementSuggestions = self._generateDocumentImprovementSuggestions(content, intent)
return {
"documentName": getattr(doc, 'documentName', 'Unknown'),
"dataTypeMatch": dataTypeMatch,
"formatMatch": formatMatch,
"qualityScore": qualityScore,
"successCriteriaMet": successCriteriaMet,
"specificIssues": specificIssues,
"improvementSuggestions": improvementSuggestions
}
def _checkDataTypeMatch(self, content: str, dataType: str) -> bool:
"""Checks if content matches the expected data type"""
if dataType == "numbers":
return self._containsNumbers(content)
elif dataType == "text":
return self._containsText(content)
elif dataType == "documents":
return self._containsDocumentContent(content)
elif dataType == "analysis":
return self._containsAnalysis(content)
elif dataType == "code":
return self._containsCode(content)
else:
return True # Unknown type, assume match
def _containsNumbers(self, content: str) -> bool:
"""Checks if content contains actual numbers (not code)"""
# Look for actual numbers in the content
numbers = re.findall(r'\b\d+\b', content)
# Check if it's code (contains function definitions, etc.)
isCode = any(keyword in content.lower() for keyword in [
'def ', 'function', 'import ', 'class ', 'for ', 'while ', 'if ',
'return', 'print(', 'console.log', 'public ', 'private '
])
# If it's code, it doesn't contain actual numbers
if isCode:
return False
# If it has numbers and it's not code, it contains actual numbers
return len(numbers) > 0
def _containsText(self, content: str) -> bool:
"""Checks if content contains readable text"""
# Remove numbers and special characters
textContent = re.sub(r'[^\w\s]', '', content)
words = textContent.split()
# Check if there are enough words to be considered text
return len(words) > 5
def _containsDocumentContent(self, content: str) -> bool:
"""Checks if content is suitable for document creation"""
# Check for structured content
hasStructure = any(indicator in content for indicator in [
'\n', '\t', '|', '-', '*', '1.', '2.', '', ''
])
# Check for meaningful content
hasMeaningfulContent = len(content.strip()) > 50
return hasStructure and hasMeaningfulContent
def _containsAnalysis(self, content: str) -> bool:
"""Checks if content contains analysis"""
analysisIndicators = [
'analysis', 'findings', 'conclusion', 'summary', 'insights',
'trends', 'patterns', 'comparison', 'evaluation', 'assessment'
]
contentLower = content.lower()
return any(indicator in contentLower for indicator in analysisIndicators)
def _containsCode(self, content: str) -> bool:
"""Checks if content contains code"""
codeIndicators = [
'def ', 'function', 'import ', 'class ', 'for ', 'while ', 'if ',
'return', 'print(', 'console.log', 'public ', 'private ', 'void ',
'int ', 'string ', 'var ', 'let ', 'const '
]
contentLower = content.lower()
return any(indicator in contentLower for indicator in codeIndicators)
def _checkFormatMatch(self, content: str, expectedFormat: str) -> bool:
"""Checks if content matches expected format"""
if expectedFormat == "raw_data":
# Raw data should be simple, not heavily formatted
return not any(indicator in content for indicator in [
'<html>', '<div>', '<table>', '## ', '### ', '**', '__'
])
elif expectedFormat == "formatted":
# Formatted content should have structure
return any(indicator in content for indicator in [
'\n', '\t', '|', '-', '*', '1.', '2.', ''
])
elif expectedFormat == "structured":
# Structured content should have clear organization
return any(indicator in content for indicator in [
'{', '}', '[', ']', '|', '\t', ' '
])
else:
return True # Unknown format, assume match
def _checkSuccessCriteria(self, content: str, intent: Dict[str, Any]) -> List[bool]:
"""Checks if content meets success criteria"""
criteriaMet = []
successCriteria = intent.get("successCriteria", [])
for criterion in successCriteria:
if 'prime numbers' in criterion.lower():
# Check if content contains actual prime numbers, not code
hasNumbers = bool(re.search(r'\b\d+\b', content))
isNotCode = not any(keyword in content.lower() for keyword in [
'def ', 'function', 'import ', 'class '
])
criteriaMet.append(hasNumbers and isNotCode)
elif 'document' in criterion.lower():
# Check if content is suitable for document creation
hasStructure = any(indicator in content for indicator in [
'\n', '\t', '|', '-', '*', '1.', '2.'
])
criteriaMet.append(hasStructure)
elif 'format' in criterion.lower():
# Check if content is properly formatted
hasFormatting = any(indicator in content for indicator in [
'\n', '\t', '|', '-', '*', '1.', '2.', ''
])
criteriaMet.append(hasFormatting)
else:
# Generic check - content should not be empty
criteriaMet.append(len(content.strip()) > 0)
return criteriaMet
def _calculateDocumentQualityScore(self, content: str, intent: Dict[str, Any]) -> float:
"""Calculates quality score for a single document"""
score = 0.0
# Base score for having content
if len(content.strip()) > 0:
score += 0.2
# Score for data type match
if self._checkDataTypeMatch(content, intent.get("dataType", "unknown")):
score += 0.3
# Score for format match
if self._checkFormatMatch(content, intent.get("expectedFormat", "unknown")):
score += 0.2
# Score for success criteria
successCriteriaMet = self._checkSuccessCriteria(content, intent)
if successCriteriaMet:
successRate = sum(successCriteriaMet) / len(successCriteriaMet)
score += 0.3 * successRate
return min(score, 1.0)
def _calculateQualityScore(self, validationDetails: List[Dict[str, Any]]) -> float:
"""Calculates overall quality score from validation details"""
if not validationDetails:
return 0.0
totalScore = sum(detail.get("qualityScore", 0) for detail in validationDetails)
return totalScore / len(validationDetails)
def _identifySpecificIssues(self, content: str, intent: Dict[str, Any]) -> List[str]:
"""Identifies specific issues with the content"""
issues = []
# Check for common issues
if intent.get("dataType") == "numbers" and self._containsCode(content):
issues.append("Content contains code instead of actual numbers")
if intent.get("expectedFormat") == "raw_data" and any(indicator in content for indicator in ['<html>', '## ', '**']):
issues.append("Content is formatted when raw data was requested")
if len(content.strip()) == 0:
issues.append("Content is empty")
return issues
def _generateDocumentImprovementSuggestions(self, content: str, intent: Dict[str, Any]) -> List[str]:
"""Generates improvement suggestions for a single document"""
suggestions = []
dataType = intent.get("dataType", "unknown")
expectedFormat = intent.get("expectedFormat", "unknown")
if dataType == "numbers" and self._containsCode(content):
suggestions.append("Deliver actual numbers, not code to generate them")
if expectedFormat == "raw_data" and any(indicator in content for indicator in ['<html>', '## ']):
suggestions.append("Provide raw data without formatting")
if len(content.strip()) == 0:
suggestions.append("Provide actual content")
return suggestions
def _generateImprovementSuggestions(self, validationDetails: List[Dict[str, Any]],
intent: Dict[str, Any]) -> List[str]:
"""Generates improvement suggestions based on validation results"""
suggestions = []
# Check for common issues
if not any(detail.get("dataTypeMatch", False) for detail in validationDetails):
dataType = intent.get("dataType", "unknown")
suggestions.append(f"Content should contain {dataType} data, not code or other formats")
if not any(detail.get("formatMatch", False) for detail in validationDetails):
expectedFormat = intent.get("expectedFormat", "unknown")
suggestions.append(f"Content should be in {expectedFormat} format")
# Add specific suggestions from validation details
for detail in validationDetails:
suggestions.extend(detail.get("improvementSuggestions", []))
return list(set(suggestions)) # Remove duplicates
def _createFailedValidationResult(self, error: str) -> Dict[str, Any]:
"""Creates a failed validation result"""
return {
"overallSuccess": False,
"qualityScore": 0.0,
"validationDetails": [],
"improvementSuggestions": [f"Validation failed: {error}"]
"improvementSuggestions": [f"NEXT STEP: Fix validation error - {error}. Check system logs for more details and retry the operation."]
}
def _validateWithAI(self, documents: List[Any], intent: Dict[str, Any]) -> Dict[str, Any]:
"""AI-based validation to intelligently assess task completion"""
async def _validateWithAI(self, documents: List[Any], intent: Dict[str, Any]) -> Dict[str, Any]:
"""AI-based comprehensive validation - single main function"""
try:
if not hasattr(self, 'services') or not self.services or not hasattr(self.services, 'ai'):
return self._createFailedValidationResult("AI service not available")
# Extract content from all documents
documentContents = []
for doc in documents:
@ -326,60 +61,77 @@ class ContentValidator:
"content": content[:2000] # Limit content for AI processing
})
# Create AI validation prompt
# Create comprehensive AI validation prompt
validationPrompt = f"""
You are a task completion validator. Analyze if the delivered content actually fulfills the user's request.
You are a comprehensive task completion validator. Analyze if the delivered content fulfills the user's request.
USER REQUEST: {intent.get('primaryGoal', 'Unknown')}
EXPECTED DATA TYPE: {intent.get('dataType', 'unknown')}
EXPECTED FORMAT: {intent.get('expectedFormat', 'unknown')}
SUCCESS CRITERIA: {intent.get('successCriteria', [])}
DELIVERED CONTENT:
{json.dumps(documentContents, indent=2)}
TASK: Determine if the user's request has been fully completed.
Analyze the gap between what was requested and what was delivered. Consider any missing elements, incorrect formats, incomplete work, or other discrepancies.
Perform comprehensive validation:
1. Check if content matches expected data type
2. Check if content matches expected format
3. Verify success criteria are met
4. Assess overall quality and completeness
5. Identify specific gaps and issues
6. Provide actionable next steps
Respond with JSON only:
{{
"overallSuccess": true/false,
"qualityScore": 0.0-1.0,
"gapAnalysis": "Detailed analysis of what's missing or incorrect",
"improvementSuggestions": ["specific action 1", "specific action 2"]
"dataTypeMatch": true/false,
"formatMatch": true/false,
"successCriteriaMet": [true/false for each criterion],
"gapAnalysis": "Detailed analysis: what's missing/incorrect AND what specific next step to do",
"improvementSuggestions": ["NEXT STEP: specific action 1", "NEXT STEP: specific action 2"],
"validationDetails": [
{{
"documentName": "Document name",
"issues": ["specific issue 1", "specific issue 2"],
"suggestions": ["NEXT STEP: specific fix 1", "NEXT STEP: specific fix 2"]
}}
]
}}
"""
# Call AI service for validation
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationType
from modules.datamodels.datamodelAi import AiCallOptions, OperationType
request_options = AiCallOptions()
request_options.operationType = OperationType.GENERAL
request = AiCallRequest(prompt=validationPrompt, context="", options=request_options)
response = await self.services.ai.callAi(
prompt=validationPrompt,
documents=None,
options=request_options
)
if response:
import re
result = response.strip()
json_match = re.search(r'\{.*\}', result, re.DOTALL)
if json_match:
result = json_match.group(0)
aiResult = json.loads(result)
return {
"overallSuccess": aiResult.get("overallSuccess", False),
"qualityScore": aiResult.get("qualityScore", 0.0),
"validationDetails": aiResult.get("validationDetails", [{
"documentName": "AI Validation",
"gapAnalysis": aiResult.get("gapAnalysis", ""),
"successCriteriaMet": aiResult.get("successCriteriaMet", [False])
}]),
"improvementSuggestions": aiResult.get("improvementSuggestions", [])
}
# Get AI service from the workflow context
if hasattr(self, 'services') and hasattr(self.services, 'ai'):
response = self.services.ai.aiObjects.call(request)
if response and response.content:
import re
result = response.content.strip()
json_match = re.search(r'\{.*\}', result, re.DOTALL)
if json_match:
result = json_match.group(0)
aiResult = json.loads(result)
return {
"overallSuccess": aiResult.get("overallSuccess", False),
"qualityScore": aiResult.get("qualityScore", 0.0),
"validationDetails": [{
"documentName": "AI Validation",
"gapAnalysis": aiResult.get("gapAnalysis", ""),
"successCriteriaMet": [aiResult.get("overallSuccess", False)]
}],
"improvementSuggestions": aiResult.get("improvementSuggestions", [])
}
return None # Fallback to rule-based validation
return self._createFailedValidationResult("AI validation failed - no response")
except Exception as e:
logger.error(f"AI validation failed: {str(e)}")
return None # Fallback to rule-based validation
return self._createFailedValidationResult(f"AI validation error: {str(e)}")

View file

@ -1,228 +1,109 @@
# intentAnalyzer.py
# Intent analysis for adaptive React mode
# Intent analysis for adaptive React mode - AI-based, language-agnostic
import re
import json
import logging
from typing import Dict, Any, List
from enum import Enum
logger = logging.getLogger(__name__)
class DataType(Enum):
NUMBERS = "numbers"
TEXT = "text"
DOCUMENTS = "documents"
ANALYSIS = "analysis"
CODE = "code"
UNKNOWN = "unknown"
class ExpectedFormat(Enum):
RAW_DATA = "raw_data"
FORMATTED = "formatted"
STRUCTURED = "structured"
VISUAL = "visual"
UNKNOWN = "unknown"
class IntentAnalyzer:
"""Analyzes user intent to understand what they actually want"""
"""Analyzes user intent using AI - language-agnostic and generic"""
def __init__(self):
self.dataTypePatterns = {
DataType.NUMBERS: [
r'\b(numbers?|digits?|count|list|sequence)\b',
r'\b(prime|fibonacci|random|even|odd)\s+(numbers?)\b',
r'\b(calculate|compute|generate)\s+(numbers?)\b',
r'\b(first|last)\s+\d+\s+(numbers?)\b'
],
DataType.TEXT: [
r'\b(text|content|words?|sentences?|paragraphs?)\b',
r'\b(write|create|generate)\s+(text|content)\b',
r'\b(summary|description|explanation)\b',
r'\b(article|essay|report)\b'
],
DataType.DOCUMENTS: [
r'\b(document|file|report|pdf|word|excel)\b',
r'\b(create|generate|make)\s+(document|file|report)\b',
r'\b(format|structure|organize)\s+(document)\b',
r'\b(presentation|slides?)\b'
],
DataType.ANALYSIS: [
r'\b(analyze|analysis|examine|study|evaluate)\b',
r'\b(insights?|findings?|results?)\b',
r'\b(compare|contrast|evaluate)\b',
r'\b(trends?|patterns?)\b'
],
DataType.CODE: [
r'\b(code|program|script|algorithm|function)\b',
r'\b(write|create|develop)\s+(code|program|script)\b',
r'\b(implement|build|construct)\b',
r'\b(debug|fix|optimize)\s+(code)\b'
]
}
self.formatPatterns = {
ExpectedFormat.RAW_DATA: [
r'\b(raw|plain|simple|basic)\b',
r'\b(numbers?|data|list)\b(?!\s+(in|as|with))',
r'\b(just|only)\s+(numbers?|data)\b'
],
ExpectedFormat.FORMATTED: [
r'\b(formatted|structured|organized|presented)\b',
r'\b(table|chart|graph|visual)\b',
r'\b(pretty|nice|clean)\s+(format|presentation)\b',
r'\b(professional|polished)\b'
],
ExpectedFormat.STRUCTURED: [
r'\b(json|xml|csv|structured)\b',
r'\b(organized|categorized|grouped)\b',
r'\b(systematic|methodical)\b',
r'\b(database|spreadsheet)\b'
]
}
def __init__(self, services=None):
self.services = services
def analyzeUserIntent(self, userPrompt: str, context: Any) -> Dict[str, Any]:
"""Analyzes user intent from prompt and context"""
async def analyzeUserIntent(self, userPrompt: str, context: Any) -> Dict[str, Any]:
"""Analyzes user intent from prompt and context using AI"""
try:
# Extract primary goal
primaryGoal = self._extractPrimaryGoal(userPrompt)
# Use AI to analyze intent
aiAnalysis = await self._analyzeIntentWithAI(userPrompt, context)
if aiAnalysis:
return aiAnalysis
# Classify data type
dataType = self._classifyDataType(userPrompt)
# Determine expected format
expectedFormat = self._determineExpectedFormat(userPrompt)
# Assess quality requirements
qualityRequirements = self._assessQualityRequirements(userPrompt, context)
# Extract success criteria
successCriteria = self._extractSuccessCriteria(userPrompt, context)
# Calculate confidence score
confidenceScore = self._calculateConfidenceScore(dataType, expectedFormat, successCriteria)
return {
"primaryGoal": primaryGoal,
"dataType": dataType.value,
"expectedFormat": expectedFormat.value,
"qualityRequirements": qualityRequirements,
"successCriteria": successCriteria,
"confidenceScore": confidenceScore
}
# Fallback to basic analysis if AI fails
return self._createBasicIntentAnalysis(userPrompt)
except Exception as e:
logger.error(f"Error analyzing user intent: {str(e)}")
return self._createDefaultIntentAnalysis(userPrompt)
def _extractPrimaryGoal(self, userPrompt: str) -> str:
"""Extracts the primary goal from user prompt"""
# Simple extraction - can be enhanced
return userPrompt.strip()
async def _analyzeIntentWithAI(self, userPrompt: str, context: Any) -> Dict[str, Any]:
"""Uses AI to analyze user intent - language-agnostic"""
try:
if not self.services or not hasattr(self.services, 'ai'):
return None
# Create AI analysis prompt
analysisPrompt = f"""
You are an intent analyzer. Analyze the user's request to understand what they want delivered.
USER REQUEST: {userPrompt}
CONTEXT: {getattr(context.task_step, 'objective', '') if hasattr(context, 'task_step') and context.task_step else ''}
Analyze the user's intent and determine:
1. What type of data/content they want (numbers, text, documents, analysis, code, etc.)
2. What format they expect (raw data, formatted, structured, visual, etc.)
3. What quality requirements they have (accuracy, completeness, format)
4. What specific success criteria define completion
Respond with JSON only:
{{
"primaryGoal": "The main objective the user wants to achieve",
"dataType": "numbers|text|documents|analysis|code|unknown",
"expectedFormat": "raw_data|formatted|structured|visual|unknown",
"qualityRequirements": {{
"accuracyThreshold": 0.0-1.0,
"completenessThreshold": 0.0-1.0,
"formatRequirement": "any|formatted|raw|structured"
}},
"successCriteria": ["specific criterion 1", "specific criterion 2"],
"confidenceScore": 0.0-1.0
}}
"""
# Call AI service for analysis
from modules.datamodels.datamodelAi import AiCallOptions, OperationType
request_options = AiCallOptions()
request_options.operationType = OperationType.GENERAL
response = await self.services.ai.callAi(
prompt=analysisPrompt,
documents=None,
options=request_options
)
if response:
import re
result = response.strip()
json_match = re.search(r'\{.*\}', result, re.DOTALL)
if json_match:
result = json_match.group(0)
aiResult = json.loads(result)
return aiResult
return None
except Exception as e:
logger.error(f"AI intent analysis failed: {str(e)}")
return None
def _classifyDataType(self, userPrompt: str) -> DataType:
"""Classifies the type of data the user wants"""
promptLower = userPrompt.lower()
for dataType, patterns in self.dataTypePatterns.items():
for pattern in patterns:
if re.search(pattern, promptLower):
return dataType
return DataType.UNKNOWN
def _determineExpectedFormat(self, userPrompt: str) -> ExpectedFormat:
"""Determines the expected format of the output"""
promptLower = userPrompt.lower()
for formatType, patterns in self.formatPatterns.items():
for pattern in patterns:
if re.search(pattern, promptLower):
return formatType
return ExpectedFormat.UNKNOWN
def _assessQualityRequirements(self, userPrompt: str, context: Any) -> Dict[str, Any]:
"""Assesses quality requirements from prompt and context"""
promptLower = userPrompt.lower()
# Check for accuracy requirements
accuracyThreshold = 0.8
if any(word in promptLower for word in ['exact', 'precise', 'accurate', 'correct']):
accuracyThreshold = 0.95
elif any(word in promptLower for word in ['approximate', 'rough', 'estimate']):
accuracyThreshold = 0.7
# Check for completeness requirements
completenessThreshold = 0.8
if any(word in promptLower for word in ['complete', 'full', 'comprehensive', 'all']):
completenessThreshold = 0.95
elif any(word in promptLower for word in ['summary', 'brief', 'overview']):
completenessThreshold = 0.6
# Check for format requirements
formatRequirement = "any"
if any(word in promptLower for word in ['formatted', 'structured', 'organized']):
formatRequirement = "formatted"
elif any(word in promptLower for word in ['raw', 'plain', 'simple']):
formatRequirement = "raw"
def _createBasicIntentAnalysis(self, userPrompt: str) -> Dict[str, Any]:
"""Creates basic intent analysis without AI"""
return {
"accuracyThreshold": accuracyThreshold,
"completenessThreshold": completenessThreshold,
"formatRequirement": formatRequirement
"primaryGoal": userPrompt.strip(),
"dataType": "unknown",
"expectedFormat": "unknown",
"qualityRequirements": {
"accuracyThreshold": 0.8,
"completenessThreshold": 0.8,
"formatRequirement": "any"
},
"successCriteria": ["Delivers what the user requested"],
"confidenceScore": 0.5
}
def _extractSuccessCriteria(self, userPrompt: str, context: Any) -> List[str]:
"""Extracts success criteria from prompt and context"""
criteria = []
promptLower = userPrompt.lower()
# Extract explicit criteria
if 'first' in promptLower and 'numbers' in promptLower:
criteria.append("Contains the first N numbers as requested")
if 'prime' in promptLower:
criteria.append("Contains actual prime numbers, not code to generate them")
if 'document' in promptLower:
criteria.append("Creates a properly formatted document")
if 'format' in promptLower:
criteria.append("Content is properly formatted as requested")
# Add context-based criteria
if hasattr(context, 'task_step') and context.task_step:
taskObjective = context.task_step.objective.lower()
if 'word' in taskObjective:
criteria.append("Creates a Word document")
if 'excel' in taskObjective:
criteria.append("Creates an Excel spreadsheet")
return criteria if criteria else ["Delivers what the user requested"]
def _calculateConfidenceScore(self, dataType: DataType, expectedFormat: ExpectedFormat,
successCriteria: List[str]) -> float:
"""Calculates confidence score for the intent analysis"""
score = 0.0
# Data type confidence
if dataType != DataType.UNKNOWN:
score += 0.3
# Format confidence
if expectedFormat != ExpectedFormat.UNKNOWN:
score += 0.2
# Success criteria confidence
if len(successCriteria) > 0:
score += 0.3
# Additional confidence for specific patterns
if len(successCriteria) > 1:
score += 0.2
return min(score, 1.0)
def _createDefaultIntentAnalysis(self, userPrompt: str) -> Dict[str, Any]:
"""Creates a default intent analysis when analysis fails"""
return {

View file

@ -31,9 +31,8 @@ class ReactMode(BaseMode):
def __init__(self, services, workflow):
super().__init__(services, workflow)
# Initialize adaptive components
self.intentAnalyzer = IntentAnalyzer()
self.contentValidator = ContentValidator()
self.contentValidator.services = self.services # Pass services for AI validation
self.intentAnalyzer = IntentAnalyzer(services)
self.contentValidator = ContentValidator(services)
self.learningEngine = LearningEngine()
self.progressTracker = ProgressTracker()
self.currentIntent = None
@ -53,9 +52,9 @@ class ReactMode(BaseMode):
# NEW: Analyze intents separately for proper validation vs task completion
# Workflow-level intent from cleaned original user prompt
original_prompt = self.services.currentUserPrompt if self.services and hasattr(self.services, 'currentUserPrompt') else taskStep.objective
self.workflowIntent = self.intentAnalyzer.analyzeUserIntent(original_prompt, context)
self.workflowIntent = await self.intentAnalyzer.analyzeUserIntent(original_prompt, context)
# Task-level intent from current task objective (used only for task-scoped checks)
self.taskIntent = self.intentAnalyzer.analyzeUserIntent(taskStep.objective, context)
self.taskIntent = await self.intentAnalyzer.analyzeUserIntent(taskStep.objective, context)
logger.info(f"Intent analysis — workflow: {self.workflowIntent}")
logger.info(f"Intent analysis — task: {self.taskIntent}")
@ -103,7 +102,7 @@ class ReactMode(BaseMode):
# NEW: Add content validation (against original cleaned user prompt / workflow intent)
if getattr(self, 'workflowIntent', None) and result.documents:
validationResult = self.contentValidator.validateContent(result.documents, self.workflowIntent)
validationResult = await self.contentValidator.validateContent(result.documents, self.workflowIntent)
observation['contentValidation'] = validationResult
logger.info(f"Content validation: {validationResult['overallSuccess']} (quality: {validationResult['qualityScore']:.2f})")