From da1f07555619cdb027d7641319007cb12f0b30a9 Mon Sep 17 00:00:00 2001
From: ValueOn AG
Date: Wed, 15 Oct 2025 00:08:28 +0200
Subject: [PATCH] Fixes 01
---
.../serviceAi/subDocumentGeneration.py | 142 ++++++-
.../serviceAi/subDocumentProcessing.py | 10 +-
.../renderers/rendererHtml.py | 16 +-
.../renderers/rendererMarkdown.py | 16 +-
.../renderers/rendererText.py | 16 +-
.../workflows/processing/adaptive/__init__.py | 4 +-
.../processing/adaptive/contentValidator.py | 374 +++---------------
.../processing/adaptive/intentAnalyzer.py | 289 ++++----------
.../workflows/processing/modes/modeReact.py | 11 +-
9 files changed, 339 insertions(+), 539 deletions(-)
diff --git a/modules/services/serviceAi/subDocumentGeneration.py b/modules/services/serviceAi/subDocumentGeneration.py
index 09fe524c..6d7ee4b7 100644
--- a/modules/services/serviceAi/subDocumentGeneration.py
+++ b/modules/services/serviceAi/subDocumentGeneration.py
@@ -382,8 +382,22 @@ class SubDocumentGeneration:
logger.info(f"AI enhanced JSON content successfully")
except json.JSONDecodeError as e:
- logger.warning(f"AI generation returned invalid JSON: {str(e)}, using original content")
- enhancedContent = complete_document
+ logger.warning(f"AI generation returned invalid JSON: {str(e)}, attempting to repair...")
+ # Try to repair common JSON issues
+ try:
+ repaired_result = self._repairJson(result)
+ enhancedContent = json.loads(repaired_result)
+ logger.info(f"Successfully repaired JSON content")
+ except (json.JSONDecodeError, Exception) as repair_error:
+ logger.warning(f"JSON repair failed: {str(repair_error)}, trying AI repair...")
+ # Try AI-powered JSON repair as last resort
+ try:
+ ai_repaired = await self._repairJsonWithAI(result)
+ enhancedContent = json.loads(ai_repaired)
+ logger.info(f"AI successfully repaired JSON content")
+ except Exception as ai_repair_error:
+ logger.warning(f"AI JSON repair also failed: {str(ai_repair_error)}, using original content")
+ enhancedContent = complete_document
else:
logger.warning("AI generation returned empty response, using original content")
enhancedContent = complete_document
@@ -664,3 +678,127 @@ Return only the JSON response.
except Exception:
# Non-fatal; ignore if storage or chat creation fails
return
+
+ def _repairJson(self, json_string: str) -> str:
+ """Repair common JSON syntax errors efficiently for large JSON."""
+ try:
+ import re
+ import json
+
+ # Remove any leading/trailing whitespace
+ json_string = json_string.strip()
+
+ # For large JSON, skip substring extraction and go straight to targeted repairs
+ logger.info(f"Attempting JSON repair for {len(json_string)} characters...")
+
+ # Try to parse first to see what specific error we get
+ try:
+ json.loads(json_string)
+ return json_string # Already valid
+ except json.JSONDecodeError as e:
+ error_msg = str(e)
+ logger.info(f"JSON error: {error_msg}")
+
+ # Apply targeted fixes based on the specific error
+ if "Expecting ',' delimiter" in error_msg:
+ # Fix missing commas between array elements
+ json_string = re.sub(r'\]\s*\[', '], [', json_string)
+ json_string = re.sub(r'\}\s*\{', '}, {', json_string)
+ # Fix missing commas between object properties
+ json_string = re.sub(r'("\s*:\s*[^,}]+)\s*(")', r'\1, \2', json_string)
+
+ if "Expecting value" in error_msg:
+ # Fix missing values (replace empty with null)
+ json_string = re.sub(r':\s*,', ': null,', json_string)
+ json_string = re.sub(r':\s*}', ': null}', json_string)
+
+ if "Expecting property name" in error_msg:
+ # Fix unquoted property names
+ json_string = re.sub(r'(\w+):', r'"\1":', json_string)
+
+ # Fix trailing commas before closing brackets/braces
+ json_string = re.sub(r',(\s*[}\]])', r'\1', json_string)
+
+ # Fix missing closing brackets/braces (only if reasonable)
+ open_braces = json_string.count('{')
+ close_braces = json_string.count('}')
+ open_brackets = json_string.count('[')
+ close_brackets = json_string.count(']')
+
+ # Only add missing brackets if the difference is small (avoid runaway)
+ if 0 < (open_braces - close_braces) <= 5:
+ missing_braces = open_braces - close_braces
+ json_string += '}' * missing_braces
+
+ if 0 < (open_brackets - close_brackets) <= 5:
+ missing_brackets = open_brackets - close_brackets
+ json_string += ']' * missing_brackets
+
+ # Try to parse again
+ try:
+ json.loads(json_string)
+ logger.info("JSON repair successful")
+ return json_string
+ except json.JSONDecodeError:
+ logger.warning("JSON repair failed - will try AI repair")
+ return json_string
+
+ except Exception as e:
+ logger.warning(f"JSON repair failed: {str(e)}")
+ return json_string
+
+ async def _repairJsonWithAI(self, malformed_json: str) -> str:
+ """Use AI to repair malformed JSON efficiently for large files."""
+ try:
+ # Limit JSON size for AI processing (max 50KB to avoid token limits)
+ max_json_size = 50000
+ json_to_repair = malformed_json
+
+ if len(malformed_json) > max_json_size:
+ logger.warning(f"JSON too large ({len(malformed_json)} chars), truncating to {max_json_size} chars for AI repair")
+ # Try to find a good truncation point (end of a complete object/array)
+ truncate_at = max_json_size
+ for i in range(max_json_size, max(0, max_json_size - 1000), -1):
+ if malformed_json[i] in ['}', ']']:
+ truncate_at = i + 1
+ break
+ json_to_repair = malformed_json[:truncate_at] + "..."
+
+ repair_prompt = f"""
+You are a JSON repair expert. Fix the following malformed JSON and return ONLY the corrected JSON, no explanations.
+
+Malformed JSON:
+{json_to_repair}
+
+Return only the valid JSON:
+"""
+
+ # Use AI to repair the JSON
+ repaired_json = await self.services.ai.callAi(
+ prompt=repair_prompt,
+ documents=None,
+ options={
+ "process_type": "text",
+ "operation_type": "generate_content",
+ "priority": "speed",
+ "max_cost": 0.01
+ }
+ )
+
+ # Clean up the response (remove any markdown formatting)
+ repaired_json = repaired_json.strip()
+ if repaired_json.startswith('```json'):
+ repaired_json = repaired_json[7:]
+ if repaired_json.endswith('```'):
+ repaired_json = repaired_json[:-3]
+ repaired_json = repaired_json.strip()
+
+ # Validate the repaired JSON
+ import json
+ json.loads(repaired_json)
+ logger.info("AI JSON repair successful")
+ return repaired_json
+
+ except Exception as e:
+ logger.warning(f"AI JSON repair failed: {str(e)}")
+ return malformed_json
diff --git a/modules/services/serviceAi/subDocumentProcessing.py b/modules/services/serviceAi/subDocumentProcessing.py
index e96a9394..d85a5341 100644
--- a/modules/services/serviceAi/subDocumentProcessing.py
+++ b/modules/services/serviceAi/subDocumentProcessing.py
@@ -199,8 +199,10 @@ class SubDocumentProcessing:
else:
raise ValueError('Normalization produced zero rows')
except Exception as e:
- # Surface normalization failure while leaving original merged JSON (single-path expectation is to fail)
- raise
+ # Log normalization failure but don't re-raise - continue with original merged JSON
+ logger.warning(f"Normalization failed (expected): {str(e)}")
+ logger.debug(f"Normalization error type: {type(e).__name__}")
+ # Continue with original merged JSON instead of re-raising
# Save merged JSON extraction content to debug file - only if debug enabled
try:
@@ -221,6 +223,10 @@ class SubDocumentProcessing:
except Exception as e:
logger.error(f"Error in per-chunk processing (JSON mode): {str(e)}")
+ logger.error(f"Exception type: {type(e).__name__}")
+ logger.error(f"Exception args: {e.args}")
+ import traceback
+ logger.error(f"Traceback: {traceback.format_exc()}")
return {"metadata": {"title": "Error Document"}, "sections": []}
async def processDocumentsPerChunkJsonWithPrompt(
diff --git a/modules/services/serviceGeneration/renderers/rendererHtml.py b/modules/services/serviceGeneration/renderers/rendererHtml.py
index 5cd9b691..1dedaf46 100644
--- a/modules/services/serviceGeneration/renderers/rendererHtml.py
+++ b/modules/services/serviceGeneration/renderers/rendererHtml.py
@@ -278,17 +278,25 @@ class RendererHtml(BaseRenderer):
section_data = self._get_section_data(section)
if section_type == "table":
- return self._render_json_table(section_data, styles)
+ # Process the section data to extract table structure
+ processed_data = self._process_section_by_type(section)
+ return self._render_json_table(processed_data, styles)
elif section_type == "bullet_list":
- return self._render_json_bullet_list(section_data, styles)
+ # Process the section data to extract bullet list structure
+ processed_data = self._process_section_by_type(section)
+ return self._render_json_bullet_list(processed_data, styles)
elif section_type == "heading":
return self._render_json_heading(section_data, styles)
elif section_type == "paragraph":
return self._render_json_paragraph(section_data, styles)
elif section_type == "code_block":
- return self._render_json_code_block(section_data, styles)
+ # Process the section data to extract code block structure
+ processed_data = self._process_section_by_type(section)
+ return self._render_json_code_block(processed_data, styles)
elif section_type == "image":
- return self._render_json_image(section_data, styles)
+ # Process the section data to extract image structure
+ processed_data = self._process_section_by_type(section)
+ return self._render_json_image(processed_data, styles)
else:
# Fallback to paragraph for unknown types
return self._render_json_paragraph(section_data, styles)
diff --git a/modules/services/serviceGeneration/renderers/rendererMarkdown.py b/modules/services/serviceGeneration/renderers/rendererMarkdown.py
index 61f0bebc..59806d4c 100644
--- a/modules/services/serviceGeneration/renderers/rendererMarkdown.py
+++ b/modules/services/serviceGeneration/renderers/rendererMarkdown.py
@@ -81,17 +81,25 @@ class RendererMarkdown(BaseRenderer):
section_data = self._get_section_data(section)
if section_type == "table":
- return self._render_json_table(section_data)
+ # Process the section data to extract table structure
+ processed_data = self._process_section_by_type(section)
+ return self._render_json_table(processed_data)
elif section_type == "bullet_list":
- return self._render_json_bullet_list(section_data)
+ # Process the section data to extract bullet list structure
+ processed_data = self._process_section_by_type(section)
+ return self._render_json_bullet_list(processed_data)
elif section_type == "heading":
return self._render_json_heading(section_data)
elif section_type == "paragraph":
return self._render_json_paragraph(section_data)
elif section_type == "code_block":
- return self._render_json_code_block(section_data)
+ # Process the section data to extract code block structure
+ processed_data = self._process_section_by_type(section)
+ return self._render_json_code_block(processed_data)
elif section_type == "image":
- return self._render_json_image(section_data)
+ # Process the section data to extract image structure
+ processed_data = self._process_section_by_type(section)
+ return self._render_json_image(processed_data)
else:
# Fallback to paragraph for unknown types
return self._render_json_paragraph(section_data)
diff --git a/modules/services/serviceGeneration/renderers/rendererText.py b/modules/services/serviceGeneration/renderers/rendererText.py
index 33d648e8..68ccfdbe 100644
--- a/modules/services/serviceGeneration/renderers/rendererText.py
+++ b/modules/services/serviceGeneration/renderers/rendererText.py
@@ -104,9 +104,13 @@ class RendererText(BaseRenderer):
section_data = self._get_section_data(section)
if section_type == "table":
- return self._render_json_table(section_data)
+ # Process the section data to extract table structure
+ processed_data = self._process_section_by_type(section)
+ return self._render_json_table(processed_data)
elif section_type == "bullet_list":
- return self._render_json_bullet_list(section_data)
+ # Process the section data to extract bullet list structure
+ processed_data = self._process_section_by_type(section)
+ return self._render_json_bullet_list(processed_data)
elif section_type == "heading":
# Render each heading element in the elements array
# section_data is already the elements array from _get_section_data
@@ -122,9 +126,13 @@ class RendererText(BaseRenderer):
rendered_elements.append(self._render_json_paragraph(element))
return "\n".join(rendered_elements)
elif section_type == "code_block":
- return self._render_json_code_block(section_data)
+ # Process the section data to extract code block structure
+ processed_data = self._process_section_by_type(section)
+ return self._render_json_code_block(processed_data)
elif section_type == "image":
- return self._render_json_image(section_data)
+ # Process the section data to extract image structure
+ processed_data = self._process_section_by_type(section)
+ return self._render_json_image(processed_data)
else:
# Fallback to paragraph for unknown types - render each element
# section_data is already the elements array from _get_section_data
diff --git a/modules/workflows/processing/adaptive/__init__.py b/modules/workflows/processing/adaptive/__init__.py
index fdff3698..afc0c963 100644
--- a/modules/workflows/processing/adaptive/__init__.py
+++ b/modules/workflows/processing/adaptive/__init__.py
@@ -1,9 +1,9 @@
# adaptive module for React mode
# Provides adaptive learning capabilities
-from .intentAnalyzer import IntentAnalyzer, DataType, ExpectedFormat
+from .intentAnalyzer import IntentAnalyzer
from .contentValidator import ContentValidator
from .learningEngine import LearningEngine
from .progressTracker import ProgressTracker
-__all__ = ['IntentAnalyzer', 'ContentValidator', 'LearningEngine', 'ProgressTracker', 'DataType', 'ExpectedFormat']
+__all__ = ['IntentAnalyzer', 'ContentValidator', 'LearningEngine', 'ProgressTracker']
diff --git a/modules/workflows/processing/adaptive/contentValidator.py b/modules/workflows/processing/adaptive/contentValidator.py
index 5253ab5e..d211d1c3 100644
--- a/modules/workflows/processing/adaptive/contentValidator.py
+++ b/modules/workflows/processing/adaptive/contentValidator.py
@@ -1,9 +1,9 @@
# contentValidator.py
# Content validation for adaptive React mode
-import re
import logging
import json
+import re
from typing import List, Dict, Any
logger = logging.getLogger(__name__)
@@ -11,40 +11,14 @@ logger = logging.getLogger(__name__)
class ContentValidator:
"""Validates delivered content against user intent"""
- def __init__(self):
- pass
+ def __init__(self, services=None):
+ self.services = services
- def validateContent(self, documents: List[Any], intent: Dict[str, Any]) -> Dict[str, Any]:
+ async def validateContent(self, documents: List[Any], intent: Dict[str, Any]) -> Dict[str, Any]:
"""Validates delivered content against user intent using AI"""
try:
- # First, try AI-based validation for intelligent gap analysis
- aiValidation = self._validateWithAI(documents, intent)
- if aiValidation:
- return aiValidation
-
- # Fallback to rule-based validation if AI validation fails
- validationDetails = []
-
- for doc in documents:
- content = self._extractContent(doc)
- detail = self._validateSingleDocument(content, doc, intent)
- validationDetails.append(detail)
-
- # Calculate overall success
- overallSuccess = all(detail.get("successCriteriaMet", [False]) for detail in validationDetails)
-
- # Calculate quality score
- qualityScore = self._calculateQualityScore(validationDetails)
-
- # Generate improvement suggestions
- improvementSuggestions = self._generateImprovementSuggestions(validationDetails, intent)
-
- return {
- "overallSuccess": overallSuccess,
- "qualityScore": qualityScore,
- "validationDetails": validationDetails,
- "improvementSuggestions": improvementSuggestions
- }
+ # Use AI for comprehensive validation
+ return await self._validateWithAI(documents, intent)
except Exception as e:
logger.error(f"Error validating content: {str(e)}")
@@ -63,260 +37,21 @@ class ContentValidator:
except Exception:
return ""
- def _validateSingleDocument(self, content: str, doc: Any, intent: Dict[str, Any]) -> Dict[str, Any]:
- """Validates a single document against intent"""
- # Check data type match
- dataTypeMatch = self._checkDataTypeMatch(content, intent.get("dataType", "unknown"))
-
- # Check format match
- formatMatch = self._checkFormatMatch(content, intent.get("expectedFormat", "unknown"))
-
- # Calculate quality score
- qualityScore = self._calculateDocumentQualityScore(content, intent)
-
- # Check success criteria
- successCriteriaMet = self._checkSuccessCriteria(content, intent)
-
- # Identify specific issues
- specificIssues = self._identifySpecificIssues(content, intent)
-
- # Generate improvement suggestions
- improvementSuggestions = self._generateDocumentImprovementSuggestions(content, intent)
-
- return {
- "documentName": getattr(doc, 'documentName', 'Unknown'),
- "dataTypeMatch": dataTypeMatch,
- "formatMatch": formatMatch,
- "qualityScore": qualityScore,
- "successCriteriaMet": successCriteriaMet,
- "specificIssues": specificIssues,
- "improvementSuggestions": improvementSuggestions
- }
-
- def _checkDataTypeMatch(self, content: str, dataType: str) -> bool:
- """Checks if content matches the expected data type"""
- if dataType == "numbers":
- return self._containsNumbers(content)
- elif dataType == "text":
- return self._containsText(content)
- elif dataType == "documents":
- return self._containsDocumentContent(content)
- elif dataType == "analysis":
- return self._containsAnalysis(content)
- elif dataType == "code":
- return self._containsCode(content)
- else:
- return True # Unknown type, assume match
-
- def _containsNumbers(self, content: str) -> bool:
- """Checks if content contains actual numbers (not code)"""
- # Look for actual numbers in the content
- numbers = re.findall(r'\b\d+\b', content)
-
- # Check if it's code (contains function definitions, etc.)
- isCode = any(keyword in content.lower() for keyword in [
- 'def ', 'function', 'import ', 'class ', 'for ', 'while ', 'if ',
- 'return', 'print(', 'console.log', 'public ', 'private '
- ])
-
- # If it's code, it doesn't contain actual numbers
- if isCode:
- return False
-
- # If it has numbers and it's not code, it contains actual numbers
- return len(numbers) > 0
-
- def _containsText(self, content: str) -> bool:
- """Checks if content contains readable text"""
- # Remove numbers and special characters
- textContent = re.sub(r'[^\w\s]', '', content)
- words = textContent.split()
-
- # Check if there are enough words to be considered text
- return len(words) > 5
-
- def _containsDocumentContent(self, content: str) -> bool:
- """Checks if content is suitable for document creation"""
- # Check for structured content
- hasStructure = any(indicator in content for indicator in [
- '\n', '\t', '|', '-', '*', '1.', '2.', '•', '◦'
- ])
-
- # Check for meaningful content
- hasMeaningfulContent = len(content.strip()) > 50
-
- return hasStructure and hasMeaningfulContent
-
- def _containsAnalysis(self, content: str) -> bool:
- """Checks if content contains analysis"""
- analysisIndicators = [
- 'analysis', 'findings', 'conclusion', 'summary', 'insights',
- 'trends', 'patterns', 'comparison', 'evaluation', 'assessment'
- ]
-
- contentLower = content.lower()
- return any(indicator in contentLower for indicator in analysisIndicators)
-
- def _containsCode(self, content: str) -> bool:
- """Checks if content contains code"""
- codeIndicators = [
- 'def ', 'function', 'import ', 'class ', 'for ', 'while ', 'if ',
- 'return', 'print(', 'console.log', 'public ', 'private ', 'void ',
- 'int ', 'string ', 'var ', 'let ', 'const '
- ]
-
- contentLower = content.lower()
- return any(indicator in contentLower for indicator in codeIndicators)
-
- def _checkFormatMatch(self, content: str, expectedFormat: str) -> bool:
- """Checks if content matches expected format"""
- if expectedFormat == "raw_data":
- # Raw data should be simple, not heavily formatted
- return not any(indicator in content for indicator in [
- '', '', '
', '## ', '### ', '**', '__'
- ])
- elif expectedFormat == "formatted":
- # Formatted content should have structure
- return any(indicator in content for indicator in [
- '\n', '\t', '|', '-', '*', '1.', '2.', '•'
- ])
- elif expectedFormat == "structured":
- # Structured content should have clear organization
- return any(indicator in content for indicator in [
- '{', '}', '[', ']', '|', '\t', ' '
- ])
- else:
- return True # Unknown format, assume match
-
- def _checkSuccessCriteria(self, content: str, intent: Dict[str, Any]) -> List[bool]:
- """Checks if content meets success criteria"""
- criteriaMet = []
- successCriteria = intent.get("successCriteria", [])
-
- for criterion in successCriteria:
- if 'prime numbers' in criterion.lower():
- # Check if content contains actual prime numbers, not code
- hasNumbers = bool(re.search(r'\b\d+\b', content))
- isNotCode = not any(keyword in content.lower() for keyword in [
- 'def ', 'function', 'import ', 'class '
- ])
- criteriaMet.append(hasNumbers and isNotCode)
- elif 'document' in criterion.lower():
- # Check if content is suitable for document creation
- hasStructure = any(indicator in content for indicator in [
- '\n', '\t', '|', '-', '*', '1.', '2.'
- ])
- criteriaMet.append(hasStructure)
- elif 'format' in criterion.lower():
- # Check if content is properly formatted
- hasFormatting = any(indicator in content for indicator in [
- '\n', '\t', '|', '-', '*', '1.', '2.', '•'
- ])
- criteriaMet.append(hasFormatting)
- else:
- # Generic check - content should not be empty
- criteriaMet.append(len(content.strip()) > 0)
-
- return criteriaMet
-
- def _calculateDocumentQualityScore(self, content: str, intent: Dict[str, Any]) -> float:
- """Calculates quality score for a single document"""
- score = 0.0
-
- # Base score for having content
- if len(content.strip()) > 0:
- score += 0.2
-
- # Score for data type match
- if self._checkDataTypeMatch(content, intent.get("dataType", "unknown")):
- score += 0.3
-
- # Score for format match
- if self._checkFormatMatch(content, intent.get("expectedFormat", "unknown")):
- score += 0.2
-
- # Score for success criteria
- successCriteriaMet = self._checkSuccessCriteria(content, intent)
- if successCriteriaMet:
- successRate = sum(successCriteriaMet) / len(successCriteriaMet)
- score += 0.3 * successRate
-
- return min(score, 1.0)
-
- def _calculateQualityScore(self, validationDetails: List[Dict[str, Any]]) -> float:
- """Calculates overall quality score from validation details"""
- if not validationDetails:
- return 0.0
-
- totalScore = sum(detail.get("qualityScore", 0) for detail in validationDetails)
- return totalScore / len(validationDetails)
-
- def _identifySpecificIssues(self, content: str, intent: Dict[str, Any]) -> List[str]:
- """Identifies specific issues with the content"""
- issues = []
-
- # Check for common issues
- if intent.get("dataType") == "numbers" and self._containsCode(content):
- issues.append("Content contains code instead of actual numbers")
-
- if intent.get("expectedFormat") == "raw_data" and any(indicator in content for indicator in ['', '## ', '**']):
- issues.append("Content is formatted when raw data was requested")
-
- if len(content.strip()) == 0:
- issues.append("Content is empty")
-
- return issues
-
- def _generateDocumentImprovementSuggestions(self, content: str, intent: Dict[str, Any]) -> List[str]:
- """Generates improvement suggestions for a single document"""
- suggestions = []
-
- dataType = intent.get("dataType", "unknown")
- expectedFormat = intent.get("expectedFormat", "unknown")
-
- if dataType == "numbers" and self._containsCode(content):
- suggestions.append("Deliver actual numbers, not code to generate them")
-
- if expectedFormat == "raw_data" and any(indicator in content for indicator in ['', '## ']):
- suggestions.append("Provide raw data without formatting")
-
- if len(content.strip()) == 0:
- suggestions.append("Provide actual content")
-
- return suggestions
-
- def _generateImprovementSuggestions(self, validationDetails: List[Dict[str, Any]],
- intent: Dict[str, Any]) -> List[str]:
- """Generates improvement suggestions based on validation results"""
- suggestions = []
-
- # Check for common issues
- if not any(detail.get("dataTypeMatch", False) for detail in validationDetails):
- dataType = intent.get("dataType", "unknown")
- suggestions.append(f"Content should contain {dataType} data, not code or other formats")
-
- if not any(detail.get("formatMatch", False) for detail in validationDetails):
- expectedFormat = intent.get("expectedFormat", "unknown")
- suggestions.append(f"Content should be in {expectedFormat} format")
-
- # Add specific suggestions from validation details
- for detail in validationDetails:
- suggestions.extend(detail.get("improvementSuggestions", []))
-
- return list(set(suggestions)) # Remove duplicates
-
def _createFailedValidationResult(self, error: str) -> Dict[str, Any]:
"""Creates a failed validation result"""
return {
"overallSuccess": False,
"qualityScore": 0.0,
"validationDetails": [],
- "improvementSuggestions": [f"Validation failed: {error}"]
+ "improvementSuggestions": [f"NEXT STEP: Fix validation error - {error}. Check system logs for more details and retry the operation."]
}
- def _validateWithAI(self, documents: List[Any], intent: Dict[str, Any]) -> Dict[str, Any]:
- """AI-based validation to intelligently assess task completion"""
+ async def _validateWithAI(self, documents: List[Any], intent: Dict[str, Any]) -> Dict[str, Any]:
+ """AI-based comprehensive validation - single main function"""
try:
+ if not hasattr(self, 'services') or not self.services or not hasattr(self.services, 'ai'):
+ return self._createFailedValidationResult("AI service not available")
+
# Extract content from all documents
documentContents = []
for doc in documents:
@@ -326,60 +61,77 @@ class ContentValidator:
"content": content[:2000] # Limit content for AI processing
})
- # Create AI validation prompt
+ # Create comprehensive AI validation prompt
validationPrompt = f"""
-You are a task completion validator. Analyze if the delivered content actually fulfills the user's request.
+You are a comprehensive task completion validator. Analyze if the delivered content fulfills the user's request.
USER REQUEST: {intent.get('primaryGoal', 'Unknown')}
+EXPECTED DATA TYPE: {intent.get('dataType', 'unknown')}
+EXPECTED FORMAT: {intent.get('expectedFormat', 'unknown')}
+SUCCESS CRITERIA: {intent.get('successCriteria', [])}
DELIVERED CONTENT:
{json.dumps(documentContents, indent=2)}
-TASK: Determine if the user's request has been fully completed.
-
-Analyze the gap between what was requested and what was delivered. Consider any missing elements, incorrect formats, incomplete work, or other discrepancies.
+Perform comprehensive validation:
+1. Check if content matches expected data type
+2. Check if content matches expected format
+3. Verify success criteria are met
+4. Assess overall quality and completeness
+5. Identify specific gaps and issues
+6. Provide actionable next steps
Respond with JSON only:
{{
"overallSuccess": true/false,
"qualityScore": 0.0-1.0,
- "gapAnalysis": "Detailed analysis of what's missing or incorrect",
- "improvementSuggestions": ["specific action 1", "specific action 2"]
+ "dataTypeMatch": true/false,
+ "formatMatch": true/false,
+ "successCriteriaMet": [true/false for each criterion],
+ "gapAnalysis": "Detailed analysis: what's missing/incorrect AND what specific next step to do",
+ "improvementSuggestions": ["NEXT STEP: specific action 1", "NEXT STEP: specific action 2"],
+ "validationDetails": [
+ {{
+ "documentName": "Document name",
+ "issues": ["specific issue 1", "specific issue 2"],
+ "suggestions": ["NEXT STEP: specific fix 1", "NEXT STEP: specific fix 2"]
+ }}
+ ]
}}
"""
# Call AI service for validation
- from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationType
+ from modules.datamodels.datamodelAi import AiCallOptions, OperationType
request_options = AiCallOptions()
request_options.operationType = OperationType.GENERAL
- request = AiCallRequest(prompt=validationPrompt, context="", options=request_options)
+ response = await self.services.ai.callAi(
+ prompt=validationPrompt,
+ documents=None,
+ options=request_options
+ )
+ if response:
+ import re
+ result = response.strip()
+ json_match = re.search(r'\{.*\}', result, re.DOTALL)
+ if json_match:
+ result = json_match.group(0)
+
+ aiResult = json.loads(result)
+
+ return {
+ "overallSuccess": aiResult.get("overallSuccess", False),
+ "qualityScore": aiResult.get("qualityScore", 0.0),
+ "validationDetails": aiResult.get("validationDetails", [{
+ "documentName": "AI Validation",
+ "gapAnalysis": aiResult.get("gapAnalysis", ""),
+ "successCriteriaMet": aiResult.get("successCriteriaMet", [False])
+ }]),
+ "improvementSuggestions": aiResult.get("improvementSuggestions", [])
+ }
- # Get AI service from the workflow context
- if hasattr(self, 'services') and hasattr(self.services, 'ai'):
- response = self.services.ai.aiObjects.call(request)
- if response and response.content:
- import re
- result = response.content.strip()
- json_match = re.search(r'\{.*\}', result, re.DOTALL)
- if json_match:
- result = json_match.group(0)
-
- aiResult = json.loads(result)
-
- return {
- "overallSuccess": aiResult.get("overallSuccess", False),
- "qualityScore": aiResult.get("qualityScore", 0.0),
- "validationDetails": [{
- "documentName": "AI Validation",
- "gapAnalysis": aiResult.get("gapAnalysis", ""),
- "successCriteriaMet": [aiResult.get("overallSuccess", False)]
- }],
- "improvementSuggestions": aiResult.get("improvementSuggestions", [])
- }
-
- return None # Fallback to rule-based validation
+ return self._createFailedValidationResult("AI validation failed - no response")
except Exception as e:
logger.error(f"AI validation failed: {str(e)}")
- return None # Fallback to rule-based validation
\ No newline at end of file
+ return self._createFailedValidationResult(f"AI validation error: {str(e)}")
\ No newline at end of file
diff --git a/modules/workflows/processing/adaptive/intentAnalyzer.py b/modules/workflows/processing/adaptive/intentAnalyzer.py
index 374742f3..3e64e111 100644
--- a/modules/workflows/processing/adaptive/intentAnalyzer.py
+++ b/modules/workflows/processing/adaptive/intentAnalyzer.py
@@ -1,228 +1,109 @@
# intentAnalyzer.py
-# Intent analysis for adaptive React mode
+# Intent analysis for adaptive React mode - AI-based, language-agnostic
-import re
+import json
import logging
from typing import Dict, Any, List
-from enum import Enum
logger = logging.getLogger(__name__)
-class DataType(Enum):
- NUMBERS = "numbers"
- TEXT = "text"
- DOCUMENTS = "documents"
- ANALYSIS = "analysis"
- CODE = "code"
- UNKNOWN = "unknown"
-
-class ExpectedFormat(Enum):
- RAW_DATA = "raw_data"
- FORMATTED = "formatted"
- STRUCTURED = "structured"
- VISUAL = "visual"
- UNKNOWN = "unknown"
-
class IntentAnalyzer:
- """Analyzes user intent to understand what they actually want"""
+ """Analyzes user intent using AI - language-agnostic and generic"""
- def __init__(self):
- self.dataTypePatterns = {
- DataType.NUMBERS: [
- r'\b(numbers?|digits?|count|list|sequence)\b',
- r'\b(prime|fibonacci|random|even|odd)\s+(numbers?)\b',
- r'\b(calculate|compute|generate)\s+(numbers?)\b',
- r'\b(first|last)\s+\d+\s+(numbers?)\b'
- ],
- DataType.TEXT: [
- r'\b(text|content|words?|sentences?|paragraphs?)\b',
- r'\b(write|create|generate)\s+(text|content)\b',
- r'\b(summary|description|explanation)\b',
- r'\b(article|essay|report)\b'
- ],
- DataType.DOCUMENTS: [
- r'\b(document|file|report|pdf|word|excel)\b',
- r'\b(create|generate|make)\s+(document|file|report)\b',
- r'\b(format|structure|organize)\s+(document)\b',
- r'\b(presentation|slides?)\b'
- ],
- DataType.ANALYSIS: [
- r'\b(analyze|analysis|examine|study|evaluate)\b',
- r'\b(insights?|findings?|results?)\b',
- r'\b(compare|contrast|evaluate)\b',
- r'\b(trends?|patterns?)\b'
- ],
- DataType.CODE: [
- r'\b(code|program|script|algorithm|function)\b',
- r'\b(write|create|develop)\s+(code|program|script)\b',
- r'\b(implement|build|construct)\b',
- r'\b(debug|fix|optimize)\s+(code)\b'
- ]
- }
-
- self.formatPatterns = {
- ExpectedFormat.RAW_DATA: [
- r'\b(raw|plain|simple|basic)\b',
- r'\b(numbers?|data|list)\b(?!\s+(in|as|with))',
- r'\b(just|only)\s+(numbers?|data)\b'
- ],
- ExpectedFormat.FORMATTED: [
- r'\b(formatted|structured|organized|presented)\b',
- r'\b(table|chart|graph|visual)\b',
- r'\b(pretty|nice|clean)\s+(format|presentation)\b',
- r'\b(professional|polished)\b'
- ],
- ExpectedFormat.STRUCTURED: [
- r'\b(json|xml|csv|structured)\b',
- r'\b(organized|categorized|grouped)\b',
- r'\b(systematic|methodical)\b',
- r'\b(database|spreadsheet)\b'
- ]
- }
+ def __init__(self, services=None):
+ self.services = services
- def analyzeUserIntent(self, userPrompt: str, context: Any) -> Dict[str, Any]:
- """Analyzes user intent from prompt and context"""
+ async def analyzeUserIntent(self, userPrompt: str, context: Any) -> Dict[str, Any]:
+ """Analyzes user intent from prompt and context using AI"""
try:
- # Extract primary goal
- primaryGoal = self._extractPrimaryGoal(userPrompt)
+ # Use AI to analyze intent
+ aiAnalysis = await self._analyzeIntentWithAI(userPrompt, context)
+ if aiAnalysis:
+ return aiAnalysis
- # Classify data type
- dataType = self._classifyDataType(userPrompt)
-
- # Determine expected format
- expectedFormat = self._determineExpectedFormat(userPrompt)
-
- # Assess quality requirements
- qualityRequirements = self._assessQualityRequirements(userPrompt, context)
-
- # Extract success criteria
- successCriteria = self._extractSuccessCriteria(userPrompt, context)
-
- # Calculate confidence score
- confidenceScore = self._calculateConfidenceScore(dataType, expectedFormat, successCriteria)
-
- return {
- "primaryGoal": primaryGoal,
- "dataType": dataType.value,
- "expectedFormat": expectedFormat.value,
- "qualityRequirements": qualityRequirements,
- "successCriteria": successCriteria,
- "confidenceScore": confidenceScore
- }
+ # Fallback to basic analysis if AI fails
+ return self._createBasicIntentAnalysis(userPrompt)
except Exception as e:
logger.error(f"Error analyzing user intent: {str(e)}")
return self._createDefaultIntentAnalysis(userPrompt)
- def _extractPrimaryGoal(self, userPrompt: str) -> str:
- """Extracts the primary goal from user prompt"""
- # Simple extraction - can be enhanced
- return userPrompt.strip()
+ async def _analyzeIntentWithAI(self, userPrompt: str, context: Any) -> Dict[str, Any]:
+ """Uses AI to analyze user intent - language-agnostic"""
+ try:
+ if not self.services or not hasattr(self.services, 'ai'):
+ return None
+
+ # Create AI analysis prompt
+ analysisPrompt = f"""
+You are an intent analyzer. Analyze the user's request to understand what they want delivered.
+
+USER REQUEST: {userPrompt}
+
+CONTEXT: {getattr(context.task_step, 'objective', '') if hasattr(context, 'task_step') and context.task_step else ''}
+
+Analyze the user's intent and determine:
+1. What type of data/content they want (numbers, text, documents, analysis, code, etc.)
+2. What format they expect (raw data, formatted, structured, visual, etc.)
+3. What quality requirements they have (accuracy, completeness, format)
+4. What specific success criteria define completion
+
+Respond with JSON only:
+{{
+ "primaryGoal": "The main objective the user wants to achieve",
+ "dataType": "numbers|text|documents|analysis|code|unknown",
+ "expectedFormat": "raw_data|formatted|structured|visual|unknown",
+ "qualityRequirements": {{
+ "accuracyThreshold": 0.0-1.0,
+ "completenessThreshold": 0.0-1.0,
+ "formatRequirement": "any|formatted|raw|structured"
+ }},
+ "successCriteria": ["specific criterion 1", "specific criterion 2"],
+ "confidenceScore": 0.0-1.0
+}}
+"""
+
+ # Call AI service for analysis
+ from modules.datamodels.datamodelAi import AiCallOptions, OperationType
+ request_options = AiCallOptions()
+ request_options.operationType = OperationType.GENERAL
+
+ response = await self.services.ai.callAi(
+ prompt=analysisPrompt,
+ documents=None,
+ options=request_options
+ )
+ if response:
+ import re
+ result = response.strip()
+ json_match = re.search(r'\{.*\}', result, re.DOTALL)
+ if json_match:
+ result = json_match.group(0)
+
+ aiResult = json.loads(result)
+ return aiResult
+
+ return None
+
+ except Exception as e:
+ logger.error(f"AI intent analysis failed: {str(e)}")
+ return None
- def _classifyDataType(self, userPrompt: str) -> DataType:
- """Classifies the type of data the user wants"""
- promptLower = userPrompt.lower()
-
- for dataType, patterns in self.dataTypePatterns.items():
- for pattern in patterns:
- if re.search(pattern, promptLower):
- return dataType
-
- return DataType.UNKNOWN
-
- def _determineExpectedFormat(self, userPrompt: str) -> ExpectedFormat:
- """Determines the expected format of the output"""
- promptLower = userPrompt.lower()
-
- for formatType, patterns in self.formatPatterns.items():
- for pattern in patterns:
- if re.search(pattern, promptLower):
- return formatType
-
- return ExpectedFormat.UNKNOWN
-
- def _assessQualityRequirements(self, userPrompt: str, context: Any) -> Dict[str, Any]:
- """Assesses quality requirements from prompt and context"""
- promptLower = userPrompt.lower()
-
- # Check for accuracy requirements
- accuracyThreshold = 0.8
- if any(word in promptLower for word in ['exact', 'precise', 'accurate', 'correct']):
- accuracyThreshold = 0.95
- elif any(word in promptLower for word in ['approximate', 'rough', 'estimate']):
- accuracyThreshold = 0.7
-
- # Check for completeness requirements
- completenessThreshold = 0.8
- if any(word in promptLower for word in ['complete', 'full', 'comprehensive', 'all']):
- completenessThreshold = 0.95
- elif any(word in promptLower for word in ['summary', 'brief', 'overview']):
- completenessThreshold = 0.6
-
- # Check for format requirements
- formatRequirement = "any"
- if any(word in promptLower for word in ['formatted', 'structured', 'organized']):
- formatRequirement = "formatted"
- elif any(word in promptLower for word in ['raw', 'plain', 'simple']):
- formatRequirement = "raw"
-
+ def _createBasicIntentAnalysis(self, userPrompt: str) -> Dict[str, Any]:
+ """Creates basic intent analysis without AI"""
return {
- "accuracyThreshold": accuracyThreshold,
- "completenessThreshold": completenessThreshold,
- "formatRequirement": formatRequirement
+ "primaryGoal": userPrompt.strip(),
+ "dataType": "unknown",
+ "expectedFormat": "unknown",
+ "qualityRequirements": {
+ "accuracyThreshold": 0.8,
+ "completenessThreshold": 0.8,
+ "formatRequirement": "any"
+ },
+ "successCriteria": ["Delivers what the user requested"],
+ "confidenceScore": 0.5
}
- def _extractSuccessCriteria(self, userPrompt: str, context: Any) -> List[str]:
- """Extracts success criteria from prompt and context"""
- criteria = []
- promptLower = userPrompt.lower()
-
- # Extract explicit criteria
- if 'first' in promptLower and 'numbers' in promptLower:
- criteria.append("Contains the first N numbers as requested")
-
- if 'prime' in promptLower:
- criteria.append("Contains actual prime numbers, not code to generate them")
-
- if 'document' in promptLower:
- criteria.append("Creates a properly formatted document")
-
- if 'format' in promptLower:
- criteria.append("Content is properly formatted as requested")
-
- # Add context-based criteria
- if hasattr(context, 'task_step') and context.task_step:
- taskObjective = context.task_step.objective.lower()
- if 'word' in taskObjective:
- criteria.append("Creates a Word document")
- if 'excel' in taskObjective:
- criteria.append("Creates an Excel spreadsheet")
-
- return criteria if criteria else ["Delivers what the user requested"]
-
- def _calculateConfidenceScore(self, dataType: DataType, expectedFormat: ExpectedFormat,
- successCriteria: List[str]) -> float:
- """Calculates confidence score for the intent analysis"""
- score = 0.0
-
- # Data type confidence
- if dataType != DataType.UNKNOWN:
- score += 0.3
-
- # Format confidence
- if expectedFormat != ExpectedFormat.UNKNOWN:
- score += 0.2
-
- # Success criteria confidence
- if len(successCriteria) > 0:
- score += 0.3
-
- # Additional confidence for specific patterns
- if len(successCriteria) > 1:
- score += 0.2
-
- return min(score, 1.0)
-
def _createDefaultIntentAnalysis(self, userPrompt: str) -> Dict[str, Any]:
"""Creates a default intent analysis when analysis fails"""
return {
diff --git a/modules/workflows/processing/modes/modeReact.py b/modules/workflows/processing/modes/modeReact.py
index 1bc893a5..9a79dcb3 100644
--- a/modules/workflows/processing/modes/modeReact.py
+++ b/modules/workflows/processing/modes/modeReact.py
@@ -31,9 +31,8 @@ class ReactMode(BaseMode):
def __init__(self, services, workflow):
super().__init__(services, workflow)
# Initialize adaptive components
- self.intentAnalyzer = IntentAnalyzer()
- self.contentValidator = ContentValidator()
- self.contentValidator.services = self.services # Pass services for AI validation
+ self.intentAnalyzer = IntentAnalyzer(services)
+ self.contentValidator = ContentValidator(services)
self.learningEngine = LearningEngine()
self.progressTracker = ProgressTracker()
self.currentIntent = None
@@ -53,9 +52,9 @@ class ReactMode(BaseMode):
# NEW: Analyze intents separately for proper validation vs task completion
# Workflow-level intent from cleaned original user prompt
original_prompt = self.services.currentUserPrompt if self.services and hasattr(self.services, 'currentUserPrompt') else taskStep.objective
- self.workflowIntent = self.intentAnalyzer.analyzeUserIntent(original_prompt, context)
+ self.workflowIntent = await self.intentAnalyzer.analyzeUserIntent(original_prompt, context)
# Task-level intent from current task objective (used only for task-scoped checks)
- self.taskIntent = self.intentAnalyzer.analyzeUserIntent(taskStep.objective, context)
+ self.taskIntent = await self.intentAnalyzer.analyzeUserIntent(taskStep.objective, context)
logger.info(f"Intent analysis — workflow: {self.workflowIntent}")
logger.info(f"Intent analysis — task: {self.taskIntent}")
@@ -103,7 +102,7 @@ class ReactMode(BaseMode):
# NEW: Add content validation (against original cleaned user prompt / workflow intent)
if getattr(self, 'workflowIntent', None) and result.documents:
- validationResult = self.contentValidator.validateContent(result.documents, self.workflowIntent)
+ validationResult = await self.contentValidator.validateContent(result.documents, self.workflowIntent)
observation['contentValidation'] = validationResult
logger.info(f"Content validation: {validationResult['overallSuccess']} (quality: {validationResult['qualityScore']:.2f})")