diff --git a/modules/services/serviceAi/subDocumentGeneration.py b/modules/services/serviceAi/subDocumentGeneration.py index 09fe524c..6d7ee4b7 100644 --- a/modules/services/serviceAi/subDocumentGeneration.py +++ b/modules/services/serviceAi/subDocumentGeneration.py @@ -382,8 +382,22 @@ class SubDocumentGeneration: logger.info(f"AI enhanced JSON content successfully") except json.JSONDecodeError as e: - logger.warning(f"AI generation returned invalid JSON: {str(e)}, using original content") - enhancedContent = complete_document + logger.warning(f"AI generation returned invalid JSON: {str(e)}, attempting to repair...") + # Try to repair common JSON issues + try: + repaired_result = self._repairJson(result) + enhancedContent = json.loads(repaired_result) + logger.info(f"Successfully repaired JSON content") + except (json.JSONDecodeError, Exception) as repair_error: + logger.warning(f"JSON repair failed: {str(repair_error)}, trying AI repair...") + # Try AI-powered JSON repair as last resort + try: + ai_repaired = await self._repairJsonWithAI(result) + enhancedContent = json.loads(ai_repaired) + logger.info(f"AI successfully repaired JSON content") + except Exception as ai_repair_error: + logger.warning(f"AI JSON repair also failed: {str(ai_repair_error)}, using original content") + enhancedContent = complete_document else: logger.warning("AI generation returned empty response, using original content") enhancedContent = complete_document @@ -664,3 +678,127 @@ Return only the JSON response. except Exception: # Non-fatal; ignore if storage or chat creation fails return + + def _repairJson(self, json_string: str) -> str: + """Repair common JSON syntax errors efficiently for large JSON.""" + try: + import re + import json + + # Remove any leading/trailing whitespace + json_string = json_string.strip() + + # For large JSON, skip substring extraction and go straight to targeted repairs + logger.info(f"Attempting JSON repair for {len(json_string)} characters...") + + # Try to parse first to see what specific error we get + try: + json.loads(json_string) + return json_string # Already valid + except json.JSONDecodeError as e: + error_msg = str(e) + logger.info(f"JSON error: {error_msg}") + + # Apply targeted fixes based on the specific error + if "Expecting ',' delimiter" in error_msg: + # Fix missing commas between array elements + json_string = re.sub(r'\]\s*\[', '], [', json_string) + json_string = re.sub(r'\}\s*\{', '}, {', json_string) + # Fix missing commas between object properties + json_string = re.sub(r'("\s*:\s*[^,}]+)\s*(")', r'\1, \2', json_string) + + if "Expecting value" in error_msg: + # Fix missing values (replace empty with null) + json_string = re.sub(r':\s*,', ': null,', json_string) + json_string = re.sub(r':\s*}', ': null}', json_string) + + if "Expecting property name" in error_msg: + # Fix unquoted property names + json_string = re.sub(r'(\w+):', r'"\1":', json_string) + + # Fix trailing commas before closing brackets/braces + json_string = re.sub(r',(\s*[}\]])', r'\1', json_string) + + # Fix missing closing brackets/braces (only if reasonable) + open_braces = json_string.count('{') + close_braces = json_string.count('}') + open_brackets = json_string.count('[') + close_brackets = json_string.count(']') + + # Only add missing brackets if the difference is small (avoid runaway) + if 0 < (open_braces - close_braces) <= 5: + missing_braces = open_braces - close_braces + json_string += '}' * missing_braces + + if 0 < (open_brackets - close_brackets) <= 5: + missing_brackets = open_brackets - close_brackets + json_string += ']' * missing_brackets + + # Try to parse again + try: + json.loads(json_string) + logger.info("JSON repair successful") + return json_string + except json.JSONDecodeError: + logger.warning("JSON repair failed - will try AI repair") + return json_string + + except Exception as e: + logger.warning(f"JSON repair failed: {str(e)}") + return json_string + + async def _repairJsonWithAI(self, malformed_json: str) -> str: + """Use AI to repair malformed JSON efficiently for large files.""" + try: + # Limit JSON size for AI processing (max 50KB to avoid token limits) + max_json_size = 50000 + json_to_repair = malformed_json + + if len(malformed_json) > max_json_size: + logger.warning(f"JSON too large ({len(malformed_json)} chars), truncating to {max_json_size} chars for AI repair") + # Try to find a good truncation point (end of a complete object/array) + truncate_at = max_json_size + for i in range(max_json_size, max(0, max_json_size - 1000), -1): + if malformed_json[i] in ['}', ']']: + truncate_at = i + 1 + break + json_to_repair = malformed_json[:truncate_at] + "..." + + repair_prompt = f""" +You are a JSON repair expert. Fix the following malformed JSON and return ONLY the corrected JSON, no explanations. + +Malformed JSON: +{json_to_repair} + +Return only the valid JSON: +""" + + # Use AI to repair the JSON + repaired_json = await self.services.ai.callAi( + prompt=repair_prompt, + documents=None, + options={ + "process_type": "text", + "operation_type": "generate_content", + "priority": "speed", + "max_cost": 0.01 + } + ) + + # Clean up the response (remove any markdown formatting) + repaired_json = repaired_json.strip() + if repaired_json.startswith('```json'): + repaired_json = repaired_json[7:] + if repaired_json.endswith('```'): + repaired_json = repaired_json[:-3] + repaired_json = repaired_json.strip() + + # Validate the repaired JSON + import json + json.loads(repaired_json) + logger.info("AI JSON repair successful") + return repaired_json + + except Exception as e: + logger.warning(f"AI JSON repair failed: {str(e)}") + return malformed_json diff --git a/modules/services/serviceAi/subDocumentProcessing.py b/modules/services/serviceAi/subDocumentProcessing.py index e96a9394..d85a5341 100644 --- a/modules/services/serviceAi/subDocumentProcessing.py +++ b/modules/services/serviceAi/subDocumentProcessing.py @@ -199,8 +199,10 @@ class SubDocumentProcessing: else: raise ValueError('Normalization produced zero rows') except Exception as e: - # Surface normalization failure while leaving original merged JSON (single-path expectation is to fail) - raise + # Log normalization failure but don't re-raise - continue with original merged JSON + logger.warning(f"Normalization failed (expected): {str(e)}") + logger.debug(f"Normalization error type: {type(e).__name__}") + # Continue with original merged JSON instead of re-raising # Save merged JSON extraction content to debug file - only if debug enabled try: @@ -221,6 +223,10 @@ class SubDocumentProcessing: except Exception as e: logger.error(f"Error in per-chunk processing (JSON mode): {str(e)}") + logger.error(f"Exception type: {type(e).__name__}") + logger.error(f"Exception args: {e.args}") + import traceback + logger.error(f"Traceback: {traceback.format_exc()}") return {"metadata": {"title": "Error Document"}, "sections": []} async def processDocumentsPerChunkJsonWithPrompt( diff --git a/modules/services/serviceGeneration/renderers/rendererHtml.py b/modules/services/serviceGeneration/renderers/rendererHtml.py index 5cd9b691..1dedaf46 100644 --- a/modules/services/serviceGeneration/renderers/rendererHtml.py +++ b/modules/services/serviceGeneration/renderers/rendererHtml.py @@ -278,17 +278,25 @@ class RendererHtml(BaseRenderer): section_data = self._get_section_data(section) if section_type == "table": - return self._render_json_table(section_data, styles) + # Process the section data to extract table structure + processed_data = self._process_section_by_type(section) + return self._render_json_table(processed_data, styles) elif section_type == "bullet_list": - return self._render_json_bullet_list(section_data, styles) + # Process the section data to extract bullet list structure + processed_data = self._process_section_by_type(section) + return self._render_json_bullet_list(processed_data, styles) elif section_type == "heading": return self._render_json_heading(section_data, styles) elif section_type == "paragraph": return self._render_json_paragraph(section_data, styles) elif section_type == "code_block": - return self._render_json_code_block(section_data, styles) + # Process the section data to extract code block structure + processed_data = self._process_section_by_type(section) + return self._render_json_code_block(processed_data, styles) elif section_type == "image": - return self._render_json_image(section_data, styles) + # Process the section data to extract image structure + processed_data = self._process_section_by_type(section) + return self._render_json_image(processed_data, styles) else: # Fallback to paragraph for unknown types return self._render_json_paragraph(section_data, styles) diff --git a/modules/services/serviceGeneration/renderers/rendererMarkdown.py b/modules/services/serviceGeneration/renderers/rendererMarkdown.py index 61f0bebc..59806d4c 100644 --- a/modules/services/serviceGeneration/renderers/rendererMarkdown.py +++ b/modules/services/serviceGeneration/renderers/rendererMarkdown.py @@ -81,17 +81,25 @@ class RendererMarkdown(BaseRenderer): section_data = self._get_section_data(section) if section_type == "table": - return self._render_json_table(section_data) + # Process the section data to extract table structure + processed_data = self._process_section_by_type(section) + return self._render_json_table(processed_data) elif section_type == "bullet_list": - return self._render_json_bullet_list(section_data) + # Process the section data to extract bullet list structure + processed_data = self._process_section_by_type(section) + return self._render_json_bullet_list(processed_data) elif section_type == "heading": return self._render_json_heading(section_data) elif section_type == "paragraph": return self._render_json_paragraph(section_data) elif section_type == "code_block": - return self._render_json_code_block(section_data) + # Process the section data to extract code block structure + processed_data = self._process_section_by_type(section) + return self._render_json_code_block(processed_data) elif section_type == "image": - return self._render_json_image(section_data) + # Process the section data to extract image structure + processed_data = self._process_section_by_type(section) + return self._render_json_image(processed_data) else: # Fallback to paragraph for unknown types return self._render_json_paragraph(section_data) diff --git a/modules/services/serviceGeneration/renderers/rendererText.py b/modules/services/serviceGeneration/renderers/rendererText.py index 33d648e8..68ccfdbe 100644 --- a/modules/services/serviceGeneration/renderers/rendererText.py +++ b/modules/services/serviceGeneration/renderers/rendererText.py @@ -104,9 +104,13 @@ class RendererText(BaseRenderer): section_data = self._get_section_data(section) if section_type == "table": - return self._render_json_table(section_data) + # Process the section data to extract table structure + processed_data = self._process_section_by_type(section) + return self._render_json_table(processed_data) elif section_type == "bullet_list": - return self._render_json_bullet_list(section_data) + # Process the section data to extract bullet list structure + processed_data = self._process_section_by_type(section) + return self._render_json_bullet_list(processed_data) elif section_type == "heading": # Render each heading element in the elements array # section_data is already the elements array from _get_section_data @@ -122,9 +126,13 @@ class RendererText(BaseRenderer): rendered_elements.append(self._render_json_paragraph(element)) return "\n".join(rendered_elements) elif section_type == "code_block": - return self._render_json_code_block(section_data) + # Process the section data to extract code block structure + processed_data = self._process_section_by_type(section) + return self._render_json_code_block(processed_data) elif section_type == "image": - return self._render_json_image(section_data) + # Process the section data to extract image structure + processed_data = self._process_section_by_type(section) + return self._render_json_image(processed_data) else: # Fallback to paragraph for unknown types - render each element # section_data is already the elements array from _get_section_data diff --git a/modules/workflows/processing/adaptive/__init__.py b/modules/workflows/processing/adaptive/__init__.py index fdff3698..afc0c963 100644 --- a/modules/workflows/processing/adaptive/__init__.py +++ b/modules/workflows/processing/adaptive/__init__.py @@ -1,9 +1,9 @@ # adaptive module for React mode # Provides adaptive learning capabilities -from .intentAnalyzer import IntentAnalyzer, DataType, ExpectedFormat +from .intentAnalyzer import IntentAnalyzer from .contentValidator import ContentValidator from .learningEngine import LearningEngine from .progressTracker import ProgressTracker -__all__ = ['IntentAnalyzer', 'ContentValidator', 'LearningEngine', 'ProgressTracker', 'DataType', 'ExpectedFormat'] +__all__ = ['IntentAnalyzer', 'ContentValidator', 'LearningEngine', 'ProgressTracker'] diff --git a/modules/workflows/processing/adaptive/contentValidator.py b/modules/workflows/processing/adaptive/contentValidator.py index 5253ab5e..d211d1c3 100644 --- a/modules/workflows/processing/adaptive/contentValidator.py +++ b/modules/workflows/processing/adaptive/contentValidator.py @@ -1,9 +1,9 @@ # contentValidator.py # Content validation for adaptive React mode -import re import logging import json +import re from typing import List, Dict, Any logger = logging.getLogger(__name__) @@ -11,40 +11,14 @@ logger = logging.getLogger(__name__) class ContentValidator: """Validates delivered content against user intent""" - def __init__(self): - pass + def __init__(self, services=None): + self.services = services - def validateContent(self, documents: List[Any], intent: Dict[str, Any]) -> Dict[str, Any]: + async def validateContent(self, documents: List[Any], intent: Dict[str, Any]) -> Dict[str, Any]: """Validates delivered content against user intent using AI""" try: - # First, try AI-based validation for intelligent gap analysis - aiValidation = self._validateWithAI(documents, intent) - if aiValidation: - return aiValidation - - # Fallback to rule-based validation if AI validation fails - validationDetails = [] - - for doc in documents: - content = self._extractContent(doc) - detail = self._validateSingleDocument(content, doc, intent) - validationDetails.append(detail) - - # Calculate overall success - overallSuccess = all(detail.get("successCriteriaMet", [False]) for detail in validationDetails) - - # Calculate quality score - qualityScore = self._calculateQualityScore(validationDetails) - - # Generate improvement suggestions - improvementSuggestions = self._generateImprovementSuggestions(validationDetails, intent) - - return { - "overallSuccess": overallSuccess, - "qualityScore": qualityScore, - "validationDetails": validationDetails, - "improvementSuggestions": improvementSuggestions - } + # Use AI for comprehensive validation + return await self._validateWithAI(documents, intent) except Exception as e: logger.error(f"Error validating content: {str(e)}") @@ -63,260 +37,21 @@ class ContentValidator: except Exception: return "" - def _validateSingleDocument(self, content: str, doc: Any, intent: Dict[str, Any]) -> Dict[str, Any]: - """Validates a single document against intent""" - # Check data type match - dataTypeMatch = self._checkDataTypeMatch(content, intent.get("dataType", "unknown")) - - # Check format match - formatMatch = self._checkFormatMatch(content, intent.get("expectedFormat", "unknown")) - - # Calculate quality score - qualityScore = self._calculateDocumentQualityScore(content, intent) - - # Check success criteria - successCriteriaMet = self._checkSuccessCriteria(content, intent) - - # Identify specific issues - specificIssues = self._identifySpecificIssues(content, intent) - - # Generate improvement suggestions - improvementSuggestions = self._generateDocumentImprovementSuggestions(content, intent) - - return { - "documentName": getattr(doc, 'documentName', 'Unknown'), - "dataTypeMatch": dataTypeMatch, - "formatMatch": formatMatch, - "qualityScore": qualityScore, - "successCriteriaMet": successCriteriaMet, - "specificIssues": specificIssues, - "improvementSuggestions": improvementSuggestions - } - - def _checkDataTypeMatch(self, content: str, dataType: str) -> bool: - """Checks if content matches the expected data type""" - if dataType == "numbers": - return self._containsNumbers(content) - elif dataType == "text": - return self._containsText(content) - elif dataType == "documents": - return self._containsDocumentContent(content) - elif dataType == "analysis": - return self._containsAnalysis(content) - elif dataType == "code": - return self._containsCode(content) - else: - return True # Unknown type, assume match - - def _containsNumbers(self, content: str) -> bool: - """Checks if content contains actual numbers (not code)""" - # Look for actual numbers in the content - numbers = re.findall(r'\b\d+\b', content) - - # Check if it's code (contains function definitions, etc.) - isCode = any(keyword in content.lower() for keyword in [ - 'def ', 'function', 'import ', 'class ', 'for ', 'while ', 'if ', - 'return', 'print(', 'console.log', 'public ', 'private ' - ]) - - # If it's code, it doesn't contain actual numbers - if isCode: - return False - - # If it has numbers and it's not code, it contains actual numbers - return len(numbers) > 0 - - def _containsText(self, content: str) -> bool: - """Checks if content contains readable text""" - # Remove numbers and special characters - textContent = re.sub(r'[^\w\s]', '', content) - words = textContent.split() - - # Check if there are enough words to be considered text - return len(words) > 5 - - def _containsDocumentContent(self, content: str) -> bool: - """Checks if content is suitable for document creation""" - # Check for structured content - hasStructure = any(indicator in content for indicator in [ - '\n', '\t', '|', '-', '*', '1.', '2.', '•', '◦' - ]) - - # Check for meaningful content - hasMeaningfulContent = len(content.strip()) > 50 - - return hasStructure and hasMeaningfulContent - - def _containsAnalysis(self, content: str) -> bool: - """Checks if content contains analysis""" - analysisIndicators = [ - 'analysis', 'findings', 'conclusion', 'summary', 'insights', - 'trends', 'patterns', 'comparison', 'evaluation', 'assessment' - ] - - contentLower = content.lower() - return any(indicator in contentLower for indicator in analysisIndicators) - - def _containsCode(self, content: str) -> bool: - """Checks if content contains code""" - codeIndicators = [ - 'def ', 'function', 'import ', 'class ', 'for ', 'while ', 'if ', - 'return', 'print(', 'console.log', 'public ', 'private ', 'void ', - 'int ', 'string ', 'var ', 'let ', 'const ' - ] - - contentLower = content.lower() - return any(indicator in contentLower for indicator in codeIndicators) - - def _checkFormatMatch(self, content: str, expectedFormat: str) -> bool: - """Checks if content matches expected format""" - if expectedFormat == "raw_data": - # Raw data should be simple, not heavily formatted - return not any(indicator in content for indicator in [ - '', '
', '', '## ', '### ', '**', '__' - ]) - elif expectedFormat == "formatted": - # Formatted content should have structure - return any(indicator in content for indicator in [ - '\n', '\t', '|', '-', '*', '1.', '2.', '•' - ]) - elif expectedFormat == "structured": - # Structured content should have clear organization - return any(indicator in content for indicator in [ - '{', '}', '[', ']', '|', '\t', ' ' - ]) - else: - return True # Unknown format, assume match - - def _checkSuccessCriteria(self, content: str, intent: Dict[str, Any]) -> List[bool]: - """Checks if content meets success criteria""" - criteriaMet = [] - successCriteria = intent.get("successCriteria", []) - - for criterion in successCriteria: - if 'prime numbers' in criterion.lower(): - # Check if content contains actual prime numbers, not code - hasNumbers = bool(re.search(r'\b\d+\b', content)) - isNotCode = not any(keyword in content.lower() for keyword in [ - 'def ', 'function', 'import ', 'class ' - ]) - criteriaMet.append(hasNumbers and isNotCode) - elif 'document' in criterion.lower(): - # Check if content is suitable for document creation - hasStructure = any(indicator in content for indicator in [ - '\n', '\t', '|', '-', '*', '1.', '2.' - ]) - criteriaMet.append(hasStructure) - elif 'format' in criterion.lower(): - # Check if content is properly formatted - hasFormatting = any(indicator in content for indicator in [ - '\n', '\t', '|', '-', '*', '1.', '2.', '•' - ]) - criteriaMet.append(hasFormatting) - else: - # Generic check - content should not be empty - criteriaMet.append(len(content.strip()) > 0) - - return criteriaMet - - def _calculateDocumentQualityScore(self, content: str, intent: Dict[str, Any]) -> float: - """Calculates quality score for a single document""" - score = 0.0 - - # Base score for having content - if len(content.strip()) > 0: - score += 0.2 - - # Score for data type match - if self._checkDataTypeMatch(content, intent.get("dataType", "unknown")): - score += 0.3 - - # Score for format match - if self._checkFormatMatch(content, intent.get("expectedFormat", "unknown")): - score += 0.2 - - # Score for success criteria - successCriteriaMet = self._checkSuccessCriteria(content, intent) - if successCriteriaMet: - successRate = sum(successCriteriaMet) / len(successCriteriaMet) - score += 0.3 * successRate - - return min(score, 1.0) - - def _calculateQualityScore(self, validationDetails: List[Dict[str, Any]]) -> float: - """Calculates overall quality score from validation details""" - if not validationDetails: - return 0.0 - - totalScore = sum(detail.get("qualityScore", 0) for detail in validationDetails) - return totalScore / len(validationDetails) - - def _identifySpecificIssues(self, content: str, intent: Dict[str, Any]) -> List[str]: - """Identifies specific issues with the content""" - issues = [] - - # Check for common issues - if intent.get("dataType") == "numbers" and self._containsCode(content): - issues.append("Content contains code instead of actual numbers") - - if intent.get("expectedFormat") == "raw_data" and any(indicator in content for indicator in ['', '## ', '**']): - issues.append("Content is formatted when raw data was requested") - - if len(content.strip()) == 0: - issues.append("Content is empty") - - return issues - - def _generateDocumentImprovementSuggestions(self, content: str, intent: Dict[str, Any]) -> List[str]: - """Generates improvement suggestions for a single document""" - suggestions = [] - - dataType = intent.get("dataType", "unknown") - expectedFormat = intent.get("expectedFormat", "unknown") - - if dataType == "numbers" and self._containsCode(content): - suggestions.append("Deliver actual numbers, not code to generate them") - - if expectedFormat == "raw_data" and any(indicator in content for indicator in ['', '## ']): - suggestions.append("Provide raw data without formatting") - - if len(content.strip()) == 0: - suggestions.append("Provide actual content") - - return suggestions - - def _generateImprovementSuggestions(self, validationDetails: List[Dict[str, Any]], - intent: Dict[str, Any]) -> List[str]: - """Generates improvement suggestions based on validation results""" - suggestions = [] - - # Check for common issues - if not any(detail.get("dataTypeMatch", False) for detail in validationDetails): - dataType = intent.get("dataType", "unknown") - suggestions.append(f"Content should contain {dataType} data, not code or other formats") - - if not any(detail.get("formatMatch", False) for detail in validationDetails): - expectedFormat = intent.get("expectedFormat", "unknown") - suggestions.append(f"Content should be in {expectedFormat} format") - - # Add specific suggestions from validation details - for detail in validationDetails: - suggestions.extend(detail.get("improvementSuggestions", [])) - - return list(set(suggestions)) # Remove duplicates - def _createFailedValidationResult(self, error: str) -> Dict[str, Any]: """Creates a failed validation result""" return { "overallSuccess": False, "qualityScore": 0.0, "validationDetails": [], - "improvementSuggestions": [f"Validation failed: {error}"] + "improvementSuggestions": [f"NEXT STEP: Fix validation error - {error}. Check system logs for more details and retry the operation."] } - def _validateWithAI(self, documents: List[Any], intent: Dict[str, Any]) -> Dict[str, Any]: - """AI-based validation to intelligently assess task completion""" + async def _validateWithAI(self, documents: List[Any], intent: Dict[str, Any]) -> Dict[str, Any]: + """AI-based comprehensive validation - single main function""" try: + if not hasattr(self, 'services') or not self.services or not hasattr(self.services, 'ai'): + return self._createFailedValidationResult("AI service not available") + # Extract content from all documents documentContents = [] for doc in documents: @@ -326,60 +61,77 @@ class ContentValidator: "content": content[:2000] # Limit content for AI processing }) - # Create AI validation prompt + # Create comprehensive AI validation prompt validationPrompt = f""" -You are a task completion validator. Analyze if the delivered content actually fulfills the user's request. +You are a comprehensive task completion validator. Analyze if the delivered content fulfills the user's request. USER REQUEST: {intent.get('primaryGoal', 'Unknown')} +EXPECTED DATA TYPE: {intent.get('dataType', 'unknown')} +EXPECTED FORMAT: {intent.get('expectedFormat', 'unknown')} +SUCCESS CRITERIA: {intent.get('successCriteria', [])} DELIVERED CONTENT: {json.dumps(documentContents, indent=2)} -TASK: Determine if the user's request has been fully completed. - -Analyze the gap between what was requested and what was delivered. Consider any missing elements, incorrect formats, incomplete work, or other discrepancies. +Perform comprehensive validation: +1. Check if content matches expected data type +2. Check if content matches expected format +3. Verify success criteria are met +4. Assess overall quality and completeness +5. Identify specific gaps and issues +6. Provide actionable next steps Respond with JSON only: {{ "overallSuccess": true/false, "qualityScore": 0.0-1.0, - "gapAnalysis": "Detailed analysis of what's missing or incorrect", - "improvementSuggestions": ["specific action 1", "specific action 2"] + "dataTypeMatch": true/false, + "formatMatch": true/false, + "successCriteriaMet": [true/false for each criterion], + "gapAnalysis": "Detailed analysis: what's missing/incorrect AND what specific next step to do", + "improvementSuggestions": ["NEXT STEP: specific action 1", "NEXT STEP: specific action 2"], + "validationDetails": [ + {{ + "documentName": "Document name", + "issues": ["specific issue 1", "specific issue 2"], + "suggestions": ["NEXT STEP: specific fix 1", "NEXT STEP: specific fix 2"] + }} + ] }} """ # Call AI service for validation - from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationType + from modules.datamodels.datamodelAi import AiCallOptions, OperationType request_options = AiCallOptions() request_options.operationType = OperationType.GENERAL - request = AiCallRequest(prompt=validationPrompt, context="", options=request_options) + response = await self.services.ai.callAi( + prompt=validationPrompt, + documents=None, + options=request_options + ) + if response: + import re + result = response.strip() + json_match = re.search(r'\{.*\}', result, re.DOTALL) + if json_match: + result = json_match.group(0) + + aiResult = json.loads(result) + + return { + "overallSuccess": aiResult.get("overallSuccess", False), + "qualityScore": aiResult.get("qualityScore", 0.0), + "validationDetails": aiResult.get("validationDetails", [{ + "documentName": "AI Validation", + "gapAnalysis": aiResult.get("gapAnalysis", ""), + "successCriteriaMet": aiResult.get("successCriteriaMet", [False]) + }]), + "improvementSuggestions": aiResult.get("improvementSuggestions", []) + } - # Get AI service from the workflow context - if hasattr(self, 'services') and hasattr(self.services, 'ai'): - response = self.services.ai.aiObjects.call(request) - if response and response.content: - import re - result = response.content.strip() - json_match = re.search(r'\{.*\}', result, re.DOTALL) - if json_match: - result = json_match.group(0) - - aiResult = json.loads(result) - - return { - "overallSuccess": aiResult.get("overallSuccess", False), - "qualityScore": aiResult.get("qualityScore", 0.0), - "validationDetails": [{ - "documentName": "AI Validation", - "gapAnalysis": aiResult.get("gapAnalysis", ""), - "successCriteriaMet": [aiResult.get("overallSuccess", False)] - }], - "improvementSuggestions": aiResult.get("improvementSuggestions", []) - } - - return None # Fallback to rule-based validation + return self._createFailedValidationResult("AI validation failed - no response") except Exception as e: logger.error(f"AI validation failed: {str(e)}") - return None # Fallback to rule-based validation \ No newline at end of file + return self._createFailedValidationResult(f"AI validation error: {str(e)}") \ No newline at end of file diff --git a/modules/workflows/processing/adaptive/intentAnalyzer.py b/modules/workflows/processing/adaptive/intentAnalyzer.py index 374742f3..3e64e111 100644 --- a/modules/workflows/processing/adaptive/intentAnalyzer.py +++ b/modules/workflows/processing/adaptive/intentAnalyzer.py @@ -1,228 +1,109 @@ # intentAnalyzer.py -# Intent analysis for adaptive React mode +# Intent analysis for adaptive React mode - AI-based, language-agnostic -import re +import json import logging from typing import Dict, Any, List -from enum import Enum logger = logging.getLogger(__name__) -class DataType(Enum): - NUMBERS = "numbers" - TEXT = "text" - DOCUMENTS = "documents" - ANALYSIS = "analysis" - CODE = "code" - UNKNOWN = "unknown" - -class ExpectedFormat(Enum): - RAW_DATA = "raw_data" - FORMATTED = "formatted" - STRUCTURED = "structured" - VISUAL = "visual" - UNKNOWN = "unknown" - class IntentAnalyzer: - """Analyzes user intent to understand what they actually want""" + """Analyzes user intent using AI - language-agnostic and generic""" - def __init__(self): - self.dataTypePatterns = { - DataType.NUMBERS: [ - r'\b(numbers?|digits?|count|list|sequence)\b', - r'\b(prime|fibonacci|random|even|odd)\s+(numbers?)\b', - r'\b(calculate|compute|generate)\s+(numbers?)\b', - r'\b(first|last)\s+\d+\s+(numbers?)\b' - ], - DataType.TEXT: [ - r'\b(text|content|words?|sentences?|paragraphs?)\b', - r'\b(write|create|generate)\s+(text|content)\b', - r'\b(summary|description|explanation)\b', - r'\b(article|essay|report)\b' - ], - DataType.DOCUMENTS: [ - r'\b(document|file|report|pdf|word|excel)\b', - r'\b(create|generate|make)\s+(document|file|report)\b', - r'\b(format|structure|organize)\s+(document)\b', - r'\b(presentation|slides?)\b' - ], - DataType.ANALYSIS: [ - r'\b(analyze|analysis|examine|study|evaluate)\b', - r'\b(insights?|findings?|results?)\b', - r'\b(compare|contrast|evaluate)\b', - r'\b(trends?|patterns?)\b' - ], - DataType.CODE: [ - r'\b(code|program|script|algorithm|function)\b', - r'\b(write|create|develop)\s+(code|program|script)\b', - r'\b(implement|build|construct)\b', - r'\b(debug|fix|optimize)\s+(code)\b' - ] - } - - self.formatPatterns = { - ExpectedFormat.RAW_DATA: [ - r'\b(raw|plain|simple|basic)\b', - r'\b(numbers?|data|list)\b(?!\s+(in|as|with))', - r'\b(just|only)\s+(numbers?|data)\b' - ], - ExpectedFormat.FORMATTED: [ - r'\b(formatted|structured|organized|presented)\b', - r'\b(table|chart|graph|visual)\b', - r'\b(pretty|nice|clean)\s+(format|presentation)\b', - r'\b(professional|polished)\b' - ], - ExpectedFormat.STRUCTURED: [ - r'\b(json|xml|csv|structured)\b', - r'\b(organized|categorized|grouped)\b', - r'\b(systematic|methodical)\b', - r'\b(database|spreadsheet)\b' - ] - } + def __init__(self, services=None): + self.services = services - def analyzeUserIntent(self, userPrompt: str, context: Any) -> Dict[str, Any]: - """Analyzes user intent from prompt and context""" + async def analyzeUserIntent(self, userPrompt: str, context: Any) -> Dict[str, Any]: + """Analyzes user intent from prompt and context using AI""" try: - # Extract primary goal - primaryGoal = self._extractPrimaryGoal(userPrompt) + # Use AI to analyze intent + aiAnalysis = await self._analyzeIntentWithAI(userPrompt, context) + if aiAnalysis: + return aiAnalysis - # Classify data type - dataType = self._classifyDataType(userPrompt) - - # Determine expected format - expectedFormat = self._determineExpectedFormat(userPrompt) - - # Assess quality requirements - qualityRequirements = self._assessQualityRequirements(userPrompt, context) - - # Extract success criteria - successCriteria = self._extractSuccessCriteria(userPrompt, context) - - # Calculate confidence score - confidenceScore = self._calculateConfidenceScore(dataType, expectedFormat, successCriteria) - - return { - "primaryGoal": primaryGoal, - "dataType": dataType.value, - "expectedFormat": expectedFormat.value, - "qualityRequirements": qualityRequirements, - "successCriteria": successCriteria, - "confidenceScore": confidenceScore - } + # Fallback to basic analysis if AI fails + return self._createBasicIntentAnalysis(userPrompt) except Exception as e: logger.error(f"Error analyzing user intent: {str(e)}") return self._createDefaultIntentAnalysis(userPrompt) - def _extractPrimaryGoal(self, userPrompt: str) -> str: - """Extracts the primary goal from user prompt""" - # Simple extraction - can be enhanced - return userPrompt.strip() + async def _analyzeIntentWithAI(self, userPrompt: str, context: Any) -> Dict[str, Any]: + """Uses AI to analyze user intent - language-agnostic""" + try: + if not self.services or not hasattr(self.services, 'ai'): + return None + + # Create AI analysis prompt + analysisPrompt = f""" +You are an intent analyzer. Analyze the user's request to understand what they want delivered. + +USER REQUEST: {userPrompt} + +CONTEXT: {getattr(context.task_step, 'objective', '') if hasattr(context, 'task_step') and context.task_step else ''} + +Analyze the user's intent and determine: +1. What type of data/content they want (numbers, text, documents, analysis, code, etc.) +2. What format they expect (raw data, formatted, structured, visual, etc.) +3. What quality requirements they have (accuracy, completeness, format) +4. What specific success criteria define completion + +Respond with JSON only: +{{ + "primaryGoal": "The main objective the user wants to achieve", + "dataType": "numbers|text|documents|analysis|code|unknown", + "expectedFormat": "raw_data|formatted|structured|visual|unknown", + "qualityRequirements": {{ + "accuracyThreshold": 0.0-1.0, + "completenessThreshold": 0.0-1.0, + "formatRequirement": "any|formatted|raw|structured" + }}, + "successCriteria": ["specific criterion 1", "specific criterion 2"], + "confidenceScore": 0.0-1.0 +}} +""" + + # Call AI service for analysis + from modules.datamodels.datamodelAi import AiCallOptions, OperationType + request_options = AiCallOptions() + request_options.operationType = OperationType.GENERAL + + response = await self.services.ai.callAi( + prompt=analysisPrompt, + documents=None, + options=request_options + ) + if response: + import re + result = response.strip() + json_match = re.search(r'\{.*\}', result, re.DOTALL) + if json_match: + result = json_match.group(0) + + aiResult = json.loads(result) + return aiResult + + return None + + except Exception as e: + logger.error(f"AI intent analysis failed: {str(e)}") + return None - def _classifyDataType(self, userPrompt: str) -> DataType: - """Classifies the type of data the user wants""" - promptLower = userPrompt.lower() - - for dataType, patterns in self.dataTypePatterns.items(): - for pattern in patterns: - if re.search(pattern, promptLower): - return dataType - - return DataType.UNKNOWN - - def _determineExpectedFormat(self, userPrompt: str) -> ExpectedFormat: - """Determines the expected format of the output""" - promptLower = userPrompt.lower() - - for formatType, patterns in self.formatPatterns.items(): - for pattern in patterns: - if re.search(pattern, promptLower): - return formatType - - return ExpectedFormat.UNKNOWN - - def _assessQualityRequirements(self, userPrompt: str, context: Any) -> Dict[str, Any]: - """Assesses quality requirements from prompt and context""" - promptLower = userPrompt.lower() - - # Check for accuracy requirements - accuracyThreshold = 0.8 - if any(word in promptLower for word in ['exact', 'precise', 'accurate', 'correct']): - accuracyThreshold = 0.95 - elif any(word in promptLower for word in ['approximate', 'rough', 'estimate']): - accuracyThreshold = 0.7 - - # Check for completeness requirements - completenessThreshold = 0.8 - if any(word in promptLower for word in ['complete', 'full', 'comprehensive', 'all']): - completenessThreshold = 0.95 - elif any(word in promptLower for word in ['summary', 'brief', 'overview']): - completenessThreshold = 0.6 - - # Check for format requirements - formatRequirement = "any" - if any(word in promptLower for word in ['formatted', 'structured', 'organized']): - formatRequirement = "formatted" - elif any(word in promptLower for word in ['raw', 'plain', 'simple']): - formatRequirement = "raw" - + def _createBasicIntentAnalysis(self, userPrompt: str) -> Dict[str, Any]: + """Creates basic intent analysis without AI""" return { - "accuracyThreshold": accuracyThreshold, - "completenessThreshold": completenessThreshold, - "formatRequirement": formatRequirement + "primaryGoal": userPrompt.strip(), + "dataType": "unknown", + "expectedFormat": "unknown", + "qualityRequirements": { + "accuracyThreshold": 0.8, + "completenessThreshold": 0.8, + "formatRequirement": "any" + }, + "successCriteria": ["Delivers what the user requested"], + "confidenceScore": 0.5 } - def _extractSuccessCriteria(self, userPrompt: str, context: Any) -> List[str]: - """Extracts success criteria from prompt and context""" - criteria = [] - promptLower = userPrompt.lower() - - # Extract explicit criteria - if 'first' in promptLower and 'numbers' in promptLower: - criteria.append("Contains the first N numbers as requested") - - if 'prime' in promptLower: - criteria.append("Contains actual prime numbers, not code to generate them") - - if 'document' in promptLower: - criteria.append("Creates a properly formatted document") - - if 'format' in promptLower: - criteria.append("Content is properly formatted as requested") - - # Add context-based criteria - if hasattr(context, 'task_step') and context.task_step: - taskObjective = context.task_step.objective.lower() - if 'word' in taskObjective: - criteria.append("Creates a Word document") - if 'excel' in taskObjective: - criteria.append("Creates an Excel spreadsheet") - - return criteria if criteria else ["Delivers what the user requested"] - - def _calculateConfidenceScore(self, dataType: DataType, expectedFormat: ExpectedFormat, - successCriteria: List[str]) -> float: - """Calculates confidence score for the intent analysis""" - score = 0.0 - - # Data type confidence - if dataType != DataType.UNKNOWN: - score += 0.3 - - # Format confidence - if expectedFormat != ExpectedFormat.UNKNOWN: - score += 0.2 - - # Success criteria confidence - if len(successCriteria) > 0: - score += 0.3 - - # Additional confidence for specific patterns - if len(successCriteria) > 1: - score += 0.2 - - return min(score, 1.0) - def _createDefaultIntentAnalysis(self, userPrompt: str) -> Dict[str, Any]: """Creates a default intent analysis when analysis fails""" return { diff --git a/modules/workflows/processing/modes/modeReact.py b/modules/workflows/processing/modes/modeReact.py index 1bc893a5..9a79dcb3 100644 --- a/modules/workflows/processing/modes/modeReact.py +++ b/modules/workflows/processing/modes/modeReact.py @@ -31,9 +31,8 @@ class ReactMode(BaseMode): def __init__(self, services, workflow): super().__init__(services, workflow) # Initialize adaptive components - self.intentAnalyzer = IntentAnalyzer() - self.contentValidator = ContentValidator() - self.contentValidator.services = self.services # Pass services for AI validation + self.intentAnalyzer = IntentAnalyzer(services) + self.contentValidator = ContentValidator(services) self.learningEngine = LearningEngine() self.progressTracker = ProgressTracker() self.currentIntent = None @@ -53,9 +52,9 @@ class ReactMode(BaseMode): # NEW: Analyze intents separately for proper validation vs task completion # Workflow-level intent from cleaned original user prompt original_prompt = self.services.currentUserPrompt if self.services and hasattr(self.services, 'currentUserPrompt') else taskStep.objective - self.workflowIntent = self.intentAnalyzer.analyzeUserIntent(original_prompt, context) + self.workflowIntent = await self.intentAnalyzer.analyzeUserIntent(original_prompt, context) # Task-level intent from current task objective (used only for task-scoped checks) - self.taskIntent = self.intentAnalyzer.analyzeUserIntent(taskStep.objective, context) + self.taskIntent = await self.intentAnalyzer.analyzeUserIntent(taskStep.objective, context) logger.info(f"Intent analysis — workflow: {self.workflowIntent}") logger.info(f"Intent analysis — task: {self.taskIntent}") @@ -103,7 +102,7 @@ class ReactMode(BaseMode): # NEW: Add content validation (against original cleaned user prompt / workflow intent) if getattr(self, 'workflowIntent', None) and result.documents: - validationResult = self.contentValidator.validateContent(result.documents, self.workflowIntent) + validationResult = await self.contentValidator.validateContent(result.documents, self.workflowIntent) observation['contentValidation'] = validationResult logger.info(f"Content validation: {validationResult['overallSuccess']} (quality: {validationResult['qualityScore']:.2f})")