adapted ai chat validation

2025-10-15 12:41:02 +02:00 · 2025-10-15 12:41:02 +02:00 · 1d347eb15a
commit 1d347eb15a
parent e9756bbc17
6 changed files with 195 additions and 58 deletions
--- a/modules/services/serviceGeneration/mainServiceGeneration.py
+++ b/modules/services/serviceGeneration/mainServiceGeneration.py
@ -323,7 +323,7 @@ class GenerationService:
            try:
                debug_enabled = self.services.utils.configGet("APP_DEBUG_CHAT_WORKFLOW_ENABLED", False)
                if debug_enabled:
-                    import os
+                    import os, json
                    ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
                    debug_root = "./test-chat/ai"
                    debug_dir = os.path.join(debug_root, f"render_input_{ts}")
@ -332,6 +332,12 @@ class GenerationService:
                        f.write(f"title: {title}\nformat: {outputFormat}\ncontent_type: {type(extractedContent).__name__}\n")
                        f.write(f"content_size: {len(str(extractedContent))} characters\n")
                        f.write(f"sections_count: {len(extractedContent.get('sections', []))}\n")
+                    # Also write the extracted content JSON for inspection
+                    try:
+                        with open(os.path.join(debug_dir, "extracted_content.json"), "w", encoding="utf-8") as jf:
+                            json.dump(extractedContent, jf, ensure_ascii=False, indent=2)
+                    except Exception:
+                        pass
            except Exception:
                pass

--- a/modules/services/serviceGeneration/renderers/rendererHtml.py
+++ b/modules/services/serviceGeneration/renderers/rendererHtml.py
@ -363,6 +363,15 @@ class RendererHtml(BaseRenderer):
    def _render_json_heading(self, heading_data: Dict[str, Any], styles: Dict[str, Any]) -> str:
        """Render a JSON heading to HTML using AI-generated styles."""
        try:
+            # Normalize non-dict inputs
+            if isinstance(heading_data, str):
+                heading_data = {"text": heading_data, "level": 2}
+            elif isinstance(heading_data, list):
+                # Render a list as bullet list under a default heading label
+                return self._render_json_bullet_list({"items": heading_data}, styles)
+            elif not isinstance(heading_data, dict):
+                return ""
+            
            level = heading_data.get("level", 1)
            text = heading_data.get("text", "")
            
@ -379,6 +388,15 @@ class RendererHtml(BaseRenderer):
    def _render_json_paragraph(self, paragraph_data: Dict[str, Any], styles: Dict[str, Any]) -> str:
        """Render a JSON paragraph to HTML using AI-generated styles."""
        try:
+            # Normalize non-dict inputs
+            if isinstance(paragraph_data, str):
+                paragraph_data = {"text": paragraph_data}
+            elif isinstance(paragraph_data, list):
+                # Treat list as bullet list paragraph
+                return self._render_json_bullet_list({"items": paragraph_data}, styles)
+            elif not isinstance(paragraph_data, dict):
+                return ""
+            
            text = paragraph_data.get("text", "")
            
            if text:
--- a/modules/services/serviceWorkflow/mainServiceWorkflow.py
+++ b/modules/services/serviceWorkflow/mainServiceWorkflow.py
@ -79,14 +79,7 @@ class WorkflowService:
        """Get ChatDocuments from a list of document references using all three formats."""
        try:
            workflow = self.services.currentWorkflow
-            
-            # Reload workflow from database to ensure we have all messages
-            if hasattr(workflow, 'id'):
-                try:
-                    workflow = self.getWorkflow(workflow.id)
-                    logger.debug(f"Reloaded workflow {workflow.id} with {len(workflow.messages)} messages")
-                except Exception as e:
-                    logger.warning(f"Could not reload workflow from database: {str(e)}")
+            logger.debug(f"getChatDocumentsFromDocumentList: currentWorkflow.id = {workflow.id if workflow and hasattr(workflow, 'id') else 'NO_ID'}")
            
            all_documents = []
            for doc_ref in documentList:
@ -497,15 +490,32 @@ class WorkflowService:
    def getWorkflow(self, workflowId: str):
        """Get workflow by ID by delegating to the chat interface"""
        try:
-            return self.interfaceDbChat.getWorkflow(workflowId)
+            logger.debug(f"getWorkflow called with workflowId: {workflowId}")
+            result = self.interfaceDbChat.getWorkflow(workflowId)
+            if result:
+                logger.debug(f"getWorkflow returned workflow with ID: {result.id}")
+            else:
+                logger.warning(f"getWorkflow returned None for workflowId: {workflowId}")
+            return result
        except Exception as e:
            logger.error(f"Error getting workflow: {str(e)}")
            raise

    def createMessage(self, messageData: Dict[str, Any]):
-        """Create a new message by delegating to the chat interface"""
+        """Create a new message by delegating to the chat interface and append to in-memory workflow."""
        try:
-            return self.interfaceDbChat.createMessage(messageData)
+            message = self.interfaceDbChat.createMessage(messageData)
+            try:
+                # Keep in-memory workflow messages in sync
+                workflow = getattr(self.services, 'currentWorkflow', None)
+                if workflow and hasattr(workflow, 'messages') and message:
+                    # Avoid duplicates if same message was already appended
+                    if not any(getattr(m, 'id', None) == getattr(message, 'id', None) for m in workflow.messages):
+                        workflow.messages.append(message)
+            except Exception:
+                # Never fail if local append has issues
+                pass
+            return message
        except Exception as e:
            logger.error(f"Error creating message: {str(e)}")
            raise
@ -519,9 +529,24 @@ class WorkflowService:
            raise

    def createLog(self, logData: Dict[str, Any]):
-        """Create a new log entry by delegating to the chat interface"""
+        """Create a new log entry by delegating to the chat interface and append to in-memory workflow logs."""
        try:
-            return self.interfaceDbChat.createLog(logData)
+            log_entry = self.interfaceDbChat.createLog(logData)
+            try:
+                workflow = getattr(self.services, 'currentWorkflow', None)
+                if workflow and hasattr(workflow, 'logs') and log_entry:
+                    # Avoid duplicates by id if present, else compare message+timestamp tuple
+                    get_id = getattr(log_entry, 'id', None)
+                    if get_id is not None:
+                        if not any(getattr(l, 'id', None) == get_id for l in workflow.logs):
+                            workflow.logs.append(log_entry)
+                    else:
+                        key = (getattr(log_entry, 'message', None), getattr(log_entry, 'publishedAt', None))
+                        if not any((getattr(l, 'message', None), getattr(l, 'publishedAt', None)) == key for l in workflow.logs):
+                            workflow.logs.append(log_entry)
+            except Exception:
+                pass
+            return log_entry
        except Exception as e:
            logger.error(f"Error creating log: {str(e)}")
            raise
@ -611,6 +636,31 @@ class WorkflowService:
            # Get document reference list using the exact same logic as old system
            document_list = self._getDocumentReferenceList(workflow)
            
+            # Optional: dump a concise document index for debugging
+            try:
+                debug_enabled = self.services.utils.configGet("APP_DEBUG_CHAT_WORKFLOW_ENABLED", False)
+                if debug_enabled:
+                    import os, json
+                    from datetime import datetime, UTC
+                    ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
+                    debug_root = "./test-chat/ai"
+                    os.makedirs(debug_root, exist_ok=True)
+                    doc_index = []
+                    for bucket in ("chat", "history"):
+                        for ex in document_list.get(bucket, []) or []:
+                            doc_index.append({
+                                "bucket": bucket,
+                                "label": ex.get("documentsLabel"),
+                                "documents": ex.get("documents", [])
+                            })
+                    with open(os.path.join(debug_root, f"{ts}_available_documents_index.json"), "w", encoding="utf-8") as f:
+                        json.dump({
+                            "workflowId": getattr(workflow, 'id', None),
+                            "index": doc_index
+                        }, f, ensure_ascii=False, indent=2)
+            except Exception:
+                pass
+            
            # Build index string for AI action planning
            context = ""
            
@ -691,47 +741,50 @@ class WorkflowService:
        if all_documents:
            self._refreshDocumentFileAttributes(all_documents)
        
+        def _is_valid_document(doc) -> bool:
+            try:
+                size_ok = getattr(doc, 'fileSize', 0) and getattr(doc, 'fileSize', 0) > 0
+                id_ok = bool(getattr(doc, 'fileId', None))
+                mime_ok = bool(getattr(doc, 'mimeType', None))
+                return size_ok and id_ok and mime_ok
+            except Exception:
+                return False
+        
        chat_exchanges = []
        history_exchanges = []
        
-        # Process messages in reverse order; "first" marks boundary
        in_current_round = True
        for message in reversed(workflow.messages):
            is_first = message.status == "first" if hasattr(message, 'status') else False
            
-            # Build a DocumentExchange if message has documents and an explicit documentsLabel
            doc_exchange = None
            if message.documents:
                existing_label = getattr(message, 'documentsLabel', None)
                if existing_label:
-                    # Validate and use the message's actual documentsLabel
                    validated_label = self._validateDocumentLabelConsistency(message)
                    doc_refs = []
                    for doc in message.documents:
+                        if not _is_valid_document(doc):
+                            # Skip empty/invalid docs
+                            continue
                        doc_ref = self._getDocumentReferenceFromChatDocument(doc, message)
                        doc_refs.append(doc_ref)
-                    doc_exchange = {
-                        'documentsLabel': validated_label,
-                        'documents': doc_refs
-                    }
-                # IMPORTANT: Never synthesize new labels here. If a message lacks
-                # a documentsLabel, we skip adding an exchange for it.
+                    if doc_refs:
+                        doc_exchange = {
+                            'documentsLabel': validated_label,
+                            'documents': doc_refs
+                        }
            
-            # Append to appropriate container based on boundary
            if doc_exchange:
                if in_current_round:
                    chat_exchanges.append(doc_exchange)
                else:
                    history_exchanges.append(doc_exchange)
            
-            # Flip boundary after including the "first" message in chat
            if in_current_round and is_first:
                in_current_round = False
        
-        # Sort by recency: most recent first, then current round, then earlier rounds
-        # Sort chat exchanges by message sequence number (most recent first)
        chat_exchanges.sort(key=lambda x: self._getMessageSequenceForExchange(x, workflow), reverse=True)
-        # Sort history exchanges by message sequence number (most recent first)
        history_exchanges.sort(key=lambda x: self._getMessageSequenceForExchange(x, workflow), reverse=True)
        
        return {
@ -743,11 +796,16 @@ class WorkflowService:
        """Update file attributes (fileName, fileSize, mimeType) for documents"""
        for doc in documents:
            try:
-                # Debug: Log original filename before refresh
                original_filename = doc.fileName
                logger.debug(f"Before refresh - Document {doc.id}: fileName='{original_filename}' (length: {len(original_filename)})")
                
-                # Use the proper WorkflowService method to get file info
+                # Skip invalid docs early if essential identifiers are missing
+                if not getattr(doc, 'fileId', None):
+                    logger.debug(f"Skipping document {doc.id} due to missing fileId")
+                    setattr(doc, 'fileSize', 0)
+                    setattr(doc, 'mimeType', None)
+                    continue
+                
                file_info = self.getFileInfo(doc.fileId)
                if file_info:
                    db_filename = file_info.get("fileName", doc.fileName)
@ -757,10 +815,16 @@ class WorkflowService:
                    doc.fileSize = file_info.get("size", doc.fileSize)
                    doc.mimeType = file_info.get("mimeType", doc.mimeType)
                    
-                    # Debug: Log final filename after refresh
+                    # Mark invalid if missing mimeType
+                    if not doc.mimeType:
+                        logger.debug(f"Document {doc.id} has missing mimeType; will be filtered from index")
+                        setattr(doc, 'fileSize', 0)
+                    
                    logger.debug(f"After refresh - Document {doc.id}: fileName='{doc.fileName}' (length: {len(doc.fileName)})")
                else:
                    logger.warning(f"File not found for document {doc.id}, fileId: {doc.fileId}")
+                    setattr(doc, 'fileSize', 0)
+                    setattr(doc, 'mimeType', None)
            except Exception as e:
                logger.error(f"Error refreshing file attributes for document {doc.id}: {e}")

--- a/modules/workflows/processing/adaptive/contentValidator.py
+++ b/modules/workflows/processing/adaptive/contentValidator.py
@ -38,12 +38,15 @@ class ContentValidator:
            return ""
    
    def _createFailedValidationResult(self, error: str) -> Dict[str, Any]:
-        """Creates a failed validation result"""
+        """Creates a failed validation result in a schema-stable shape"""
        return {
-            "overallSuccess": False,
-            "qualityScore": 0.0,
+            "overallSuccess": None,  # Unknown when validator itself failed
+            "qualityScore": None,
            "validationDetails": [],
-            "improvementSuggestions": [f"NEXT STEP: Fix validation error - {error}. Check system logs for more details and retry the operation."]
+            "improvementSuggestions": [f"NEXT STEP: Fix validation error - {error}. Check system logs for more details and retry the operation."],
+            "schemaCompliant": False,
+            "originalType": "error",
+            "missingFields": ["overallSuccess", "qualityScore"],
        }
    
    def _isValidJsonResponse(self, response: str) -> bool:
@ -60,7 +63,7 @@ class ContentValidator:
            return False
    
    def _extractFallbackValidationResult(self, response: str) -> Dict[str, Any]:
-        """Extracts validation result from malformed AI response"""
+        """Extracts a minimal validation result from a malformed AI response (schema-stable)"""
        try:
            import re
            
@ -79,16 +82,23 @@ class ContentValidator:
                else:
                    overall_success = False
            
-            return {
-                "overallSuccess": overall_success if isinstance(overall_success, bool) else (overall_success.group(1).lower() == 'true' if overall_success else False),
-                "qualityScore": float(quality_score.group(1)) if quality_score else 0.5,
+            parsed_overall = overall_success if isinstance(overall_success, bool) else (overall_success.group(1).lower() == 'true' if overall_success else None)
+            parsed_quality = float(quality_score.group(1)) if quality_score else None
+
+            result = {
+                "overallSuccess": parsed_overall,
+                "qualityScore": parsed_quality,
                "validationDetails": [{
                    "documentName": "AI Validation (Fallback)",
                    "gapAnalysis": gap_analysis.group(1) if gap_analysis else "Unable to parse detailed analysis",
-                    "successCriteriaMet": [False]  # Conservative fallback
+                    "successCriteriaMet": []
                }],
-                "improvementSuggestions": ["NEXT STEP: AI response was malformed - retry the operation for better results"]
+                "improvementSuggestions": ["NEXT STEP: AI response was malformed - retry the operation for better results"],
+                "schemaCompliant": False,
+                "originalType": "text",
+                "missingFields": [k for k, v in {"overallSuccess": parsed_overall, "qualityScore": parsed_quality}.items() if v is None],
            }
+            return result
        except Exception as e:
            logger.error(f"Fallback extraction failed: {str(e)}")
            return None
@ -241,17 +251,38 @@ RESPOND WITH THIS EXACT JSON FORMAT - NO OTHER TEXT:
            try:
                aiResult = json.loads(result)
                logger.info("AI validation JSON parsed successfully")
-                
-                return {
-                    "overallSuccess": aiResult.get("overallSuccess", False),
-                    "qualityScore": aiResult.get("qualityScore", 0.0),
-                    "validationDetails": aiResult.get("validationDetails", [{
+
+                overall = aiResult.get("overallSuccess")
+                quality = aiResult.get("qualityScore")
+                details = aiResult.get("validationDetails")
+                gap = aiResult.get("gapAnalysis", "")
+                criteria = aiResult.get("successCriteriaMet")
+                improvements = aiResult.get("improvementSuggestions", [])
+
+                # Normalize into schema-stable object without forcing failure defaults
+                normalized = {
+                    "overallSuccess": overall if isinstance(overall, bool) else None,
+                    "qualityScore": float(quality) if isinstance(quality, (int, float)) else None,
+                    "validationDetails": details if isinstance(details, list) else [{
                        "documentName": "AI Validation",
-                        "gapAnalysis": aiResult.get("gapAnalysis", ""),
-                        "successCriteriaMet": aiResult.get("successCriteriaMet", [False])
-                    }]),
-                    "improvementSuggestions": aiResult.get("improvementSuggestions", [])
+                        "gapAnalysis": gap,
+                        "successCriteriaMet": criteria if isinstance(criteria, list) else []
+                    }],
+                    "improvementSuggestions": improvements,
+                    "schemaCompliant": True,
+                    "originalType": "json",
+                    "missingFields": []
                }
+
+                if normalized["overallSuccess"] is None:
+                    normalized["missingFields"].append("overallSuccess")
+                if normalized["qualityScore"] is None:
+                    normalized["missingFields"].append("qualityScore")
+                # If any critical field missing, mark as not fully compliant
+                if normalized["missingFields"]:
+                    normalized["schemaCompliant"] = False
+
+                return normalized
                
            except json.JSONDecodeError as json_error:
                logger.warning(f"All AI validation attempts failed - invalid JSON: {str(json_error)}")
--- a/modules/workflows/processing/adaptive/progressTracker.py
+++ b/modules/workflows/processing/adaptive/progressTracker.py
@ -20,11 +20,22 @@ class ProgressTracker:
    def updateProgress(self, result: Any, validation: Dict[str, Any], intent: Dict[str, Any]):
        """Updates progress tracking based on action result"""
        try:
-            overallSuccess = validation.get('overallSuccess', False)
-            qualityScore = validation.get('qualityScore', 0)
+            schemaCompliant = validation.get('schemaCompliant', True)
+            overallSuccess = validation.get('overallSuccess', None)
+            qualityScore = validation.get('qualityScore', None)
            improvementSuggestions = validation.get('improvementSuggestions', [])
            
-            if overallSuccess and qualityScore > 0.7:
+            # If validation is not schema compliant, treat as indeterminate (do not count as failure)
+            if not schemaCompliant or overallSuccess is None or qualityScore is None:
+                self.partialAchievements.append({
+                    "objective": intent.get('primaryGoal', 'Unknown'),
+                    "partialAchievement": "Validation indeterminate (schema non-compliant or missing fields)",
+                    "missingFields": validation.get('missingFields', []),
+                    "timestamp": datetime.now(timezone.utc).timestamp()
+                })
+                self.currentPhase = "partial"
+                logger.info(f"Indeterminate validation (no penalty): {intent.get('primaryGoal', 'Unknown')}")
+            elif overallSuccess and qualityScore > 0.7:
                # Successful completion
                self.completedObjectives.append({
                    "objective": intent.get('primaryGoal', 'Unknown'),
@ -89,9 +100,13 @@ class ProgressTracker:
                return False
            
            # If validation shows success, don't continue
-            if validation.get('overallSuccess', False):
+            if validation.get('schemaCompliant', True) and validation.get('overallSuccess', False):
                return False
            
+            # If validation is not schema compliant, allow one refinement pass without counting as failure
+            if not validation.get('schemaCompliant', True):
+                return True
+
            # Otherwise, continue
            return True
            
--- a/modules/workflows/processing/modes/modeReact.py
+++ b/modules/workflows/processing/modes/modeReact.py
@ -240,12 +240,15 @@ class ReactMode(BaseMode):
                    if ref_match:
                        valid_refs.append(ref_match.group(1))
            
-            # Check if all provided references are valid
+            # Prefer non-empty documents: the available_docs index is already filtered to skip empty docs
+            preferred_refs = set(valid_refs)
+            
+            # Check if all provided references are valid and prefer non-empty
            for ref in document_refs:
-                if ref not in valid_refs:
-                    logger.error(f"Invalid document reference: {ref}")
+                if ref not in preferred_refs:
+                    logger.error(f"Invalid or empty document reference: {ref}")
                    logger.error(f"Available references: {valid_refs}")
-                    raise ValueError(f"Document reference '{ref}' not found in available documents. Use only exact references from AVAILABLE_DOCUMENTS_INDEX.")
+                    raise ValueError(f"Document reference '{ref}' not found or refers to empty document. Use only non-empty references from AVAILABLE_DOCUMENTS_INDEX.")
                    
        except Exception as e:
            logger.error(f"Error validating document references: {str(e)}")