From 1d347eb15a99d1be3c998fbc044a2f410542463d Mon Sep 17 00:00:00 2001 From: ValueOn AG Date: Wed, 15 Oct 2025 12:41:02 +0200 Subject: [PATCH] adapted ai chat validation --- .../mainServiceGeneration.py | 8 +- .../renderers/rendererHtml.py | 18 +++ .../serviceWorkflow/mainServiceWorkflow.py | 124 +++++++++++++----- .../processing/adaptive/contentValidator.py | 69 +++++++--- .../processing/adaptive/progressTracker.py | 23 +++- .../workflows/processing/modes/modeReact.py | 11 +- 6 files changed, 195 insertions(+), 58 deletions(-) diff --git a/modules/services/serviceGeneration/mainServiceGeneration.py b/modules/services/serviceGeneration/mainServiceGeneration.py index 8ed6423b..d38cea96 100644 --- a/modules/services/serviceGeneration/mainServiceGeneration.py +++ b/modules/services/serviceGeneration/mainServiceGeneration.py @@ -323,7 +323,7 @@ class GenerationService: try: debug_enabled = self.services.utils.configGet("APP_DEBUG_CHAT_WORKFLOW_ENABLED", False) if debug_enabled: - import os + import os, json ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S") debug_root = "./test-chat/ai" debug_dir = os.path.join(debug_root, f"render_input_{ts}") @@ -332,6 +332,12 @@ class GenerationService: f.write(f"title: {title}\nformat: {outputFormat}\ncontent_type: {type(extractedContent).__name__}\n") f.write(f"content_size: {len(str(extractedContent))} characters\n") f.write(f"sections_count: {len(extractedContent.get('sections', []))}\n") + # Also write the extracted content JSON for inspection + try: + with open(os.path.join(debug_dir, "extracted_content.json"), "w", encoding="utf-8") as jf: + json.dump(extractedContent, jf, ensure_ascii=False, indent=2) + except Exception: + pass except Exception: pass diff --git a/modules/services/serviceGeneration/renderers/rendererHtml.py b/modules/services/serviceGeneration/renderers/rendererHtml.py index 1dedaf46..1b202886 100644 --- a/modules/services/serviceGeneration/renderers/rendererHtml.py +++ b/modules/services/serviceGeneration/renderers/rendererHtml.py @@ -363,6 +363,15 @@ class RendererHtml(BaseRenderer): def _render_json_heading(self, heading_data: Dict[str, Any], styles: Dict[str, Any]) -> str: """Render a JSON heading to HTML using AI-generated styles.""" try: + # Normalize non-dict inputs + if isinstance(heading_data, str): + heading_data = {"text": heading_data, "level": 2} + elif isinstance(heading_data, list): + # Render a list as bullet list under a default heading label + return self._render_json_bullet_list({"items": heading_data}, styles) + elif not isinstance(heading_data, dict): + return "" + level = heading_data.get("level", 1) text = heading_data.get("text", "") @@ -379,6 +388,15 @@ class RendererHtml(BaseRenderer): def _render_json_paragraph(self, paragraph_data: Dict[str, Any], styles: Dict[str, Any]) -> str: """Render a JSON paragraph to HTML using AI-generated styles.""" try: + # Normalize non-dict inputs + if isinstance(paragraph_data, str): + paragraph_data = {"text": paragraph_data} + elif isinstance(paragraph_data, list): + # Treat list as bullet list paragraph + return self._render_json_bullet_list({"items": paragraph_data}, styles) + elif not isinstance(paragraph_data, dict): + return "" + text = paragraph_data.get("text", "") if text: diff --git a/modules/services/serviceWorkflow/mainServiceWorkflow.py b/modules/services/serviceWorkflow/mainServiceWorkflow.py index afc4e3b5..dba44e80 100644 --- a/modules/services/serviceWorkflow/mainServiceWorkflow.py +++ b/modules/services/serviceWorkflow/mainServiceWorkflow.py @@ -79,14 +79,7 @@ class WorkflowService: """Get ChatDocuments from a list of document references using all three formats.""" try: workflow = self.services.currentWorkflow - - # Reload workflow from database to ensure we have all messages - if hasattr(workflow, 'id'): - try: - workflow = self.getWorkflow(workflow.id) - logger.debug(f"Reloaded workflow {workflow.id} with {len(workflow.messages)} messages") - except Exception as e: - logger.warning(f"Could not reload workflow from database: {str(e)}") + logger.debug(f"getChatDocumentsFromDocumentList: currentWorkflow.id = {workflow.id if workflow and hasattr(workflow, 'id') else 'NO_ID'}") all_documents = [] for doc_ref in documentList: @@ -497,15 +490,32 @@ class WorkflowService: def getWorkflow(self, workflowId: str): """Get workflow by ID by delegating to the chat interface""" try: - return self.interfaceDbChat.getWorkflow(workflowId) + logger.debug(f"getWorkflow called with workflowId: {workflowId}") + result = self.interfaceDbChat.getWorkflow(workflowId) + if result: + logger.debug(f"getWorkflow returned workflow with ID: {result.id}") + else: + logger.warning(f"getWorkflow returned None for workflowId: {workflowId}") + return result except Exception as e: logger.error(f"Error getting workflow: {str(e)}") raise def createMessage(self, messageData: Dict[str, Any]): - """Create a new message by delegating to the chat interface""" + """Create a new message by delegating to the chat interface and append to in-memory workflow.""" try: - return self.interfaceDbChat.createMessage(messageData) + message = self.interfaceDbChat.createMessage(messageData) + try: + # Keep in-memory workflow messages in sync + workflow = getattr(self.services, 'currentWorkflow', None) + if workflow and hasattr(workflow, 'messages') and message: + # Avoid duplicates if same message was already appended + if not any(getattr(m, 'id', None) == getattr(message, 'id', None) for m in workflow.messages): + workflow.messages.append(message) + except Exception: + # Never fail if local append has issues + pass + return message except Exception as e: logger.error(f"Error creating message: {str(e)}") raise @@ -519,9 +529,24 @@ class WorkflowService: raise def createLog(self, logData: Dict[str, Any]): - """Create a new log entry by delegating to the chat interface""" + """Create a new log entry by delegating to the chat interface and append to in-memory workflow logs.""" try: - return self.interfaceDbChat.createLog(logData) + log_entry = self.interfaceDbChat.createLog(logData) + try: + workflow = getattr(self.services, 'currentWorkflow', None) + if workflow and hasattr(workflow, 'logs') and log_entry: + # Avoid duplicates by id if present, else compare message+timestamp tuple + get_id = getattr(log_entry, 'id', None) + if get_id is not None: + if not any(getattr(l, 'id', None) == get_id for l in workflow.logs): + workflow.logs.append(log_entry) + else: + key = (getattr(log_entry, 'message', None), getattr(log_entry, 'publishedAt', None)) + if not any((getattr(l, 'message', None), getattr(l, 'publishedAt', None)) == key for l in workflow.logs): + workflow.logs.append(log_entry) + except Exception: + pass + return log_entry except Exception as e: logger.error(f"Error creating log: {str(e)}") raise @@ -611,6 +636,31 @@ class WorkflowService: # Get document reference list using the exact same logic as old system document_list = self._getDocumentReferenceList(workflow) + # Optional: dump a concise document index for debugging + try: + debug_enabled = self.services.utils.configGet("APP_DEBUG_CHAT_WORKFLOW_ENABLED", False) + if debug_enabled: + import os, json + from datetime import datetime, UTC + ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S") + debug_root = "./test-chat/ai" + os.makedirs(debug_root, exist_ok=True) + doc_index = [] + for bucket in ("chat", "history"): + for ex in document_list.get(bucket, []) or []: + doc_index.append({ + "bucket": bucket, + "label": ex.get("documentsLabel"), + "documents": ex.get("documents", []) + }) + with open(os.path.join(debug_root, f"{ts}_available_documents_index.json"), "w", encoding="utf-8") as f: + json.dump({ + "workflowId": getattr(workflow, 'id', None), + "index": doc_index + }, f, ensure_ascii=False, indent=2) + except Exception: + pass + # Build index string for AI action planning context = "" @@ -691,47 +741,50 @@ class WorkflowService: if all_documents: self._refreshDocumentFileAttributes(all_documents) + def _is_valid_document(doc) -> bool: + try: + size_ok = getattr(doc, 'fileSize', 0) and getattr(doc, 'fileSize', 0) > 0 + id_ok = bool(getattr(doc, 'fileId', None)) + mime_ok = bool(getattr(doc, 'mimeType', None)) + return size_ok and id_ok and mime_ok + except Exception: + return False + chat_exchanges = [] history_exchanges = [] - # Process messages in reverse order; "first" marks boundary in_current_round = True for message in reversed(workflow.messages): is_first = message.status == "first" if hasattr(message, 'status') else False - # Build a DocumentExchange if message has documents and an explicit documentsLabel doc_exchange = None if message.documents: existing_label = getattr(message, 'documentsLabel', None) if existing_label: - # Validate and use the message's actual documentsLabel validated_label = self._validateDocumentLabelConsistency(message) doc_refs = [] for doc in message.documents: + if not _is_valid_document(doc): + # Skip empty/invalid docs + continue doc_ref = self._getDocumentReferenceFromChatDocument(doc, message) doc_refs.append(doc_ref) - doc_exchange = { - 'documentsLabel': validated_label, - 'documents': doc_refs - } - # IMPORTANT: Never synthesize new labels here. If a message lacks - # a documentsLabel, we skip adding an exchange for it. + if doc_refs: + doc_exchange = { + 'documentsLabel': validated_label, + 'documents': doc_refs + } - # Append to appropriate container based on boundary if doc_exchange: if in_current_round: chat_exchanges.append(doc_exchange) else: history_exchanges.append(doc_exchange) - # Flip boundary after including the "first" message in chat if in_current_round and is_first: in_current_round = False - # Sort by recency: most recent first, then current round, then earlier rounds - # Sort chat exchanges by message sequence number (most recent first) chat_exchanges.sort(key=lambda x: self._getMessageSequenceForExchange(x, workflow), reverse=True) - # Sort history exchanges by message sequence number (most recent first) history_exchanges.sort(key=lambda x: self._getMessageSequenceForExchange(x, workflow), reverse=True) return { @@ -743,11 +796,16 @@ class WorkflowService: """Update file attributes (fileName, fileSize, mimeType) for documents""" for doc in documents: try: - # Debug: Log original filename before refresh original_filename = doc.fileName logger.debug(f"Before refresh - Document {doc.id}: fileName='{original_filename}' (length: {len(original_filename)})") - # Use the proper WorkflowService method to get file info + # Skip invalid docs early if essential identifiers are missing + if not getattr(doc, 'fileId', None): + logger.debug(f"Skipping document {doc.id} due to missing fileId") + setattr(doc, 'fileSize', 0) + setattr(doc, 'mimeType', None) + continue + file_info = self.getFileInfo(doc.fileId) if file_info: db_filename = file_info.get("fileName", doc.fileName) @@ -757,10 +815,16 @@ class WorkflowService: doc.fileSize = file_info.get("size", doc.fileSize) doc.mimeType = file_info.get("mimeType", doc.mimeType) - # Debug: Log final filename after refresh + # Mark invalid if missing mimeType + if not doc.mimeType: + logger.debug(f"Document {doc.id} has missing mimeType; will be filtered from index") + setattr(doc, 'fileSize', 0) + logger.debug(f"After refresh - Document {doc.id}: fileName='{doc.fileName}' (length: {len(doc.fileName)})") else: logger.warning(f"File not found for document {doc.id}, fileId: {doc.fileId}") + setattr(doc, 'fileSize', 0) + setattr(doc, 'mimeType', None) except Exception as e: logger.error(f"Error refreshing file attributes for document {doc.id}: {e}") diff --git a/modules/workflows/processing/adaptive/contentValidator.py b/modules/workflows/processing/adaptive/contentValidator.py index 0279df90..5423dc8e 100644 --- a/modules/workflows/processing/adaptive/contentValidator.py +++ b/modules/workflows/processing/adaptive/contentValidator.py @@ -38,12 +38,15 @@ class ContentValidator: return "" def _createFailedValidationResult(self, error: str) -> Dict[str, Any]: - """Creates a failed validation result""" + """Creates a failed validation result in a schema-stable shape""" return { - "overallSuccess": False, - "qualityScore": 0.0, + "overallSuccess": None, # Unknown when validator itself failed + "qualityScore": None, "validationDetails": [], - "improvementSuggestions": [f"NEXT STEP: Fix validation error - {error}. Check system logs for more details and retry the operation."] + "improvementSuggestions": [f"NEXT STEP: Fix validation error - {error}. Check system logs for more details and retry the operation."], + "schemaCompliant": False, + "originalType": "error", + "missingFields": ["overallSuccess", "qualityScore"], } def _isValidJsonResponse(self, response: str) -> bool: @@ -60,7 +63,7 @@ class ContentValidator: return False def _extractFallbackValidationResult(self, response: str) -> Dict[str, Any]: - """Extracts validation result from malformed AI response""" + """Extracts a minimal validation result from a malformed AI response (schema-stable)""" try: import re @@ -79,16 +82,23 @@ class ContentValidator: else: overall_success = False - return { - "overallSuccess": overall_success if isinstance(overall_success, bool) else (overall_success.group(1).lower() == 'true' if overall_success else False), - "qualityScore": float(quality_score.group(1)) if quality_score else 0.5, + parsed_overall = overall_success if isinstance(overall_success, bool) else (overall_success.group(1).lower() == 'true' if overall_success else None) + parsed_quality = float(quality_score.group(1)) if quality_score else None + + result = { + "overallSuccess": parsed_overall, + "qualityScore": parsed_quality, "validationDetails": [{ "documentName": "AI Validation (Fallback)", "gapAnalysis": gap_analysis.group(1) if gap_analysis else "Unable to parse detailed analysis", - "successCriteriaMet": [False] # Conservative fallback + "successCriteriaMet": [] }], - "improvementSuggestions": ["NEXT STEP: AI response was malformed - retry the operation for better results"] + "improvementSuggestions": ["NEXT STEP: AI response was malformed - retry the operation for better results"], + "schemaCompliant": False, + "originalType": "text", + "missingFields": [k for k, v in {"overallSuccess": parsed_overall, "qualityScore": parsed_quality}.items() if v is None], } + return result except Exception as e: logger.error(f"Fallback extraction failed: {str(e)}") return None @@ -241,17 +251,38 @@ RESPOND WITH THIS EXACT JSON FORMAT - NO OTHER TEXT: try: aiResult = json.loads(result) logger.info("AI validation JSON parsed successfully") - - return { - "overallSuccess": aiResult.get("overallSuccess", False), - "qualityScore": aiResult.get("qualityScore", 0.0), - "validationDetails": aiResult.get("validationDetails", [{ + + overall = aiResult.get("overallSuccess") + quality = aiResult.get("qualityScore") + details = aiResult.get("validationDetails") + gap = aiResult.get("gapAnalysis", "") + criteria = aiResult.get("successCriteriaMet") + improvements = aiResult.get("improvementSuggestions", []) + + # Normalize into schema-stable object without forcing failure defaults + normalized = { + "overallSuccess": overall if isinstance(overall, bool) else None, + "qualityScore": float(quality) if isinstance(quality, (int, float)) else None, + "validationDetails": details if isinstance(details, list) else [{ "documentName": "AI Validation", - "gapAnalysis": aiResult.get("gapAnalysis", ""), - "successCriteriaMet": aiResult.get("successCriteriaMet", [False]) - }]), - "improvementSuggestions": aiResult.get("improvementSuggestions", []) + "gapAnalysis": gap, + "successCriteriaMet": criteria if isinstance(criteria, list) else [] + }], + "improvementSuggestions": improvements, + "schemaCompliant": True, + "originalType": "json", + "missingFields": [] } + + if normalized["overallSuccess"] is None: + normalized["missingFields"].append("overallSuccess") + if normalized["qualityScore"] is None: + normalized["missingFields"].append("qualityScore") + # If any critical field missing, mark as not fully compliant + if normalized["missingFields"]: + normalized["schemaCompliant"] = False + + return normalized except json.JSONDecodeError as json_error: logger.warning(f"All AI validation attempts failed - invalid JSON: {str(json_error)}") diff --git a/modules/workflows/processing/adaptive/progressTracker.py b/modules/workflows/processing/adaptive/progressTracker.py index 80dfcf63..b5a41533 100644 --- a/modules/workflows/processing/adaptive/progressTracker.py +++ b/modules/workflows/processing/adaptive/progressTracker.py @@ -20,11 +20,22 @@ class ProgressTracker: def updateProgress(self, result: Any, validation: Dict[str, Any], intent: Dict[str, Any]): """Updates progress tracking based on action result""" try: - overallSuccess = validation.get('overallSuccess', False) - qualityScore = validation.get('qualityScore', 0) + schemaCompliant = validation.get('schemaCompliant', True) + overallSuccess = validation.get('overallSuccess', None) + qualityScore = validation.get('qualityScore', None) improvementSuggestions = validation.get('improvementSuggestions', []) - if overallSuccess and qualityScore > 0.7: + # If validation is not schema compliant, treat as indeterminate (do not count as failure) + if not schemaCompliant or overallSuccess is None or qualityScore is None: + self.partialAchievements.append({ + "objective": intent.get('primaryGoal', 'Unknown'), + "partialAchievement": "Validation indeterminate (schema non-compliant or missing fields)", + "missingFields": validation.get('missingFields', []), + "timestamp": datetime.now(timezone.utc).timestamp() + }) + self.currentPhase = "partial" + logger.info(f"Indeterminate validation (no penalty): {intent.get('primaryGoal', 'Unknown')}") + elif overallSuccess and qualityScore > 0.7: # Successful completion self.completedObjectives.append({ "objective": intent.get('primaryGoal', 'Unknown'), @@ -89,9 +100,13 @@ class ProgressTracker: return False # If validation shows success, don't continue - if validation.get('overallSuccess', False): + if validation.get('schemaCompliant', True) and validation.get('overallSuccess', False): return False + # If validation is not schema compliant, allow one refinement pass without counting as failure + if not validation.get('schemaCompliant', True): + return True + # Otherwise, continue return True diff --git a/modules/workflows/processing/modes/modeReact.py b/modules/workflows/processing/modes/modeReact.py index 9a79dcb3..a2091ac0 100644 --- a/modules/workflows/processing/modes/modeReact.py +++ b/modules/workflows/processing/modes/modeReact.py @@ -240,12 +240,15 @@ class ReactMode(BaseMode): if ref_match: valid_refs.append(ref_match.group(1)) - # Check if all provided references are valid + # Prefer non-empty documents: the available_docs index is already filtered to skip empty docs + preferred_refs = set(valid_refs) + + # Check if all provided references are valid and prefer non-empty for ref in document_refs: - if ref not in valid_refs: - logger.error(f"Invalid document reference: {ref}") + if ref not in preferred_refs: + logger.error(f"Invalid or empty document reference: {ref}") logger.error(f"Available references: {valid_refs}") - raise ValueError(f"Document reference '{ref}' not found in available documents. Use only exact references from AVAILABLE_DOCUMENTS_INDEX.") + raise ValueError(f"Document reference '{ref}' not found or refers to empty document. Use only non-empty references from AVAILABLE_DOCUMENTS_INDEX.") except Exception as e: logger.error(f"Error validating document references: {str(e)}")