From 1d347eb15a99d1be3c998fbc044a2f410542463d Mon Sep 17 00:00:00 2001
From: ValueOn AG
Date: Wed, 15 Oct 2025 12:41:02 +0200
Subject: [PATCH] adapted ai chat validation
---
.../mainServiceGeneration.py | 8 +-
.../renderers/rendererHtml.py | 18 +++
.../serviceWorkflow/mainServiceWorkflow.py | 124 +++++++++++++-----
.../processing/adaptive/contentValidator.py | 69 +++++++---
.../processing/adaptive/progressTracker.py | 23 +++-
.../workflows/processing/modes/modeReact.py | 11 +-
6 files changed, 195 insertions(+), 58 deletions(-)
diff --git a/modules/services/serviceGeneration/mainServiceGeneration.py b/modules/services/serviceGeneration/mainServiceGeneration.py
index 8ed6423b..d38cea96 100644
--- a/modules/services/serviceGeneration/mainServiceGeneration.py
+++ b/modules/services/serviceGeneration/mainServiceGeneration.py
@@ -323,7 +323,7 @@ class GenerationService:
try:
debug_enabled = self.services.utils.configGet("APP_DEBUG_CHAT_WORKFLOW_ENABLED", False)
if debug_enabled:
- import os
+ import os, json
ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
debug_root = "./test-chat/ai"
debug_dir = os.path.join(debug_root, f"render_input_{ts}")
@@ -332,6 +332,12 @@ class GenerationService:
f.write(f"title: {title}\nformat: {outputFormat}\ncontent_type: {type(extractedContent).__name__}\n")
f.write(f"content_size: {len(str(extractedContent))} characters\n")
f.write(f"sections_count: {len(extractedContent.get('sections', []))}\n")
+ # Also write the extracted content JSON for inspection
+ try:
+ with open(os.path.join(debug_dir, "extracted_content.json"), "w", encoding="utf-8") as jf:
+ json.dump(extractedContent, jf, ensure_ascii=False, indent=2)
+ except Exception:
+ pass
except Exception:
pass
diff --git a/modules/services/serviceGeneration/renderers/rendererHtml.py b/modules/services/serviceGeneration/renderers/rendererHtml.py
index 1dedaf46..1b202886 100644
--- a/modules/services/serviceGeneration/renderers/rendererHtml.py
+++ b/modules/services/serviceGeneration/renderers/rendererHtml.py
@@ -363,6 +363,15 @@ class RendererHtml(BaseRenderer):
def _render_json_heading(self, heading_data: Dict[str, Any], styles: Dict[str, Any]) -> str:
"""Render a JSON heading to HTML using AI-generated styles."""
try:
+ # Normalize non-dict inputs
+ if isinstance(heading_data, str):
+ heading_data = {"text": heading_data, "level": 2}
+ elif isinstance(heading_data, list):
+ # Render a list as bullet list under a default heading label
+ return self._render_json_bullet_list({"items": heading_data}, styles)
+ elif not isinstance(heading_data, dict):
+ return ""
+
level = heading_data.get("level", 1)
text = heading_data.get("text", "")
@@ -379,6 +388,15 @@ class RendererHtml(BaseRenderer):
def _render_json_paragraph(self, paragraph_data: Dict[str, Any], styles: Dict[str, Any]) -> str:
"""Render a JSON paragraph to HTML using AI-generated styles."""
try:
+ # Normalize non-dict inputs
+ if isinstance(paragraph_data, str):
+ paragraph_data = {"text": paragraph_data}
+ elif isinstance(paragraph_data, list):
+ # Treat list as bullet list paragraph
+ return self._render_json_bullet_list({"items": paragraph_data}, styles)
+ elif not isinstance(paragraph_data, dict):
+ return ""
+
text = paragraph_data.get("text", "")
if text:
diff --git a/modules/services/serviceWorkflow/mainServiceWorkflow.py b/modules/services/serviceWorkflow/mainServiceWorkflow.py
index afc4e3b5..dba44e80 100644
--- a/modules/services/serviceWorkflow/mainServiceWorkflow.py
+++ b/modules/services/serviceWorkflow/mainServiceWorkflow.py
@@ -79,14 +79,7 @@ class WorkflowService:
"""Get ChatDocuments from a list of document references using all three formats."""
try:
workflow = self.services.currentWorkflow
-
- # Reload workflow from database to ensure we have all messages
- if hasattr(workflow, 'id'):
- try:
- workflow = self.getWorkflow(workflow.id)
- logger.debug(f"Reloaded workflow {workflow.id} with {len(workflow.messages)} messages")
- except Exception as e:
- logger.warning(f"Could not reload workflow from database: {str(e)}")
+ logger.debug(f"getChatDocumentsFromDocumentList: currentWorkflow.id = {workflow.id if workflow and hasattr(workflow, 'id') else 'NO_ID'}")
all_documents = []
for doc_ref in documentList:
@@ -497,15 +490,32 @@ class WorkflowService:
def getWorkflow(self, workflowId: str):
"""Get workflow by ID by delegating to the chat interface"""
try:
- return self.interfaceDbChat.getWorkflow(workflowId)
+ logger.debug(f"getWorkflow called with workflowId: {workflowId}")
+ result = self.interfaceDbChat.getWorkflow(workflowId)
+ if result:
+ logger.debug(f"getWorkflow returned workflow with ID: {result.id}")
+ else:
+ logger.warning(f"getWorkflow returned None for workflowId: {workflowId}")
+ return result
except Exception as e:
logger.error(f"Error getting workflow: {str(e)}")
raise
def createMessage(self, messageData: Dict[str, Any]):
- """Create a new message by delegating to the chat interface"""
+ """Create a new message by delegating to the chat interface and append to in-memory workflow."""
try:
- return self.interfaceDbChat.createMessage(messageData)
+ message = self.interfaceDbChat.createMessage(messageData)
+ try:
+ # Keep in-memory workflow messages in sync
+ workflow = getattr(self.services, 'currentWorkflow', None)
+ if workflow and hasattr(workflow, 'messages') and message:
+ # Avoid duplicates if same message was already appended
+ if not any(getattr(m, 'id', None) == getattr(message, 'id', None) for m in workflow.messages):
+ workflow.messages.append(message)
+ except Exception:
+ # Never fail if local append has issues
+ pass
+ return message
except Exception as e:
logger.error(f"Error creating message: {str(e)}")
raise
@@ -519,9 +529,24 @@ class WorkflowService:
raise
def createLog(self, logData: Dict[str, Any]):
- """Create a new log entry by delegating to the chat interface"""
+ """Create a new log entry by delegating to the chat interface and append to in-memory workflow logs."""
try:
- return self.interfaceDbChat.createLog(logData)
+ log_entry = self.interfaceDbChat.createLog(logData)
+ try:
+ workflow = getattr(self.services, 'currentWorkflow', None)
+ if workflow and hasattr(workflow, 'logs') and log_entry:
+ # Avoid duplicates by id if present, else compare message+timestamp tuple
+ get_id = getattr(log_entry, 'id', None)
+ if get_id is not None:
+ if not any(getattr(l, 'id', None) == get_id for l in workflow.logs):
+ workflow.logs.append(log_entry)
+ else:
+ key = (getattr(log_entry, 'message', None), getattr(log_entry, 'publishedAt', None))
+ if not any((getattr(l, 'message', None), getattr(l, 'publishedAt', None)) == key for l in workflow.logs):
+ workflow.logs.append(log_entry)
+ except Exception:
+ pass
+ return log_entry
except Exception as e:
logger.error(f"Error creating log: {str(e)}")
raise
@@ -611,6 +636,31 @@ class WorkflowService:
# Get document reference list using the exact same logic as old system
document_list = self._getDocumentReferenceList(workflow)
+ # Optional: dump a concise document index for debugging
+ try:
+ debug_enabled = self.services.utils.configGet("APP_DEBUG_CHAT_WORKFLOW_ENABLED", False)
+ if debug_enabled:
+ import os, json
+ from datetime import datetime, UTC
+ ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
+ debug_root = "./test-chat/ai"
+ os.makedirs(debug_root, exist_ok=True)
+ doc_index = []
+ for bucket in ("chat", "history"):
+ for ex in document_list.get(bucket, []) or []:
+ doc_index.append({
+ "bucket": bucket,
+ "label": ex.get("documentsLabel"),
+ "documents": ex.get("documents", [])
+ })
+ with open(os.path.join(debug_root, f"{ts}_available_documents_index.json"), "w", encoding="utf-8") as f:
+ json.dump({
+ "workflowId": getattr(workflow, 'id', None),
+ "index": doc_index
+ }, f, ensure_ascii=False, indent=2)
+ except Exception:
+ pass
+
# Build index string for AI action planning
context = ""
@@ -691,47 +741,50 @@ class WorkflowService:
if all_documents:
self._refreshDocumentFileAttributes(all_documents)
+ def _is_valid_document(doc) -> bool:
+ try:
+ size_ok = getattr(doc, 'fileSize', 0) and getattr(doc, 'fileSize', 0) > 0
+ id_ok = bool(getattr(doc, 'fileId', None))
+ mime_ok = bool(getattr(doc, 'mimeType', None))
+ return size_ok and id_ok and mime_ok
+ except Exception:
+ return False
+
chat_exchanges = []
history_exchanges = []
- # Process messages in reverse order; "first" marks boundary
in_current_round = True
for message in reversed(workflow.messages):
is_first = message.status == "first" if hasattr(message, 'status') else False
- # Build a DocumentExchange if message has documents and an explicit documentsLabel
doc_exchange = None
if message.documents:
existing_label = getattr(message, 'documentsLabel', None)
if existing_label:
- # Validate and use the message's actual documentsLabel
validated_label = self._validateDocumentLabelConsistency(message)
doc_refs = []
for doc in message.documents:
+ if not _is_valid_document(doc):
+ # Skip empty/invalid docs
+ continue
doc_ref = self._getDocumentReferenceFromChatDocument(doc, message)
doc_refs.append(doc_ref)
- doc_exchange = {
- 'documentsLabel': validated_label,
- 'documents': doc_refs
- }
- # IMPORTANT: Never synthesize new labels here. If a message lacks
- # a documentsLabel, we skip adding an exchange for it.
+ if doc_refs:
+ doc_exchange = {
+ 'documentsLabel': validated_label,
+ 'documents': doc_refs
+ }
- # Append to appropriate container based on boundary
if doc_exchange:
if in_current_round:
chat_exchanges.append(doc_exchange)
else:
history_exchanges.append(doc_exchange)
- # Flip boundary after including the "first" message in chat
if in_current_round and is_first:
in_current_round = False
- # Sort by recency: most recent first, then current round, then earlier rounds
- # Sort chat exchanges by message sequence number (most recent first)
chat_exchanges.sort(key=lambda x: self._getMessageSequenceForExchange(x, workflow), reverse=True)
- # Sort history exchanges by message sequence number (most recent first)
history_exchanges.sort(key=lambda x: self._getMessageSequenceForExchange(x, workflow), reverse=True)
return {
@@ -743,11 +796,16 @@ class WorkflowService:
"""Update file attributes (fileName, fileSize, mimeType) for documents"""
for doc in documents:
try:
- # Debug: Log original filename before refresh
original_filename = doc.fileName
logger.debug(f"Before refresh - Document {doc.id}: fileName='{original_filename}' (length: {len(original_filename)})")
- # Use the proper WorkflowService method to get file info
+ # Skip invalid docs early if essential identifiers are missing
+ if not getattr(doc, 'fileId', None):
+ logger.debug(f"Skipping document {doc.id} due to missing fileId")
+ setattr(doc, 'fileSize', 0)
+ setattr(doc, 'mimeType', None)
+ continue
+
file_info = self.getFileInfo(doc.fileId)
if file_info:
db_filename = file_info.get("fileName", doc.fileName)
@@ -757,10 +815,16 @@ class WorkflowService:
doc.fileSize = file_info.get("size", doc.fileSize)
doc.mimeType = file_info.get("mimeType", doc.mimeType)
- # Debug: Log final filename after refresh
+ # Mark invalid if missing mimeType
+ if not doc.mimeType:
+ logger.debug(f"Document {doc.id} has missing mimeType; will be filtered from index")
+ setattr(doc, 'fileSize', 0)
+
logger.debug(f"After refresh - Document {doc.id}: fileName='{doc.fileName}' (length: {len(doc.fileName)})")
else:
logger.warning(f"File not found for document {doc.id}, fileId: {doc.fileId}")
+ setattr(doc, 'fileSize', 0)
+ setattr(doc, 'mimeType', None)
except Exception as e:
logger.error(f"Error refreshing file attributes for document {doc.id}: {e}")
diff --git a/modules/workflows/processing/adaptive/contentValidator.py b/modules/workflows/processing/adaptive/contentValidator.py
index 0279df90..5423dc8e 100644
--- a/modules/workflows/processing/adaptive/contentValidator.py
+++ b/modules/workflows/processing/adaptive/contentValidator.py
@@ -38,12 +38,15 @@ class ContentValidator:
return ""
def _createFailedValidationResult(self, error: str) -> Dict[str, Any]:
- """Creates a failed validation result"""
+ """Creates a failed validation result in a schema-stable shape"""
return {
- "overallSuccess": False,
- "qualityScore": 0.0,
+ "overallSuccess": None, # Unknown when validator itself failed
+ "qualityScore": None,
"validationDetails": [],
- "improvementSuggestions": [f"NEXT STEP: Fix validation error - {error}. Check system logs for more details and retry the operation."]
+ "improvementSuggestions": [f"NEXT STEP: Fix validation error - {error}. Check system logs for more details and retry the operation."],
+ "schemaCompliant": False,
+ "originalType": "error",
+ "missingFields": ["overallSuccess", "qualityScore"],
}
def _isValidJsonResponse(self, response: str) -> bool:
@@ -60,7 +63,7 @@ class ContentValidator:
return False
def _extractFallbackValidationResult(self, response: str) -> Dict[str, Any]:
- """Extracts validation result from malformed AI response"""
+ """Extracts a minimal validation result from a malformed AI response (schema-stable)"""
try:
import re
@@ -79,16 +82,23 @@ class ContentValidator:
else:
overall_success = False
- return {
- "overallSuccess": overall_success if isinstance(overall_success, bool) else (overall_success.group(1).lower() == 'true' if overall_success else False),
- "qualityScore": float(quality_score.group(1)) if quality_score else 0.5,
+ parsed_overall = overall_success if isinstance(overall_success, bool) else (overall_success.group(1).lower() == 'true' if overall_success else None)
+ parsed_quality = float(quality_score.group(1)) if quality_score else None
+
+ result = {
+ "overallSuccess": parsed_overall,
+ "qualityScore": parsed_quality,
"validationDetails": [{
"documentName": "AI Validation (Fallback)",
"gapAnalysis": gap_analysis.group(1) if gap_analysis else "Unable to parse detailed analysis",
- "successCriteriaMet": [False] # Conservative fallback
+ "successCriteriaMet": []
}],
- "improvementSuggestions": ["NEXT STEP: AI response was malformed - retry the operation for better results"]
+ "improvementSuggestions": ["NEXT STEP: AI response was malformed - retry the operation for better results"],
+ "schemaCompliant": False,
+ "originalType": "text",
+ "missingFields": [k for k, v in {"overallSuccess": parsed_overall, "qualityScore": parsed_quality}.items() if v is None],
}
+ return result
except Exception as e:
logger.error(f"Fallback extraction failed: {str(e)}")
return None
@@ -241,17 +251,38 @@ RESPOND WITH THIS EXACT JSON FORMAT - NO OTHER TEXT:
try:
aiResult = json.loads(result)
logger.info("AI validation JSON parsed successfully")
-
- return {
- "overallSuccess": aiResult.get("overallSuccess", False),
- "qualityScore": aiResult.get("qualityScore", 0.0),
- "validationDetails": aiResult.get("validationDetails", [{
+
+ overall = aiResult.get("overallSuccess")
+ quality = aiResult.get("qualityScore")
+ details = aiResult.get("validationDetails")
+ gap = aiResult.get("gapAnalysis", "")
+ criteria = aiResult.get("successCriteriaMet")
+ improvements = aiResult.get("improvementSuggestions", [])
+
+ # Normalize into schema-stable object without forcing failure defaults
+ normalized = {
+ "overallSuccess": overall if isinstance(overall, bool) else None,
+ "qualityScore": float(quality) if isinstance(quality, (int, float)) else None,
+ "validationDetails": details if isinstance(details, list) else [{
"documentName": "AI Validation",
- "gapAnalysis": aiResult.get("gapAnalysis", ""),
- "successCriteriaMet": aiResult.get("successCriteriaMet", [False])
- }]),
- "improvementSuggestions": aiResult.get("improvementSuggestions", [])
+ "gapAnalysis": gap,
+ "successCriteriaMet": criteria if isinstance(criteria, list) else []
+ }],
+ "improvementSuggestions": improvements,
+ "schemaCompliant": True,
+ "originalType": "json",
+ "missingFields": []
}
+
+ if normalized["overallSuccess"] is None:
+ normalized["missingFields"].append("overallSuccess")
+ if normalized["qualityScore"] is None:
+ normalized["missingFields"].append("qualityScore")
+ # If any critical field missing, mark as not fully compliant
+ if normalized["missingFields"]:
+ normalized["schemaCompliant"] = False
+
+ return normalized
except json.JSONDecodeError as json_error:
logger.warning(f"All AI validation attempts failed - invalid JSON: {str(json_error)}")
diff --git a/modules/workflows/processing/adaptive/progressTracker.py b/modules/workflows/processing/adaptive/progressTracker.py
index 80dfcf63..b5a41533 100644
--- a/modules/workflows/processing/adaptive/progressTracker.py
+++ b/modules/workflows/processing/adaptive/progressTracker.py
@@ -20,11 +20,22 @@ class ProgressTracker:
def updateProgress(self, result: Any, validation: Dict[str, Any], intent: Dict[str, Any]):
"""Updates progress tracking based on action result"""
try:
- overallSuccess = validation.get('overallSuccess', False)
- qualityScore = validation.get('qualityScore', 0)
+ schemaCompliant = validation.get('schemaCompliant', True)
+ overallSuccess = validation.get('overallSuccess', None)
+ qualityScore = validation.get('qualityScore', None)
improvementSuggestions = validation.get('improvementSuggestions', [])
- if overallSuccess and qualityScore > 0.7:
+ # If validation is not schema compliant, treat as indeterminate (do not count as failure)
+ if not schemaCompliant or overallSuccess is None or qualityScore is None:
+ self.partialAchievements.append({
+ "objective": intent.get('primaryGoal', 'Unknown'),
+ "partialAchievement": "Validation indeterminate (schema non-compliant or missing fields)",
+ "missingFields": validation.get('missingFields', []),
+ "timestamp": datetime.now(timezone.utc).timestamp()
+ })
+ self.currentPhase = "partial"
+ logger.info(f"Indeterminate validation (no penalty): {intent.get('primaryGoal', 'Unknown')}")
+ elif overallSuccess and qualityScore > 0.7:
# Successful completion
self.completedObjectives.append({
"objective": intent.get('primaryGoal', 'Unknown'),
@@ -89,9 +100,13 @@ class ProgressTracker:
return False
# If validation shows success, don't continue
- if validation.get('overallSuccess', False):
+ if validation.get('schemaCompliant', True) and validation.get('overallSuccess', False):
return False
+ # If validation is not schema compliant, allow one refinement pass without counting as failure
+ if not validation.get('schemaCompliant', True):
+ return True
+
# Otherwise, continue
return True
diff --git a/modules/workflows/processing/modes/modeReact.py b/modules/workflows/processing/modes/modeReact.py
index 9a79dcb3..a2091ac0 100644
--- a/modules/workflows/processing/modes/modeReact.py
+++ b/modules/workflows/processing/modes/modeReact.py
@@ -240,12 +240,15 @@ class ReactMode(BaseMode):
if ref_match:
valid_refs.append(ref_match.group(1))
- # Check if all provided references are valid
+ # Prefer non-empty documents: the available_docs index is already filtered to skip empty docs
+ preferred_refs = set(valid_refs)
+
+ # Check if all provided references are valid and prefer non-empty
for ref in document_refs:
- if ref not in valid_refs:
- logger.error(f"Invalid document reference: {ref}")
+ if ref not in preferred_refs:
+ logger.error(f"Invalid or empty document reference: {ref}")
logger.error(f"Available references: {valid_refs}")
- raise ValueError(f"Document reference '{ref}' not found in available documents. Use only exact references from AVAILABLE_DOCUMENTS_INDEX.")
+ raise ValueError(f"Document reference '{ref}' not found or refers to empty document. Use only non-empty references from AVAILABLE_DOCUMENTS_INDEX.")
except Exception as e:
logger.error(f"Error validating document references: {str(e)}")