From 4f01a02b9faa3fbd670c263b26ac63707c506dc4 Mon Sep 17 00:00:00 2001
From: ValueOn AG
Date: Tue, 29 Jul 2025 18:36:34 +0200
Subject: [PATCH] validation not picky...
---
app.py | 3 +-
modules/chat/documents/documentExtraction.py | 80 +++++++++++---------
modules/chat/handling/handlingTasks.py | 19 ++++-
modules/chat/handling/promptFactory.py | 52 ++++++++-----
4 files changed, 97 insertions(+), 57 deletions(-)
diff --git a/app.py b/app.py
index 81b1c9af..bfe82f8f 100644
--- a/app.py
+++ b/app.py
@@ -53,7 +53,8 @@ def initLogging():
'response_closed.started',
'_send_single_request',
'httpcore.http11',
- 'httpx._client'
+ 'httpx._client',
+ 'HTTP Request'
]
return not any(pattern in record.msg for pattern in http_debug_patterns)
return True
diff --git a/modules/chat/documents/documentExtraction.py b/modules/chat/documents/documentExtraction.py
index 41588a62..0ad6188f 100644
--- a/modules/chat/documents/documentExtraction.py
+++ b/modules/chat/documents/documentExtraction.py
@@ -88,7 +88,7 @@ class DocumentExtraction:
import PyPDF2
import fitz # PyMuPDF for more extensive PDF processing
pdfExtractorLoaded = True
- logger.debug("📄 PDF extraction libraries successfully loaded")
+ logger.debug("PDF extraction libraries successfully loaded")
except ImportError as e:
logger.warning(f"PDF extraction libraries could not be loaded: {e}")
@@ -101,7 +101,7 @@ class DocumentExtraction:
import docx # python-docx for Word documents
import openpyxl # for Excel files
officeExtractorLoaded = True
- logger.debug("📄 Office extraction libraries successfully loaded")
+ logger.debug("Office extraction libraries successfully loaded")
except ImportError as e:
logger.warning(f"Office extraction libraries could not be loaded: {e}")
@@ -113,7 +113,7 @@ class DocumentExtraction:
global PIL, Image
from PIL import Image
imageProcessorLoaded = True
- logger.debug("📄 Image processing libraries successfully loaded")
+ logger.debug("Image processing libraries successfully loaded")
except ImportError as e:
logger.warning(f"Image processing libraries could not be loaded: {e}")
@@ -157,7 +157,7 @@ class DocumentExtraction:
processedItems = await self._aiDataExtraction(contentItems, prompt)
contentItems = processedItems
except Exception as e:
- logger.error(f"❌ Error processing content with AI: {str(e)}")
+ logger.error(f"Error processing content with AI: {str(e)}")
return ExtractedContent(
id=documentId if documentId else str(uuid.uuid4()),
@@ -165,7 +165,7 @@ class DocumentExtraction:
)
except Exception as e:
- logger.error(f"❌ Error processing file data: {str(e)}")
+ logger.error(f"Error processing file data: {str(e)}")
raise FileProcessingError(f"Failed to process file data: {str(e)}")
@@ -187,7 +187,7 @@ class DocumentExtraction:
)
)]
except Exception as e:
- logger.error(f"❌ Error processing text document: {str(e)}")
+ logger.error(f"Error processing text document: {str(e)}")
raise FileProcessingError(f"Failed to process text document: {str(e)}")
async def _processCsv(self, fileData: bytes, filename: str, mimeType: str) -> List[ContentItem]:
@@ -206,7 +206,7 @@ class DocumentExtraction:
)
)]
except Exception as e:
- logger.error(f"❌ Error processing CSV document: {str(e)}")
+ logger.error(f"Error processing CSV document: {str(e)}")
raise FileProcessingError(f"Failed to process CSV document: {str(e)}")
async def _processJson(self, fileData: bytes, filename: str, mimeType: str) -> List[ContentItem]:
@@ -226,7 +226,7 @@ class DocumentExtraction:
)
)]
except Exception as e:
- logger.error(f"❌ Error processing JSON document: {str(e)}")
+ logger.error(f"Error processing JSON document: {str(e)}")
raise FileProcessingError(f"Failed to process JSON document: {str(e)}")
async def _processXml(self, fileData: bytes, filename: str, mimeType: str) -> List[ContentItem]:
@@ -245,7 +245,7 @@ class DocumentExtraction:
)
)]
except Exception as e:
- logger.error(f"❌ Error processing XML document: {str(e)}")
+ logger.error(f"Error processing XML document: {str(e)}")
raise FileProcessingError(f"Failed to process XML document: {str(e)}")
async def _processHtml(self, fileData: bytes, filename: str, mimeType: str) -> List[ContentItem]:
@@ -264,7 +264,7 @@ class DocumentExtraction:
)
)]
except Exception as e:
- logger.error(f"❌ Error processing HTML document: {str(e)}")
+ logger.error(f"Error processing HTML document: {str(e)}")
raise FileProcessingError(f"Failed to process HTML document: {str(e)}")
async def _processSvg(self, fileData: bytes, filename: str, mimeType: str) -> List[ContentItem]:
@@ -284,7 +284,7 @@ class DocumentExtraction:
)
)]
except Exception as e:
- logger.error(f"❌ Error processing SVG document: {str(e)}")
+ logger.error(f"Error processing SVG document: {str(e)}")
raise FileProcessingError(f"Failed to process SVG document: {str(e)}")
async def _processImage(self, fileData: bytes, filename: str, mimeType: str) -> List[ContentItem]:
@@ -315,7 +315,7 @@ class DocumentExtraction:
metadata=metadata
)]
except Exception as e:
- logger.error(f"❌ Error processing image document: {str(e)}")
+ logger.error(f"Error processing image document: {str(e)}")
raise FileProcessingError(f"Failed to process image document: {str(e)}")
async def _processPdf(self, fileData: bytes, filename: str, mimeType: str) -> List[ContentItem]:
@@ -378,13 +378,13 @@ class DocumentExtraction:
)
))
except Exception as imgE:
- logger.warning(f"⚠️ Error extracting image {imgIndex} on page {pageNum + 1}: {str(imgE)}")
+ logger.warning(f"Error extracting image {imgIndex} on page {pageNum + 1}: {str(imgE)}")
doc.close()
return contentItems
except Exception as e:
- logger.error(f"❌ Error processing PDF document: {str(e)}")
+ logger.error(f"Error processing PDF document: {str(e)}")
raise FileProcessingError(f"Failed to process PDF document: {str(e)}")
async def _processDocx(self, fileData: bytes, filename: str, mimeType: str) -> List[ContentItem]:
@@ -423,7 +423,7 @@ class DocumentExtraction:
)
)]
except Exception as e:
- logger.error(f"❌ Error processing Word document: {str(e)}")
+ logger.error(f"Error processing Word document: {str(e)}")
raise FileProcessingError(f"Failed to process Word document: {str(e)}")
async def _processXlsx(self, fileData: bytes, filename: str, mimeType: str) -> List[ContentItem]:
@@ -465,7 +465,7 @@ class DocumentExtraction:
return contentItems
except Exception as e:
- logger.error(f"❌ Error processing Excel document: {str(e)}")
+ logger.error(f"Error processing Excel document: {str(e)}")
raise FileProcessingError(f"Failed to process Excel document: {str(e)}")
async def _processBinary(self, fileData: bytes, filename: str, mimeType: str) -> List[ContentItem]:
@@ -482,7 +482,7 @@ class DocumentExtraction:
)
)]
except Exception as e:
- logger.error(f"❌ Error processing binary document: {str(e)}")
+ logger.error(f"Error processing binary document: {str(e)}")
raise FileProcessingError(f"Failed to process binary document: {str(e)}")
async def _aiDataExtraction(self, contentItems: List[ContentItem], prompt: str) -> List[ContentItem]:
@@ -502,7 +502,7 @@ class DocumentExtraction:
try:
# Get content type from metadata
mimeType = item.metadata.mimeType if hasattr(item.metadata, 'mimeType') else "text/plain"
- logger.debug(f"📄 Processing content item with MIME type: {mimeType}, label: {item.label}")
+ logger.debug(f"Processing content item with MIME type: {mimeType}, label: {item.label}")
# Chunk content based on type
if mimeType.startswith('text/'):
@@ -527,12 +527,12 @@ class DocumentExtraction:
for chunk in chunks:
# Process with AI based on content type
try:
- logger.debug(f"🤖 AI processing chunk with MIME type: {mimeType}")
+ logger.debug(f"AI processing chunk with MIME type: {mimeType}")
if mimeType.startswith('image/'):
# For images, use image AI service with base64 data
# chunk is already base64 encoded string from _processImage
# Use the original prompt directly for images (no content embedding)
- logger.debug(f"🤖 Calling image AI service for MIME type: {mimeType}")
+ logger.debug(f"Calling image AI service for MIME type: {mimeType}")
processedContent = await self._serviceCenter.callAiImageBasic(prompt, chunk, mimeType)
else:
# For text content, use text AI service
@@ -553,32 +553,40 @@ class DocumentExtraction:
Return ONLY the extracted information in a clear, concise format.
"""
- logger.debug(f"🤖 Calling text AI service for MIME type: {mimeType}")
+ logger.debug(f"Calling text AI service for MIME type: {mimeType}")
processedContent = await self._serviceCenter.callAiTextBasic(aiPrompt, contentToProcess)
chunkResults.append(processedContent)
except Exception as aiError:
- logger.error(f"❌ AI processing failed for chunk: {str(aiError)}")
- # Fallback to original content
- chunkResults.append(chunk)
+ logger.error(f"AI processing failed for chunk: {str(aiError)}")
+ # For non-text content, don't fallback to binary data
+ if mimeType.startswith('image/') or mimeType.startswith('video/') or mimeType.startswith('audio/'):
+ logger.warning(f"Skipping binary content fallback for {mimeType}")
+ continue # Skip this chunk entirely
+ else:
+ # Only fallback to original content for text-based formats
+ chunkResults.append(chunk)
# Combine chunk results
combinedResult = "\n".join(chunkResults)
- # Update content with AI processed data
- processedItems.append(ContentItem(
- label=item.label,
- data=combinedResult,
- metadata=ContentMetadata(
- size=len(combinedResult.encode('utf-8')),
- pages=1,
- mimeType="text/plain",
- base64Encoded=False
- )
- ))
+ # Only add processed item if we have results
+ if combinedResult.strip():
+ processedItems.append(ContentItem(
+ label=item.label,
+ data=combinedResult,
+ metadata=ContentMetadata(
+ size=len(combinedResult.encode('utf-8')),
+ pages=1,
+ mimeType="text/plain",
+ base64Encoded=False
+ )
+ ))
+ else:
+ logger.warning(f"No processed content available for {item.label}, skipping item")
except Exception as e:
- logger.error(f"❌ Error processing content chunk: {str(e)}")
+ logger.error(f"Error processing content chunk: {str(e)}")
# Add original content if processing fails
processedItems.append(item)
diff --git a/modules/chat/handling/handlingTasks.py b/modules/chat/handling/handlingTasks.py
index c78ce995..d8ef85a6 100644
--- a/modules/chat/handling/handlingTasks.py
+++ b/modules/chat/handling/handlingTasks.py
@@ -251,7 +251,7 @@ class HandlingTasks:
if not isinstance(unmet_criteria, list):
unmet_criteria = []
- return ReviewResult(
+ review_result = ReviewResult(
status=review.get('status', 'unknown'),
reason=review.get('reason', 'No reason provided'),
improvements=improvements,
@@ -261,6 +261,23 @@ class HandlingTasks:
unmet_criteria=unmet_criteria,
confidence=review.get('confidence', 0.5)
)
+
+ # Enhanced validation logging
+ logger.info(f"VALIDATION RESULT - Task: '{task_step.description}' - Status: {review_result.status.upper()}, Quality: {review_result.quality_score}/10")
+ if review_result.status == 'success':
+ logger.info(f"VALIDATION SUCCESS - Task completed successfully")
+ if review_result.met_criteria:
+ logger.info(f"Met criteria: {', '.join(review_result.met_criteria)}")
+ elif review_result.status == 'retry':
+ logger.warning(f"VALIDATION RETRY - Task requires retry: {review_result.improvements}")
+ if review_result.unmet_criteria:
+ logger.warning(f"Unmet criteria: {', '.join(review_result.unmet_criteria)}")
+ else:
+ logger.error(f"VALIDATION FAILED - Task failed: {review_result.reason}")
+ if review_result.missing_outputs:
+ logger.error(f"Missing outputs: {', '.join(review_result.missing_outputs)}")
+
+ return review_result
except Exception as e:
logger.error(f"Error in reviewTaskCompletion: {str(e)}")
return ReviewResult(
diff --git a/modules/chat/handling/promptFactory.py b/modules/chat/handling/promptFactory.py
index 81863074..ade811b7 100644
--- a/modules/chat/handling/promptFactory.py
+++ b/modules/chat/handling/promptFactory.py
@@ -369,7 +369,7 @@ async def createResultReviewPrompt(self, review_context) -> str:
step_result_json = json.dumps(step_result_serializable, indent=2, ensure_ascii=False)
expected_outputs_str = ', '.join(task_step.expected_outputs or [])
success_criteria_str = ', '.join(task_step.success_criteria or [])
- return f"""You are a result review AI that evaluates task step completion and decides on next actions.
+ return f"""You are a result review AI that evaluates task step completion with BASIC validation.
TASK STEP: {task_step.description}
EXPECTED OUTPUTS: {expected_outputs_str}
@@ -377,29 +377,43 @@ SUCCESS CRITERIA: {success_criteria_str}
STEP RESULT: {step_result_json}
-INSTRUCTIONS:
-1. Evaluate if the task step was completed successfully
-2. Check if all expected outputs were produced
-3. Verify if success criteria were met
-4. Decide on next action: continue, retry, or fail
-5. If retry, provide specific improvements needed
+BASIC VALIDATION RULES:
+1. SUCCESS if: Action completed AND (documents were produced OR meaningful text output exists)
+2. RETRY if: Action failed due to technical issues that can be fixed
+3. FAILED if: Action completely failed with no recoverable output
-IMPORTANT NOTES:
-- Actions can produce either text results OR documents (or both)
-- Empty result_summary is acceptable if documents were produced (documents_count > 0)
-- Focus on whether the action achieved its intended purpose, not just text output
-- Document-based actions (like file extractions) often have empty text results but successful document outputs
-- Check the 'success_indicator' field: 'documents' means success via document output, 'text_result' means success via text, 'none' means no output
+VALIDATION PRINCIPLES:
+- Be GENEROUS with success - if the action achieved its basic purpose, mark as success
+- Focus on FUNCTIONALITY, not perfection
+- Document outputs are PRIMARY indicators of success
+- Text outputs are SECONDARY indicators
+- Only retry for CLEAR technical issues, not minor imperfections
+- Don't be picky about formatting or minor details
+
+EXAMPLES OF SUCCESS:
+- Document extraction produced a file (even if imperfect)
+- Text analysis provided meaningful insights
+- Data processing completed with results
+
+EXAMPLES OF RETRY:
+- Technical errors (API failures, timeouts)
+- Missing required inputs
+- Clear implementation bugs
+
+EXAMPLES OF FAILED:
+- Complete system failures
+- No output whatsoever
+- Unrecoverable errors
REQUIRED JSON STRUCTURE:
{{
"status": "success|retry|failed",
- "reason": "Explanation of the decision",
- "improvements": "Specific improvements for retry (if status is retry)",
+ "reason": "Brief explanation",
+ "improvements": ["specific technical fixes only"],
"quality_score": 1-10,
- "missing_outputs": ["output1", "output2"],
- "met_criteria": ["criteria1", "criteria2"],
- "unmet_criteria": ["criteria3", "criteria4"]
+ "missing_outputs": [],
+ "met_criteria": ["basic functionality achieved"],
+ "unmet_criteria": []
}}
-NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
\ No newline at end of file
+NOTE: Respond with ONLY the JSON object. Be GENEROUS with success ratings."""
\ No newline at end of file