commit
4a02f4c9a7
4 changed files with 42 additions and 17 deletions
|
|
@ -44,9 +44,8 @@ class AiAnthropic(BaseConnectorAi):
|
|||
return "anthropic"
|
||||
|
||||
def getModels(self) -> List[AiModel]:
|
||||
return [] # TODO: DEBUG TO TURN ON AFTER TESTING
|
||||
|
||||
"""Get all available Anthropic models."""
|
||||
# return [] # TODO: DEBUG TO TURN ON AFTER TESTING
|
||||
# Get all available Anthropic models.
|
||||
return [
|
||||
AiModel(
|
||||
name="claude-sonnet-4-5-20250929",
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ class ContentValidator:
|
|||
self.services = services
|
||||
self.learningEngine = learningEngine
|
||||
|
||||
async def validateContent(self, documents: List[Any], intent: Dict[str, Any], taskStep: Optional[Any] = None, actionName: Optional[str] = None, actionParameters: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
|
||||
async def validateContent(self, documents: List[Any], intent: Dict[str, Any], taskStep: Optional[Any] = None, actionName: Optional[str] = None, actionParameters: Optional[Dict[str, Any]] = None, actionHistory: Optional[List[Dict[str, Any]]] = None) -> Dict[str, Any]:
|
||||
"""Validates delivered content against user intent using AI (single attempt; parse-or-fail)
|
||||
|
||||
Args:
|
||||
|
|
@ -31,8 +31,9 @@ class ContentValidator:
|
|||
taskStep: Optional TaskStep object (preferred source for objective)
|
||||
actionName: Optional action name (e.g., "ai.process", "ai.webResearch") that created the documents
|
||||
actionParameters: Optional action parameters used during execution (e.g., {"columnsPerRow": 10, "researchDepth": "deep"})
|
||||
actionHistory: Optional list of previously executed actions in the workflow (for multi-step workflow context)
|
||||
"""
|
||||
return await self._validateWithAI(documents, intent, taskStep, actionName, actionParameters)
|
||||
return await self._validateWithAI(documents, intent, taskStep, actionName, actionParameters, actionHistory)
|
||||
|
||||
def _analyzeDocuments(self, documents: List[Any]) -> List[Dict[str, Any]]:
|
||||
"""Generic document analysis - create simple summaries with metadata."""
|
||||
|
|
@ -391,7 +392,7 @@ class ContentValidator:
|
|||
|
||||
return False
|
||||
|
||||
async def _validateWithAI(self, documents: List[Any], intent: Dict[str, Any], taskStep: Optional[Any] = None, actionName: Optional[str] = None, actionParameters: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
|
||||
async def _validateWithAI(self, documents: List[Any], intent: Dict[str, Any], taskStep: Optional[Any] = None, actionName: Optional[str] = None, actionParameters: Optional[Dict[str, Any]] = None, actionHistory: Optional[List[Dict[str, Any]]] = None) -> Dict[str, Any]:
|
||||
"""AI-based comprehensive validation - generic approach"""
|
||||
try:
|
||||
if not hasattr(self, 'services') or not self.services or not hasattr(self.services, 'ai'):
|
||||
|
|
@ -474,6 +475,26 @@ class ContentValidator:
|
|||
metadataJson = json.dumps(combinedMetadata, ensure_ascii=False, indent=2)
|
||||
validationMetadataContext = f"\nACTION VALIDATION METADATA: {metadataJson}"
|
||||
|
||||
# Build action history context (for multi-step workflow validation)
|
||||
actionHistoryContext = ""
|
||||
if actionHistory and isinstance(actionHistory, list) and len(actionHistory) > 0:
|
||||
historyEntries = []
|
||||
for entry in actionHistory:
|
||||
if isinstance(entry, dict):
|
||||
action = entry.get('action', 'unknown')
|
||||
params = entry.get('parameters', {}) or {}
|
||||
step = entry.get('step', 0)
|
||||
# Filter out documentList for clarity
|
||||
relevantParams = {k: v for k, v in params.items() if k not in ['documentList', 'connections'] and v is not None}
|
||||
paramsStr = json.dumps(relevantParams, ensure_ascii=False) if relevantParams else "{}"
|
||||
historyEntries.append(f"Step {step}: {action} {paramsStr}")
|
||||
elif isinstance(entry, str):
|
||||
historyEntries.append(entry)
|
||||
|
||||
if historyEntries:
|
||||
actionHistoryContext = f"\n\n=== ACTION HISTORY ===\n" + "\n".join(f"- {entry}" for entry in historyEntries)
|
||||
actionHistoryContext += "\n\nIMPORTANT: This shows the complete workflow that produced the documents. For process-oriented criteria (e.g., 'internet search performed'), check ACTION HISTORY first. Document metadata may only reflect the LAST action, not the entire workflow."
|
||||
|
||||
# Format success criteria for display with index numbers
|
||||
if successCriteria:
|
||||
criteriaDisplay = "\n".join([f"[{i}] {criterion}" for i, criterion in enumerate(successCriteria)])
|
||||
|
|
@ -485,22 +506,25 @@ class ContentValidator:
|
|||
=== TASK INFORMATION ===
|
||||
{objectiveLabel}: '{objectiveText}'
|
||||
EXPECTED DATA TYPE: {dataType}
|
||||
EXPECTED FORMATS: {expectedFormats if expectedFormats else ['any']}{actionContext}{actionParamsContext}{validationMetadataContext}
|
||||
EXPECTED FORMATS: {expectedFormats if expectedFormats else ['any']}{actionContext}{actionParamsContext}{validationMetadataContext}{actionHistoryContext}
|
||||
|
||||
=== VALIDATION INSTRUCTIONS ===
|
||||
|
||||
IMPORTANT: Different formats can represent the same data structure. Do not reject a format just because it differs from expected - check the structure summary for actual content.
|
||||
|
||||
VALIDATION RULES:
|
||||
1. Use structure summary (sections, statistics, counts) as PRIMARY evidence. Trust structure over format claims.
|
||||
2. For each criterion in criteriaMapping: evaluate ONLY that criterion. Do not mention other criteria.
|
||||
3. Priority: Data completeness > Format compatibility. Missing data is more critical than format mismatch.
|
||||
4. Format understanding: Different formats can represent equivalent data structures. Focus on content, not format name.
|
||||
5. Data availability assessment: If delivered documents do not contain required data, clearly indicate this in findings. Re-reading the same documents might not help.
|
||||
1. Use structure summary (sections, statistics, counts) as PRIMARY evidence for DATA-ORIENTED criteria. Trust structure over format claims.
|
||||
2. Use ACTION HISTORY as PRIMARY evidence for PROCESS-ORIENTED criteria (e.g., "internet search performed", "sources cited"). Document metadata may only reflect the last action, not the entire workflow.
|
||||
3. For each criterion in criteriaMapping: evaluate ONLY that criterion. Do not mention other criteria.
|
||||
4. Priority: Data completeness > Format compatibility. Missing data is more critical than format mismatch.
|
||||
5. Format understanding: Different formats can represent equivalent data structures. Focus on content, not format name.
|
||||
6. Multi-step workflow awareness: If ACTION HISTORY is present, consider the workflow as a whole. Document metadata (e.g., extraction_method) describes how data was EXTRACTED in the last step, not necessarily how it was OBTAINED in the workflow.
|
||||
7. Data availability assessment: If delivered documents do not contain required data, clearly indicate this in findings. Re-reading the same documents might not help.
|
||||
|
||||
VALIDATION STEPS:
|
||||
- Check ACTION VALIDATION METADATA first (if present) - this contains action-specific context
|
||||
- Check structure summary for quantities, counts, statistics
|
||||
- Check ACTION HISTORY first (if present) for PROCESS-ORIENTED criteria (e.g., "search performed", "sources used", "verification done")
|
||||
- Check ACTION VALIDATION METADATA (if present) - this contains action-specific context for the LAST action only
|
||||
- Check structure summary for quantities, counts, statistics (for DATA-ORIENTED criteria)
|
||||
- Compare found values with required values from criteria
|
||||
- If structure unavailable, use metadata only (format, filename, size)
|
||||
- Classify gaps: missing_data (less than required), incomplete_data (partial), wrong_structure (wrong organization), wrong_format (format mismatch but data present)
|
||||
|
|
|
|||
|
|
@ -152,9 +152,11 @@ class DynamicMode(BaseMode):
|
|||
# Pass taskStep so validator can use task.objective and format fields
|
||||
# Pass action name so validator knows which action created the documents
|
||||
# Pass action parameters so validator can verify parameter-specific requirements
|
||||
# Pass action history so validator can validate process-oriented criteria in multi-step workflows
|
||||
actionName = selection.get('action', 'unknown')
|
||||
actionParameters = selection.get('parameters', {})
|
||||
validationResult = await self.contentValidator.validateContent(result.documents, self.workflowIntent, taskStep, actionName, actionParameters)
|
||||
actionHistory = getattr(context, 'executedActions', None) if hasattr(context, 'executedActions') else None
|
||||
validationResult = await self.contentValidator.validateContent(result.documents, self.workflowIntent, taskStep, actionName, actionParameters, actionHistory)
|
||||
observation.contentValidation = validationResult
|
||||
quality_score = validationResult.get('qualityScore', 0.0)
|
||||
if quality_score is None:
|
||||
|
|
|
|||
|
|
@ -357,8 +357,8 @@ class WorkflowProcessor:
|
|||
"- Need for previous workflow rounds/history (e.g., 'continue', 'retry', 'fix', 'improve', 'update', 'modify', 'based on previous', 'build on', references to earlier work)\n"
|
||||
"- Language: Detect the ISO 639-1 language code (e.g., de, en, fr, it) from the user's request\n\n"
|
||||
"Complexity levels:\n"
|
||||
"- 'simple': Single question, no documents or minimal documents, straightforward answer that can be provided in one AI response (5-15s)\n"
|
||||
"- 'moderate': Multiple steps, some documents, structured response requiring some processing (30-60s)\n"
|
||||
"- 'simple': Only if NO documents AND NO web search required. Single question, straightforward answer that can be provided in one AI response (5-15s)\n"
|
||||
"- 'moderate': Multiple steps, some documents, structured response requiring some processing, or web search needed (30-60s)\n"
|
||||
"- 'complex': Multi-task workflow, many documents, research needed, content generation required, multi-step planning (60-120s)\n\n"
|
||||
"Return ONLY a JSON object with this exact structure:\n"
|
||||
"{\n"
|
||||
|
|
|
|||
Loading…
Reference in a new issue