app chain validation - fixed issues

This commit is contained in:
ValueOn AG 2025-11-03 23:51:20 +01:00
parent 310e6d3f8b
commit a53d8f8e33
19 changed files with 977 additions and 168 deletions

View file

@ -1,6 +1,12 @@
"""
Base connector interface for AI connectors.
All AI connectors should inherit from this class.
IMPORTANT: Model Registration Requirements
- Each model must have a unique displayName across all connectors
- The displayName is used as the unique identifier in the model registry
- The name field is used for API calls (can be duplicated across different model instances)
- If duplicate displayNames are detected during registration, an error will be raised
"""
from abc import ABC, abstractmethod
@ -9,7 +15,13 @@ from modules.datamodels.datamodelAi import AiModel
class BaseConnectorAi(ABC):
"""Base class for all AI connectors."""
"""
Base class for all AI connectors.
IMPORTANT: Models returned by getModels() must have unique displayName values.
The displayName serves as the unique identifier in the model registry.
Duplicate displayNames will cause registration to fail with an error.
"""
def __init__(self):
self._models_cache: Optional[List[AiModel]] = None
@ -21,6 +33,10 @@ class BaseConnectorAi(ABC):
"""
Get all available models for this connector.
Should be implemented by each connector.
IMPORTANT: Each model's displayName must be unique across all connectors.
If multiple models share the same API name (e.g., "gpt-4o"), they must have
different displayNames (e.g., "OpenAI GPT-4o" vs "OpenAI GPT-4o Instance Vision").
"""
pass
@ -58,8 +74,16 @@ class BaseConnectorAi(ABC):
self._models_cache = None
self._last_cache_update = None
def getModelByDisplayName(self, displayName: str) -> Optional[AiModel]:
"""Get a specific model by displayName (displayName must be unique)."""
models = self.getCachedModels()
for model in models:
if model.displayName == displayName:
return model
return None
def getModelByName(self, name: str) -> Optional[AiModel]:
"""Get a specific model by name."""
"""Get a specific model by name (API name). Note: name can be duplicated, returns first match."""
models = self.getCachedModels()
for model in models:
if model.name == name:

View file

@ -25,16 +25,31 @@ class ModelRegistry:
def registerConnector(self, connector: BaseConnectorAi):
"""Register a connector and collect its models."""
connectorType = connector.getConnectorType()
# If connector already registered, skip re-registration to avoid duplicate models
if connectorType in self._connectors:
logger.debug(f"Connector {connectorType} already registered, skipping re-registration")
return
self._connectors[connectorType] = connector
# Collect models from this connector
try:
models = connector.getCachedModels()
for model in models:
self._models[model.name] = model
logger.debug(f"Registered model: {model.name} from {connectorType}")
# Validate displayName uniqueness
if model.displayName in self._models:
existingModel = self._models[model.displayName]
errorMsg = f"Duplicate displayName '{model.displayName}' detected! Existing model: displayName='{existingModel.displayName}', name='{existingModel.name}' (connector: {existingModel.connectorType}), New model: displayName='{model.displayName}', name='{model.name}' (connector: {connectorType}). displayName must be unique."
logger.error(errorMsg)
raise ValueError(errorMsg)
# Use displayName as the key (must be unique)
self._models[model.displayName] = model
logger.debug(f"Registered model: {model.displayName} (name: {model.name}) from {connectorType}")
except Exception as e:
logger.error(f"Failed to register models from {connectorType}: {e}")
raise
def discoverConnectors(self) -> List[BaseConnectorAi]:
"""Auto-discover connectors by scanning aicorePlugin*.py files."""
@ -90,17 +105,26 @@ class ModelRegistry:
connector.clearCache() # Clear connector cache
models = connector.getCachedModels()
for model in models:
self._models[model.name] = model
# Validate displayName uniqueness
if model.displayName in self._models:
existingModel = self._models[model.displayName]
errorMsg = f"Duplicate displayName '{model.displayName}' detected! Existing model: displayName='{existingModel.displayName}', name='{existingModel.name}' (connector: {existingModel.connectorType}), New model: displayName='{model.displayName}', name='{model.name}' (connector: {connector.getConnectorType()}). displayName must be unique."
logger.error(errorMsg)
raise ValueError(errorMsg)
# Use displayName as the key (must be unique)
self._models[model.displayName] = model
except Exception as e:
logger.error(f"Failed to refresh models from {connector.getConnectorType()}: {e}")
raise
self._lastRefresh = currentTime
logger.info(f"Model registry refreshed: {len(self._models)} models available")
def getModel(self, name: str) -> Optional[AiModel]:
"""Get a specific model by name."""
def getModel(self, displayName: str) -> Optional[AiModel]:
"""Get a specific model by displayName (displayName must be unique)."""
self.refreshModels()
return self._models.get(name)
return self._models.get(displayName)
def getModels(self) -> List[AiModel]:
"""Get all available models."""
@ -121,11 +145,18 @@ class ModelRegistry:
def getAvailableModels(self) -> List[AiModel]:
"""Get only available models."""
self.refreshModels()
return [model for model in self._models.values() if model.isAvailable]
allModels = list(self._models.values())
availableModels = [model for model in allModels if model.isAvailable]
unavailableCount = len(allModels) - len(availableModels)
if unavailableCount > 0:
unavailableModels = [m.name for m in allModels if not m.isAvailable]
logger.debug(f"getAvailableModels: {len(availableModels)} available, {unavailableCount} unavailable. Unavailable: {unavailableModels}")
logger.debug(f"getAvailableModels: Returning {len(availableModels)} models: {[m.name for m in availableModels]}")
return availableModels
def getConnectorForModel(self, modelName: str) -> Optional[BaseConnectorAi]:
"""Get the connector instance for a specific model."""
model = self.getModel(modelName)
def getConnectorForModel(self, displayName: str) -> Optional[BaseConnectorAi]:
"""Get the connector instance for a specific model by displayName."""
model = self.getModel(displayName)
if model:
return self._connectors.get(model.connectorType)
return None

View file

@ -45,8 +45,8 @@ class AiAnthropic(BaseConnectorAi):
"""Get all available Anthropic models."""
return [
AiModel(
name="claude-3-5-sonnet-20241022",
displayName="Anthropic Claude 3.5 Sonnet",
name="claude-sonnet-4-5-20250929",
displayName="Anthropic Claude Sonnet 4.5",
connectorType="anthropic",
apiUrl="https://api.anthropic.com/v1/messages",
temperature=0.2,
@ -66,12 +66,12 @@ class AiAnthropic(BaseConnectorAi):
(OperationTypeEnum.DATA_GENERATE, 9),
(OperationTypeEnum.DATA_EXTRACT, 8)
),
version="claude-3-5-sonnet-20241022",
version="claude-sonnet-4-5-20250929",
calculatePriceUsd=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.015 + (bytesReceived / 4 / 1000) * 0.075
),
AiModel(
name="claude-3-5-sonnet-20241022",
displayName="Anthropic Claude 3.5 Sonnet Instance Vision",
name="claude-sonnet-4-5-20250929",
displayName="Anthropic Claude Sonnet 4.5 Vision",
connectorType="anthropic",
apiUrl="https://api.anthropic.com/v1/messages",
temperature=0.2,
@ -87,7 +87,7 @@ class AiAnthropic(BaseConnectorAi):
operationTypes=createOperationTypeRatings(
(OperationTypeEnum.IMAGE_ANALYSE, 10)
),
version="claude-3-5-sonnet-20241022",
version="claude-sonnet-4-5-20250929",
calculatePriceUsd=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.015 + (bytesReceived / 4 / 1000) * 0.075
)
]

View file

@ -55,7 +55,7 @@ class AiObjects:
return cls()
def _selectModel(self, prompt: str, context: str, options: AiCallOptions) -> str:
"""Select the best model using dynamic model selection system."""
"""Select the best model using dynamic model selection system. Returns displayName (unique identifier)."""
# Get available models from the dynamic registry
availableModels = modelRegistry.getAvailableModels()
@ -71,7 +71,7 @@ class AiObjects:
raise ValueError("No suitable AI model found")
logger.info(f"Selected model: {selectedModel.name} ({selectedModel.displayName})")
return selectedModel.name
return selectedModel.displayName
# AI for Extraction, Processing, Generation
@ -258,8 +258,8 @@ class AiObjects:
logger.info(f"✅ Content part processed successfully with model: {model.name}")
return response
else:
# Part too large - chunk it
chunks = await self._chunkContentPart(contentPart, model, options)
# Part too large - chunk it (pass prompt to account for it in chunk size calculation)
chunks = await self._chunkContentPart(contentPart, model, options, prompt)
if not chunks:
raise ValueError(f"Failed to chunk content part for model {model.name}")
@ -302,19 +302,51 @@ class AiObjects:
# All models failed
return self._createErrorResponse(f"All models failed: {str(lastError)}", 0, 0)
async def _chunkContentPart(self, contentPart, model, options) -> List[Dict[str, Any]]:
"""Chunk a content part based on model capabilities."""
async def _chunkContentPart(self, contentPart, model, options, prompt: str = "") -> List[Dict[str, Any]]:
"""Chunk a content part based on model capabilities, accounting for prompt, system message overhead, and maxTokens output."""
# Calculate model-specific chunk sizes
modelContextBytes = model.contextLength * 4 # Convert tokens to bytes
maxContextBytes = int(modelContextBytes * 0.9) # 90% of context length
textChunkSize = int(maxContextBytes * 0.7) # 70% of max context for text chunks
imageChunkSize = int(maxContextBytes * 0.8) # 80% of max context for image chunks
modelContextTokens = model.contextLength # Total context in tokens
modelMaxOutputTokens = model.maxTokens # Maximum output tokens
# Reserve tokens for:
# 1. Prompt (user message)
promptTokens = len(prompt.encode('utf-8')) / 4 if prompt else 0
# 2. System message wrapper ("Context from documents:\n")
systemMessageTokens = 10 # ~40 bytes = 10 tokens
# 3. Max output tokens (model will reserve space for completion)
outputTokens = modelMaxOutputTokens
# 4. JSON structure and message overhead (~100 tokens)
messageOverheadTokens = 100
# Total reserved tokens = input overhead + output reservation
totalReservedTokens = promptTokens + systemMessageTokens + messageOverheadTokens + outputTokens
# Available tokens for content = context length - reserved tokens
# Use 80% of available for safety margin
availableContentTokens = int((modelContextTokens - totalReservedTokens) * 0.8)
# Ensure we have at least some space
if availableContentTokens < 100:
logger.warning(f"Very limited space for content: {availableContentTokens} tokens available. Model: {model.name}, contextLength: {modelContextTokens}, maxTokens: {modelMaxOutputTokens}, prompt: {promptTokens:.0f} tokens")
availableContentTokens = max(100, int(modelContextTokens * 0.1)) # Fallback to 10% of context
# Convert tokens to bytes (1 token ≈ 4 bytes)
availableContentBytes = availableContentTokens * 4
logger.debug(f"Chunking calculation for {model.name}: contextLength={modelContextTokens} tokens, maxTokens={modelMaxOutputTokens} tokens, prompt={promptTokens:.0f} tokens, reserved={totalReservedTokens:.0f} tokens, available={availableContentTokens} tokens ({availableContentBytes} bytes)")
# Use 70% of available content bytes for text chunks (conservative)
textChunkSize = int(availableContentBytes * 0.7)
imageChunkSize = int(availableContentBytes * 0.8) # 80% for image chunks
# Build chunking options
chunkingOptions = {
"textChunkSize": textChunkSize,
"imageChunkSize": imageChunkSize,
"maxSize": maxContextBytes,
"maxSize": availableContentBytes,
"chunkAllowed": True
}
@ -510,17 +542,17 @@ class AiObjects:
return [model.model_dump() for model in models if model.connectorType == connectorType]
return [model.model_dump() for model in models]
async def getModelInfo(self, modelName: str) -> Dict[str, Any]:
"""Get information about a specific model."""
model = modelRegistry.getModel(modelName)
async def getModelInfo(self, displayName: str) -> Dict[str, Any]:
"""Get information about a specific model by displayName."""
model = modelRegistry.getModel(displayName)
if not model:
raise ValueError(f"Model {modelName} not found")
raise ValueError(f"Model with displayName '{displayName}' not found")
return model.model_dump()
async def getModelsByTag(self, tag: str) -> List[str]:
"""Get model names that have a specific tag."""
"""Get model displayNames that have a specific tag. Returns displayNames (unique identifiers)."""
models = modelRegistry.getModelsByTag(tag)
return [model.name for model in models]
return [model.displayName for model in models]
def applyMerging(parts: List[ContentPart], strategy: MergeStrategy) -> List[ContentPart]:

View file

@ -491,7 +491,8 @@ Respond with ONLY a JSON object in this exact format:
async def callAiPlanning(
self,
prompt: str,
placeholders: Optional[List[PromptPlaceholder]] = None
placeholders: Optional[List[PromptPlaceholder]] = None,
debugType: Optional[str] = None
) -> str:
"""
Planning AI call for task planning, action planning, action selection, etc.
@ -500,6 +501,8 @@ Respond with ONLY a JSON object in this exact format:
Args:
prompt: The planning prompt
placeholders: Optional list of placeholder replacements
debugType: Optional debug file type identifier (e.g., 'taskplan', 'actionplan', 'intentanalysis')
If not provided, defaults to 'plan'
Returns:
Planning JSON response
@ -529,11 +532,12 @@ Respond with ONLY a JSON object in this exact format:
options=options
)
# Debug: persist prompt/response for analysis
self.services.utils.writeDebugFile(fullPrompt, "plan_prompt")
# Debug: persist prompt/response for analysis with context-specific naming
debugPrefix = debugType if debugType else "plan"
self.services.utils.writeDebugFile(fullPrompt, f"{debugPrefix}_prompt")
response = await self.aiObjects.call(request)
result = response.content or ""
self.services.utils.writeDebugFile(result, "plan_response")
self.services.utils.writeDebugFile(result, f"{debugPrefix}_response")
return result
# Document Generation AI Call
@ -579,8 +583,10 @@ Respond with ONLY a JSON object in this exact format:
self.services.workflow.progressLogUpdate(aiOperationId, 0.1, "Analyzing prompt parameters")
options = await self._analyzePromptAndCreateOptions(prompt)
# Handle image generation requests directly via generic path
# Check operationType FIRST - some operations need direct routing (before document generation checks)
opType = getattr(options, "operationType", None)
# Handle image generation requests directly via generic path
isImageRequest = (opType == OperationTypeEnum.IMAGE_GENERATE)
if isImageRequest:
@ -630,6 +636,43 @@ Respond with ONLY a JSON object in this exact format:
self.services.workflow.progressLogFinish(aiOperationId, False)
return {"success": False, "error": errorMsg}
# Handle WEB_SEARCH and WEB_CRAWL operations - route directly to connectors
# These operations require raw JSON prompts that connectors parse directly
# Must check BEFORE document generation to avoid wrapping the prompt
isWebOperation = (opType == OperationTypeEnum.WEB_SEARCH or opType == OperationTypeEnum.WEB_CRAWL)
if isWebOperation:
# Web operations: prompt is already structured JSON (AiCallPromptWebSearch/WebCrawl)
# Route directly through centralized AI call - model selector chooses appropriate connector
# Connector parses the JSON prompt and executes the operation
self.services.workflow.progressLogUpdate(aiOperationId, 0.4, f"Calling AI for {opType.name}")
request = AiCallRequest(
prompt=prompt, # Pass raw JSON prompt unchanged - connector will parse it
context="",
options=options
)
response = await self.aiObjects.call(request)
# Extract result from response
if response.content:
# Emit stats for web operation
self.services.workflow.storeWorkflowStat(
self.services.currentWorkflow,
response,
f"ai.{opType.name.lower()}"
)
self.services.workflow.progressLogUpdate(aiOperationId, 0.9, f"{opType.name} completed")
self.services.workflow.progressLogFinish(aiOperationId, True)
return response.content
else:
errorMsg = f"No content returned from {opType.name}: {response.content}"
logger.error(f"Error in {opType.name}: {errorMsg}")
self.services.workflow.progressLogFinish(aiOperationId, False)
return {"success": False, "error": errorMsg}
# CRITICAL: For document generation with JSON templates, NEVER compress the prompt
# Compressing would truncate the template structure and confuse the AI
if outputFormat: # Document generation with structured output

View file

@ -23,16 +23,17 @@ class ExtractionService:
self.services = services
self._extractorRegistry = ExtractorRegistry()
self._chunkerRegistry = ChunkerRegistry()
# Ensure AI connectors are discovered so pricing models are available
try:
# If internal model is missing, trigger discovery and registration
if modelRegistry.getModel("internal-extractor") is None:
discovered = modelRegistry.discoverConnectors()
for connector in discovered:
modelRegistry.registerConnector(connector)
except Exception:
# Propagate actual errors during use; init should be fast and side-effect free otherwise
pass
# Ensure connectors are registered
discovered = modelRegistry.discoverConnectors()
for connector in discovered:
modelRegistry.registerConnector(connector)
# Verify required internal model is available (used for pricing in extractContent)
modelDisplayName = "Internal Document Extractor"
model = modelRegistry.getModel(modelDisplayName)
if model is None or model.calculatePriceUsd is None:
raise RuntimeError(f"FATAL: Required internal model '{modelDisplayName}' is not available. Check connector registration.")
def extractContent(self, documents: List[ChatDocument], options: ExtractionOptions) -> List[ContentExtracted]:
"""
@ -115,17 +116,18 @@ class ExtractionService:
# Emit stats for extraction operation
# Use internal extraction model for pricing
modelName = "internal-extractor"
model = modelRegistry.getModel(modelName)
modelDisplayName = "Internal Document Extractor"
model = modelRegistry.getModel(modelDisplayName)
# Hard fail if model is missing; caller must ensure connectors are registered
if model is None or model.calculatePriceUsd is None:
raise RuntimeError(f"Pricing model not available: {modelName}")
raise RuntimeError(f"Pricing model not available: {modelDisplayName}")
priceUsd = model.calculatePriceUsd(processingTime, bytesSent, bytesReceived)
# Create AiCallResponse with real calculation
# Use model.name for the response (API identifier), not displayName
aiResponse = AiCallResponse(
content="", # No content for extraction stats needed
modelName=modelName,
modelName=model.name,
priceUsd=priceUsd,
processingTime=processingTime,
bytesSent=bytesSent,

View file

@ -89,7 +89,11 @@ class NormalizationService:
" \"Date\": {\"formats\": [\"DD.MM.YYYY\",\"YYYY-MM-DD\"]}\n }\n}\n"
)
response = await self.services.ai.callAiPlanning(prompt=prompt, placeholders=None)
response = await self.services.ai.callAiPlanning(
prompt=prompt,
placeholders=None,
debugType="normalization"
)
if not response:
return {"mapping": {}, "normalizationPolicy": {}}

View file

@ -59,8 +59,9 @@ class WebService:
countryCode = analysisResult.get("country", country)
languageCode = analysisResult.get("language", language)
finalResearchDepth = analysisResult.get("researchDepth", researchDepth)
suggestedFilename = analysisResult.get("filename", None)
logger.info(f"AI Analysis: instruction='{instruction[:100]}...', urls={len(extractedUrls)}, needsSearch={needsSearch}, maxNumberPages={maxNumberPages}, researchDepth={finalResearchDepth}")
logger.info(f"AI Analysis: instruction='{instruction[:100]}...', urls={len(extractedUrls)}, needsSearch={needsSearch}, maxNumberPages={maxNumberPages}, researchDepth={finalResearchDepth}, filename={suggestedFilename}")
# Combine URLs (from user + from prompt extraction)
allUrls = []
@ -109,7 +110,7 @@ class WebService:
self.services.workflow.progressLogUpdate(operationId, 0.9, "Consolidating results")
# Return consolidated result
return {
result = {
"instruction": instruction,
"urls_crawled": allUrls,
"total_urls": len(allUrls),
@ -117,6 +118,12 @@ class WebService:
"total_results": len(crawlResult) if isinstance(crawlResult, list) else 1
}
# Add suggested filename if available
if suggestedFilename:
result["suggested_filename"] = suggestedFilename
return result
except Exception as e:
logger.error(f"Error in web research: {str(e)}")
raise
@ -147,31 +154,41 @@ USER PROVIDED:
- Language: {language or "Not specified"}
Extract and provide a JSON response with:
1. instruction: The core research instruction (cleaned prompt without URLs)
2. urls: List of URLs found in the prompt text
1. instruction: Formulate directly, WHAT you want to find on the web. Do not include URLs in the instruction. Good example: "What is the company Xyz doing?". Bad example: "Conduct web research on the company Xyz"
2. urls: Put list of URLs found in the prompt text, and URL's you know, that are relevant to the research
3. needsSearch: true if web search is needed to identify url's to crawl, false if only crawling of provided URLs is wanted
4. maxNumberPages: Recommended number of URLs to crawl (based on research scope, typical: 2-20)
5. country: Country code if identified in the prompt (2-digit lowercase, e.g., ch, us, de)
6. language: Language identified from the prompt (lowercase, e.g., de, en, fr)
7. researchDepth: Research depth based on instruction complexity - "fast" (quick overview, maxDepth=1), "general" (standard research, maxDepth=2), or "deep" (comprehensive research, maxDepth=3)
8. filename: Generate a concise, descriptive filename (without extension) for the research results. Should be short (max 50 characters), descriptive of the research topic, use underscores instead of spaces, and only contain alphanumeric characters and underscores. Example: "WebResearch_Topic_Context"
Return ONLY valid JSON, no additional text:
{{
"instruction": "cleaned research instruction",
"instruction": "research instruction",
"urls": ["url1", "url2"],
"needsSearch": true,
"maxNumberPages": 10,
"country": "ch",
"language": "de",
"researchDepth": "general"
"language": "en",
"researchDepth": "general",
"filename": "descriptive_filename_without_extension"
}}"""
try:
# Call AI planning to analyze intent
analysisJson = await self.services.ai.callAiPlanning(analysisPrompt)
analysisJson = await self.services.ai.callAiPlanning(
analysisPrompt,
debugType="webresearchintent"
)
# Extract JSON from response (handles markdown code blocks)
extractedJson = self.services.utils.jsonExtractString(analysisJson)
if not extractedJson:
raise ValueError("No JSON found in AI response")
# Parse JSON response
result = json.loads(analysisJson)
result = json.loads(extractedJson)
logger.info(f"Intent analysis result: {result}")
return result
@ -186,7 +203,8 @@ Return ONLY valid JSON, no additional text:
"maxNumberPages": 10,
"country": country,
"language": language,
"researchDepth": researchDepth
"researchDepth": researchDepth,
"filename": None
}
async def _performWebSearch(
@ -207,6 +225,9 @@ Return ONLY valid JSON, no additional text:
)
searchPrompt = searchPromptModel.model_dump_json(exclude_none=True, indent=2)
# Debug: persist search prompt
self.services.utils.writeDebugFile(searchPrompt, "websearch_prompt")
# Call AI with WEB_SEARCH operation
searchOptions = AiCallOptions(
operationType=OperationTypeEnum.WEB_SEARCH,
@ -220,9 +241,17 @@ Return ONLY valid JSON, no additional text:
outputFormat="json"
)
# Debug: persist search response
if isinstance(searchResult, str):
self.services.utils.writeDebugFile(searchResult, "websearch_response")
else:
self.services.utils.writeDebugFile(json.dumps(searchResult, indent=2), "websearch_response")
# Parse and extract URLs
if isinstance(searchResult, str):
searchData = json.loads(searchResult)
# Extract JSON from response (handles markdown code blocks)
extractedJson = self.services.utils.jsonExtractString(searchResult)
searchData = json.loads(extractedJson) if extractedJson else json.loads(searchResult)
else:
searchData = searchResult
@ -234,7 +263,14 @@ Return ONLY valid JSON, no additional text:
elif "results" in searchData:
urls = [r.get("url") for r in searchData["results"] if r.get("url")]
elif isinstance(searchData, list):
urls = [item.get("url") for item in searchData if item.get("url")]
# Handle both cases: list of URL strings or list of dicts with "url" key
for item in searchData:
if isinstance(item, str):
# Item is already a URL string
urls.append(item)
elif isinstance(item, dict) and item.get("url"):
# Item is a dict with "url" key
urls.append(item.get("url"))
logger.info(f"Web search returned {len(urls)} URLs")
return urls
@ -266,6 +302,10 @@ Return ONLY valid JSON, no additional text:
)
crawlPrompt = crawlPromptModel.model_dump_json(exclude_none=True, indent=2)
# Debug: persist crawl prompt (with URL identifier in content for clarity)
debugPrompt = f"URL: {url}\n\n{crawlPrompt}"
self.services.utils.writeDebugFile(debugPrompt, "webcrawl_prompt")
# Call AI with WEB_CRAWL operation
crawlOptions = AiCallOptions(
operationType=OperationTypeEnum.WEB_CRAWL,
@ -279,10 +319,18 @@ Return ONLY valid JSON, no additional text:
outputFormat="json"
)
# Debug: persist crawl response
if isinstance(crawlResult, str):
self.services.utils.writeDebugFile(crawlResult, "webcrawl_response")
else:
self.services.utils.writeDebugFile(json.dumps(crawlResult, indent=2), "webcrawl_response")
# Parse crawl result
if isinstance(crawlResult, str):
try:
crawlData = json.loads(crawlResult)
# Extract JSON from response (handles markdown code blocks)
extractedJson = self.services.utils.jsonExtractString(crawlResult)
crawlData = json.loads(extractedJson) if extractedJson else json.loads(crawlResult)
except:
crawlData = {"url": url, "content": crawlResult}
else:

View file

@ -31,14 +31,15 @@ class MethodAi(MethodBase):
async def process(self, parameters: Dict[str, Any]) -> ActionResult:
"""
GENERAL:
- Purpose: Process a user prompt with optional unlimited input documents to produce one or many output documents of the SAME format.
- Input requirements: aiPrompt (required); optional documentList.
- Output format: Exactly one file format to select. For multiple output file formats you need to do different calls.
- Purpose: Universal AI document processing action - accepts MULTIPLE input documents in ANY format (docx, pdf, json, txt, xlsx, html, images, etc.) and processes them together with a prompt to produce MULTIPLE output documents in ANY specified format (via resultType). Use for document generation, format conversion, content transformation, analysis, summarization, translation, extraction, comparison, and any AI-powered document manipulation.
- Input requirements: aiPrompt (required); optional documentList (can contain multiple documents in any format).
- Output format: Multiple documents in the same format per call (via resultType: txt, json, pdf, docx, xlsx, pptx, png, jpg, etc.). The AI can generate multiple files based on the prompt (e.g., "create separate documents for each section"). Default: txt.
- Key capabilities: Can process any number of input documents together, extract data from mixed formats, combine information, generate multiple output files, transform between formats, perform analysis/comparison/summarization on document sets.
Parameters:
- aiPrompt (str, required): Instruction for the AI.
- documentList (list, optional): Document reference(s) for context.
- resultType (str, optional): Output file extension - only one extension allowed (e.g. txt, json, md, csv, xml, html, pdf, docx, xlsx, png, ...). Default: txt.
- aiPrompt (str, required): Instruction for the AI describing what processing to perform.
- documentList (list, optional): Document reference(s) in any format to use as input/context.
- resultType (str, optional): Output file extension (txt, json, md, csv, xml, html, pdf, docx, xlsx, png, etc.). All output documents will use this format. Default: txt.
"""
try:
# Init progress logger
@ -208,12 +209,33 @@ class MethodAi(MethodBase):
# Complete progress tracking
self.services.workflow.progressLogFinish(operationId, True)
# Create meaningful filename
meaningfulName = self._generateMeaningfulFileName(
base_name="web_research",
extension="json",
action_name="research"
)
# Get meaningful filename from research result (generated by intent analyzer)
suggestedFilename = result.get("suggested_filename")
if suggestedFilename:
# Clean and validate filename
import re
cleaned = suggestedFilename.strip().strip('"\'')
cleaned = cleaned.replace('\n', ' ').replace('\r', ' ').strip()
# Ensure it doesn't already have extension
if cleaned.lower().endswith('.json'):
cleaned = cleaned[:-5]
# Validate: should be reasonable length and contain only safe characters
if cleaned and len(cleaned) <= 60 and re.match(r'^[a-zA-Z0-9_\-]+$', cleaned):
meaningfulName = f"{cleaned}.json"
else:
# Fallback to generic meaningful filename
meaningfulName = self._generateMeaningfulFileName(
base_name="web_research",
extension="json",
action_name="research"
)
else:
# Fallback to generic meaningful filename
meaningfulName = self._generateMeaningfulFileName(
base_name="web_research",
extension="json",
action_name="research"
)
from modules.datamodels.datamodelChat import ActionDocument
actionDocument = ActionDocument(
@ -231,3 +253,504 @@ class MethodAi(MethodBase):
except:
pass
return ActionResult.isFailure(error=str(e))
# ============================================================================
# Document Transformation Wrappers
# ============================================================================
@action
async def summarizeDocument(self, parameters: Dict[str, Any]) -> ActionResult:
"""
GENERAL:
- Purpose: Summarize one or more documents, extracting key points and main ideas.
- Input requirements: documentList (required); optional summaryLength, focus.
- Output format: Text document with summary (default: txt, can be overridden with resultType).
Parameters:
- documentList (list, required): Document reference(s) to summarize.
- summaryLength (str, optional): Desired summary length - brief, medium, or detailed. Default: medium.
- focus (str, optional): Specific aspect to focus on in the summary (e.g., "financial data", "key decisions").
- resultType (str, optional): Output file extension (txt, md, docx, etc.). Default: txt.
"""
documentList = parameters.get("documentList", [])
if not documentList:
return ActionResult.isFailure(error="documentList is required")
summaryLength = parameters.get("summaryLength", "medium")
focus = parameters.get("focus")
resultType = parameters.get("resultType", "txt")
lengthInstructions = {
"brief": "Create a brief summary (2-3 paragraphs)",
"medium": "Create a medium-length summary (comprehensive but concise)",
"detailed": "Create a detailed summary covering all major points"
}
lengthInstruction = lengthInstructions.get(summaryLength.lower(), lengthInstructions["medium"])
aiPrompt = f"Summarize the provided document(s). {lengthInstruction}."
if focus:
aiPrompt += f" Focus specifically on: {focus}."
aiPrompt += " Extract and present the key points, main ideas, and important information in a clear, well-structured format."
return await self.process({
"aiPrompt": aiPrompt,
"documentList": documentList,
"resultType": resultType
})
@action
async def translateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
"""
GENERAL:
- Purpose: Translate documents to a target language while preserving formatting and structure.
- Input requirements: documentList (required); targetLanguage (required).
- Output format: Translated document in same format as input (default) or specified resultType.
Parameters:
- documentList (list, required): Document reference(s) to translate.
- targetLanguage (str, required): Target language code or name (e.g., "de", "German", "French", "es").
- sourceLanguage (str, optional): Source language if known (e.g., "en", "English"). If not provided, AI will detect.
- preserveFormatting (bool, optional): Whether to preserve original formatting. Default: True.
- resultType (str, optional): Output file extension. If not specified, uses same format as input.
"""
documentList = parameters.get("documentList", [])
if not documentList:
return ActionResult.isFailure(error="documentList is required")
targetLanguage = parameters.get("targetLanguage")
if not targetLanguage:
return ActionResult.isFailure(error="targetLanguage is required")
sourceLanguage = parameters.get("sourceLanguage")
preserveFormatting = parameters.get("preserveFormatting", True)
resultType = parameters.get("resultType")
aiPrompt = f"Translate the provided document(s) to {targetLanguage}."
if sourceLanguage:
aiPrompt += f" The source language is {sourceLanguage}."
if preserveFormatting:
aiPrompt += " Preserve all formatting, structure, tables, and layout exactly as they appear in the original document."
else:
aiPrompt += " Focus on accurate translation of content."
aiPrompt += " Maintain the same document structure, headings, and organization."
processParams = {
"aiPrompt": aiPrompt,
"documentList": documentList
}
if resultType:
processParams["resultType"] = resultType
return await self.process(processParams)
@action
async def convertDocument(self, parameters: Dict[str, Any]) -> ActionResult:
"""
GENERAL:
- Purpose: Convert documents between different formats (PDFWord, ExcelCSV, etc.).
- Input requirements: documentList (required); targetFormat (required).
- Output format: Document in target format.
Parameters:
- documentList (list, required): Document reference(s) to convert.
- targetFormat (str, required): Target format extension (docx, pdf, xlsx, csv, txt, html, json, md, etc.).
- preserveStructure (bool, optional): Whether to preserve document structure (headings, tables, etc.). Default: True.
"""
documentList = parameters.get("documentList", [])
if not documentList:
return ActionResult.isFailure(error="documentList is required")
targetFormat = parameters.get("targetFormat")
if not targetFormat:
return ActionResult.isFailure(error="targetFormat is required")
preserveStructure = parameters.get("preserveStructure", True)
# Normalize format (remove leading dot if present)
normalizedFormat = targetFormat.strip().lstrip('.').lower()
aiPrompt = f"Convert the provided document(s) to {normalizedFormat.upper()} format."
if preserveStructure:
aiPrompt += " Preserve all document structure including headings, tables, formatting, lists, and layout."
aiPrompt += " Ensure the converted document maintains the same content and information as the original."
return await self.process({
"aiPrompt": aiPrompt,
"documentList": documentList,
"resultType": normalizedFormat
})
@action
async def extractData(self, parameters: Dict[str, Any]) -> ActionResult:
"""
GENERAL:
- Purpose: Extract structured data from documents (key-value pairs, entities, facts, etc.).
- Input requirements: documentList (required); optional dataStructure, fields.
- Output format: JSON by default, or specified resultType.
Parameters:
- documentList (list, required): Document reference(s) to extract data from.
- dataStructure (str, optional): Desired data structure - flat, nested, or list. Default: nested.
- fields (list, optional): Specific fields/properties to extract (e.g., ["name", "date", "amount"]).
- resultType (str, optional): Output format (json, csv, xlsx, etc.). Default: json.
"""
documentList = parameters.get("documentList", [])
if not documentList:
return ActionResult.isFailure(error="documentList is required")
dataStructure = parameters.get("dataStructure", "nested")
fields = parameters.get("fields", [])
resultType = parameters.get("resultType", "json")
aiPrompt = "Extract structured data from the provided document(s)."
if fields:
fieldsStr = ", ".join(fields)
aiPrompt += f" Extract the following specific fields: {fieldsStr}."
else:
aiPrompt += " Extract all relevant data including names, dates, amounts, entities, and key information."
structureInstructions = {
"flat": "Use a flat key-value structure with simple properties.",
"nested": "Use a nested JSON structure with logical grouping of related data.",
"list": "Structure the data as a list/array of objects, one per entity or record."
}
aiPrompt += f" {structureInstructions.get(dataStructure.lower(), structureInstructions['nested'])}"
aiPrompt += " Ensure all extracted data is accurate and complete."
return await self.process({
"aiPrompt": aiPrompt,
"documentList": documentList,
"resultType": resultType
})
@action
async def extractTables(self, parameters: Dict[str, Any]) -> ActionResult:
"""
GENERAL:
- Purpose: Extract tables from documents, preserving structure and data.
- Input requirements: documentList (required); optional tableFormat.
- Output format: JSON by default (structured table data), or CSV/XLSX if specified.
Parameters:
- documentList (list, required): Document reference(s) to extract tables from.
- tableFormat (str, optional): Output format for tables - json, csv, or xlsx. Default: json.
- includeHeaders (bool, optional): Include table headers. Default: True.
"""
documentList = parameters.get("documentList", [])
if not documentList:
return ActionResult.isFailure(error="documentList is required")
tableFormat = parameters.get("tableFormat", "json")
includeHeaders = parameters.get("includeHeaders", True)
# Map tableFormat to resultType
formatMap = {
"json": "json",
"csv": "csv",
"xlsx": "xlsx",
"xls": "xlsx"
}
resultType = formatMap.get(tableFormat.lower(), "json")
aiPrompt = "Extract all tables from the provided document(s)."
if includeHeaders:
aiPrompt += " Include table headers and preserve the table structure."
else:
aiPrompt += " Extract table data without headers."
aiPrompt += " Maintain accurate data types (numbers as numbers, dates as dates, etc.) and preserve all table relationships."
if resultType == "json":
aiPrompt += " Structure each table as a JSON object with headers and rows as arrays."
elif resultType == "csv":
aiPrompt += " Output each table as CSV format with proper comma separation."
elif resultType == "xlsx":
aiPrompt += " Structure the output as an Excel spreadsheet with tables properly formatted."
return await self.process({
"aiPrompt": aiPrompt,
"documentList": documentList,
"resultType": resultType
})
# ============================================================================
# Content Generation Wrappers
# ============================================================================
@action
async def generateReport(self, parameters: Dict[str, Any]) -> ActionResult:
"""
GENERAL:
- Purpose: Generate comprehensive reports from input documents/data with analysis and insights.
- Input requirements: documentList (optional, can generate from scratch); optional reportType, sections.
- Output format: Document in specified format (default: docx).
Parameters:
- documentList (list, optional): Input documents/data to base the report on.
- reportType (str, optional): Type of report - summary, analysis, executive, detailed. Default: analysis.
- sections (list, optional): Specific sections to include (e.g., ["introduction", "findings", "recommendations"]).
- title (str, optional): Report title.
- resultType (str, optional): Output format (docx, pdf, md, etc.). Default: docx.
"""
documentList = parameters.get("documentList", [])
reportType = parameters.get("reportType", "analysis")
sections = parameters.get("sections", [])
title = parameters.get("title")
resultType = parameters.get("resultType", "docx")
reportTypeInstructions = {
"summary": "Create a summary report with key highlights and main points.",
"analysis": "Create an analytical report with insights, findings, and detailed examination.",
"executive": "Create an executive summary report suitable for senior management with key insights and recommendations.",
"detailed": "Create a comprehensive detailed report covering all aspects with in-depth analysis."
}
aiPrompt = f"Generate a {reportType} report."
if title:
aiPrompt += f" Title: {title}."
aiPrompt += f" {reportTypeInstructions.get(reportType.lower(), reportTypeInstructions['analysis'])}"
if sections:
sectionsStr = ", ".join(sections)
aiPrompt += f" Include the following sections: {sectionsStr}."
else:
aiPrompt += " Include standard report sections such as introduction, main content, analysis, findings, and conclusions."
if documentList:
aiPrompt += " Base the report on the provided input documents, analyzing and synthesizing the information."
else:
aiPrompt += " Create a professional, well-structured report."
processParams = {
"aiPrompt": aiPrompt,
"resultType": resultType
}
if documentList:
processParams["documentList"] = documentList
return await self.process(processParams)
@action
async def generateChart(self, parameters: Dict[str, Any]) -> ActionResult:
"""
GENERAL:
- Purpose: Generate charts/graphs from data in documents or structured data.
- Input requirements: documentList (required); optional chartType, title, labels.
- Output format: Image (png or jpg).
Parameters:
- documentList (list, required): Documents containing data to visualize (CSV, Excel, JSON, etc.).
- chartType (str, optional): Type of chart - bar, line, pie, scatter, area, etc. Default: bar.
- title (str, optional): Chart title.
- xAxisLabel (str, optional): X-axis label.
- yAxisLabel (str, optional): Y-axis label.
- resultType (str, optional): Image format (png or jpg). Default: png.
"""
documentList = parameters.get("documentList", [])
if not documentList:
return ActionResult.isFailure(error="documentList is required")
chartType = parameters.get("chartType", "bar")
title = parameters.get("title")
xAxisLabel = parameters.get("xAxisLabel")
yAxisLabel = parameters.get("yAxisLabel")
resultType = parameters.get("resultType", "png")
# Ensure resultType is an image format
if resultType.lower() not in ["png", "jpg", "jpeg"]:
resultType = "png"
aiPrompt = f"Generate a {chartType} chart from the provided data."
if title:
aiPrompt += f" Chart title: {title}."
if xAxisLabel:
aiPrompt += f" X-axis label: {xAxisLabel}."
if yAxisLabel:
aiPrompt += f" Y-axis label: {yAxisLabel}."
aiPrompt += " Create a clear, professional chart with appropriate labels, legends, and formatting. Ensure the chart is visually appealing and easy to read."
return await self.process({
"aiPrompt": aiPrompt,
"documentList": documentList,
"resultType": resultType
})
@action
async def generateDocument(self, parameters: Dict[str, Any]) -> ActionResult:
"""
GENERAL:
- Purpose: Generate documents from scratch or based on templates/inputs.
- Input requirements: prompt or description (required); optional documentList (for templates/references).
- Output format: Document in specified format (default: docx).
Parameters:
- prompt (str, required): Description of the document to generate.
- documentList (list, optional): Template documents or reference documents to use as a guide.
- documentType (str, optional): Type of document - letter, memo, proposal, contract, etc.
- resultType (str, optional): Output format (docx, pdf, txt, md, etc.). Default: docx.
"""
prompt = parameters.get("prompt")
if not prompt:
return ActionResult.isFailure(error="prompt is required")
documentList = parameters.get("documentList", [])
documentType = parameters.get("documentType")
resultType = parameters.get("resultType", "docx")
aiPrompt = f"Generate a document based on the following requirements: {prompt}"
if documentType:
aiPrompt += f" Document type: {documentType}."
if documentList:
aiPrompt += " Use the provided template/reference documents as a guide for structure, format, and style."
aiPrompt += " Create a professional, well-structured document with appropriate formatting and organization."
processParams = {
"aiPrompt": aiPrompt,
"resultType": resultType
}
if documentList:
processParams["documentList"] = documentList
return await self.process(processParams)
# ============================================================================
# Analysis & Comparison Wrappers
# ============================================================================
@action
async def analyzeDocuments(self, parameters: Dict[str, Any]) -> ActionResult:
"""
GENERAL:
- Purpose: Analyze documents and find insights, patterns, trends, and key information.
- Input requirements: documentList (required); optional analysisType, focus.
- Output format: Analysis report in specified format (default: txt).
Parameters:
- documentList (list, required): Document(s) to analyze.
- analysisType (str, optional): Type of analysis - general, financial, technical, sentiment, etc. Default: general.
- focus (str, optional): Specific aspect to focus on (e.g., "trends", "risks", "opportunities").
- resultType (str, optional): Output format (txt, md, docx, json, etc.). Default: txt.
"""
documentList = parameters.get("documentList", [])
if not documentList:
return ActionResult.isFailure(error="documentList is required")
analysisType = parameters.get("analysisType", "general")
focus = parameters.get("focus")
resultType = parameters.get("resultType", "txt")
aiPrompt = f"Analyze the provided document(s) and find insights, patterns, and key information."
aiPrompt += f" Perform a {analysisType} analysis."
if focus:
aiPrompt += f" Focus specifically on: {focus}."
aiPrompt += " Identify trends, important findings, relationships, and provide actionable insights. Present the analysis in a clear, structured format."
return await self.process({
"aiPrompt": aiPrompt,
"documentList": documentList,
"resultType": resultType
})
@action
async def compareDocuments(self, parameters: Dict[str, Any]) -> ActionResult:
"""
GENERAL:
- Purpose: Compare multiple documents and identify differences, similarities, and changes.
- Input requirements: documentList (required, should contain 2+ documents); optional comparisonType, focus.
- Output format: Comparison report in specified format (default: txt).
Parameters:
- documentList (list, required): Two or more documents to compare.
- comparisonType (str, optional): Type of comparison - differences, similarities, changes, full. Default: full.
- focus (str, optional): Specific aspect to focus on (e.g., "content", "structure", "data", "formatting").
- resultType (str, optional): Output format (txt, md, docx, json, etc.). Default: txt.
"""
documentList = parameters.get("documentList", [])
if not documentList:
return ActionResult.isFailure(error="documentList is required")
if isinstance(documentList, str):
documentList = [documentList]
if len(documentList) < 2:
return ActionResult.isFailure(error="At least 2 documents are required for comparison")
comparisonType = parameters.get("comparisonType", "full")
focus = parameters.get("focus")
resultType = parameters.get("resultType", "txt")
comparisonInstructions = {
"differences": "Focus on identifying and highlighting all differences between the documents.",
"similarities": "Focus on identifying commonalities, shared content, and similarities.",
"changes": "Identify what has changed between versions, what was added, removed, or modified.",
"full": "Provide a comprehensive comparison including both differences and similarities."
}
aiPrompt = f"Compare the provided documents."
aiPrompt += f" {comparisonInstructions.get(comparisonType.lower(), comparisonInstructions['full'])}"
if focus:
aiPrompt += f" Focus specifically on: {focus}."
aiPrompt += " Present the comparison in a clear, structured format that makes differences and similarities easy to understand."
return await self.process({
"aiPrompt": aiPrompt,
"documentList": documentList,
"resultType": resultType
})
@action
async def validateData(self, parameters: Dict[str, Any]) -> ActionResult:
"""
GENERAL:
- Purpose: Validate data quality, structure, completeness, and correctness in documents/data files.
- Input requirements: documentList (required); optional validationRules, schema.
- Output format: Validation report in JSON or text format (default: json).
Parameters:
- documentList (list, required): Documents/data files to validate.
- validationRules (list, optional): Specific validation rules to check (e.g., ["required_fields", "data_types", "ranges"]).
- schema (dict, optional): Expected data schema/structure to validate against.
- resultType (str, optional): Output format (json, txt, md, etc.). Default: json.
"""
documentList = parameters.get("documentList", [])
if not documentList:
return ActionResult.isFailure(error="documentList is required")
validationRules = parameters.get("validationRules", [])
schema = parameters.get("schema")
resultType = parameters.get("resultType", "json")
aiPrompt = "Validate the data quality, structure, completeness, and correctness in the provided documents."
if validationRules:
rulesStr = ", ".join(validationRules)
aiPrompt += f" Apply the following validation rules: {rulesStr}."
else:
aiPrompt += " Check for data completeness, correct data types, required fields, data consistency, and any anomalies or errors."
if schema:
import json
schemaStr = json.dumps(schema, indent=2)
aiPrompt += f" Validate against the following expected schema: {schemaStr}."
if resultType == "json":
aiPrompt += " Provide the validation results as structured JSON with validation status, errors, warnings, and details for each check."
else:
aiPrompt += " Provide a detailed validation report listing all findings, errors, warnings, and pass/fail status for each validation check."
return await self.process({
"aiPrompt": aiPrompt,
"documentList": documentList,
"resultType": resultType
})

View file

@ -361,11 +361,11 @@ class MethodSharepoint(MethodBase):
async def _makeGraphApiCall(self, endpoint: str, method: str = "GET", data: bytes = None) -> Dict[str, Any]:
"""Make a Microsoft Graph API call with timeout and detailed logging"""
try:
if not hasattr(self.services, 'sharepoint') or not self.services.sharepoint._target.access_token:
if not hasattr(self.services, 'sharepoint') or not self.services.sharepoint._target.accessToken:
return {"error": "SharePoint service not configured with access token"}
headers = {
"Authorization": f"Bearer {self.services.sharepoint._target.access_token}",
"Authorization": f"Bearer {self.services.sharepoint._target.accessToken}",
"Content-Type": "application/json" if data and method != "PUT" else "application/octet-stream" if data else "application/json"
}
@ -1019,7 +1019,7 @@ class MethodSharepoint(MethodBase):
# For content download, we need to handle binary data
try:
async with aiohttp.ClientSession() as session:
headers = {"Authorization": f"Bearer {self.services.sharepoint._target.access_token}"}
headers = {"Authorization": f"Bearer {self.services.sharepoint._target.accessToken}"}
async with session.get(f"https://graph.microsoft.com/v1.0/{content_endpoint}", headers=headers) as response:
if response.status == 200:
content = await response.text()
@ -1096,7 +1096,7 @@ class MethodSharepoint(MethodBase):
async def uploadDocument(self, parameters: Dict[str, Any]) -> ActionResult:
"""
GENERAL:
- Purpose: Upload documents to SharePoint.
- Purpose: Upload documents to SharePoint. Only to choose this action with a connectionReference
- Input requirements: connectionReference (required); documentList (required); fileNames (required); optional pathObject or pathQuery.
- Output format: JSON with upload status and file info.

View file

@ -22,15 +22,16 @@ class ContentValidator:
self.services = services
self.learningEngine = learningEngine
async def validateContent(self, documents: List[Any], intent: Dict[str, Any], taskStep: Optional[Any] = None) -> Dict[str, Any]:
async def validateContent(self, documents: List[Any], intent: Dict[str, Any], taskStep: Optional[Any] = None, actionName: Optional[str] = None) -> Dict[str, Any]:
"""Validates delivered content against user intent using AI (single attempt; parse-or-fail)
Args:
documents: List of documents to validate
intent: Workflow-level intent dict (for format requirements)
taskStep: Optional TaskStep object (preferred source for objective)
actionName: Optional action name (e.g., "ai.process", "ai.webResearch") that created the documents
"""
return await self._validateWithAI(documents, intent, taskStep)
return await self._validateWithAI(documents, intent, taskStep, actionName)
def _analyzeDocuments(self, documents: List[Any]) -> List[Dict[str, Any]]:
"""Generic document analysis - create simple summaries with metadata."""
@ -248,7 +249,7 @@ class ContentValidator:
return False
async def _validateWithAI(self, documents: List[Any], intent: Dict[str, Any], taskStep: Optional[Any] = None) -> Dict[str, Any]:
async def _validateWithAI(self, documents: List[Any], intent: Dict[str, Any], taskStep: Optional[Any] = None, actionName: Optional[str] = None) -> Dict[str, Any]:
"""AI-based comprehensive validation - generic approach"""
try:
if not hasattr(self, 'services') or not self.services or not hasattr(self.services, 'ai'):
@ -295,21 +296,26 @@ class ContentValidator:
successCriteria = intent.get('successCriteria', [])
criteriaCount = len(successCriteria)
# Build action name context
actionContext = ""
if actionName:
actionContext = f"\nACTION THAT CREATED DOCUMENTS: {actionName}"
promptBase = f"""TASK VALIDATION
{objectiveLabel}: '{objectiveText}'
EXPECTED DATA TYPE: {dataType}
EXPECTED FORMATS: {expectedFormats if expectedFormats else ['any']}
SUCCESS CRITERIA ({criteriaCount} items): {successCriteria}
SUCCESS CRITERIA ({criteriaCount} items): {successCriteria}{actionContext}
VALIDATION RULES:
IMPORTANT: You only have document METADATA (filename, format, size, mimeType) - NOT document content.
Validate based on metadata only:
1. Check if filenames are meaningful and match approximately the task objective
1. Check if filenames are APPROXIMATELY meaningful (generic names like "generated.docx" are acceptable if format matches)
2. Check if delivered formats are compatible with expected format
3. Check if document sizes are reasonable for the task objective
4. Assess if filename and size combination suggests correct data type
5. Rate overall quality (0.0-1.0) based on metadata indicators
5. Rate overall quality (0.0-1.0) based on metadata indicators, with format matching being the most important
6. Identify specific gaps based on what the user requested (infer from filename, size, format - NOT content)
OUTPUT FORMAT - JSON ONLY (no prose):
@ -319,7 +325,7 @@ OUTPUT FORMAT - JSON ONLY (no prose):
"dataTypeMatch": false,
"formatMatch": false,
"documentCount": {len(documents)},
"successCriteriaMet": {[False] * criteriaCount},
"successCriteriaMet": {"[false]" * criteriaCount},
"gapAnalysis": "Describe what is missing or incorrect based on filename, size, format metadata",
"improvementSuggestions": ["General action to improve overall result"],
"validationDetails": [
@ -354,80 +360,77 @@ DELIVERED DOCUMENTS ({len(documents)} items):
# Call AI service for validation
response = await self.services.ai.callAiPlanning(
prompt=validationPrompt,
placeholders=None
placeholders=None,
debugType="contentvalidation"
)
if not response or not response.strip():
logger.warning("AI validation returned empty response")
raise ValueError("AI validation failed - empty response")
# Clean and extract JSON from response
# Clean and extract JSON from response using proper JSON extraction utility
# This handles nested structures and markdown code blocks correctly
result = response.strip()
logger.debug(f"AI validation response length: {len(result)}")
# Try to find JSON in the response with multiple strategies
# Strategy 1: Look for JSON in markdown code blocks
json_match = re.search(r'```(?:json)?\s*(\{.*?\})\s*```', result, re.DOTALL)
if json_match:
result = json_match.group(1)
logger.debug(f"Extracted JSON from markdown code block: {result[:200]}...")
else:
# Strategy 2: Look for JSON object with proper structure
json_match = re.search(r'\{[^{}]*"overallSuccess"[^{}]*\}', result, re.DOTALL)
if not json_match:
# Strategy 3: Look for any JSON object
json_match = re.search(r'\{.*\}', result, re.DOTALL)
if json_match:
result = json_match.group(0)
logger.debug(f"Extracted JSON directly: {result[:200]}...")
else:
logger.debug(f"No JSON found in AI response: {result[:200]}...")
logger.debug(f"Full AI response: {result}")
raise ValueError("AI validation failed - no JSON in response")
# Extract JSON first
extractedJson = self.services.utils.jsonExtractString(result)
if not extractedJson:
logger.debug(f"No JSON found in AI response: {result[:200]}...")
logger.debug(f"Full AI response: {result}")
raise ValueError("AI validation failed - no JSON in response")
# Proactively fix Python-style booleans (False/True -> false/true) BEFORE parsing
# This handles booleans in any context: standalone, in lists, in dicts, etc.
import re
# Use word boundaries but also handle cases where booleans are in brackets/arrays
# Replace False/True regardless of context (word boundary handles string matching correctly)
normalizedJson = re.sub(r'\bFalse\b', 'false', extractedJson)
normalizedJson = re.sub(r'\bTrue\b', 'true', normalizedJson)
logger.debug(f"Extracted JSON (before normalization): {extractedJson[:200]}...")
logger.debug(f"Normalized JSON (after boolean fix): {normalizedJson[:200]}...")
# Now try to parse the normalized JSON
try:
aiResult = json.loads(result)
aiResult = json.loads(normalizedJson)
logger.info("AI validation JSON parsed successfully")
overall = aiResult.get("overallSuccess")
quality = aiResult.get("qualityScore")
details = aiResult.get("validationDetails")
gap = aiResult.get("gapAnalysis", "")
criteria = aiResult.get("successCriteriaMet")
improvements = aiResult.get("improvementSuggestions", [])
# Normalize while keeping failures explicit
normalized = {
"overallSuccess": overall if isinstance(overall, bool) else None,
"qualityScore": float(quality) if isinstance(quality, (int, float)) else None,
"documentCount": len(documentSummaries),
"validationDetails": details if isinstance(details, list) else [{
"documentName": "AI Validation",
"gapAnalysis": gap,
"successCriteriaMet": criteria if isinstance(criteria, list) else []
}],
"improvementSuggestions": improvements,
"schemaCompliant": True,
"originalType": "json",
"missingFields": []
}
if normalized["overallSuccess"] is None:
normalized["missingFields"].append("overallSuccess")
if normalized["qualityScore"] is None:
normalized["missingFields"].append("qualityScore")
if normalized["missingFields"]:
normalized["schemaCompliant"] = False
return normalized
except json.JSONDecodeError as json_error:
logger.warning(f"AI validation invalid JSON: {str(json_error)}")
logger.debug(f"JSON content: {result}")
raise
logger.warning(f"AI validation invalid JSON after normalization: {str(json_error)}")
logger.debug(f"JSON content that failed: {normalizedJson[:500]}...")
raise ValueError(f"AI validation failed - invalid JSON: {str(json_error)}")
raise ValueError("AI validation failed - no response")
overall = aiResult.get("overallSuccess")
quality = aiResult.get("qualityScore")
details = aiResult.get("validationDetails")
gap = aiResult.get("gapAnalysis", "")
criteria = aiResult.get("successCriteriaMet")
improvements = aiResult.get("improvementSuggestions", [])
# Normalize while keeping failures explicit
normalized = {
"overallSuccess": overall if isinstance(overall, bool) else None,
"qualityScore": float(quality) if isinstance(quality, (int, float)) else None,
"documentCount": len(documentSummaries),
"validationDetails": details if isinstance(details, list) else [{
"documentName": "AI Validation",
"gapAnalysis": gap,
"successCriteriaMet": criteria if isinstance(criteria, list) else []
}],
"improvementSuggestions": improvements,
"schemaCompliant": True,
"originalType": "json",
"missingFields": []
}
if normalized["overallSuccess"] is None:
normalized["missingFields"].append("overallSuccess")
if normalized["qualityScore"] is None:
normalized["missingFields"].append("qualityScore")
if normalized["missingFields"]:
normalized["schemaCompliant"] = False
return normalized
except Exception as e:
logger.error(f"AI validation failed: {str(e)}")

View file

@ -76,7 +76,8 @@ CRITICAL: Respond with ONLY the JSON object below. Do not include any explanator
# Call AI service for analysis
response = await self.services.ai.callAiPlanning(
prompt=analysisPrompt,
placeholders=None
placeholders=None,
debugType="intentanalysis"
)
# No retries or correction prompts here; parse-or-fail below

View file

@ -141,14 +141,23 @@ class MessageCreator:
logger.info(f"Action message round number extraction: resultLabel='{resultLabel}', extractedRound={currentRound}, workflowRound={workflowContext.get('currentRound', 0)}")
# Build a user-friendly message based on success/failure
# Use userMessage from action if available (in user's language), otherwise fall back to task objective
userFriendlyText = None
if hasattr(action, 'userMessage') and action.userMessage:
userFriendlyText = action.userMessage
elif taskStep and hasattr(taskStep, 'userMessage') and taskStep.userMessage:
userFriendlyText = taskStep.userMessage
else:
userFriendlyText = taskObjective
if result.success:
messageText = f"**Action {currentAction} ({action.execMethod}.{action.execAction})**\n\n"
messageText += f"{taskObjective}\n\n"
messageText += f"{userFriendlyText}\n\n"
else:
# ⚠️ FAILURE MESSAGE - Show error details to user
errorDetails = result.error if result.error else "Unknown error occurred"
messageText = f"**Action {currentAction} ({action.execMethod}.{action.execAction})**\n\n"
messageText += f"{taskObjective}\n\n"
messageText += f"{userFriendlyText}\n\n"
messageText += f"{errorDetails}\n\n"
# Build concise summary to persist for history context

View file

@ -112,7 +112,8 @@ class TaskPlanner:
prompt = await self.services.ai.callAiPlanning(
prompt=taskPlanningPromptTemplate,
placeholders=placeholders
placeholders=placeholders,
debugType="taskplan"
)
# Check if AI response is valid

View file

@ -145,7 +145,11 @@ class ActionplanMode(BaseMode):
maxProcessingTime=30
)
prompt = await self.services.ai.callAiPlanning(prompt=actionPromptTemplate, placeholders=placeholders)
prompt = await self.services.ai.callAiPlanning(
prompt=actionPromptTemplate,
placeholders=placeholders,
debugType="actionplan"
)
# Check if AI response is valid
if not prompt:
@ -338,7 +342,9 @@ class ActionplanMode(BaseMode):
if getattr(self, 'workflowIntent', None) and result.documents:
# Pass ALL documents to validator - validator decides what to validate (generic approach)
# Pass taskStep so validator can use task.objective and format fields
validationResult = await self.contentValidator.validateContent(result.documents, self.workflowIntent, taskStep)
# Pass action name so validator knows which action created the documents
actionName = f"{action.execMethod}.{action.execAction}"
validationResult = await self.contentValidator.validateContent(result.documents, self.workflowIntent, taskStep, actionName)
qualityScore = validationResult.get('qualityScore', 0.0)
if qualityScore is None:
qualityScore = 0.0
@ -528,7 +534,11 @@ class ActionplanMode(BaseMode):
maxProcessingTime=30
)
response = await self.services.ai.callAiPlanning(prompt=promptTemplate, placeholders=placeholders)
response = await self.services.ai.callAiPlanning(
prompt=promptTemplate,
placeholders=placeholders,
debugType="resultreview"
)
# Log result review response received
logger.info("=== RESULT REVIEW AI RESPONSE RECEIVED ===")

View file

@ -108,7 +108,9 @@ class DynamicMode(BaseMode):
if getattr(self, 'workflowIntent', None) and result.documents:
# Pass ALL documents to validator - validator decides what to validate (generic approach)
# Pass taskStep so validator can use task.objective and format fields
validationResult = await self.contentValidator.validateContent(result.documents, self.workflowIntent, taskStep)
# Pass action name so validator knows which action created the documents
actionName = selection.get('action', 'unknown')
validationResult = await self.contentValidator.validateContent(result.documents, self.workflowIntent, taskStep, actionName)
observation.contentValidation = validationResult
quality_score = validationResult.get('qualityScore', 0.0)
if quality_score is None:
@ -217,7 +219,8 @@ class DynamicMode(BaseMode):
)
response = await self.services.ai.callAiPlanning(
prompt=promptTemplate,
placeholders=placeholders
placeholders=placeholders,
debugType="actionplan"
)
jsonStart = response.find('{') if response else -1
jsonEnd = response.rfind('}') + 1 if response else 0
@ -326,7 +329,8 @@ class DynamicMode(BaseMode):
)
paramsResp = await self.services.ai.callAiPlanning(
prompt=promptTemplate,
placeholders=placeholders
placeholders=placeholders,
debugType="paramplan"
)
# Parse JSON response
js = paramsResp[paramsResp.find('{'):paramsResp.rfind('}')+1] if paramsResp else '{}'
@ -658,7 +662,8 @@ class DynamicMode(BaseMode):
)
resp = await self.services.ai.callAiPlanning(
prompt=promptTemplate,
placeholders=placeholders
placeholders=placeholders,
debugType="refinement"
)
# More robust JSON extraction
@ -773,7 +778,8 @@ Return only the user-friendly message, no technical details."""
# Call AI to generate user-friendly message
response = await self.services.ai.callAiPlanning(
prompt=prompt,
placeholders=None
placeholders=None,
debugType="userfriendlymessage"
)
return response.strip() if response else f"Executing {method}.{actionName} action..."
@ -807,7 +813,8 @@ Return only the user-friendly message, no technical details."""
# Call AI to generate user-friendly result message
response = await self.services.ai.callAiPlanning(
prompt=prompt,
placeholders=None
placeholders=None,
debugType="userfriendlyresult"
)
return response.strip() if response else f"{method}.{actionName} action completed"

View file

@ -10,6 +10,8 @@ NAMING CONVENTION:
MAPPING TABLE (keys function) with usage [taskplan | actionplan | dynamic]:
{{KEY:USER_PROMPT}} -> extractUserPrompt() [taskplan, actionplan, dynamic]
{{KEY:OVERALL_TASK_CONTEXT}} -> extractOverallTaskContext() [dynamic]
{{KEY:TASK_OBJECTIVE}} -> extractTaskObjective() [dynamic]
{{KEY:USER_LANGUAGE}} -> extractUserLanguage() [actionplan, dynamic]
{{KEY:LANGUAGE_USER_DETECTED}} -> extractLanguageUserDetected() [taskplan]
{{KEY:WORKFLOW_HISTORY}} -> extractWorkflowHistory() [taskplan, actionplan, dynamic]
@ -36,6 +38,57 @@ from typing import Dict, Any, List
logger = logging.getLogger(__name__)
from modules.workflows.processing.shared.methodDiscovery import (methods, discoverMethods)
def extractOverallTaskContext(service: Any, context: Any) -> str:
"""Extract the original normalized user request (overall task context). Maps to {{KEY:OVERALL_TASK_CONTEXT}}.
Always returns the original user request, not the task objective.
"""
try:
# Always prefer the normalized user prompt from services (original request)
if service:
# Prefer normalized version if available
normalized = getattr(service, 'currentUserPromptNormalized', None)
if normalized:
return normalized
# Fallback to currentUserPrompt (original request)
currentPrompt = getattr(service, 'currentUserPrompt', None)
if currentPrompt:
return currentPrompt
# If no services available, try to get from workflow's first message
if hasattr(context, 'workflow') and context.workflow:
messages = getattr(context.workflow, 'messages', []) or []
if messages:
firstMessage = messages[0]
msgContent = getattr(firstMessage, 'message', None) or ''
if msgContent:
return msgContent
return 'No overall task context available'
except Exception:
return 'No overall task context available'
def extractTaskObjective(context: Any) -> str:
"""Extract the task objective from taskStep. Maps to {{KEY:TASK_OBJECTIVE}}.
Returns the specific task objective, not the overall user request.
"""
try:
if hasattr(context, 'taskStep') and context.taskStep:
objective = getattr(context.taskStep, 'objective', None)
if objective:
return objective
# Fallback: try to get from services
services = getattr(context, 'services', None)
if services:
currentPrompt = getattr(services, 'currentUserPrompt', None)
if currentPrompt:
return currentPrompt
return 'No task objective specified'
except Exception:
return 'No task objective specified'
def extractUserPrompt(context: Any) -> str:
"""Extract user prompt from context. Maps to {{KEY:USER_PROMPT}}.
Prefer the cleaned intent stored on the services object if available via context.

View file

@ -17,12 +17,16 @@ from modules.workflows.processing.shared.placeholderFactory import (
extractLearningsAndImprovements,
extractLatestRefinementFeedback,
extractWorkflowHistory,
extractOverallTaskContext,
extractTaskObjective,
)
from modules.workflows.processing.shared.methodDiscovery import methods, getActionParameterList
def generateDynamicPlanSelectionPrompt(services, context: Any, learningEngine=None) -> PromptBundle:
"""Define placeholders first, then the template; return PromptBundle."""
placeholders: List[PromptPlaceholder] = [
PromptPlaceholder(label="OVERALL_TASK_CONTEXT", content=extractOverallTaskContext(services, context), summaryAllowed=False),
PromptPlaceholder(label="TASK_OBJECTIVE", content=extractTaskObjective(context), summaryAllowed=False),
PromptPlaceholder(label="USER_PROMPT", content=extractUserPrompt(context), summaryAllowed=False),
PromptPlaceholder(label="USER_LANGUAGE", content=extractUserLanguage(services), summaryAllowed=False),
PromptPlaceholder(label="AVAILABLE_DOCUMENTS_SUMMARY", content=extractAvailableDocumentsSummary(services, context), summaryAllowed=True),
@ -51,8 +55,11 @@ def generateDynamicPlanSelectionPrompt(services, context: Any, learningEngine=No
template = """Select exactly one next action to advance the task incrementally.
OVERALL TASK CONTEXT:
{{KEY:OVERALL_TASK_CONTEXT}}
OBJECTIVE:
{{KEY:USER_PROMPT}}
{{KEY:TASK_OBJECTIVE}}
AVAILABLE_DOCUMENTS_SUMMARY:
{{KEY:AVAILABLE_DOCUMENTS_SUMMARY}}
@ -188,6 +195,7 @@ Excludes documents/connections/history entirely.
learningsText = ""
placeholders: List[PromptPlaceholder] = [
PromptPlaceholder(label="OVERALL_TASK_CONTEXT", content=extractOverallTaskContext(services, context), summaryAllowed=False),
PromptPlaceholder(label="ACTION_OBJECTIVE", content=actionObjective, summaryAllowed=False),
PromptPlaceholder(label="SELECTED_ACTION", content=compoundActionName, summaryAllowed=False),
PromptPlaceholder(label="USER_LANGUAGE", content=extractUserLanguage(services), summaryAllowed=False),
@ -211,7 +219,12 @@ Excludes documents/connections/history entirely.
template = """You are a parameter generator. Set the parameters for this specific action.
CONTEXT AND OBJECTIVE:
OVERALL TASK CONTEXT:
-----------------
{{KEY:OVERALL_TASK_CONTEXT}}
-----------------
THIS ACTION'S SPECIFIC OBJECTIVE:
-----------------
{{KEY:ACTION_OBJECTIVE}}
-----------------
@ -249,7 +262,8 @@ REQUIRED PARAMETERS FOR THIS ACTION (use these exact parameter names):
INSTRUCTIONS:
- Use ONLY the parameter names listed in section REQUIRED PARAMETERS FOR THIS ACTION
- Fill in appropriate values based on the context and objective
- Fill in appropriate values based on the OVERALL TASK CONTEXT and THIS ACTION'S SPECIFIC OBJECTIVE
- Consider the overall task context when setting parameter values to ensure they align with the complete user request
- Do NOT invent new parameters
- Do NOT include: documentList, connectionReference, history, documents, connections
- CRITICAL: Follow the learning-based parameter guidance above

View file

@ -218,7 +218,11 @@ class WorkflowManager:
)
# Call AI analyzer (planning call - will use static parameters)
aiResponse = await self.services.ai.callAiPlanning(prompt=analyzerPrompt, placeholders=None)
aiResponse = await self.services.ai.callAiPlanning(
prompt=analyzerPrompt,
placeholders=None,
debugType="userintention"
)
detectedLanguage = None
normalizedRequest = None