Merge branch 'int' into feat/chatbot

This commit is contained in:
Christopher Gondek 2025-10-09 10:36:22 +02:00
commit 478e139730
95 changed files with 12300 additions and 7373 deletions

3
.gitignore vendored
View file

@ -167,4 +167,5 @@ cython_debug/
# local data # local data
gwserver/_database* gwserver/_database*
gwserver/results/* gwserver/results/*
*.log.* *.log.*
test-chat

View file

@ -64,12 +64,12 @@ Connector_AiAnthropic_MODEL_NAME = claude-3-5-sonnet-20241022
Connector_AiAnthropic_TEMPERATURE = 0.2 Connector_AiAnthropic_TEMPERATURE = 0.2
Connector_AiAnthropic_MAX_TOKENS = 2000 Connector_AiAnthropic_MAX_TOKENS = 2000
# LangDoc configuration # Perplexity AI configuration
Connector_AiLangdoc_API_URL = https://api.langdock.com/v1/chat/completions Connector_AiPerplexity_API_URL = https://api.perplexity.ai/chat/completions
Connector_AiLangdoc_API_SECRET = sk-9KaNH1FfEx7SkTijsFpXeTIc9_xOmoo7e0hW6SqrYavFq_bgjcULa7PXp3kWQpp4gfk8-U0B4L91CP6YpAJxZg Connector_AiPerplexity_API_SECRET = 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
Connector_AiLangdoc_MODEL_NAME = gpt-4o Connector_AiPerplexity_MODEL_NAME = sonar
Connector_AiLangdoc_TEMPERATURE = 0.2 Connector_AiPerplexity_TEMPERATURE = 0.2
Connector_AiLangdoc_MAX_TOKENS = 2000 Connector_AiPerplexity_MAX_TOKENS = 2000
# Agent Mail configuration # Agent Mail configuration
Service_MSFT_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c Service_MSFT_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c

View file

@ -64,12 +64,12 @@ Connector_AiAnthropic_MODEL_NAME = claude-3-5-sonnet-20241022
Connector_AiAnthropic_TEMPERATURE = 0.2 Connector_AiAnthropic_TEMPERATURE = 0.2
Connector_AiAnthropic_MAX_TOKENS = 2000 Connector_AiAnthropic_MAX_TOKENS = 2000
# LangDoc configuration # Perplexity AI configuration
Connector_AiLangdoc_API_URL = https://api.langdock.com/v1/chat/completions Connector_AiPerplexity_API_URL = https://api.perplexity.ai/chat/completions
Connector_AiLangdoc_API_SECRET = sk-9KaNH1FfEx7SkTijsFpXeTIc9_xOmoo7e0hW6SqrYavFq_bgjcULa7PXp3kWQpp4gfk8-U0B4L91CP6YpAJxZg Connector_AiPerplexity_API_SECRET = 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
Connector_AiLangdoc_MODEL_NAME = gpt-4o Connector_AiPerplexity_MODEL_NAME = sonar
Connector_AiLangdoc_TEMPERATURE = 0.2 Connector_AiPerplexity_TEMPERATURE = 0.2
Connector_AiLangdoc_MAX_TOKENS = 2000 Connector_AiPerplexity_MAX_TOKENS = 2000
# Agent Mail configuration # Agent Mail configuration
Service_MSFT_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c Service_MSFT_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c

View file

@ -64,12 +64,12 @@ Connector_AiAnthropic_MODEL_NAME = claude-3-5-sonnet-20241022
Connector_AiAnthropic_TEMPERATURE = 0.2 Connector_AiAnthropic_TEMPERATURE = 0.2
Connector_AiAnthropic_MAX_TOKENS = 2000 Connector_AiAnthropic_MAX_TOKENS = 2000
# LangDoc configuration # Perplexity AI configuration
Connector_AiLangdoc_API_URL = https://api.langdock.com/v1/chat/completions Connector_AiPerplexity_API_URL = https://api.perplexity.ai/chat/completions
Connector_AiLangdoc_API_SECRET = sk-9KaNH1FfEx7SkTijsFpXeTIc9_xOmoo7e0hW6SqrYavFq_bgjcULa7PXp3kWQpp4gfk8-U0B4L91CP6YpAJxZg Connector_AiPerplexity_API_SECRET = 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
Connector_AiLangdoc_MODEL_NAME = gpt-4o Connector_AiPerplexity_MODEL_NAME = sonar
Connector_AiLangdoc_TEMPERATURE = 0.2 Connector_AiPerplexity_TEMPERATURE = 0.2
Connector_AiLangdoc_MAX_TOKENS = 2000 Connector_AiPerplexity_MAX_TOKENS = 2000
# Agent Mail configuration # Agent Mail configuration
Service_MSFT_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c Service_MSFT_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c

View file

@ -62,13 +62,52 @@ class AiAnthropic:
if maxTokens is None: if maxTokens is None:
maxTokens = self.config.get("maxTokens", 2000) maxTokens = self.config.get("maxTokens", 2000)
# Transform OpenAI-style messages to Anthropic format:
# - Move any 'system' role content to top-level 'system'
# - Keep only 'user'/'assistant' messages in the list
system_contents: List[str] = []
converted_messages: List[Dict[str, Any]] = []
for m in messages:
role = m.get("role")
content = m.get("content", "")
if role == "system":
# Collect system content; Anthropic expects top-level 'system'
if isinstance(content, list):
# Join text parts if provided as blocks
joined = "\n\n".join(
[
(part.get("text") if isinstance(part, dict) else str(part))
for part in content
]
)
system_contents.append(joined)
else:
system_contents.append(str(content))
continue
# For Anthropic, content can be a string; pass through strings, collapse blocks
if isinstance(content, list):
# Collapse to text if blocks are provided
collapsed = "\n\n".join(
[
(part.get("text") if isinstance(part, dict) else str(part))
for part in content
]
)
converted_messages.append({"role": role, "content": collapsed})
else:
converted_messages.append({"role": role, "content": content})
system_prompt = "\n\n".join([s for s in system_contents if s]) if system_contents else None
# Create Anthropic API payload # Create Anthropic API payload
payload = { payload: Dict[str, Any] = {
"model": self.modelName, "model": self.modelName,
"messages": messages, "messages": converted_messages,
"temperature": temperature, "temperature": temperature,
"max_tokens": maxTokens "max_tokens": maxTokens,
} }
if system_prompt:
payload["system"] = system_prompt
response = await self.httpClient.post( response = await self.httpClient.post(
self.apiUrl, self.apiUrl,
@ -174,8 +213,8 @@ class AiAnthropic:
} }
] ]
# Use the existing callApi function with the Vision model # Use the existing callAiBasic function with the Vision model
response = await self.callApi(messages) response = await self.callAiBasic(messages)
# Extract and return content # Extract and return content
return response["choices"][0]["message"]["content"] return response["choices"][0]["message"]["content"]

View file

@ -1,406 +0,0 @@
import logging
import httpx
import asyncio
import re
from typing import Dict, Any, List, Union, Optional
from fastapi import HTTPException
from modules.shared.configuration import APP_CONFIG
# Configure logger
logger = logging.getLogger(__name__)
def loadConfigData():
"""Load configuration data for LangDoc connector"""
return {
"apiKey": APP_CONFIG.get('Connector_AiLangdoc_API_SECRET'),
"apiUrl": APP_CONFIG.get('Connector_AiLangdoc_API_URL'),
"modelName": APP_CONFIG.get('Connector_AiLangdoc_MODEL_NAME'),
"temperature": float(APP_CONFIG.get('Connector_AiLangdoc_TEMPERATURE')),
"maxTokens": int(APP_CONFIG.get('Connector_AiLangdoc_MAX_TOKENS'))
}
class AiLangdoc:
"""Connector for communication with the LangDoc API (OpenAI-compatible)."""
def __init__(self):
# Load configuration
self.config = loadConfigData()
self.apiKey = self.config["apiKey"]
self.apiUrl = self.config["apiUrl"]
self.modelName = self.config["modelName"]
# HttpClient for API calls
self.httpClient = httpx.AsyncClient(
timeout=120.0, # Longer timeout for complex requests
headers={
"Authorization": f"Bearer {self.apiKey}",
"Content-Type": "application/json"
}
)
logger.info(f"LangDoc Connector initialized with model: {self.modelName}")
async def callAiBasic(self, messages: List[Dict[str, Any]], temperature: float = None, maxTokens: int = None) -> str:
"""
Calls the LangDoc API with the given messages.
Args:
messages: List of messages in OpenAI format (role, content)
temperature: Temperature for response generation (0.0-1.0)
maxTokens: Maximum number of tokens in the response
Returns:
The response from the LangDoc API
Raises:
HTTPException: For errors in API communication
"""
try:
# Use parameters from configuration if none were overridden
if temperature is None:
temperature = self.config.get("temperature", 0.2)
if maxTokens is None:
maxTokens = self.config.get("maxTokens", 2000)
payload = {
"model": self.modelName,
"messages": messages,
"temperature": temperature,
"max_tokens": maxTokens
}
response = await self.httpClient.post(
self.apiUrl,
json=payload
)
if response.status_code != 200:
error_detail = f"LangDoc API error: {response.status_code} - {response.text}"
logger.error(error_detail)
# Provide more specific error messages based on status code
if response.status_code == 429:
error_message = "Rate limit exceeded. Please wait before making another request."
elif response.status_code == 401:
error_message = "Invalid API key. Please check your LangDoc API configuration."
elif response.status_code == 400:
error_message = f"Invalid request to LangDoc API: {response.text}"
else:
error_message = f"LangDoc API error ({response.status_code}): {response.text}"
raise HTTPException(status_code=500, detail=error_message)
responseJson = response.json()
content = responseJson["choices"][0]["message"]["content"]
return content
except Exception as e:
logger.error(f"Error calling LangDoc API: {str(e)}")
raise HTTPException(status_code=500, detail=f"Error calling LangDoc API: {str(e)}")
async def callAiImage(self, prompt: str, imageData: Union[str, bytes], mimeType: str = None) -> str:
"""
Analyzes an image using LangDoc's vision capabilities.
Args:
imageData: Either a file path (str) or image data (bytes)
mimeType: The MIME type of the image (optional, only for binary data)
prompt: The prompt for analysis
Returns:
The analysis response as text
"""
try:
# Distinguish between file path and binary data
if isinstance(imageData, str):
# It's a file path - import filehandling only when needed
from modules import agentserviceFilemanager as fileHandler
base64Data, autoMimeType = fileHandler.encodeFileToBase64(imageData)
mimeType = mimeType or autoMimeType
else:
# It's binary data
import base64
base64Data = base64.b64encode(imageData).decode('utf-8')
# MIME type must be specified for binary data
if not mimeType:
# Fallback to generic image type
mimeType = "image/png"
# Prepare the payload for the Vision API
messages = [
{
"role": "user",
"content": [
{"type": "text", "text": prompt},
{
"type": "image_url",
"image_url": {
"url": f"data:{mimeType};base64,{base64Data}"
}
}
]
}
]
# Use the existing callAiBasic function
response = await self.callAiBasic(messages)
return response
except Exception as e:
logger.error(f"Error during image analysis: {str(e)}", exc_info=True)
return f"[Error during image analysis: {str(e)}]"
async def listModels(self) -> List[Dict[str, Any]]:
"""
Lists available models from the LangDoc API.
Returns:
List of available models with their details
"""
try:
# LangDoc uses OpenAI-compatible endpoints
modelsUrl = self.apiUrl.replace("/chat/completions", "/models")
response = await self.httpClient.get(modelsUrl)
if response.status_code != 200:
error_detail = f"LangDoc API error listing models: {response.status_code} - {response.text}"
logger.error(error_detail)
raise HTTPException(status_code=500, detail=error_detail)
responseJson = response.json()
return responseJson.get("data", [])
except Exception as e:
logger.error(f"Error listing LangDoc models: {str(e)}")
raise HTTPException(status_code=500, detail=f"Error listing LangDoc models: {str(e)}")
async def getModelInfo(self, modelName: str = None) -> Dict[str, Any]:
"""
Gets information about a specific model.
Args:
modelName: Name of the model to get info for (uses default if None)
Returns:
Model information dictionary
"""
try:
if modelName is None:
modelName = self.modelName
models = await self.listModels()
for model in models:
if model.get("id") == modelName:
return model
raise HTTPException(status_code=404, detail=f"Model {modelName} not found")
except Exception as e:
logger.error(f"Error getting LangDoc model info: {str(e)}")
raise HTTPException(status_code=500, detail=f"Error getting LangDoc model info: {str(e)}")
async def generateImage(self, prompt: str, size: str = "1024x1024", quality: str = "standard", style: str = "vivid") -> Dict[str, Any]:
"""
Generates an image using LangDoc's DALL-E 3 integration.
Args:
prompt: Text description of the image to generate
size: Image size - "1024x1024", "1792x1024", or "1024x1792"
quality: Image quality - "standard" or "hd"
style: Image style - "vivid" or "natural"
Returns:
Dictionary containing the generated image data and metadata
Raises:
HTTPException: For errors in API communication
"""
try:
# Use OpenAI-compatible images endpoint
imagesUrl = self.apiUrl.replace("/chat/completions", "/images/generations")
payload = {
"model": "dall-e-3",
"prompt": prompt,
"size": size,
"quality": quality,
"style": style,
"n": 1
}
response = await self.httpClient.post(
imagesUrl,
json=payload
)
if response.status_code != 200:
error_detail = f"LangDoc Image Generation API error: {response.status_code} - {response.text}"
logger.error(error_detail)
# Provide more specific error messages
if response.status_code == 429:
error_message = "Rate limit exceeded for image generation. Please wait before making another request."
elif response.status_code == 401:
error_message = "Invalid API key for image generation. Please check your LangDoc API configuration."
elif response.status_code == 400:
error_message = f"Invalid request to LangDoc Image API: {response.text}"
else:
error_message = f"LangDoc Image API error ({response.status_code}): {response.text}"
raise HTTPException(status_code=500, detail=error_message)
responseJson = response.json()
# Extract image data
imageData = responseJson.get("data", [])
if not imageData:
raise HTTPException(status_code=500, detail="No image data returned from LangDoc API")
imageInfo = imageData[0]
return {
"success": True,
"image_url": imageInfo.get("url"),
"revised_prompt": imageInfo.get("revised_prompt"),
"size": size,
"quality": quality,
"style": style,
"model": "dall-e-3",
"created": responseJson.get("created"),
"raw_response": responseJson
}
except Exception as e:
logger.error(f"Error generating image with LangDoc: {str(e)}")
raise HTTPException(status_code=500, detail=f"Error generating image with LangDoc: {str(e)}")
async def generateImageWithVariations(self, prompt: str, variations: int = 1, size: str = "1024x1024", quality: str = "standard", style: str = "vivid") -> List[Dict[str, Any]]:
"""
Generates multiple image variations using LangDoc's DALL-E 3 integration.
Args:
prompt: Text description of the image to generate
variations: Number of variations to generate (1-4)
size: Image size - "1024x1024", "1792x1024", or "1024x1792"
quality: Image quality - "standard" or "hd"
style: Image style - "vivid" or "natural"
Returns:
List of dictionaries containing generated image data and metadata
Raises:
HTTPException: For errors in API communication
"""
try:
# Limit variations to reasonable number
variations = min(max(variations, 1), 4)
# Use OpenAI-compatible images endpoint
imagesUrl = self.apiUrl.replace("/chat/completions", "/images/generations")
results = []
# Generate multiple variations by making multiple API calls
for i in range(variations):
# Add variation to prompt to get different results
variationPrompt = f"{prompt} (variation {i+1})"
payload = {
"model": "dall-e-3",
"prompt": variationPrompt,
"size": size,
"quality": quality,
"style": style,
"n": 1
}
response = await self.httpClient.post(
imagesUrl,
json=payload
)
if response.status_code != 200:
logger.warning(f"Failed to generate variation {i+1}: {response.status_code} - {response.text}")
continue
responseJson = response.json()
imageData = responseJson.get("data", [])
if imageData:
imageInfo = imageData[0]
results.append({
"variation": i + 1,
"image_url": imageInfo.get("url"),
"revised_prompt": imageInfo.get("revised_prompt"),
"size": size,
"quality": quality,
"style": style,
"model": "dall-e-3",
"created": responseJson.get("created")
})
# Add small delay between requests to avoid rate limiting
if i < variations - 1:
await asyncio.sleep(1)
return results
except Exception as e:
logger.error(f"Error generating image variations with LangDoc: {str(e)}")
raise HTTPException(status_code=500, detail=f"Error generating image variations with LangDoc: {str(e)}")
async def generateImageWithChat(self, prompt: str, size: str = "1024x1024", quality: str = "standard", style: str = "vivid") -> str:
"""
Generates an image using LangDoc's chat interface with image generation tools.
This method uses the chat completions endpoint with image generation capabilities.
Args:
prompt: Text description of the image to generate
size: Image size - "1024x1024", "1792x1024", or "1024x1792"
quality: Image quality - "standard" or "hd"
style: Image style - "vivid" or "natural"
Returns:
Response text from the chat model (may include image references)
Raises:
HTTPException: For errors in API communication
"""
try:
# Create a prompt that requests image generation
imagePrompt = f"Please generate an image with the following description: {prompt}. Size: {size}, Quality: {quality}, Style: {style}"
messages = [
{
"role": "user",
"content": imagePrompt
}
]
# Use the chat completions endpoint
response = await self.callAiBasic(messages)
return response
except Exception as e:
logger.error(f"Error generating image with chat: {str(e)}")
raise HTTPException(status_code=500, detail=f"Error generating image with chat: {str(e)}")
async def _testConnection(self) -> bool:
"""
Tests the connection to the LangDoc API.
Returns:
True if connection is successful, False otherwise
"""
try:
# Try to list models as a simple connection test
await self.listModels()
return True
except Exception as e:
logger.error(f"LangDoc connection test failed: {str(e)}")
return False

View file

@ -0,0 +1,255 @@
import logging
import httpx
import asyncio
from typing import Dict, Any, List, Union, Optional
from fastapi import HTTPException
from modules.shared.configuration import APP_CONFIG
# Configure logger
logger = logging.getLogger(__name__)
def loadConfigData():
"""Load configuration data for Perplexity connector"""
return {
"apiKey": APP_CONFIG.get('Connector_AiPerplexity_API_SECRET'),
"apiUrl": APP_CONFIG.get('Connector_AiPerplexity_API_URL'),
"modelName": APP_CONFIG.get('Connector_AiPerplexity_MODEL_NAME'),
"temperature": float(APP_CONFIG.get('Connector_AiPerplexity_TEMPERATURE')),
"maxTokens": int(APP_CONFIG.get('Connector_AiPerplexity_MAX_TOKENS'))
}
class AiPerplexity:
"""Connector for communication with the Perplexity API."""
def __init__(self):
# Load configuration
self.config = loadConfigData()
self.apiKey = self.config["apiKey"]
self.apiUrl = self.config["apiUrl"]
self.modelName = self.config["modelName"]
# HttpClient for API calls
self.httpClient = httpx.AsyncClient(
timeout=120.0, # Longer timeout for complex requests
headers={
"Authorization": f"Bearer {self.apiKey}",
"Content-Type": "application/json",
"Accept": "application/json"
}
)
logger.info(f"Perplexity Connector initialized with model: {self.modelName}")
async def callAiBasic(self, messages: List[Dict[str, Any]], temperature: float = None, maxTokens: int = None) -> str:
"""
Calls the Perplexity API with the given messages.
Args:
messages: List of messages in OpenAI format (role, content)
temperature: Temperature for response generation (0.0-1.0)
maxTokens: Maximum number of tokens in the response
Returns:
The response from the Perplexity API
Raises:
HTTPException: For errors in API communication
"""
try:
# Use parameters from configuration if none were overridden
if temperature is None:
temperature = self.config.get("temperature", 0.2)
if maxTokens is None:
maxTokens = self.config.get("maxTokens", 2000)
payload = {
"model": self.modelName,
"messages": messages,
"temperature": temperature,
"max_tokens": maxTokens
}
response = await self.httpClient.post(
self.apiUrl,
json=payload
)
if response.status_code != 200:
error_detail = f"Perplexity API error: {response.status_code} - {response.text}"
logger.error(error_detail)
# Provide more specific error messages based on status code
if response.status_code == 429:
error_message = "Rate limit exceeded. Please wait before making another request."
elif response.status_code == 401:
error_message = "Invalid API key. Please check your Perplexity API configuration."
elif response.status_code == 400:
error_message = f"Invalid request to Perplexity API: {response.text}"
else:
error_message = f"Perplexity API error ({response.status_code}): {response.text}"
raise HTTPException(status_code=500, detail=error_message)
responseJson = response.json()
content = responseJson["choices"][0]["message"]["content"]
return content
except Exception as e:
logger.error(f"Error calling Perplexity API: {str(e)}")
raise HTTPException(status_code=500, detail=f"Error calling Perplexity API: {str(e)}")
async def callAiWithWebSearch(self, query: str, temperature: float = None, maxTokens: int = None) -> str:
"""
Calls Perplexity API with web search capabilities for research.
Args:
query: The research query or question
temperature: Temperature for response generation (0.0-1.0)
maxTokens: Maximum number of tokens in the response
Returns:
The response from Perplexity with web search context
"""
try:
# Use parameters from configuration if none were overridden
if temperature is None:
temperature = self.config.get("temperature", 0.2)
if maxTokens is None:
maxTokens = self.config.get("maxTokens", 2000)
# For web search, we use the configured model name
webSearchModel = self.modelName
payload = {
"model": webSearchModel,
"messages": [
{
"role": "user",
"content": query
}
],
"temperature": temperature,
"max_tokens": maxTokens
}
response = await self.httpClient.post(
self.apiUrl,
json=payload
)
if response.status_code != 200:
error_detail = f"Perplexity Web Search API error: {response.status_code} - {response.text}"
logger.error(error_detail)
if response.status_code == 429:
error_message = "Rate limit exceeded for web search. Please wait before making another request."
elif response.status_code == 401:
error_message = "Invalid API key for web search. Please check your Perplexity API configuration."
elif response.status_code == 400:
error_message = f"Invalid request to Perplexity Web Search API: {response.text}"
else:
error_message = f"Perplexity Web Search API error ({response.status_code}): {response.text}"
raise HTTPException(status_code=500, detail=error_message)
responseJson = response.json()
content = responseJson["choices"][0]["message"]["content"]
return content
except Exception as e:
logger.error(f"Error calling Perplexity Web Search API: {str(e)}")
raise HTTPException(status_code=500, detail=f"Error calling Perplexity Web Search API: {str(e)}")
async def researchTopic(self, topic: str, depth: str = "basic") -> str:
"""
Research a topic using Perplexity's web search capabilities.
Args:
topic: The topic to research
depth: Research depth - "basic", "detailed", or "comprehensive"
Returns:
Comprehensive research results on the topic
"""
try:
# Create research prompts based on depth
if depth == "basic":
prompt = f"Provide a basic overview of: {topic}"
elif depth == "detailed":
prompt = f"Provide a detailed analysis of: {topic}. Include recent developments, key facts, and important information."
else: # comprehensive
prompt = f"Provide a comprehensive research report on: {topic}. Include recent developments, key facts, statistics, expert opinions, and current trends."
return await self.callAiWithWebSearch(prompt)
except Exception as e:
logger.error(f"Error researching topic: {str(e)}")
raise HTTPException(status_code=500, detail=f"Error researching topic: {str(e)}")
async def answerQuestion(self, question: str, context: str = None) -> str:
"""
Answer a question using web search for current information.
Args:
question: The question to answer
context: Optional context to provide
Returns:
Answer with web search context
"""
try:
if context:
prompt = f"Context: {context}\n\nQuestion: {question}\n\nPlease provide a comprehensive answer using current information from the web."
else:
prompt = f"Question: {question}\n\nPlease provide a comprehensive answer using current information from the web."
return await self.callAiWithWebSearch(prompt)
except Exception as e:
logger.error(f"Error answering question: {str(e)}")
raise HTTPException(status_code=500, detail=f"Error answering question: {str(e)}")
async def getCurrentNews(self, topic: str = None, limit: int = 5) -> str:
"""
Get current news on a specific topic.
Args:
topic: The topic to get news about (optional)
limit: Number of news items to retrieve
Returns:
Current news information
"""
try:
if topic:
prompt = f"Get the latest news about {topic}. Provide {limit} recent news items with sources and dates."
else:
prompt = f"Get the latest news. Provide {limit} recent news items with sources and dates."
return await self.callAiWithWebSearch(prompt)
except Exception as e:
logger.error(f"Error getting current news: {str(e)}")
raise HTTPException(status_code=500, detail=f"Error getting current news: {str(e)}")
async def _testConnection(self) -> bool:
"""
Tests the connection to the Perplexity API.
Returns:
True if connection is successful, False otherwise
"""
try:
# Try a simple test message
testMessages = [
{"role": "user", "content": "Hello, please respond with just 'OK' to confirm the connection works."}
]
response = await self.callAiBasic(testMessages)
return response and len(response.strip()) > 0
except Exception as e:
logger.error(f"Perplexity connection test failed: {str(e)}")
return False

View file

@ -4,6 +4,7 @@
import logging import logging
import asyncio import asyncio
from dataclasses import dataclass from dataclasses import dataclass
from typing import Optional
from tavily import AsyncTavilyClient from tavily import AsyncTavilyClient
from modules.shared.configuration import APP_CONFIG from modules.shared.configuration import APP_CONFIG
from modules.shared.timezoneUtils import get_utc_timestamp from modules.shared.timezoneUtils import get_utc_timestamp
@ -29,6 +30,7 @@ logger = logging.getLogger(__name__)
class WebSearchResult: class WebSearchResult:
title: str title: str
url: str url: str
raw_content: Optional[str] = None
@dataclass @dataclass
class WebCrawlResult: class WebCrawlResult:
@ -83,7 +85,11 @@ class ConnectorWeb:
return WebSearchActionResult(success=False, error=str(e)) return WebSearchActionResult(success=False, error=str(e))
result_items = [ result_items = [
WebSearchResultItem(title=result.title, url=result.url) WebSearchResultItem(
title=result.title,
url=result.url,
raw_content=getattr(result, 'raw_content', None)
)
for result in raw_results for result in raw_results
] ]
@ -246,6 +252,15 @@ class ConnectorWeb:
urls = [result.url for result in search_results] urls = [result.url for result in search_results]
return await self._crawl(urls, extract_depth=extract_depth, format=format) return await self._crawl(urls, extract_depth=extract_depth, format=format)
def _clean_url(self, url: str) -> str:
"""Clean URL by removing extra text that might be appended."""
import re
# Extract just the URL part, removing any extra text after it
url_match = re.match(r'(https?://[^\s,]+)', url)
if url_match:
return url_match.group(1)
return url
async def _search( async def _search(
self, self,
query: str, query: str,
@ -289,7 +304,11 @@ class ConnectorWeb:
response = await self.client.search(**kwargs) response = await self.client.search(**kwargs)
return [ return [
WebSearchResult(title=result["title"], url=result["url"]) WebSearchResult(
title=result["title"],
url=self._clean_url(result["url"]),
raw_content=result.get("raw_content")
)
for result in response["results"] for result in response["results"]
] ]
@ -304,26 +323,53 @@ class ConnectorWeb:
retry_delay = self.crawl_retry_delay retry_delay = self.crawl_retry_delay
timeout = self.crawl_timeout timeout = self.crawl_timeout
logger.debug(f"Starting crawl of {len(urls)} URLs: {urls}")
logger.debug(f"Crawl settings: extract_depth={extract_depth}, format={format}, timeout={timeout}s")
for attempt in range(max_retries + 1): for attempt in range(max_retries + 1):
try: try:
logger.debug(f"Crawl attempt {attempt + 1}/{max_retries + 1}")
# Use asyncio.wait_for for timeout # Use asyncio.wait_for for timeout
# Build kwargs for extract # Build kwargs for extract
kwargs_extract: dict = {"urls": urls} kwargs_extract: dict = {"urls": urls}
kwargs_extract["extract_depth"] = extract_depth or "advanced" kwargs_extract["extract_depth"] = extract_depth or "advanced"
kwargs_extract["format"] = format or "text" kwargs_extract["format"] = format or "markdown" # Use markdown to get HTML structure
logger.debug(f"Sending request to Tavily with kwargs: {kwargs_extract}")
response = await asyncio.wait_for( response = await asyncio.wait_for(
self.client.extract(**kwargs_extract), self.client.extract(**kwargs_extract),
timeout=timeout timeout=timeout
) )
return [ logger.debug(f"Tavily response received: {list(response.keys())}")
WebCrawlResult(url=result["url"], content=result["raw_content"])
# Debug: Log what Tavily actually returns
if "results" in response and response["results"]:
logger.debug(f"Tavily returned {len(response['results'])} results")
logger.debug(f"First result keys: {list(response['results'][0].keys())}")
logger.debug(f"First result has raw_content: {'raw_content' in response['results'][0]}")
# Log each result
for i, result in enumerate(response["results"]):
logger.debug(f"Result {i+1}: URL={result.get('url', 'N/A')}, content_length={len(result.get('raw_content', result.get('content', '')))}")
else:
logger.warning(f"Tavily returned no results in response: {response}")
results = [
WebCrawlResult(
url=result["url"],
content=result.get("raw_content", result.get("content", "")) # Try raw_content first, fallback to content
)
for result in response["results"] for result in response["results"]
] ]
logger.debug(f"Crawl successful: extracted {len(results)} results")
return results
except asyncio.TimeoutError: except asyncio.TimeoutError:
logger.warning(f"Crawl attempt {attempt + 1} timed out after {timeout} seconds") logger.warning(f"Crawl attempt {attempt + 1} timed out after {timeout} seconds for URLs: {urls}")
if attempt < max_retries: if attempt < max_retries:
logger.info(f"Retrying in {retry_delay} seconds...") logger.info(f"Retrying in {retry_delay} seconds...")
await asyncio.sleep(retry_delay) await asyncio.sleep(retry_delay)
@ -331,7 +377,22 @@ class ConnectorWeb:
raise Exception(f"Crawl failed after {max_retries + 1} attempts due to timeout") raise Exception(f"Crawl failed after {max_retries + 1} attempts due to timeout")
except Exception as e: except Exception as e:
logger.warning(f"Crawl attempt {attempt + 1} failed: {str(e)}") logger.warning(f"Crawl attempt {attempt + 1} failed for URLs {urls}: {str(e)}")
logger.debug(f"Full error details: {type(e).__name__}: {str(e)}")
# Check if it's a validation error and log more details
if "validation" in str(e).lower():
logger.debug(f"URL validation failed. Checking URL format:")
for i, url in enumerate(urls):
logger.debug(f" URL {i+1}: '{url}' (length: {len(url)})")
# Check for common URL issues
if ' ' in url:
logger.debug(f" WARNING: URL contains spaces!")
if not url.startswith(('http://', 'https://')):
logger.debug(f" WARNING: URL doesn't start with http/https!")
if len(url) > 2000:
logger.debug(f" WARNING: URL is very long ({len(url)} chars)")
if attempt < max_retries: if attempt < max_retries:
logger.info(f"Retrying in {retry_delay} seconds...") logger.info(f"Retrying in {retry_delay} seconds...")
await asyncio.sleep(retry_delay) await asyncio.sleep(retry_delay)

View file

@ -384,6 +384,57 @@ class DatabaseConnector:
logger.info( logger.info(
f"Created table '{table}' with columns from Pydantic model" f"Created table '{table}' with columns from Pydantic model"
) )
else:
# Table exists: ensure all columns from model are present (simple additive migration)
try:
cursor.execute(
"""
SELECT column_name FROM information_schema.columns
WHERE LOWER(table_name) = LOWER(%s) AND table_schema = 'public'
""",
(table,),
)
existing_columns = {
row["column_name"] for row in cursor.fetchall()
}
# Desired columns based on model
model_fields = _get_model_fields(model_class)
desired_columns = (
set(["id"])
| set(model_fields.keys())
| {"_createdAt", "_modifiedAt", "_createdBy", "_modifiedBy"}
)
# Add missing columns
for col in sorted(desired_columns - existing_columns):
# Determine SQL type
if col in ["id"]:
continue # primary key exists already
sql_type = model_fields.get(col)
if col in ["_createdAt"]:
sql_type = "DOUBLE PRECISION"
elif col in ["_modifiedAt"]:
sql_type = "DOUBLE PRECISION"
elif col in ["_createdBy", "_modifiedBy"]:
sql_type = "VARCHAR(255)"
if not sql_type:
sql_type = "TEXT"
try:
cursor.execute(
f'ALTER TABLE "{table}" ADD COLUMN "{col}" {sql_type}'
)
logger.info(
f"Added missing column '{col}' ({sql_type}) to '{table}'"
)
except Exception as add_err:
logger.warning(
f"Could not add column '{col}' to '{table}': {add_err}"
)
except Exception as ensure_err:
logger.warning(
f"Could not ensure columns for existing table '{table}': {ensure_err}"
)
self.connection.commit() self.connection.commit()
return True return True

View file

@ -10,7 +10,6 @@ from . import datamodelWeb as web
from . import datamodelUam as uam from . import datamodelUam as uam
from . import datamodelSecurity as security from . import datamodelSecurity as security
from . import datamodelNeutralizer as neutralizer from . import datamodelNeutralizer as neutralizer
from . import datamodelWorkflow as workflow
from . import datamodelChat as chat from . import datamodelChat as chat
from . import datamodelFiles as files from . import datamodelFiles as files
from . import datamodelVoice as voice from . import datamodelVoice as voice

View file

@ -111,6 +111,11 @@ class AiCallOptions(BaseModel):
callType: Literal["planning", "text"] = Field(default="text", description="Call type: planning or text") callType: Literal["planning", "text"] = Field(default="text", description="Call type: planning or text")
safetyMargin: float = Field(default=0.1, ge=0.0, le=0.5, description="Safety margin for token limits (0.0-0.5)") safetyMargin: float = Field(default=0.1, ge=0.0, le=0.5, description="Safety margin for token limits (0.0-0.5)")
modelCapabilities: Optional[List[str]] = Field(default=None, description="Required model capabilities for filtering") modelCapabilities: Optional[List[str]] = Field(default=None, description="Required model capabilities for filtering")
# Model generation parameters
temperature: Optional[float] = Field(default=None, ge=0.0, le=2.0, description="Temperature for response generation (0.0-2.0, lower = more consistent)")
maxTokens: Optional[int] = Field(default=None, ge=1, le=32000, description="Maximum tokens in response")
maxParts: Optional[int] = Field(default=1000, ge=1, le=1000, description="Maximum number of continuation parts to fetch")
class AiCallRequest(BaseModel): class AiCallRequest(BaseModel):

View file

@ -169,7 +169,7 @@ register_model_labels(
) )
class ExtractedContent(BaseModel, ModelMixin): class ChatContentExtracted(BaseModel, ModelMixin):
id: str = Field(description="Reference to source ChatDocument") id: str = Field(description="Reference to source ChatDocument")
contents: List[ContentItem] = Field( contents: List[ContentItem] = Field(
default_factory=list, description="List of content items" default_factory=list, description="List of content items"
@ -177,7 +177,7 @@ class ExtractedContent(BaseModel, ModelMixin):
register_model_labels( register_model_labels(
"ExtractedContent", "ChatContentExtracted",
{"en": "Extracted Content", "fr": "Contenu extrait"}, {"en": "Extracted Content", "fr": "Contenu extrait"},
{ {
"id": {"en": "Object ID", "fr": "ID de l'objet"}, "id": {"en": "Object ID", "fr": "ID de l'objet"},
@ -201,6 +201,9 @@ class ChatMessage(BaseModel, ModelMixin):
None, description="Label for the set of documents" None, description="Label for the set of documents"
) )
message: Optional[str] = Field(None, description="Message content") message: Optional[str] = Field(None, description="Message content")
summary: Optional[str] = Field(
None, description="Short summary of this message for planning/history"
)
role: str = Field(description="Role of the message sender") role: str = Field(description="Role of the message sender")
status: str = Field(description="Status of the message (first, step, last)") status: str = Field(description="Status of the message (first, step, last)")
sequenceNr: int = Field( sequenceNr: int = Field(
@ -244,6 +247,7 @@ register_model_labels(
"documents": {"en": "Documents", "fr": "Documents"}, "documents": {"en": "Documents", "fr": "Documents"},
"documentsLabel": {"en": "Documents Label", "fr": "Label des documents"}, "documentsLabel": {"en": "Documents Label", "fr": "Label des documents"},
"message": {"en": "Message", "fr": "Message"}, "message": {"en": "Message", "fr": "Message"},
"summary": {"en": "Summary", "fr": "Résumé"},
"role": {"en": "Role", "fr": "Rôle"}, "role": {"en": "Role", "fr": "Rôle"},
"status": {"en": "Status", "fr": "Statut"}, "status": {"en": "Status", "fr": "Statut"},
"sequenceNr": {"en": "Sequence Number", "fr": "Numéro de séquence"}, "sequenceNr": {"en": "Sequence Number", "fr": "Numéro de séquence"},
@ -419,34 +423,6 @@ register_model_labels(
) )
class WorkflowResult(BaseModel, ModelMixin):
status: str
completed_tasks: int
total_tasks: int
execution_time: float
final_results_count: int
error: Optional[str] = None
phase: Optional[str] = None
register_model_labels(
"WorkflowResult",
{"en": "Workflow Result", "fr": "Résultat du workflow"},
{
"status": {"en": "Status", "fr": "Statut"},
"completed_tasks": {"en": "Completed Tasks", "fr": "Tâches terminées"},
"total_tasks": {"en": "Total Tasks", "fr": "Total des tâches"},
"execution_time": {"en": "Execution Time", "fr": "Temps d'exécution"},
"final_results_count": {
"en": "Final Results Count",
"fr": "Nombre de résultats finaux",
},
"error": {"en": "Error", "fr": "Erreur"},
"phase": {"en": "Phase", "fr": "Phase"},
},
)
class UserInputRequest(BaseModel, ModelMixin): class UserInputRequest(BaseModel, ModelMixin):
prompt: str = Field(description="Prompt for the user") prompt: str = Field(description="Prompt for the user")
listFileId: List[str] = Field(default_factory=list, description="List of file IDs") listFileId: List[str] = Field(default_factory=list, description="List of file IDs")
@ -462,3 +438,519 @@ register_model_labels(
"userLanguage": {"en": "User Language", "fr": "Langue de l'utilisateur"}, "userLanguage": {"en": "User Language", "fr": "Langue de l'utilisateur"},
}, },
) )
class ActionDocument(BaseModel, ModelMixin):
"""Clear document structure for action results"""
documentName: str = Field(description="Name of the document")
documentData: Any = Field(description="Content/data of the document")
mimeType: str = Field(description="MIME type of the document")
register_model_labels(
"ActionDocument",
{"en": "Action Document", "fr": "Document d'action"},
{
"documentName": {"en": "Document Name", "fr": "Nom du document"},
"documentData": {"en": "Document Data", "fr": "Données du document"},
"mimeType": {"en": "MIME Type", "fr": "Type MIME"},
},
)
class ActionResult(BaseModel, ModelMixin):
"""Clean action result with documents as primary output
IMPORTANT: Action methods should NOT set resultLabel in their return value.
The resultLabel is managed by the action handler using the action's execResultLabel
from the action plan. This ensures consistent document routing throughout the workflow.
"""
success: bool = Field(description="Whether execution succeeded")
error: Optional[str] = Field(None, description="Error message if failed")
documents: List[ActionDocument] = Field(
default_factory=list, description="Document outputs"
)
resultLabel: Optional[str] = Field(
None,
description="Label for document routing (set by action handler, not by action methods)",
)
@classmethod
def isSuccess(cls, documents: List[ActionDocument] = None) -> "ActionResult":
return cls(success=True, documents=documents or [])
@classmethod
def isFailure(
cls, error: str, documents: List[ActionDocument] = None
) -> "ActionResult":
return cls(success=False, documents=documents or [], error=error)
register_model_labels(
"ActionResult",
{"en": "Action Result", "fr": "Résultat de l'action"},
{
"success": {"en": "Success", "fr": "Succès"},
"error": {"en": "Error", "fr": "Erreur"},
"documents": {"en": "Documents", "fr": "Documents"},
"resultLabel": {"en": "Result Label", "fr": "Étiquette du résultat"},
},
)
class ActionSelection(BaseModel, ModelMixin):
method: str = Field(description="Method to execute (e.g., web, document, ai)")
name: str = Field(
description="Action name within the method (e.g., search, extract)"
)
register_model_labels(
"ActionSelection",
{"en": "Action Selection", "fr": "Sélection d'action"},
{
"method": {"en": "Method", "fr": "Méthode"},
"name": {"en": "Action Name", "fr": "Nom de l'action"},
},
)
class ActionParameters(BaseModel, ModelMixin):
parameters: Dict[str, Any] = Field(
default_factory=dict, description="Parameters to execute the selected action"
)
register_model_labels(
"ActionParameters",
{"en": "Action Parameters", "fr": "Paramètres d'action"},
{
"parameters": {"en": "Parameters", "fr": "Paramètres"},
},
)
class ObservationPreview(BaseModel, ModelMixin):
name: str = Field(description="Document name or URL label")
mime: str = Field(description="MIME type or kind")
snippet: str = Field(description="Short snippet or summary")
register_model_labels(
"ObservationPreview",
{"en": "Observation Preview", "fr": "Aperçu d'observation"},
{
"name": {"en": "Name", "fr": "Nom"},
"mime": {"en": "MIME", "fr": "MIME"},
"snippet": {"en": "Snippet", "fr": "Extrait"},
},
)
class Observation(BaseModel, ModelMixin):
success: bool = Field(description="Action execution success flag")
resultLabel: str = Field(description="Deterministic label for produced documents")
documentsCount: int = Field(description="Number of produced documents")
previews: List[ObservationPreview] = Field(
default_factory=list, description="Compact previews of outputs"
)
notes: List[str] = Field(
default_factory=list, description="Short notes or key facts"
)
register_model_labels(
"Observation",
{"en": "Observation", "fr": "Observation"},
{
"success": {"en": "Success", "fr": "Succès"},
"resultLabel": {"en": "Result Label", "fr": "Étiquette du résultat"},
"documentsCount": {"en": "Documents Count", "fr": "Nombre de documents"},
"previews": {"en": "Previews", "fr": "Aperçus"},
"notes": {"en": "Notes", "fr": "Notes"},
},
)
class TaskStatus(str):
PENDING = "pending"
RUNNING = "running"
COMPLETED = "completed"
FAILED = "failed"
CANCELLED = "cancelled"
register_model_labels(
"TaskStatus",
{"en": "Task Status", "fr": "Statut de la tâche"},
{
"PENDING": {"en": "Pending", "fr": "En attente"},
"RUNNING": {"en": "Running", "fr": "En cours"},
"COMPLETED": {"en": "Completed", "fr": "Terminé"},
"FAILED": {"en": "Failed", "fr": "Échec"},
"CANCELLED": {"en": "Cancelled", "fr": "Annulé"},
},
)
class DocumentExchange(BaseModel, ModelMixin):
documentsLabel: str = Field(description="Label for the set of documents")
documents: List[str] = Field(
default_factory=list, description="List of document references"
)
register_model_labels(
"DocumentExchange",
{"en": "Document Exchange", "fr": "Échange de documents"},
{
"documentsLabel": {"en": "Documents Label", "fr": "Label des documents"},
"documents": {"en": "Documents", "fr": "Documents"},
},
)
class ActionItem(BaseModel, ModelMixin):
id: str = Field(..., description="Action ID")
execMethod: str = Field(..., description="Method to execute")
execAction: str = Field(..., description="Action to perform")
execParameters: Dict[str, Any] = Field(
default_factory=dict, description="Action parameters"
)
execResultLabel: Optional[str] = Field(
None, description="Label for the set of result documents"
)
expectedDocumentFormats: Optional[List[Dict[str, str]]] = Field(
None, description="Expected document formats (optional)"
)
userMessage: Optional[str] = Field(
None, description="User-friendly message in user's language"
)
status: TaskStatus = Field(default=TaskStatus.PENDING, description="Action status")
error: Optional[str] = Field(None, description="Error message if action failed")
retryCount: int = Field(default=0, description="Number of retries attempted")
retryMax: int = Field(default=3, description="Maximum number of retries")
processingTime: Optional[float] = Field(
None, description="Processing time in seconds"
)
timestamp: float = Field(
..., description="When the action was executed (UTC timestamp in seconds)"
)
result: Optional[str] = Field(None, description="Result of the action")
def setSuccess(self, result: str = None) -> None:
"""Set the action as successful with optional result"""
self.status = TaskStatus.COMPLETED
self.error = None
if result is not None:
self.result = result
def setError(self, error_message: str) -> None:
"""Set the action as failed with error message"""
self.status = TaskStatus.FAILED
self.error = error_message
register_model_labels(
"ActionItem",
{"en": "Task Action", "fr": "Action de tâche"},
{
"id": {"en": "Action ID", "fr": "ID de l'action"},
"execMethod": {"en": "Method", "fr": "Méthode"},
"execAction": {"en": "Action", "fr": "Action"},
"execParameters": {"en": "Parameters", "fr": "Paramètres"},
"execResultLabel": {"en": "Result Label", "fr": "Label du résultat"},
"expectedDocumentFormats": {
"en": "Expected Document Formats",
"fr": "Formats de documents attendus",
},
"userMessage": {"en": "User Message", "fr": "Message utilisateur"},
"status": {"en": "Status", "fr": "Statut"},
"error": {"en": "Error", "fr": "Erreur"},
"retryCount": {"en": "Retry Count", "fr": "Nombre de tentatives"},
"retryMax": {"en": "Max Retries", "fr": "Tentatives max"},
"processingTime": {"en": "Processing Time", "fr": "Temps de traitement"},
"timestamp": {"en": "Timestamp", "fr": "Horodatage"},
"result": {"en": "Result", "fr": "Résultat"},
},
)
class TaskResult(BaseModel, ModelMixin):
taskId: str = Field(..., description="Task ID")
status: TaskStatus = Field(default=TaskStatus.PENDING, description="Task status")
success: bool = Field(..., description="Whether the task was successful")
feedback: Optional[str] = Field(None, description="Task feedback message")
error: Optional[str] = Field(None, description="Error message if task failed")
register_model_labels(
"TaskResult",
{"en": "Task Result", "fr": "Résultat de tâche"},
{
"taskId": {"en": "Task ID", "fr": "ID de la tâche"},
"status": {"en": "Status", "fr": "Statut"},
"success": {"en": "Success", "fr": "Succès"},
"feedback": {"en": "Feedback", "fr": "Retour"},
"error": {"en": "Error", "fr": "Erreur"},
},
)
class TaskItem(BaseModel, ModelMixin):
id: str = Field(..., description="Task ID")
workflowId: str = Field(..., description="Workflow ID")
userInput: str = Field(..., description="User input that triggered the task")
status: TaskStatus = Field(default=TaskStatus.PENDING, description="Task status")
error: Optional[str] = Field(None, description="Error message if task failed")
startedAt: Optional[float] = Field(
None, description="When the task started (UTC timestamp in seconds)"
)
finishedAt: Optional[float] = Field(
None, description="When the task finished (UTC timestamp in seconds)"
)
actionList: List[ActionItem] = Field(
default_factory=list, description="List of actions to execute"
)
retryCount: int = Field(default=0, description="Number of retries attempted")
retryMax: int = Field(default=3, description="Maximum number of retries")
rollbackOnFailure: bool = Field(
default=True, description="Whether to rollback on failure"
)
dependencies: List[str] = Field(
default_factory=list, description="List of task IDs this task depends on"
)
feedback: Optional[str] = Field(None, description="Task feedback message")
processingTime: Optional[float] = Field(
None, description="Total processing time in seconds"
)
resultLabels: Optional[Dict[str, Any]] = Field(
default_factory=dict, description="Map of result labels to their values"
)
register_model_labels(
"TaskItem",
{"en": "Task", "fr": "Tâche"},
{
"id": {"en": "Task ID", "fr": "ID de la tâche"},
"workflowId": {"en": "Workflow ID", "fr": "ID du workflow"},
"userInput": {"en": "User Input", "fr": "Entrée utilisateur"},
"status": {"en": "Status", "fr": "Statut"},
"error": {"en": "Error", "fr": "Erreur"},
"startedAt": {"en": "Started At", "fr": "Démarré à"},
"finishedAt": {"en": "Finished At", "fr": "Terminé à"},
"actionList": {"en": "Actions", "fr": "Actions"},
"retryCount": {"en": "Retry Count", "fr": "Nombre de tentatives"},
"retryMax": {"en": "Max Retries", "fr": "Tentatives max"},
"processingTime": {"en": "Processing Time", "fr": "Temps de traitement"},
},
)
class TaskStep(BaseModel, ModelMixin):
id: str
objective: str
dependencies: Optional[list[str]] = Field(default_factory=list)
success_criteria: Optional[list[str]] = Field(default_factory=list)
estimated_complexity: Optional[str] = None
userMessage: Optional[str] = Field(
None, description="User-friendly message in user's language"
)
register_model_labels(
"TaskStep",
{"en": "Task Step", "fr": "Étape de tâche"},
{
"id": {"en": "ID", "fr": "ID"},
"objective": {"en": "Objective", "fr": "Objectif"},
"dependencies": {"en": "Dependencies", "fr": "Dépendances"},
"success_criteria": {"en": "Success Criteria", "fr": "Critères de succès"},
"estimated_complexity": {
"en": "Estimated Complexity",
"fr": "Complexité estimée",
},
"userMessage": {"en": "User Message", "fr": "Message utilisateur"},
},
)
class TaskHandover(BaseModel, ModelMixin):
taskId: str = Field(description="Target task ID")
sourceTask: Optional[str] = Field(None, description="Source task ID")
inputDocuments: List[DocumentExchange] = Field(
default_factory=list, description="Available input documents"
)
outputDocuments: List[DocumentExchange] = Field(
default_factory=list, description="Produced output documents"
)
context: Dict[str, Any] = Field(default_factory=dict, description="Task context")
previousResults: List[str] = Field(
default_factory=list, description="Previous result summaries"
)
improvements: List[str] = Field(
default_factory=list, description="Improvement suggestions"
)
workflowSummary: Optional[str] = Field(
None, description="Summarized workflow context"
)
messageHistory: List[str] = Field(
default_factory=list, description="Key message summaries"
)
timestamp: float = Field(
..., description="When the handover was created (UTC timestamp in seconds)"
)
handoverType: str = Field(
default="task", description="Type of handover: task, phase, or workflow"
)
register_model_labels(
"TaskHandover",
{"en": "Task Handover", "fr": "Transfert de tâche"},
{
"taskId": {"en": "Task ID", "fr": "ID de la tâche"},
"sourceTask": {"en": "Source Task", "fr": "Tâche source"},
"inputDocuments": {"en": "Input Documents", "fr": "Documents d'entrée"},
"outputDocuments": {"en": "Output Documents", "fr": "Documents de sortie"},
"context": {"en": "Context", "fr": "Contexte"},
"previousResults": {"en": "Previous Results", "fr": "Résultats précédents"},
"improvements": {"en": "Improvements", "fr": "Améliorations"},
"workflowSummary": {"en": "Workflow Summary", "fr": "Résumé du workflow"},
"messageHistory": {"en": "Message History", "fr": "Historique des messages"},
"timestamp": {"en": "Timestamp", "fr": "Horodatage"},
"handoverType": {"en": "Handover Type", "fr": "Type de transfert"},
},
)
class TaskContext(BaseModel, ModelMixin):
task_step: TaskStep
workflow: Optional["ChatWorkflow"] = None
workflow_id: Optional[str] = None
available_documents: Optional[str] = "No documents available"
available_connections: Optional[list[str]] = Field(default_factory=list)
previous_results: Optional[list[str]] = Field(default_factory=list)
previous_handover: Optional[TaskHandover] = None
improvements: Optional[list[str]] = Field(default_factory=list)
retry_count: Optional[int] = 0
previous_action_results: Optional[list] = Field(default_factory=list)
previous_review_result: Optional[dict] = None
is_regeneration: Optional[bool] = False
failure_patterns: Optional[list[str]] = Field(default_factory=list)
failed_actions: Optional[list] = Field(default_factory=list)
successful_actions: Optional[list] = Field(default_factory=list)
criteria_progress: Optional[dict] = None
def getDocumentReferences(self) -> List[str]:
docs = []
if self.previous_handover:
for doc_exchange in self.previous_handover.inputDocuments:
docs.extend(doc_exchange.documents)
return list(set(docs))
def addImprovement(self, improvement: str) -> None:
if improvement not in (self.improvements or []):
if self.improvements is None:
self.improvements = []
self.improvements.append(improvement)
class ReviewContext(BaseModel, ModelMixin):
task_step: TaskStep
task_actions: Optional[list] = Field(default_factory=list)
action_results: Optional[list] = Field(default_factory=list)
step_result: Optional[dict] = Field(default_factory=dict)
workflow_id: Optional[str] = None
previous_results: Optional[list[str]] = Field(default_factory=list)
class ReviewResult(BaseModel, ModelMixin):
status: str
reason: Optional[str] = None
improvements: Optional[list[str]] = Field(default_factory=list)
quality_score: Optional[int] = 5
missing_outputs: Optional[list[str]] = Field(default_factory=list)
met_criteria: Optional[list[str]] = Field(default_factory=list)
unmet_criteria: Optional[list[str]] = Field(default_factory=list)
confidence: Optional[float] = 0.5
userMessage: Optional[str] = Field(
None, description="User-friendly message in user's language"
)
register_model_labels(
"ReviewResult",
{"en": "Review Result", "fr": "Résultat de l'évaluation"},
{
"status": {"en": "Status", "fr": "Statut"},
"reason": {"en": "Reason", "fr": "Raison"},
"improvements": {"en": "Improvements", "fr": "Améliorations"},
"quality_score": {"en": "Quality Score", "fr": "Score de qualité"},
"missing_outputs": {"en": "Missing Outputs", "fr": "Sorties manquantes"},
"met_criteria": {"en": "Met Criteria", "fr": "Critères respectés"},
"unmet_criteria": {"en": "Unmet Criteria", "fr": "Critères non respectés"},
"confidence": {"en": "Confidence", "fr": "Confiance"},
"userMessage": {"en": "User Message", "fr": "Message utilisateur"},
},
)
class TaskPlan(BaseModel, ModelMixin):
overview: str
tasks: list[TaskStep]
userMessage: Optional[str] = Field(
None, description="Overall user-friendly message for the task plan"
)
register_model_labels(
"TaskPlan",
{"en": "Task Plan", "fr": "Plan de tâches"},
{
"overview": {"en": "Overview", "fr": "Aperçu"},
"tasks": {"en": "Tasks", "fr": "Tâches"},
"userMessage": {"en": "User Message", "fr": "Message utilisateur"},
},
)
# Resolve forward references
TaskContext.update_forward_refs()
class PromptPlaceholder(BaseModel, ModelMixin):
label: str
content: str
summaryAllowed: bool = Field(
default=False,
description="Whether host may summarize content before sending to AI",
)
register_model_labels(
"PromptPlaceholder",
{"en": "Prompt Placeholder", "fr": "Espace réservé d'invite"},
{
"label": {"en": "Label", "fr": "Libellé"},
"content": {"en": "Content", "fr": "Contenu"},
"summaryAllowed": {"en": "Summary Allowed", "fr": "Résumé autorisé"},
},
)
class PromptBundle(BaseModel, ModelMixin):
prompt: str
placeholders: List[PromptPlaceholder] = Field(default_factory=list)
register_model_labels(
"PromptBundle",
{"en": "Prompt Bundle", "fr": "Lot d'invite"},
{
"prompt": {"en": "Prompt", "fr": "Invite"},
"placeholders": {"en": "Placeholders", "fr": "Espaces réservés"},
},
)

View file

@ -1,4 +1,4 @@
from typing import Any, Dict, List, Optional from typing import Any, Dict, List, Optional, Literal
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
@ -12,8 +12,114 @@ class ContentPart(BaseModel):
metadata: Dict[str, Any] = Field(default_factory=dict, description="Arbitrary metadata for the part") metadata: Dict[str, Any] = Field(default_factory=dict, description="Arbitrary metadata for the part")
class ExtractedContent(BaseModel): class ContentExtracted(BaseModel):
id: str = Field(description="Extraction id or source document id") id: str = Field(description="Extraction id or source document id")
parts: List[ContentPart] = Field(default_factory=list, description="List of extracted parts") parts: List[ContentPart] = Field(default_factory=list, description="List of extracted parts")
summary: Optional[Dict[str, Any]] = Field(default=None, description="Optional extraction summary") summary: Optional[Dict[str, Any]] = Field(default=None, description="Optional extraction summary")
class MergeStrategy(BaseModel):
"""Strategy configuration for merging content parts and AI results."""
# Grouping configuration
groupBy: str = Field(
default="typeGroup",
description="Field to group parts by (typeGroup, parentId, label, etc.)"
)
# Ordering configuration
orderBy: str = Field(
default="id",
description="Field to order parts within groups (id, order, pageIndex, etc.)"
)
# Merge behavior
mergeType: Literal["concatenate", "hierarchical", "intelligent"] = Field(
default="concatenate",
description="How to merge content within groups"
)
# Size limits
maxSize: Optional[int] = Field(
default=None,
description="Maximum size for merged content in bytes"
)
# Type-specific merge settings
textMerge: Optional[Dict[str, Any]] = Field(
default=None,
description="Text-specific merge settings (separator, formatting, etc.)"
)
tableMerge: Optional[Dict[str, Any]] = Field(
default=None,
description="Table-specific merge settings (header handling, etc.)"
)
structureMerge: Optional[Dict[str, Any]] = Field(
default=None,
description="Structure-specific merge settings (hierarchy, etc.)"
)
# AI result merging
aiResultMerge: Optional[Dict[str, Any]] = Field(
default=None,
description="AI result merging settings (prompt, context, etc.)"
)
# Chunk handling
preserveChunks: bool = Field(
default=False,
description="Whether to preserve individual chunks or merge them"
)
chunkSeparator: str = Field(
default="\n\n---\n\n",
description="Separator between chunks when merging"
)
# Metadata handling
preserveMetadata: bool = Field(
default=True,
description="Whether to preserve metadata from original parts"
)
metadataFields: Optional[List[str]] = Field(
default=None,
description="Specific metadata fields to preserve (None = all)"
)
# Error handling
onError: Literal["skip", "include", "fail"] = Field(
default="skip",
description="How to handle errors during merging"
)
# Validation
validateContent: bool = Field(
default=True,
description="Whether to validate content before merging"
)
def getTypeSpecificSettings(self, typeGroup: str) -> Dict[str, Any]:
"""Get type-specific merge settings for a content type."""
if typeGroup == "text" and self.textMerge:
return self.textMerge
elif typeGroup == "table" and self.tableMerge:
return self.tableMerge
elif typeGroup == "structure" and self.structureMerge:
return self.structureMerge
else:
return {}
def shouldPreserveChunk(self, chunk: Dict[str, Any]) -> bool:
"""Determine if a chunk should be preserved based on strategy."""
if not self.preserveChunks:
return False
# Check if chunk has error metadata
if self.onError == "skip" and chunk.get("metadata", {}).get("error"):
return False
return True

View file

@ -19,8 +19,6 @@ class FileItem(BaseModel, ModelMixin):
def to_dict(self) -> Dict[str, Any]: def to_dict(self) -> Dict[str, Any]:
return super().to_dict() return super().to_dict()
register_model_labels( register_model_labels(
"FileItem", "FileItem",
{"en": "File Item", "fr": "Élément de fichier"}, {"en": "File Item", "fr": "Élément de fichier"},
@ -35,7 +33,6 @@ register_model_labels(
}, },
) )
class FilePreview(BaseModel, ModelMixin): class FilePreview(BaseModel, ModelMixin):
content: Union[str, bytes] = Field(description="File content (text or binary)") content: Union[str, bytes] = Field(description="File content (text or binary)")
mimeType: str = Field(description="MIME type of the file") mimeType: str = Field(description="MIME type of the file")
@ -49,8 +46,6 @@ class FilePreview(BaseModel, ModelMixin):
if isinstance(data.get("content"), bytes): if isinstance(data.get("content"), bytes):
data["content"] = base64.b64encode(data["content"]).decode("utf-8") data["content"] = base64.b64encode(data["content"]).decode("utf-8")
return data return data
register_model_labels( register_model_labels(
"FilePreview", "FilePreview",
{"en": "File Preview", "fr": "Aperçu du fichier"}, {"en": "File Preview", "fr": "Aperçu du fichier"},
@ -64,13 +59,10 @@ register_model_labels(
}, },
) )
class FileData(BaseModel, ModelMixin): class FileData(BaseModel, ModelMixin):
id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Primary key") id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Primary key")
data: str = Field(description="File data content") data: str = Field(description="File data content")
base64Encoded: bool = Field(description="Whether the data is base64 encoded") base64Encoded: bool = Field(description="Whether the data is base64 encoded")
register_model_labels( register_model_labels(
"FileData", "FileData",
{"en": "File Data", "fr": "Données de fichier"}, {"en": "File Data", "fr": "Données de fichier"},
@ -80,5 +72,3 @@ register_model_labels(
"base64Encoded": {"en": "Base64 Encoded", "fr": "Encodé en Base64"}, "base64Encoded": {"en": "Base64 Encoded", "fr": "Encodé en Base64"},
}, },
) )

View file

@ -14,8 +14,6 @@ class DataNeutraliserConfig(BaseModel, ModelMixin):
namesToParse: str = Field(default="", description="Multiline list of names to parse for neutralization", frontend_type="textarea", frontend_readonly=False, frontend_required=False) namesToParse: str = Field(default="", description="Multiline list of names to parse for neutralization", frontend_type="textarea", frontend_readonly=False, frontend_required=False)
sharepointSourcePath: str = Field(default="", description="SharePoint path to read files for neutralization", frontend_type="text", frontend_readonly=False, frontend_required=False) sharepointSourcePath: str = Field(default="", description="SharePoint path to read files for neutralization", frontend_type="text", frontend_readonly=False, frontend_required=False)
sharepointTargetPath: str = Field(default="", description="SharePoint path to store neutralized files", frontend_type="text", frontend_readonly=False, frontend_required=False) sharepointTargetPath: str = Field(default="", description="SharePoint path to store neutralized files", frontend_type="text", frontend_readonly=False, frontend_required=False)
register_model_labels( register_model_labels(
"DataNeutraliserConfig", "DataNeutraliserConfig",
{"en": "Data Neutralization Config", "fr": "Configuration de neutralisation des données"}, {"en": "Data Neutralization Config", "fr": "Configuration de neutralisation des données"},
@ -30,7 +28,6 @@ register_model_labels(
}, },
) )
class DataNeutralizerAttributes(BaseModel, ModelMixin): class DataNeutralizerAttributes(BaseModel, ModelMixin):
id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Unique ID of the attribute mapping (used as UID in neutralized files)", frontend_type="text", frontend_readonly=True, frontend_required=False) id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Unique ID of the attribute mapping (used as UID in neutralized files)", frontend_type="text", frontend_readonly=True, frontend_required=False)
mandateId: str = Field(description="ID of the mandate this attribute belongs to", frontend_type="text", frontend_readonly=True, frontend_required=True) mandateId: str = Field(description="ID of the mandate this attribute belongs to", frontend_type="text", frontend_readonly=True, frontend_required=True)
@ -38,8 +35,6 @@ class DataNeutralizerAttributes(BaseModel, ModelMixin):
originalText: str = Field(description="Original text that was neutralized", frontend_type="text", frontend_readonly=True, frontend_required=True) originalText: str = Field(description="Original text that was neutralized", frontend_type="text", frontend_readonly=True, frontend_required=True)
fileId: Optional[str] = Field(default=None, description="ID of the file this attribute belongs to", frontend_type="text", frontend_readonly=True, frontend_required=False) fileId: Optional[str] = Field(default=None, description="ID of the file this attribute belongs to", frontend_type="text", frontend_readonly=True, frontend_required=False)
patternType: str = Field(description="Type of pattern that matched (email, phone, name, etc.)", frontend_type="text", frontend_readonly=True, frontend_required=True) patternType: str = Field(description="Type of pattern that matched (email, phone, name, etc.)", frontend_type="text", frontend_readonly=True, frontend_required=True)
register_model_labels( register_model_labels(
"DataNeutralizerAttributes", "DataNeutralizerAttributes",
{"en": "Neutralized Data Attribute", "fr": "Attribut de données neutralisées"}, {"en": "Neutralized Data Attribute", "fr": "Attribut de données neutralisées"},

View file

@ -47,7 +47,8 @@ class Token(BaseModel, ModelMixin):
None, description="Mandate ID for tenant scoping of the token" None, description="Mandate ID for tenant scoping of the token"
) )
model_config = ConfigDict(use_enum_values=True) class Config:
use_enum_values = True
register_model_labels( register_model_labels(

View file

@ -9,7 +9,6 @@ class TicketFieldAttribute(BaseModel):
fieldName: str = Field(description="Human-readable field name") fieldName: str = Field(description="Human-readable field name")
field: str = Field(description="Ticket field ID/key") field: str = Field(description="Ticket field ID/key")
class TicketBase(ABC): class TicketBase(ABC):
@abstractmethod @abstractmethod
async def read_attributes(self) -> list[TicketFieldAttribute]: ... async def read_attributes(self) -> list[TicketFieldAttribute]: ...

View file

@ -13,20 +13,17 @@ class AuthAuthority(str, Enum):
GOOGLE = "google" GOOGLE = "google"
MSFT = "msft" MSFT = "msft"
class UserPrivilege(str, Enum): class UserPrivilege(str, Enum):
SYSADMIN = "sysadmin" SYSADMIN = "sysadmin"
ADMIN = "admin" ADMIN = "admin"
USER = "user" USER = "user"
class ConnectionStatus(str, Enum): class ConnectionStatus(str, Enum):
ACTIVE = "active" ACTIVE = "active"
EXPIRED = "expired" EXPIRED = "expired"
REVOKED = "revoked" REVOKED = "revoked"
PENDING = "pending" PENDING = "pending"
class Mandate(BaseModel, ModelMixin): class Mandate(BaseModel, ModelMixin):
id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Unique ID of the mandate", frontend_type="text", frontend_readonly=True, frontend_required=False) id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Unique ID of the mandate", frontend_type="text", frontend_readonly=True, frontend_required=False)
name: str = Field(description="Name of the mandate", frontend_type="text", frontend_readonly=False, frontend_required=True) name: str = Field(description="Name of the mandate", frontend_type="text", frontend_readonly=False, frontend_required=True)
@ -37,8 +34,6 @@ class Mandate(BaseModel, ModelMixin):
{"value": "it", "label": {"en": "Italiano", "fr": "Italien"}}, {"value": "it", "label": {"en": "Italiano", "fr": "Italien"}},
]) ])
enabled: bool = Field(default=True, description="Indicates whether the mandate is enabled", frontend_type="checkbox", frontend_readonly=False, frontend_required=False) enabled: bool = Field(default=True, description="Indicates whether the mandate is enabled", frontend_type="checkbox", frontend_readonly=False, frontend_required=False)
register_model_labels( register_model_labels(
"Mandate", "Mandate",
{"en": "Mandate", "fr": "Mandat"}, {"en": "Mandate", "fr": "Mandat"},
@ -50,7 +45,6 @@ register_model_labels(
}, },
) )
class UserConnection(BaseModel, ModelMixin): class UserConnection(BaseModel, ModelMixin):
id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Unique ID of the connection", frontend_type="text", frontend_readonly=True, frontend_required=False) id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Unique ID of the connection", frontend_type="text", frontend_readonly=True, frontend_required=False)
userId: str = Field(description="ID of the user this connection belongs to", frontend_type="text", frontend_readonly=True, frontend_required=False) userId: str = Field(description="ID of the user this connection belongs to", frontend_type="text", frontend_readonly=True, frontend_required=False)
@ -77,8 +71,6 @@ class UserConnection(BaseModel, ModelMixin):
{"value": "none", "label": {"en": "None", "fr": "Aucun"}}, {"value": "none", "label": {"en": "None", "fr": "Aucun"}},
]) ])
tokenExpiresAt: Optional[float] = Field(None, description="When the current token expires (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False) tokenExpiresAt: Optional[float] = Field(None, description="When the current token expires (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False)
register_model_labels( register_model_labels(
"UserConnection", "UserConnection",
{"en": "User Connection", "fr": "Connexion utilisateur"}, {"en": "User Connection", "fr": "Connexion utilisateur"},
@ -98,7 +90,6 @@ register_model_labels(
}, },
) )
class User(BaseModel, ModelMixin): class User(BaseModel, ModelMixin):
id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Unique ID of the user", frontend_type="text", frontend_readonly=True, frontend_required=False) id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Unique ID of the user", frontend_type="text", frontend_readonly=True, frontend_required=False)
username: str = Field(description="Username for login", frontend_type="text", frontend_readonly=False, frontend_required=True) username: str = Field(description="Username for login", frontend_type="text", frontend_readonly=False, frontend_required=True)
@ -122,8 +113,6 @@ class User(BaseModel, ModelMixin):
{"value": "msft", "label": {"en": "Microsoft", "fr": "Microsoft"}}, {"value": "msft", "label": {"en": "Microsoft", "fr": "Microsoft"}},
]) ])
mandateId: Optional[str] = Field(None, description="ID of the mandate this user belongs to", frontend_type="text", frontend_readonly=True, frontend_required=False) mandateId: Optional[str] = Field(None, description="ID of the mandate this user belongs to", frontend_type="text", frontend_readonly=True, frontend_required=False)
register_model_labels( register_model_labels(
"User", "User",
{"en": "User", "fr": "Utilisateur"}, {"en": "User", "fr": "Utilisateur"},
@ -140,15 +129,10 @@ register_model_labels(
}, },
) )
class UserInDB(User): class UserInDB(User):
hashedPassword: Optional[str] = Field(None, description="Hash of the user password") hashedPassword: Optional[str] = Field(None, description="Hash of the user password")
register_model_labels( register_model_labels(
"UserInDB", "UserInDB",
{"en": "User Access", "fr": "Accès de l'utilisateur"}, {"en": "User Access", "fr": "Accès de l'utilisateur"},
{"hashedPassword": {"en": "Password hash", "fr": "Hachage de mot de passe"}}, {"hashedPassword": {"en": "Password hash", "fr": "Hachage de mot de passe"}},
) )

View file

@ -10,8 +10,6 @@ class Prompt(BaseModel, ModelMixin):
mandateId: str = Field(description="ID of the mandate this prompt belongs to", frontend_type="text", frontend_readonly=True, frontend_required=False) mandateId: str = Field(description="ID of the mandate this prompt belongs to", frontend_type="text", frontend_readonly=True, frontend_required=False)
content: str = Field(description="Content of the prompt", frontend_type="textarea", frontend_readonly=False, frontend_required=True) content: str = Field(description="Content of the prompt", frontend_type="textarea", frontend_readonly=False, frontend_required=True)
name: str = Field(description="Name of the prompt", frontend_type="text", frontend_readonly=False, frontend_required=True) name: str = Field(description="Name of the prompt", frontend_type="text", frontend_readonly=False, frontend_required=True)
register_model_labels( register_model_labels(
"Prompt", "Prompt",
{"en": "Prompt", "fr": "Invite"}, {"en": "Prompt", "fr": "Invite"},

View file

@ -22,7 +22,6 @@ class VoiceSettings(BaseModel, ModelMixin):
def to_dict(self) -> Dict[str, Any]: def to_dict(self) -> Dict[str, Any]:
return super().to_dict() return super().to_dict()
register_model_labels( register_model_labels(
"VoiceSettings", "VoiceSettings",
{"en": "Voice Settings", "fr": "Paramètres vocaux"}, {"en": "Voice Settings", "fr": "Paramètres vocaux"},

View file

@ -1,10 +1,8 @@
"""Web-related modules""" """Web-related modules"""
from abc import ABC, abstractmethod
from pydantic import BaseModel, Field, HttpUrl from pydantic import BaseModel, Field, HttpUrl
from typing import List, Optional, Literal from typing import List, Optional, Literal, Dict, Any
from modules.shared.configuration import APP_CONFIG from modules.shared.configuration import APP_CONFIG
from modules.datamodels.datamodelWorkflow import ActionDocument, ActionResult from modules.datamodels.datamodelChat import ActionDocument, ActionResult
WEB_SEARCH_MAX_QUERY_LENGTH: int = int(APP_CONFIG.get("Web_Search_MAX_QUERY_LENGTH", "400")) WEB_SEARCH_MAX_QUERY_LENGTH: int = int(APP_CONFIG.get("Web_Search_MAX_QUERY_LENGTH", "400"))
@ -12,130 +10,133 @@ WEB_SEARCH_MAX_RESULTS: int = int(APP_CONFIG.get("Web_Search_MAX_RESULTS", "20")
WEB_SEARCH_MIN_RESULTS: int = int(APP_CONFIG.get("Web_Search_MIN_RESULTS", "1")) WEB_SEARCH_MIN_RESULTS: int = int(APP_CONFIG.get("Web_Search_MIN_RESULTS", "1"))
class WebSearchRequest(BaseModel): class WebResearchOptions(BaseModel):
query: str = Field(min_length=1, max_length=WEB_SEARCH_MAX_QUERY_LENGTH) """Advanced options for web research workflow"""
max_results: int = Field(ge=WEB_SEARCH_MIN_RESULTS, le=WEB_SEARCH_MAX_RESULTS) max_pages: int = Field(default=10, ge=1, le=50, description="Maximum pages to crawl")
# Tavily tuning options search_depth: Literal["basic", "advanced"] = Field(default="basic", description="Tavily search depth")
search_depth: Optional[Literal["basic", "advanced"]] = Field(default=None) extract_depth: Literal["basic", "advanced"] = Field(default="advanced", description="Tavily extract depth")
time_range: Optional[Literal["d", "w", "m", "y"]] = Field( format: Literal["text", "markdown"] = Field(default="markdown", description="Content format")
default=None, description="Limit results to last day/week/month/year" return_report: bool = Field(default=True, description="Return formatted report or raw data")
) pages_search_depth: int = Field(default=1, ge=1, le=5, description="How deep to crawl: 1=main pages only, 2=main+sub-pages, 3=main+sub+sub-sub, etc.")
topic: Optional[Literal["general", "news", "academic"]] = Field(default=None) country: Optional[str] = Field(default=None, description="Country code for search bias")
include_domains: Optional[List[str]] = Field(default=None) time_range: Optional[Literal["d", "w", "m", "y"]] = Field(default=None, description="Time range for search")
exclude_domains: Optional[List[str]] = Field(default=None) topic: Optional[Literal["general", "news", "academic"]] = Field(default=None, description="Search topic")
language: Optional[str] = Field(default=None, description="ISO language code like 'en', 'de'") language: Optional[str] = Field(default=None, description="Language code")
include_answer: Optional[bool] = Field(default=None) include_answer: Optional[bool] = Field(default=None, description="Include AI answer")
include_raw_content: Optional[bool] = Field(default=None) include_raw_content: Optional[bool] = Field(default=None, description="Include raw content")
class WebResearchRequest(BaseModel):
"""Main web research request"""
user_prompt: str = Field(min_length=1, max_length=WEB_SEARCH_MAX_QUERY_LENGTH, description="User's research question or prompt")
urls: Optional[List[str]] = Field(default=None, description="Specific URLs to crawl (optional)")
max_results: int = Field(default=5, ge=1, le=WEB_SEARCH_MAX_RESULTS, description="Max search results")
options: WebResearchOptions = Field(default_factory=WebResearchOptions, description="Advanced options")
class WebSearchResultItem(BaseModel): class WebSearchResultItem(BaseModel):
"""Individual search result""" """Individual search result"""
title: str title: str
url: HttpUrl url: HttpUrl
raw_content: Optional[str] = Field(default=None, description="Raw HTML content")
class WebCrawlResultItem(BaseModel):
"""Individual crawl result"""
url: HttpUrl
content: str
class WebResearchDocumentData(BaseModel):
"""Complete web research results"""
user_prompt: str
websites_analyzed: int
additional_links_found: int
analysis_result: str
sources: List[WebSearchResultItem]
additional_links: List[str]
individual_content: Optional[Dict[str, str]] = None # URL -> content mapping
debug_info: Optional[Dict[str, Any]] = None
class WebResearchActionDocument(ActionDocument):
documentData: WebResearchDocumentData
class WebResearchActionResult(ActionResult):
documents: List[WebResearchActionDocument] = Field(default_factory=list)
# Legacy models for connector compatibility
class WebSearchDocumentData(BaseModel): class WebSearchDocumentData(BaseModel):
"""Complete search (and scrape) results document""" """Search results document data"""
query: str
query: str = Field(min_length=1, max_length=WEB_SEARCH_MAX_QUERY_LENGTH) results: List[WebSearchResultItem]
# Allow both WebSearchResultItem and WebScrapeResultItem to be stored here
results: List[object]
total_count: int total_count: int
class WebSearchActionDocument(ActionDocument): class WebSearchActionDocument(ActionDocument):
documentData: WebSearchDocumentData documentData: WebSearchDocumentData
class WebSearchActionResult(ActionResult): class WebSearchActionResult(ActionResult):
documents: List[WebSearchActionDocument] = Field(default_factory=list) documents: List[WebSearchActionDocument] = Field(default_factory=list)
class WebSearchBase(ABC):
@abstractmethod
async def search_urls(self, request: WebSearchRequest) -> WebSearchActionResult: ...
# --- Web crawl ---
class WebCrawlRequest(BaseModel):
urls: List[HttpUrl]
# Tavily extract options
extract_depth: Optional[Literal["basic", "advanced"]] = Field(default=None)
format: Optional[Literal["text", "markdown"]] = Field(default=None)
class WebCrawlResultItem(BaseModel):
"""Individual crawl result"""
url: HttpUrl
content: str
class WebCrawlDocumentData(BaseModel): class WebCrawlDocumentData(BaseModel):
"""Complete crawl results document""" """Crawl results document data"""
urls: List[HttpUrl] urls: List[HttpUrl]
results: List[WebCrawlResultItem] results: List[WebCrawlResultItem]
total_count: int total_count: int
class WebCrawlActionDocument(ActionDocument): class WebCrawlActionDocument(ActionDocument):
documentData: WebCrawlDocumentData = Field( documentData: WebCrawlDocumentData
description="The data extracted from crawled URLs"
)
class WebCrawlActionResult(ActionResult): class WebCrawlActionResult(ActionResult):
documents: List[WebCrawlActionDocument] = Field(default_factory=list) documents: List[WebCrawlActionDocument] = Field(default_factory=list)
class WebScrapeDocumentData(BaseModel):
class WebCrawlBase(ABC): """Scrape results document data"""
@abstractmethod query: str
async def crawl_urls(self, request: WebCrawlRequest) -> WebCrawlActionResult: ... results: List[WebSearchResultItem]
total_count: int
# --- Web scrape ---
class WebScrapeRequest(BaseModel):
query: str = Field(min_length=1, max_length=WEB_SEARCH_MAX_QUERY_LENGTH)
max_results: int = Field(ge=WEB_SEARCH_MIN_RESULTS, le=WEB_SEARCH_MAX_RESULTS)
# Pass-through search options
search_depth: Optional[Literal["basic", "advanced"]] = Field(default=None)
time_range: Optional[Literal["d", "w", "m", "y"]] = Field(default=None)
topic: Optional[Literal["general", "news", "academic"]] = Field(default=None)
include_domains: Optional[List[str]] = Field(default=None)
exclude_domains: Optional[List[str]] = Field(default=None)
language: Optional[str] = Field(default=None)
include_answer: Optional[bool] = Field(default=None)
include_raw_content: Optional[bool] = Field(default=None)
# Extract options
extract_depth: Optional[Literal["basic", "advanced"]] = Field(default=None)
format: Optional[Literal["text", "markdown"]] = Field(default=None)
class WebScrapeResultItem(BaseModel):
"""Individual scrape result"""
url: HttpUrl
content: str
class WebScrapeActionDocument(ActionDocument): class WebScrapeActionDocument(ActionDocument):
documentData: WebSearchDocumentData = Field( documentData: WebScrapeDocumentData
description="The data extracted from scraped URLs"
)
class WebScrapeActionResult(ActionResult): class WebScrapeActionResult(ActionResult):
documents: List[WebScrapeActionDocument] = Field(default_factory=list) documents: List[WebScrapeActionDocument] = Field(default_factory=list)
class WebSearchRequest(BaseModel):
"""Search request for Tavily"""
query: str
max_results: int = 5
search_depth: Optional[Literal["basic", "advanced"]] = None
time_range: Optional[Literal["d", "w", "m", "y"]] = None
topic: Optional[Literal["general", "news", "academic"]] = None
include_domains: Optional[List[str]] = None
exclude_domains: Optional[List[str]] = None
language: Optional[str] = None
include_answer: Optional[bool] = None
include_raw_content: Optional[bool] = None
auto_parameters: Optional[bool] = None
country: Optional[str] = None
class WebScrapeBase(ABC): class WebCrawlRequest(BaseModel):
@abstractmethod """Crawl request for Tavily"""
async def scrape(self, request: WebScrapeRequest) -> WebScrapeActionResult: ... urls: List[HttpUrl]
extract_depth: Optional[Literal["basic", "advanced"]] = None
format: Optional[Literal["text", "markdown"]] = None
class WebScrapeRequest(BaseModel):
"""Scrape request for Tavily"""
query: str
max_results: int = 5
search_depth: Optional[Literal["basic", "advanced"]] = None
time_range: Optional[Literal["d", "w", "m", "y"]] = None
topic: Optional[Literal["general", "news", "academic"]] = None
include_domains: Optional[List[str]] = None
exclude_domains: Optional[List[str]] = None
language: Optional[str] = None
include_answer: Optional[bool] = None
include_raw_content: Optional[bool] = None
auto_parameters: Optional[bool] = None
country: Optional[str] = None
extract_depth: Optional[Literal["basic", "advanced"]] = None
format: Optional[Literal["text", "markdown"]] = None
class WebScrapeResultItem(BaseModel):
"""Individual scrape result"""
url: HttpUrl
content: str

View file

@ -1,474 +0,0 @@
"""Workflow-related base datamodels and step/task structures."""
from enum import Enum
from typing import List, Dict, Any, Optional
from pydantic import BaseModel, Field
from modules.shared.attributeUtils import register_model_labels, ModelMixin
class ActionDocument(BaseModel, ModelMixin):
"""Clear document structure for action results"""
documentName: str = Field(description="Name of the document")
documentData: Any = Field(description="Content/data of the document")
mimeType: str = Field(description="MIME type of the document")
register_model_labels(
"ActionDocument",
{"en": "Action Document", "fr": "Document d'action"},
{
"documentName": {"en": "Document Name", "fr": "Nom du document"},
"documentData": {"en": "Document Data", "fr": "Données du document"},
"mimeType": {"en": "MIME Type", "fr": "Type MIME"},
},
)
class ActionResult(BaseModel, ModelMixin):
"""Clean action result with documents as primary output
IMPORTANT: Action methods should NOT set resultLabel in their return value.
The resultLabel is managed by the action handler using the action's execResultLabel
from the action plan. This ensures consistent document routing throughout the workflow.
"""
success: bool = Field(description="Whether execution succeeded")
error: Optional[str] = Field(None, description="Error message if failed")
documents: List[ActionDocument] = Field(
default_factory=list, description="Document outputs"
)
resultLabel: Optional[str] = Field(
None,
description="Label for document routing (set by action handler, not by action methods)",
)
@classmethod
def isSuccess(cls, documents: List[ActionDocument] = None) -> "ActionResult":
return cls(success=True, documents=documents or [])
@classmethod
def isFailure(
cls, error: str, documents: List[ActionDocument] = None
) -> "ActionResult":
return cls(success=False, documents=documents or [], error=error)
register_model_labels(
"ActionResult",
{"en": "Action Result", "fr": "Résultat de l'action"},
{
"success": {"en": "Success", "fr": "Succès"},
"error": {"en": "Error", "fr": "Erreur"},
"documents": {"en": "Documents", "fr": "Documents"},
"resultLabel": {"en": "Result Label", "fr": "Étiquette du résultat"},
},
)
class ActionSelection(BaseModel, ModelMixin):
method: str = Field(description="Method to execute (e.g., web, document, ai)")
name: str = Field(
description="Action name within the method (e.g., search, extract)"
)
register_model_labels(
"ActionSelection",
{"en": "Action Selection", "fr": "Sélection d'action"},
{
"method": {"en": "Method", "fr": "Méthode"},
"name": {"en": "Action Name", "fr": "Nom de l'action"},
},
)
class ActionParameters(BaseModel, ModelMixin):
parameters: Dict[str, Any] = Field(
default_factory=dict, description="Parameters to execute the selected action"
)
register_model_labels(
"ActionParameters",
{"en": "Action Parameters", "fr": "Paramètres d'action"},
{
"parameters": {"en": "Parameters", "fr": "Paramètres"},
},
)
class ObservationPreview(BaseModel, ModelMixin):
name: str = Field(description="Document name or URL label")
mime: str = Field(description="MIME type or kind")
snippet: str = Field(description="Short snippet or summary")
register_model_labels(
"ObservationPreview",
{"en": "Observation Preview", "fr": "Aperçu d'observation"},
{
"name": {"en": "Name", "fr": "Nom"},
"mime": {"en": "MIME", "fr": "MIME"},
"snippet": {"en": "Snippet", "fr": "Extrait"},
},
)
class Observation(BaseModel, ModelMixin):
success: bool = Field(description="Action execution success flag")
resultLabel: str = Field(description="Deterministic label for produced documents")
documentsCount: int = Field(description="Number of produced documents")
previews: List[ObservationPreview] = Field(
default_factory=list, description="Compact previews of outputs"
)
notes: List[str] = Field(
default_factory=list, description="Short notes or key facts"
)
register_model_labels(
"Observation",
{"en": "Observation", "fr": "Observation"},
{
"success": {"en": "Success", "fr": "Succès"},
"resultLabel": {"en": "Result Label", "fr": "Étiquette du résultat"},
"documentsCount": {"en": "Documents Count", "fr": "Nombre de documents"},
"previews": {"en": "Previews", "fr": "Aperçus"},
"notes": {"en": "Notes", "fr": "Notes"},
},
)
class TaskStatus(str, Enum):
"""Task status enumeration."""
PENDING = "pending"
RUNNING = "running"
COMPLETED = "completed"
FAILED = "failed"
CANCELLED = "cancelled"
register_model_labels(
"TaskStatus",
{"en": "Task Status", "fr": "Statut de la tâche"},
{
"PENDING": {"en": "Pending", "fr": "En attente"},
"RUNNING": {"en": "Running", "fr": "En cours"},
"COMPLETED": {"en": "Completed", "fr": "Terminé"},
"FAILED": {"en": "Failed", "fr": "Échec"},
"CANCELLED": {"en": "Cancelled", "fr": "Annulé"},
},
)
class DocumentExchange(BaseModel, ModelMixin):
documentsLabel: str = Field(description="Label for the set of documents")
documents: List[str] = Field(
default_factory=list, description="List of document references"
)
register_model_labels(
"DocumentExchange",
{"en": "Document Exchange", "fr": "Échange de documents"},
{
"documentsLabel": {"en": "Documents Label", "fr": "Label des documents"},
"documents": {"en": "Documents", "fr": "Documents"},
},
)
class TaskAction(BaseModel, ModelMixin):
id: str = Field(..., description="Action ID")
execMethod: str = Field(..., description="Method to execute")
execAction: str = Field(..., description="Action to perform")
execParameters: Dict[str, Any] = Field(
default_factory=dict, description="Action parameters"
)
execResultLabel: Optional[str] = Field(
None, description="Label for the set of result documents"
)
expectedDocumentFormats: Optional[List[Dict[str, str]]] = Field(
None, description="Expected document formats (optional)"
)
userMessage: Optional[str] = Field(
None, description="User-friendly message in user's language"
)
status: TaskStatus = Field(default=TaskStatus.PENDING, description="Action status")
error: Optional[str] = Field(None, description="Error message if action failed")
retryCount: int = Field(default=0, description="Number of retries attempted")
retryMax: int = Field(default=3, description="Maximum number of retries")
processingTime: Optional[float] = Field(
None, description="Processing time in seconds"
)
timestamp: float = Field(
..., description="When the action was executed (UTC timestamp in seconds)"
)
result: Optional[str] = Field(None, description="Result of the action")
register_model_labels(
"TaskAction",
{"en": "Task Action", "fr": "Action de tâche"},
{
"id": {"en": "Action ID", "fr": "ID de l'action"},
"execMethod": {"en": "Method", "fr": "Méthode"},
"execAction": {"en": "Action", "fr": "Action"},
"execParameters": {"en": "Parameters", "fr": "Paramètres"},
"execResultLabel": {"en": "Result Label", "fr": "Label du résultat"},
"expectedDocumentFormats": {
"en": "Expected Document Formats",
"fr": "Formats de documents attendus",
},
"userMessage": {"en": "User Message", "fr": "Message utilisateur"},
"status": {"en": "Status", "fr": "Statut"},
"error": {"en": "Error", "fr": "Erreur"},
"retryCount": {"en": "Retry Count", "fr": "Nombre de tentatives"},
"retryMax": {"en": "Max Retries", "fr": "Tentatives max"},
"processingTime": {"en": "Processing Time", "fr": "Temps de traitement"},
"timestamp": {"en": "Timestamp", "fr": "Horodatage"},
"result": {"en": "Result", "fr": "Résultat"},
},
)
class TaskResult(BaseModel, ModelMixin):
taskId: str = Field(..., description="Task ID")
status: TaskStatus = Field(default=TaskStatus.PENDING, description="Task status")
success: bool = Field(..., description="Whether the task was successful")
feedback: Optional[str] = Field(None, description="Task feedback message")
error: Optional[str] = Field(None, description="Error message if task failed")
register_model_labels(
"TaskResult",
{"en": "Task Result", "fr": "Résultat de tâche"},
{
"taskId": {"en": "Task ID", "fr": "ID de la tâche"},
"status": {"en": "Status", "fr": "Statut"},
"success": {"en": "Success", "fr": "Succès"},
"feedback": {"en": "Feedback", "fr": "Retour"},
"error": {"en": "Error", "fr": "Erreur"},
},
)
class TaskItem(BaseModel, ModelMixin):
id: str = Field(..., description="Task ID")
workflowId: str = Field(..., description="Workflow ID")
userInput: str = Field(..., description="User input that triggered the task")
status: TaskStatus = Field(default=TaskStatus.PENDING, description="Task status")
error: Optional[str] = Field(None, description="Error message if task failed")
startedAt: Optional[float] = Field(
None, description="When the task started (UTC timestamp in seconds)"
)
finishedAt: Optional[float] = Field(
None, description="When the task finished (UTC timestamp in seconds)"
)
actionList: List[TaskAction] = Field(
default_factory=list, description="List of actions to execute"
)
retryCount: int = Field(default=0, description="Number of retries attempted")
retryMax: int = Field(default=3, description="Maximum number of retries")
rollbackOnFailure: bool = Field(
default=True, description="Whether to rollback on failure"
)
dependencies: List[str] = Field(
default_factory=list, description="List of task IDs this task depends on"
)
feedback: Optional[str] = Field(None, description="Task feedback message")
processingTime: Optional[float] = Field(
None, description="Total processing time in seconds"
)
resultLabels: Optional[Dict[str, Any]] = Field(
default_factory=dict, description="Map of result labels to their values"
)
register_model_labels(
"TaskItem",
{"en": "Task", "fr": "Tâche"},
{
"id": {"en": "Task ID", "fr": "ID de la tâche"},
"workflowId": {"en": "Workflow ID", "fr": "ID du workflow"},
"userInput": {"en": "User Input", "fr": "Entrée utilisateur"},
"status": {"en": "Status", "fr": "Statut"},
"error": {"en": "Error", "fr": "Erreur"},
"startedAt": {"en": "Started At", "fr": "Démarré à"},
"finishedAt": {"en": "Finished At", "fr": "Terminé à"},
"actionList": {"en": "Actions", "fr": "Actions"},
"retryCount": {"en": "Retry Count", "fr": "Nombre de tentatives"},
"retryMax": {"en": "Max Retries", "fr": "Tentatives max"},
"processingTime": {"en": "Processing Time", "fr": "Temps de traitement"},
},
)
class TaskStep(BaseModel, ModelMixin):
id: str
objective: str
dependencies: Optional[list[str]] = Field(default_factory=list)
success_criteria: Optional[list[str]] = Field(default_factory=list)
estimated_complexity: Optional[str] = None
userMessage: Optional[str] = Field(
None, description="User-friendly message in user's language"
)
register_model_labels(
"TaskStep",
{"en": "Task Step", "fr": "Étape de tâche"},
{
"id": {"en": "ID", "fr": "ID"},
"objective": {"en": "Objective", "fr": "Objectif"},
"dependencies": {"en": "Dependencies", "fr": "Dépendances"},
"success_criteria": {"en": "Success Criteria", "fr": "Critères de succès"},
"estimated_complexity": {
"en": "Estimated Complexity",
"fr": "Complexité estimée",
},
"userMessage": {"en": "User Message", "fr": "Message utilisateur"},
},
)
class TaskHandover(BaseModel, ModelMixin):
taskId: str = Field(description="Target task ID")
sourceTask: Optional[str] = Field(None, description="Source task ID")
inputDocuments: List[DocumentExchange] = Field(
default_factory=list, description="Available input documents"
)
outputDocuments: List[DocumentExchange] = Field(
default_factory=list, description="Produced output documents"
)
context: Dict[str, Any] = Field(default_factory=dict, description="Task context")
previousResults: List[str] = Field(
default_factory=list, description="Previous result summaries"
)
improvements: List[str] = Field(
default_factory=list, description="Improvement suggestions"
)
workflowSummary: Optional[str] = Field(
None, description="Summarized workflow context"
)
messageHistory: List[str] = Field(
default_factory=list, description="Key message summaries"
)
timestamp: float = Field(
..., description="When the handover was created (UTC timestamp in seconds)"
)
handoverType: str = Field(
default="task", description="Type of handover: task, phase, or workflow"
)
register_model_labels(
"TaskHandover",
{"en": "Task Handover", "fr": "Transfert de tâche"},
{
"taskId": {"en": "Task ID", "fr": "ID de la tâche"},
"sourceTask": {"en": "Source Task", "fr": "Tâche source"},
"inputDocuments": {"en": "Input Documents", "fr": "Documents d'entrée"},
"outputDocuments": {"en": "Output Documents", "fr": "Documents de sortie"},
"context": {"en": "Context", "fr": "Contexte"},
"previousResults": {"en": "Previous Results", "fr": "Résultats précédents"},
"improvements": {"en": "Improvements", "fr": "Améliorations"},
"workflowSummary": {"en": "Workflow Summary", "fr": "Résumé du workflow"},
"messageHistory": {"en": "Message History", "fr": "Historique des messages"},
"timestamp": {"en": "Timestamp", "fr": "Horodatage"},
"handoverType": {"en": "Handover Type", "fr": "Type de transfert"},
},
)
class TaskContext(BaseModel, ModelMixin):
task_step: TaskStep
workflow: Optional["ChatWorkflow"] = None
workflow_id: Optional[str] = None
available_documents: Optional[str] = "No documents available"
available_connections: Optional[list[str]] = Field(default_factory=list)
previous_results: Optional[list[str]] = Field(default_factory=list)
previous_handover: Optional[TaskHandover] = None
improvements: Optional[list[str]] = Field(default_factory=list)
retry_count: Optional[int] = 0
previous_action_results: Optional[list] = Field(default_factory=list)
previous_review_result: Optional[dict] = None
is_regeneration: Optional[bool] = False
failure_patterns: Optional[list[str]] = Field(default_factory=list)
failed_actions: Optional[list] = Field(default_factory=list)
successful_actions: Optional[list] = Field(default_factory=list)
criteria_progress: Optional[dict] = None
def getDocumentReferences(self) -> List[str]:
docs = []
if self.previous_handover:
for doc_exchange in self.previous_handover.inputDocuments:
docs.extend(doc_exchange.documents)
return list(set(docs))
def addImprovement(self, improvement: str) -> None:
if improvement not in (self.improvements or []):
if self.improvements is None:
self.improvements = []
self.improvements.append(improvement)
class ReviewContext(BaseModel, ModelMixin):
task_step: TaskStep
task_actions: Optional[list] = Field(default_factory=list)
action_results: Optional[list] = Field(default_factory=list)
step_result: Optional[dict] = Field(default_factory=dict)
workflow_id: Optional[str] = None
previous_results: Optional[list[str]] = Field(default_factory=list)
class ReviewResult(BaseModel, ModelMixin):
status: str
reason: Optional[str] = None
improvements: Optional[list[str]] = Field(default_factory=list)
quality_score: Optional[int] = 5
missing_outputs: Optional[list[str]] = Field(default_factory=list)
met_criteria: Optional[list[str]] = Field(default_factory=list)
unmet_criteria: Optional[list[str]] = Field(default_factory=list)
confidence: Optional[float] = 0.5
userMessage: Optional[str] = Field(
None, description="User-friendly message in user's language"
)
register_model_labels(
"ReviewResult",
{"en": "Review Result", "fr": "Résultat de l'évaluation"},
{
"status": {"en": "Status", "fr": "Statut"},
"reason": {"en": "Reason", "fr": "Raison"},
"improvements": {"en": "Improvements", "fr": "Améliorations"},
"quality_score": {"en": "Quality Score", "fr": "Score de qualité"},
"missing_outputs": {"en": "Missing Outputs", "fr": "Sorties manquantes"},
"met_criteria": {"en": "Met Criteria", "fr": "Critères respectés"},
"unmet_criteria": {"en": "Unmet Criteria", "fr": "Critères non respectés"},
"confidence": {"en": "Confidence", "fr": "Confiance"},
"userMessage": {"en": "User Message", "fr": "Message utilisateur"},
},
)
class TaskPlan(BaseModel, ModelMixin):
overview: str
tasks: list[TaskStep]
userMessage: Optional[str] = Field(
None, description="Overall user-friendly message for the task plan"
)
register_model_labels(
"TaskPlan",
{"en": "Task Plan", "fr": "Plan de tâches"},
{
"overview": {"en": "Overview", "fr": "Aperçu"},
"tasks": {"en": "Tasks", "fr": "Tâches"},
"userMessage": {"en": "User Message", "fr": "Message utilisateur"},
},
)

View file

@ -1,10 +1,12 @@
import logging import logging
from typing import Dict, Any, List, Union from typing import Dict, Any, List, Union, Tuple, Optional
from dataclasses import dataclass from dataclasses import dataclass
logger = logging.getLogger(__name__)
from modules.connectors.connectorAiOpenai import AiOpenai from modules.connectors.connectorAiOpenai import AiOpenai
from modules.connectors.connectorAiAnthropic import AiAnthropic from modules.connectors.connectorAiAnthropic import AiAnthropic
from modules.connectors.connectorAiLangdoc import AiLangdoc from modules.connectors.connectorAiPerplexity import AiPerplexity
from modules.connectors.connectorAiTavily import ConnectorWeb from modules.connectors.connectorAiTavily import ConnectorWeb
from modules.datamodels.datamodelAi import ( from modules.datamodels.datamodelAi import (
AiCallOptions, AiCallOptions,
@ -18,26 +20,14 @@ from modules.datamodels.datamodelAi import (
PROCESSING_MODE_PRIORITY_MAPPING PROCESSING_MODE_PRIORITY_MAPPING
) )
from modules.datamodels.datamodelWeb import ( from modules.datamodels.datamodelWeb import (
WebCrawlActionResult, WebResearchRequest,
WebCrawlActionDocument, WebResearchActionResult,
WebCrawlDocumentData,
WebCrawlRequest,
WebCrawlResultItem,
WebScrapeActionResult,
WebScrapeActionDocument,
WebSearchDocumentData as WebScrapeDocumentData,
WebScrapeRequest,
WebScrapeResultItem,
WebSearchActionResult,
WebSearchActionDocument,
WebSearchDocumentData,
WebSearchRequest,
WebSearchResultItem, WebSearchResultItem,
WebCrawlResultItem,
WebSearchRequest,
WebCrawlRequest,
) )
from modules.datamodels.datamodelWorkflow import ActionDocument from modules.datamodels.datamodelChat import ActionDocument
logger = logging.getLogger(__name__)
# Comprehensive model registry with capability tags and function mapping # Comprehensive model registry with capability tags and function mapping
@ -52,8 +42,8 @@ aiModels: Dict[str, Dict[str, Any]] = {
"costPer1kTokensOutput": 0.06, "costPer1kTokensOutput": 0.06,
"speedRating": 8, "speedRating": 8,
"qualityRating": 9, "qualityRating": 9,
"capabilities": ["text_generation", "chat", "reasoning"], "capabilities": ["text_generation", "chat", "reasoning", "analysis"],
"tags": ["text", "chat", "reasoning", "general"] "tags": ["text", "chat", "reasoning", "analysis", "general"]
}, },
"openai_callAiBasic_gpt35": { "openai_callAiBasic_gpt35": {
"connector": "openai", "connector": "openai",
@ -118,90 +108,66 @@ aiModels: Dict[str, Dict[str, Any]] = {
"tags": ["image", "vision", "multimodal", "high_quality"] "tags": ["image", "vision", "multimodal", "high_quality"]
}, },
# LangDoc Models # Perplexity Models
"langdoc_callAiBasic": { "perplexity_callAiBasic": {
"connector": "langdoc", "connector": "perplexity",
"function": "callAiBasic", "function": "callAiBasic",
"llmName": "gpt-4o", "llmName": "llama-3.1-sonar-large-128k-online",
"contextLength": 128000, "contextLength": 128000,
"costPer1kTokens": 0.02, "costPer1kTokens": 0.005,
"costPer1kTokensOutput": 0.04, "costPer1kTokensOutput": 0.005,
"speedRating": 8, "speedRating": 8,
"qualityRating": 9, "qualityRating": 8,
"capabilities": ["text_generation", "chat", "reasoning"], "capabilities": ["text_generation", "chat", "reasoning", "web_search"],
"tags": ["text", "chat", "reasoning", "general", "cost_effective"] "tags": ["text", "chat", "reasoning", "web_search", "cost_effective"]
}, },
"langdoc_callAiImage": { "perplexity_callAiWithWebSearch": {
"connector": "langdoc", "connector": "perplexity",
"function": "callAiImage", "function": "callAiWithWebSearch",
"llmName": "gpt-4o", "llmName": "sonar-pro",
"contextLength": 128000, "contextLength": 128000,
"costPer1kTokens": 0.02, "costPer1kTokens": 0.01,
"costPer1kTokensOutput": 0.04, "costPer1kTokensOutput": 0.01,
"speedRating": 7, "speedRating": 7,
"qualityRating": 9, "qualityRating": 9,
"capabilities": ["image_analysis", "vision", "multimodal"], "capabilities": ["text_generation", "web_search", "research"],
"tags": ["image", "vision", "multimodal", "cost_effective"] "tags": ["text", "web_search", "research", "high_quality"]
}, },
"langdoc_generateImage": { "perplexity_researchTopic": {
"connector": "langdoc", "connector": "perplexity",
"function": "generateImage", "function": "researchTopic",
"llmName": "dall-e-3", "llmName": "mistral-7b-instruct",
"contextLength": 0, "contextLength": 32000,
"costPer1kTokens": 0.04, "costPer1kTokens": 0.002,
"costPer1kTokensOutput": 0.0, "costPer1kTokensOutput": 0.002,
"speedRating": 6, "speedRating": 8,
"qualityRating": 9,
"capabilities": ["image_generation", "art", "visual_creation"],
"tags": ["image_generation", "art", "visual", "cost_effective"]
},
"langdoc_generateImageWithVariations": {
"connector": "langdoc",
"function": "generateImageWithVariations",
"llmName": "dall-e-3",
"contextLength": 0,
"costPer1kTokens": 0.04,
"costPer1kTokensOutput": 0.0,
"speedRating": 5,
"qualityRating": 9,
"capabilities": ["image_generation", "art", "visual_creation", "variations"],
"tags": ["image_generation", "art", "visual", "variations", "cost_effective"]
},
"langdoc_generateImageWithChat": {
"connector": "langdoc",
"function": "generateImageWithChat",
"llmName": "gpt-4o",
"contextLength": 128000,
"costPer1kTokens": 0.02,
"costPer1kTokensOutput": 0.04,
"speedRating": 6,
"qualityRating": 8, "qualityRating": 8,
"capabilities": ["image_generation", "chat", "visual_creation"], "capabilities": ["web_search", "research", "information_gathering"],
"tags": ["image_generation", "chat", "visual", "cost_effective"] "tags": ["web_search", "research", "information", "cost_effective"]
}, },
"langdoc_listModels": { "perplexity_answerQuestion": {
"connector": "langdoc", "connector": "perplexity",
"function": "listModels", "function": "answerQuestion",
"llmName": "api", "llmName": "mistral-7b-instruct",
"contextLength": 0, "contextLength": 32000,
"costPer1kTokens": 0.0, "costPer1kTokens": 0.002,
"costPer1kTokensOutput": 0.0, "costPer1kTokensOutput": 0.002,
"speedRating": 9, "speedRating": 8,
"qualityRating": 5, "qualityRating": 8,
"capabilities": ["model_listing", "api_info"], "capabilities": ["web_search", "question_answering", "research"],
"tags": ["api", "info", "models"] "tags": ["web_search", "qa", "research", "cost_effective"]
}, },
"langdoc_getModelInfo": { "perplexity_getCurrentNews": {
"connector": "langdoc", "connector": "perplexity",
"function": "getModelInfo", "function": "getCurrentNews",
"llmName": "api", "llmName": "mistral-7b-instruct",
"contextLength": 0, "contextLength": 32000,
"costPer1kTokens": 0.0, "costPer1kTokens": 0.002,
"costPer1kTokensOutput": 0.0, "costPer1kTokensOutput": 0.002,
"speedRating": 9, "speedRating": 8,
"qualityRating": 5, "qualityRating": 8,
"capabilities": ["model_info", "api_info"], "capabilities": ["web_search", "news", "current_events"],
"tags": ["api", "info", "models"] "tags": ["web_search", "news", "current_events", "cost_effective"]
}, },
# Tavily Web Models # Tavily Web Models
@ -250,7 +216,7 @@ class AiObjects:
openaiService: AiOpenai openaiService: AiOpenai
anthropicService: AiAnthropic anthropicService: AiAnthropic
langdocService: AiLangdoc perplexityService: AiPerplexity
tavilyService: ConnectorWeb tavilyService: ConnectorWeb
def __post_init__(self) -> None: def __post_init__(self) -> None:
@ -258,8 +224,8 @@ class AiObjects:
raise TypeError("openaiService must be provided") raise TypeError("openaiService must be provided")
if self.anthropicService is None: if self.anthropicService is None:
raise TypeError("anthropicService must be provided") raise TypeError("anthropicService must be provided")
if self.langdocService is None: if self.perplexityService is None:
raise TypeError("langdocService must be provided") raise TypeError("perplexityService must be provided")
if self.tavilyService is None: if self.tavilyService is None:
raise TypeError("tavilyService must be provided") raise TypeError("tavilyService must be provided")
@ -268,13 +234,13 @@ class AiObjects:
"""Create AiObjects instance with all connectors initialized.""" """Create AiObjects instance with all connectors initialized."""
openaiService = AiOpenai() openaiService = AiOpenai()
anthropicService = AiAnthropic() anthropicService = AiAnthropic()
langdocService = AiLangdoc() perplexityService = AiPerplexity()
tavilyService = await ConnectorWeb.create() tavilyService = await ConnectorWeb.create()
return cls( return cls(
openaiService=openaiService, openaiService=openaiService,
anthropicService=anthropicService, anthropicService=anthropicService,
langdocService=langdocService, perplexityService=perplexityService,
tavilyService=tavilyService tavilyService=tavilyService
) )
@ -330,11 +296,22 @@ class AiObjects:
elif options.operationType == OperationType.IMAGE_GENERATION: elif options.operationType == OperationType.IMAGE_GENERATION:
return "openai_generateImage" return "openai_generateImage"
elif options.operationType == OperationType.WEB_RESEARCH: elif options.operationType == OperationType.WEB_RESEARCH:
return "langdoc_callAiBasic" return "perplexity_callAiWithWebSearch"
else: else:
return "openai_callAiBasic_gpt35" return "openai_callAiBasic_gpt35"
# Select based on priority # Special handling for planning operations - use Claude for consistency
if options.operationType in [OperationType.GENERATE_PLAN, OperationType.ANALYSE_CONTENT]:
if "anthropic_callAiBasic" in candidates:
logger.info("Planning operation: Selected Claude (anthropic_callAiBasic) for highest quality")
return "anthropic_callAiBasic"
# Fallback to GPT-4o if Claude not available
if "openai_callAiBasic" in candidates:
logger.info("Planning operation: Selected GPT-4o (openai_callAiBasic) as fallback")
return "openai_callAiBasic"
# Select based on priority for other operations
if effectivePriority == Priority.SPEED: if effectivePriority == Priority.SPEED:
return max(candidates, key=lambda k: candidates[k]["speedRating"]) return max(candidates, key=lambda k: candidates[k]["speedRating"])
elif effectivePriority == Priority.QUALITY: elif effectivePriority == Priority.QUALITY:
@ -355,8 +332,8 @@ class AiObjects:
return self.openaiService return self.openaiService
elif connectorType == "anthropic": elif connectorType == "anthropic":
return self.anthropicService return self.anthropicService
elif connectorType == "langdoc": elif connectorType == "perplexity":
return self.langdocService return self.perplexityService
elif connectorType == "tavily": elif connectorType == "tavily":
return self.tavilyService return self.tavilyService
else: else:
@ -383,6 +360,17 @@ class AiObjects:
# Select model for text generation # Select model for text generation
modelName = self._selectModel(prompt, context, options) modelName = self._selectModel(prompt, context, options)
# Derive generation parameters
temperature = getattr(options, "temperature", None)
if temperature is None:
temperature = 0.2
maxTokens = getattr(options, "maxTokens", None)
# Provide a generous default to avoid truncation for long outputs
if maxTokens is None:
# If resultFormat suggests large outputs (e.g., html, json), allow more tokens
wants_large = str(getattr(options, "resultFormat", "")).lower() in ["html", "json", "md", "markdown"]
maxTokens = 8000 if wants_large else 2000
messages: List[Dict[str, Any]] = [] messages: List[Dict[str, Any]] = []
if context: if context:
messages.append({"role": "system", "content": f"Context from documents:\n{context}"}) messages.append({"role": "system", "content": f"Context from documents:\n{context}"})
@ -394,10 +382,27 @@ class AiObjects:
# Call the appropriate function # Call the appropriate function
if functionName == "callAiBasic": if functionName == "callAiBasic":
if aiModels[modelName]["connector"] == "openai": if aiModels[modelName]["connector"] == "openai":
content = await connector.callAiBasic(messages) content = await connector.callAiBasic(messages, temperature=temperature, maxTokens=maxTokens)
elif aiModels[modelName]["connector"] == "perplexity":
content = await connector.callAiBasic(messages, temperature=temperature, maxTokens=maxTokens)
else: else:
response = await connector.callAiBasic(messages) response = await connector.callAiBasic(messages, temperature=temperature, maxTokens=maxTokens)
content = response["choices"][0]["message"]["content"] content = response["choices"][0]["message"]["content"]
elif functionName == "callAiWithWebSearch":
# Perplexity web search function
query = prompt
if context:
query = f"Context: {context}\n\nQuery: {prompt}"
content = await connector.callAiWithWebSearch(query)
elif functionName == "researchTopic":
# Perplexity research function
content = await connector.researchTopic(prompt)
elif functionName == "answerQuestion":
# Perplexity question answering function
content = await connector.answerQuestion(prompt, context)
elif functionName == "getCurrentNews":
# Perplexity news function
content = await connector.getCurrentNews(prompt)
else: else:
raise ValueError(f"Function {functionName} not supported for text generation") raise ValueError(f"Function {functionName} not supported for text generation")
@ -446,21 +451,331 @@ class AiObjects:
else: else:
raise ValueError(f"Function {functionName} not supported for image generation") raise ValueError(f"Function {functionName} not supported for image generation")
# Web functionality methods # Web functionality methods - Simple interface to Tavily connector
async def webSearch(self, web_search_request: WebSearchRequest) -> WebSearchActionResult: async def search_websites(self, query: str, max_results: int = 5, **kwargs) -> List[WebSearchResultItem]:
"""Perform web search using Tavily.""" """Search for websites using Tavily."""
return await self.tavilyService.search(web_search_request) request = WebSearchRequest(
query=query,
max_results=max_results,
**kwargs
)
result = await self.tavilyService.search(request)
if result.success and result.documents:
return result.documents[0].documentData.results
return []
async def webCrawl(self, web_crawl_request: WebCrawlRequest) -> WebCrawlActionResult: async def crawl_websites(self, urls: List[str], extract_depth: str = "advanced", format: str = "markdown") -> List[WebCrawlResultItem]:
"""Crawl web pages using Tavily.""" """Crawl websites using Tavily."""
return await self.tavilyService.crawl(web_crawl_request) from pydantic import HttpUrl
from urllib.parse import urlparse
# Safely create HttpUrl objects with proper scheme handling
http_urls = []
for url in urls:
try:
# Ensure URL has a scheme
parsed = urlparse(url)
if not parsed.scheme:
url = f"https://{url}"
# Use HttpUrl with scheme parameter (this works for all URLs)
http_urls.append(HttpUrl(url, scheme="https"))
except Exception as e:
logger.warning(f"Skipping invalid URL {url}: {e}")
continue
if not http_urls:
return []
request = WebCrawlRequest(
urls=http_urls,
extract_depth=extract_depth,
format=format
)
result = await self.tavilyService.crawl(request)
if result.success and result.documents:
return result.documents[0].documentData.results
return []
async def webScrape(self, web_scrape_request: WebScrapeRequest) -> WebScrapeActionResult: async def extract_content(self, urls: List[str], extract_depth: str = "advanced", format: str = "markdown") -> Dict[str, str]:
"""Scrape web content using Tavily.""" """Extract content from URLs and return as dictionary."""
return await self.tavilyService.scrape(web_scrape_request) crawl_results = await self.crawl_websites(urls, extract_depth, format)
return {str(result.url): result.content for result in crawl_results}
# Core Web Tools - Clean interface for web operations
async def readPage(self, url: str, extract_depth: str = "advanced") -> Optional[str]:
"""Read a single web page and return its content (HTML/Markdown)."""
logger.debug(f"Reading page: {url}")
try:
# URL encode the URL to handle spaces and special characters
from urllib.parse import quote, urlparse, urlunparse
parsed = urlparse(url)
encoded_url = urlunparse((
parsed.scheme,
parsed.netloc,
parsed.path,
parsed.params,
parsed.query,
parsed.fragment
))
# Manually encode query parameters to handle spaces
if parsed.query:
encoded_query = quote(parsed.query, safe='=&')
encoded_url = urlunparse((
parsed.scheme,
parsed.netloc,
parsed.path,
parsed.params,
encoded_query,
parsed.fragment
))
logger.debug(f"URL encoded: {url} -> {encoded_url}")
content = await self.extract_content([encoded_url], extract_depth, "markdown")
result = content.get(encoded_url)
if result:
logger.debug(f"Successfully read page {encoded_url}: {len(result)} chars")
else:
logger.warning(f"No content returned for page {encoded_url}")
return result
except Exception as e:
logger.warning(f"Failed to read page {url}: {e}")
return None
async def getUrlsFromPage(self, url: str, extract_depth: str = "advanced") -> List[str]:
"""Get all URLs from a web page, with redundancies removed."""
try:
content = await self.readPage(url, extract_depth)
if not content:
return []
links = self._extractLinksFromContent(content, url)
# Remove duplicates while preserving order
seen = set()
unique_links = []
for link in links:
if link not in seen:
seen.add(link)
unique_links.append(link)
logger.debug(f"Extracted {len(unique_links)} unique URLs from {url}")
return unique_links
except Exception as e:
logger.warning(f"Failed to get URLs from page {url}: {e}")
return []
def filterUrlsOnlyPages(self, urls: List[str], max_per_domain: int = 10) -> List[str]:
"""Filter URLs to get only links for pages to follow (no images, etc.)."""
from urllib.parse import urlparse
def _isHtmlCandidate(url: str) -> bool:
lower = url.lower()
blocked = ('.jpg', '.jpeg', '.png', '.gif', '.svg', '.webp', '.ico', '.bmp',
'.mp4', '.mp3', '.avi', '.mov', '.mkv',
'.pdf', '.zip', '.rar', '.7z', '.tar', '.gz',
'.css', '.js', '.woff', '.woff2', '.ttf', '.eot')
return not lower.endswith(blocked)
# Group by domain
domain_links = {}
for link in urls:
domain = urlparse(link).netloc
if domain not in domain_links:
domain_links[domain] = []
domain_links[domain].append(link)
# Filter and cap per domain
filtered_links = []
for domain, domain_link_list in domain_links.items():
seen = set()
domain_filtered = []
for link in domain_link_list:
if link in seen:
continue
if not _isHtmlCandidate(link):
continue
seen.add(link)
domain_filtered.append(link)
if len(domain_filtered) >= max_per_domain:
break
filtered_links.extend(domain_filtered)
logger.debug(f"Domain {domain}: {len(domain_link_list)} -> {len(domain_filtered)} links")
return filtered_links
def _extractLinksFromContent(self, content: str, base_url: str) -> List[str]:
"""Extract links from HTML/Markdown content."""
try:
import re
from urllib.parse import urljoin, urlparse, quote, urlunparse
def _cleanUrl(url: str) -> str:
"""Clean and encode URL to remove spaces and invalid characters."""
# Remove quotes and extra spaces
url = url.strip().strip('"\'')
# If it's a relative URL, make it absolute first
if not url.startswith(('http://', 'https://')):
url = urljoin(base_url, url)
# Parse and re-encode the URL properly
parsed = urlparse(url)
if parsed.query:
# Encode query parameters properly
encoded_query = quote(parsed.query, safe='=&')
url = urlunparse((
parsed.scheme,
parsed.netloc,
parsed.path,
parsed.params,
encoded_query,
parsed.fragment
))
return url
links = []
# Extract HTML links: <a href="url"> format
html_link_pattern = r'<a[^>]+href=["\']([^"\']+)["\'][^>]*>'
html_links = re.findall(html_link_pattern, content, re.IGNORECASE)
for url in html_links:
if url and not url.startswith('#') and not url.startswith('javascript:'):
try:
cleaned_url = _cleanUrl(url)
links.append(cleaned_url)
logger.debug(f"Extracted HTML link: {url} -> {cleaned_url}")
except Exception as e:
logger.debug(f"Failed to clean HTML link {url}: {e}")
# Extract markdown links: [text](url) format
markdown_link_pattern = r'\[([^\]]+)\]\(([^)]+)\)'
markdown_links = re.findall(markdown_link_pattern, content)
for text, url in markdown_links:
if url and not url.startswith('#'):
try:
cleaned_url = _cleanUrl(url)
# Only keep URLs from the same domain
if urlparse(cleaned_url).netloc == urlparse(base_url).netloc:
links.append(cleaned_url)
logger.debug(f"Extracted markdown link: {url} -> {cleaned_url}")
except Exception as e:
logger.debug(f"Failed to clean markdown link {url}: {e}")
# Extract plain URLs in the text
url_pattern = r'https?://[^\s\)]+'
plain_urls = re.findall(url_pattern, content)
for url in plain_urls:
try:
clean_url = url.rstrip('.,;!?')
cleaned_url = _cleanUrl(clean_url)
if urlparse(cleaned_url).netloc == urlparse(base_url).netloc:
if cleaned_url not in links: # Avoid duplicates
links.append(cleaned_url)
logger.debug(f"Extracted plain URL: {url} -> {cleaned_url}")
except Exception as e:
logger.debug(f"Failed to clean plain URL {url}: {e}")
logger.debug(f"Total links extracted and cleaned: {len(links)}")
return links
except Exception as e:
logger.warning(f"Failed to extract links from content: {e}")
return []
async def crawlRecursively(self, urls: List[str], max_depth: int, extract_depth: str = "advanced", max_per_domain: int = 10) -> Dict[str, str]:
"""
Recursively crawl URLs up to specified depth.
Args:
urls: List of starting URLs to crawl
max_depth: Maximum depth to crawl (1=main pages only, 2=main+sub-pages, etc.)
extract_depth: Tavily extract depth setting
max_per_domain: Maximum URLs per domain per level
Returns:
Dictionary mapping URL -> content for all crawled pages
"""
logger.info(f"Starting recursive crawl: {len(urls)} starting URLs, max_depth={max_depth}")
# URL index to track all processed URLs
processed_urls = set()
all_content = {}
# Current level URLs to process
current_level_urls = urls.copy()
for depth in range(1, max_depth + 1):
logger.info(f"=== DEPTH LEVEL {depth}/{max_depth} ===")
logger.info(f"Processing {len(current_level_urls)} URLs at depth {depth}")
# URLs found at this level (for next iteration)
next_level_urls = []
for url in current_level_urls:
if url in processed_urls:
logger.debug(f"URL {url} already processed, skipping")
continue
try:
logger.info(f"Processing URL at depth {depth}: {url}")
# Read page content
content = await self.readPage(url, extract_depth)
if content:
all_content[url] = content
processed_urls.add(url)
logger.info(f"✓ Successfully processed {url}: {len(content)} chars")
# Get URLs from this page for next level
page_urls = await self.getUrlsFromPage(url, extract_depth)
logger.info(f"Found {len(page_urls)} URLs on {url}")
# Filter URLs and add to next level
filtered_urls = self.filterUrlsOnlyPages(page_urls, max_per_domain)
logger.info(f"Filtered to {len(filtered_urls)} valid URLs")
# Add new URLs to next level (avoiding already processed ones)
new_urls_count = 0
for new_url in filtered_urls:
if new_url not in processed_urls:
next_level_urls.append(new_url)
new_urls_count += 1
logger.info(f"Added {new_urls_count} new URLs to next level from {url}")
else:
logger.warning(f"✗ No content extracted from {url}")
processed_urls.add(url) # Mark as processed to avoid retry
except Exception as e:
logger.warning(f"✗ Failed to process URL {url} at depth {depth}: {e}")
processed_urls.add(url) # Mark as processed to avoid retry
# Prepare for next iteration
current_level_urls = next_level_urls
logger.info(f"Depth {depth} completed. Found {len(next_level_urls)} URLs for next level")
# Stop if no more URLs to process
if not current_level_urls:
logger.info(f"No more URLs found at depth {depth}, stopping recursion")
break
logger.info(f"Recursive crawl completed: {len(all_content)} total pages crawled")
return all_content
async def webQuery(self, query: str, context: str = "", options: AiCallOptions = None) -> str: async def webQuery(self, query: str, context: str = "", options: AiCallOptions = None) -> str:
"""Use LangDoc AI to provide the best answers for web-related queries.""" """Use Perplexity AI to provide the best answers for web-related queries."""
if options is None: if options is None:
options = AiCallOptions(operationType=OperationType.WEB_RESEARCH) options = AiCallOptions(operationType=OperationType.WEB_RESEARCH)
@ -480,14 +795,12 @@ Please provide:
Format your response in a clear, professional manner that would be helpful for someone researching this topic.""" Format your response in a clear, professional manner that would be helpful for someone researching this topic."""
messages = [{"role": "user", "content": webPrompt}]
try: try:
# Use LangDoc for the best answers # Use Perplexity for web research with search capabilities
response = await self.langdocService.callAiBasic(messages) response = await self.perplexityService.callAiWithWebSearch(webPrompt)
return response return response
except Exception as e: except Exception as e:
logger.error(f"LangDoc web query failed: {str(e)}") logger.error(f"Perplexity web query failed: {str(e)}")
raise Exception(f"Failed to process web query: {str(e)}") raise Exception(f"Failed to process web query: {str(e)}")
# Utility methods # Utility methods
@ -511,3 +824,157 @@ Format your response in a clear, professional manner that would be helpful for s
"""Get model names that have a specific tag.""" """Get model names that have a specific tag."""
return [name for name, info in aiModels.items() if tag in info.get("tags", [])] return [name for name, info in aiModels.items() if tag in info.get("tags", [])]
async def selectRelevantWebsites(self, websites: List[str], userQuestion: str) -> Tuple[List[str], str]:
"""Select most relevant websites using AI analysis. Returns (selected_websites, ai_response)."""
if len(websites) <= 1:
return websites, "Only one website available, no selection needed"
try:
# Create website summaries for AI analysis
websiteSummaries = []
for i, url in enumerate(websites, 1):
from urllib.parse import urlparse
domain = urlparse(url).netloc
summary = f"{i}. {url} (Domain: {domain})"
websiteSummaries.append(summary)
selectionPrompt = f"""
Based on this user request: "{userQuestion}"
I have {len(websites)} websites found. Please select the most relevant website(s) for this request.
Available websites:
{chr(10).join(websiteSummaries)}
Please respond with the website number(s) (1, 2, 3, etc.) that are most relevant.
Format: 1,3,5 (or just 1 for single selection)
"""
# Use Perplexity to select the best websites
response = await self.webQuery(selectionPrompt)
# Parse the selection
import re
numbers = re.findall(r'\d+', response)
if numbers:
selectedWebsites = []
for num in numbers:
index = int(num) - 1
if 0 <= index < len(websites):
selectedWebsites.append(websites[index])
if selectedWebsites:
logger.info(f"AI selected {len(selectedWebsites)} websites")
return selectedWebsites, response
# Fallback to first website
logger.warning("AI selection failed, using first website")
return websites[:1], f"AI selection failed, fallback to first website. AI response: {response}"
except Exception as e:
logger.error(f"Error in website selection: {str(e)}")
return websites[:1], f"Error in website selection: {str(e)}"
async def analyzeContentWithChunking(self, allContent: Dict[str, str], userQuestion: str) -> str:
"""Analyze content using AI with chunking for large content."""
logger.info(f"Analyzing {len(allContent)} websites with AI")
# Process content in chunks to avoid token limits
chunkSize = 50000 # 50k chars per chunk
allChunks = []
for url, content in allContent.items():
filteredContent = self._filterContent(content)
if len(filteredContent) <= chunkSize:
allChunks.append((url, filteredContent))
logger.info(f"Content from {url}: {len(filteredContent)} chars (single chunk)")
else:
# Split large content into chunks
chunkCount = (len(filteredContent) + chunkSize - 1) // chunkSize
logger.info(f"Content from {url}: {len(filteredContent)} chars (split into {chunkCount} chunks)")
for i in range(0, len(filteredContent), chunkSize):
chunk = filteredContent[i:i+chunkSize]
chunkNum = i//chunkSize + 1
allChunks.append((f"{url} (part {chunkNum})", chunk))
logger.info(f"Processing {len(allChunks)} content chunks")
# Analyze each chunk
chunkAnalyses = []
for i, (url, chunk) in enumerate(allChunks, 1):
logger.info(f"Analyzing chunk {i}/{len(allChunks)}: {url}")
try:
analysisPrompt = f"""
Analyze this web content and extract relevant information for: {userQuestion}
Source: {url}
Content: {chunk}
Please extract key information relevant to the query.
"""
analysis = await self.webQuery(analysisPrompt)
chunkAnalyses.append(analysis)
logger.info(f"Chunk {i}/{len(allChunks)} analyzed successfully")
except Exception as e:
logger.error(f"Chunk {i}/{len(allChunks)} error: {e}")
# Combine all chunk analyses
if chunkAnalyses:
logger.info(f"Combining {len(chunkAnalyses)} chunk analyses")
combinedAnalysis = "\n\n".join(chunkAnalyses)
# Final synthesis
try:
logger.info("Performing final synthesis of all analyses")
synthesisPrompt = f"""
Based on these partial analyses, provide a comprehensive answer to: {userQuestion}
Partial analyses:
{combinedAnalysis}
Please provide a clear, well-structured answer to the query.
"""
finalAnalysis = await self.webQuery(synthesisPrompt)
logger.info("Final synthesis completed successfully")
return finalAnalysis
except Exception as e:
logger.error(f"Synthesis error: {e}")
return combinedAnalysis
else:
logger.error("No content could be analyzed")
return "No content could be analyzed"
def _filterContent(self, content: str) -> str:
"""Filter out navigation, ads, and other nonsense content."""
lines = content.split('\n')
filteredLines = []
for line in lines:
line = line.strip()
# Skip empty lines
if not line:
continue
# Skip navigation elements
if any(skip in line.lower() for skip in [
'toggle navigation', 'log in', 'sign up', 'cookies', 'privacy policy',
'terms of service', 'subscribe', 'newsletter', 'follow us', 'share this',
'advertisement', 'sponsored', 'banner', 'popup', 'modal'
]):
continue
# Skip image references without context
if line.startswith('![Image') and '](' in line:
continue
# Skip pure links without context
if line.startswith('[') and line.endswith(')') and '---' in line:
continue
# Keep meaningful content
if len(line) > 10: # Skip very short lines
filteredLines.append(line)
return '\n'.join(filteredLines)

View file

@ -12,8 +12,8 @@ from typing import Dict, Any, List, Optional, Union, get_origin, get_args
import asyncio import asyncio
from modules.interfaces.interfaceDbChatAccess import ChatAccess from modules.interfaces.interfaceDbChatAccess import ChatAccess
from modules.datamodels.datamodelWorkflow import ( from modules.datamodels.datamodelChat import (
TaskAction, ActionItem,
TaskResult, TaskResult,
TaskItem, TaskItem,
TaskStatus, TaskStatus,
@ -549,7 +549,7 @@ class ChatObjects:
created_documents.append(created_doc) created_documents.append(created_doc)
# Convert to ChatMessage model # Convert to ChatMessage model
return ChatMessage( chat_message = ChatMessage(
id=createdMessage["id"], id=createdMessage["id"],
workflowId=createdMessage["workflowId"], workflowId=createdMessage["workflowId"],
parentMessageId=createdMessage.get("parentMessageId"), parentMessageId=createdMessage.get("parentMessageId"),
@ -570,6 +570,11 @@ class ChatObjects:
actionMethod=createdMessage.get("actionMethod"), actionMethod=createdMessage.get("actionMethod"),
actionName=createdMessage.get("actionName") actionName=createdMessage.get("actionName")
) )
# Debug: Store message and documents for debugging TODO REMOVE
self._storeDebugMessageAndDocuments(chat_message)
return chat_message
except Exception as e: except Exception as e:
logger.error(f"Error creating workflow message: {str(e)}") logger.error(f"Error creating workflow message: {str(e)}")
@ -1045,6 +1050,120 @@ class ChatObjects:
return {"items": items} return {"items": items}
def _storeDebugMessageAndDocuments(self, message: ChatMessage) -> None:
"""
Store message and documents for debugging purposes in fileshare.
Structure: gateway/test-chat/messages/m_round_task_action_timestamp/documentlist_label/documents
Args:
message: ChatMessage object to store
"""
try:
import os
import json
from datetime import datetime, UTC
# Create base debug directory
debug_root = "./test-chat/messages"
os.makedirs(debug_root, exist_ok=True)
# Generate timestamp
timestamp = datetime.now(UTC).strftime('%Y%m%d-%H%M%S-%f')[:-3]
# Create message folder name: m_round_task_action_timestamp
# Use actual values from message, not defaults
round_str = str(message.roundNumber) if message.roundNumber is not None else "0"
task_str = str(message.taskNumber) if message.taskNumber is not None else "0"
action_str = str(message.actionNumber) if message.actionNumber is not None else "0"
message_folder = f"{timestamp}_m_{round_str}_{task_str}_{action_str}"
message_path = os.path.join(debug_root, message_folder)
os.makedirs(message_path, exist_ok=True)
# Store message data - use dict() instead of model_dump() for compatibility
message_file = os.path.join(message_path, "message.json")
with open(message_file, "w", encoding="utf-8") as f:
# Convert message to dict manually to avoid model_dump() issues
message_dict = {
"id": message.id,
"workflowId": message.workflowId,
"parentMessageId": message.parentMessageId,
"message": message.message,
"role": message.role,
"status": message.status,
"sequenceNr": message.sequenceNr,
"publishedAt": message.publishedAt,
"roundNumber": message.roundNumber,
"taskNumber": message.taskNumber,
"actionNumber": message.actionNumber,
"documentsLabel": message.documentsLabel,
"actionId": message.actionId,
"actionMethod": message.actionMethod,
"actionName": message.actionName,
"success": message.success,
"documents": []
}
json.dump(message_dict, f, indent=2, ensure_ascii=False, default=str)
# Store message content as text
if message.message:
message_text_file = os.path.join(message_path, "message_text.txt")
with open(message_text_file, "w", encoding="utf-8") as f:
f.write(str(message.message))
# Store documents if provided
if message.documents and len(message.documents) > 0:
logger.info(f"Debug: Processing {len(message.documents)} documents")
# Group documents by documentsLabel
documents_by_label = {}
for doc in message.documents:
label = message.documentsLabel or 'default'
if label not in documents_by_label:
documents_by_label[label] = []
documents_by_label[label].append(doc)
# Create subfolder for each document label
for label, docs in documents_by_label.items():
# Sanitize label for filesystem
safe_label = "".join(c for c in str(label) if c.isalnum() or c in (' ', '-', '_')).rstrip()
safe_label = safe_label.replace(' ', '_')
if not safe_label:
safe_label = "default"
label_folder = os.path.join(message_path, safe_label)
os.makedirs(label_folder, exist_ok=True)
logger.info(f"Debug: Created document folder: {label_folder}")
# Store each document
for i, doc in enumerate(docs):
# Create document metadata file
doc_meta = {
"id": doc.id,
"messageId": doc.messageId,
"fileId": doc.fileId,
"fileName": doc.fileName,
"fileSize": doc.fileSize,
"mimeType": doc.mimeType,
"roundNumber": doc.roundNumber,
"taskNumber": doc.taskNumber,
"actionNumber": doc.actionNumber,
"actionId": doc.actionId
}
doc_meta_file = os.path.join(label_folder, f"document_{i+1:03d}_metadata.json")
with open(doc_meta_file, "w", encoding="utf-8") as f:
json.dump(doc_meta, f, indent=2, ensure_ascii=False, default=str)
logger.info(f"Debug: Stored document metadata for {doc.fileName}")
logger.info(f"Debug: Stored message and documents in {message_path}")
except Exception as e:
logger.error(f"Debug: Failed to store message and documents: {e}")
import traceback
logger.error(f"Debug: Traceback: {traceback.format_exc()}")
def getInterface(currentUser: Optional[User] = None) -> 'ChatObjects': def getInterface(currentUser: Optional[User] = None) -> 'ChatObjects':
""" """

View file

@ -41,6 +41,8 @@ class Services:
def __init__(self, user: User, workflow: ChatWorkflow = None): def __init__(self, user: User, workflow: ChatWorkflow = None):
self.user: User = user self.user: User = user
self.workflow: ChatWorkflow = workflow self.workflow: ChatWorkflow = workflow
self.currentUserPrompt: str = "" # Cleaned/normalized user intent for the current round
self.rawUserPrompt: str = "" # Original raw user message for the current round
# Initialize interfaces # Initialize interfaces

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,182 @@
from typing import Any, Dict, List
import base64
import io
from modules.datamodels.datamodelExtraction import ContentPart
from ..subRegistry import Chunker
class ImageChunker(Chunker):
"""Chunker for reducing image size through resizing, compression, and tiling."""
def chunk(self, part: ContentPart, options: Dict[str, Any]) -> list[Dict[str, Any]]:
"""
Chunk an image by reducing its size through various strategies.
Args:
part: ContentPart containing image data (base64 encoded)
options: Chunking options including:
- imageChunkSize: Maximum size in bytes for each chunk
- imageMaxPixels: Maximum pixels (width*height) for the image
- imageQuality: JPEG quality (0-100, default 85)
- imageTileSize: Size for tiling if image is still too large
Returns:
List of image chunks with reduced size
"""
maxBytes = int(options.get("imageChunkSize", 1000000)) # 1MB default
maxPixels = int(options.get("imageMaxPixels", 1024 * 1024)) # 1MP default
quality = int(options.get("imageQuality", 85))
tileSize = int(options.get("imageTileSize", 512)) # 512x512 tiles
chunks: List[Dict[str, Any]] = []
try:
# Lazy import PIL to avoid hanging during module import
from PIL import Image
# Decode base64 image data
imageData = base64.b64decode(part.data)
image = Image.open(io.BytesIO(imageData))
# Get original dimensions
originalWidth, originalHeight = image.size
originalPixels = originalWidth * originalHeight
# Strategy 1: If image is small enough, return as-is
if len(part.data) <= maxBytes and originalPixels <= maxPixels:
chunks.append({
"data": part.data,
"size": len(part.data),
"order": 0,
"metadata": {
"originalSize": len(part.data),
"originalPixels": originalPixels,
"strategy": "original"
}
})
return chunks
# Strategy 2: Resize to fit within pixel limit
if originalPixels > maxPixels:
# Calculate new dimensions maintaining aspect ratio
scale = (maxPixels / originalPixels) ** 0.5
newWidth = int(originalWidth * scale)
newHeight = int(originalHeight * scale)
# Ensure minimum size
newWidth = max(newWidth, 64)
newHeight = max(newHeight, 64)
image = image.resize((newWidth, newHeight), Image.Resampling.LANCZOS)
# Strategy 3: Compress with quality reduction
currentSize = len(part.data)
currentQuality = quality
while currentSize > maxBytes and currentQuality > 10:
# Compress image
output = io.BytesIO()
image.save(output, format='JPEG', quality=currentQuality, optimize=True)
compressedData = output.getvalue()
compressedB64 = base64.b64encode(compressedData).decode('utf-8')
currentSize = len(compressedB64)
if currentSize <= maxBytes:
chunks.append({
"data": compressedB64,
"size": currentSize,
"order": 0,
"metadata": {
"originalSize": len(part.data),
"originalPixels": originalPixels,
"compressedSize": currentSize,
"quality": currentQuality,
"strategy": "compressed"
}
})
return chunks
currentQuality -= 10
# Strategy 4: Tile the image if still too large
if currentSize > maxBytes:
chunks = self._tileImage(image, maxBytes, tileSize, quality, originalPixels)
return chunks
# Fallback: Return compressed version even if over limit
output = io.BytesIO()
image.save(output, format='JPEG', quality=10, optimize=True)
compressedData = output.getvalue()
compressedB64 = base64.b64encode(compressedData).decode('utf-8')
chunks.append({
"data": compressedB64,
"size": len(compressedB64),
"order": 0,
"metadata": {
"originalSize": len(part.data),
"originalPixels": originalPixels,
"compressedSize": len(compressedB64),
"quality": 10,
"strategy": "fallback_compressed"
}
})
except Exception as e:
# Fallback: Return original data with error metadata
chunks.append({
"data": part.data,
"size": len(part.data),
"order": 0,
"metadata": {
"originalSize": len(part.data),
"strategy": "error_fallback",
"error": str(e)
}
})
return chunks
def _tileImage(self, image: "Image.Image", maxBytes: int, tileSize: int, quality: int, originalPixels: int) -> List[Dict[str, Any]]:
"""Split image into tiles if it's still too large after compression."""
chunks = []
width, height = image.size
# Calculate tile grid
tilesX = (width + tileSize - 1) // tileSize
tilesY = (height + tileSize - 1) // tileSize
for y in range(tilesY):
for x in range(tilesX):
# Calculate tile boundaries
left = x * tileSize
top = y * tileSize
right = min(left + tileSize, width)
bottom = min(top + tileSize, height)
# Extract tile
tile = image.crop((left, top, right, bottom))
# Compress tile
output = io.BytesIO()
tile.save(output, format='JPEG', quality=quality, optimize=True)
tileData = output.getvalue()
tileB64 = base64.b64encode(tileData).decode('utf-8')
chunks.append({
"data": tileB64,
"size": len(tileB64),
"order": y * tilesX + x,
"metadata": {
"originalSize": len(image.tobytes()),
"originalPixels": originalPixels,
"tileSize": tileSize,
"tilePosition": f"{x},{y}",
"tileBounds": f"{left},{top},{right},{bottom}",
"quality": quality,
"strategy": "tiled"
}
})
return chunks

View file

@ -1,12 +1,17 @@
from typing import Any, Dict, List from typing import Any, Dict, List
import logging
from modules.datamodels.datamodelExtraction import ContentPart from modules.datamodels.datamodelExtraction import ContentPart
from ..subRegistry import Chunker from ..subRegistry import Chunker
logger = logging.getLogger(__name__)
class TextChunker(Chunker): class TextChunker(Chunker):
def chunk(self, part: ContentPart, options: Dict[str, Any]) -> list[Dict[str, Any]]: def chunk(self, part: ContentPart, options: Dict[str, Any]) -> list[Dict[str, Any]]:
maxBytes = int(options.get("textChunkSize", 40000)) maxBytes = int(options.get("textChunkSize", 40000))
logger.debug(f"TextChunker: textChunkSize from options: {options.get('textChunkSize', 'NOT_FOUND')}")
logger.debug(f"TextChunker: using maxBytes: {maxBytes}")
chunks: List[Dict[str, Any]] = [] chunks: List[Dict[str, Any]] = []
current: List[str] = [] current: List[str] = []
size = 0 size = 0

View file

@ -1,7 +1,7 @@
from typing import Any, Dict, List from typing import Any, Dict, List
import base64 import base64
from ..utils import makeId from ..subUtils import makeId
from modules.datamodels.datamodelExtraction import ContentPart from modules.datamodels.datamodelExtraction import ContentPart
from ..subRegistry import Extractor from ..subRegistry import Extractor

View file

@ -1,7 +1,7 @@
from typing import Any, Dict, List from typing import Any, Dict, List
from modules.datamodels.datamodelExtraction import ContentPart from modules.datamodels.datamodelExtraction import ContentPart
from ..utils import makeId from ..subUtils import makeId
from ..subRegistry import Extractor from ..subRegistry import Extractor

View file

@ -1,7 +1,7 @@
from typing import Any, Dict, List from typing import Any, Dict, List
import io import io
from ..utils import makeId from ..subUtils import makeId
from modules.datamodels.datamodelExtraction import ContentPart from modules.datamodels.datamodelExtraction import ContentPart
from ..subRegistry import Extractor from ..subRegistry import Extractor

View file

@ -2,7 +2,7 @@ from typing import Any, Dict, List
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from modules.datamodels.datamodelExtraction import ContentPart from modules.datamodels.datamodelExtraction import ContentPart
from ..utils import makeId from ..subUtils import makeId
from ..subRegistry import Extractor from ..subRegistry import Extractor

View file

@ -1,7 +1,7 @@
from typing import Any, Dict, List from typing import Any, Dict, List
import base64 import base64
from ..utils import makeId from ..subUtils import makeId
from modules.datamodels.datamodelExtraction import ContentPart from modules.datamodels.datamodelExtraction import ContentPart
from ..subRegistry import Extractor from ..subRegistry import Extractor

View file

@ -2,7 +2,7 @@ from typing import Any, Dict, List
import json import json
from modules.datamodels.datamodelExtraction import ContentPart from modules.datamodels.datamodelExtraction import ContentPart
from ..utils import makeId from ..subUtils import makeId
from ..subRegistry import Extractor from ..subRegistry import Extractor

View file

@ -2,7 +2,7 @@ from typing import Any, Dict, List
import base64 import base64
import io import io
from ..utils import makeId from ..subUtils import makeId
from modules.datamodels.datamodelExtraction import ContentPart from modules.datamodels.datamodelExtraction import ContentPart
from ..subRegistry import Extractor from ..subRegistry import Extractor

View file

@ -1,7 +1,7 @@
from typing import Any, Dict, List from typing import Any, Dict, List
from modules.datamodels.datamodelExtraction import ContentPart from modules.datamodels.datamodelExtraction import ContentPart
from ..utils import makeId from ..subUtils import makeId
from ..subRegistry import Extractor from ..subRegistry import Extractor

View file

@ -2,7 +2,7 @@ from typing import Any, Dict, List
import io import io
from datetime import datetime from datetime import datetime
from ..utils import makeId from ..subUtils import makeId
from modules.datamodels.datamodelExtraction import ContentPart from modules.datamodels.datamodelExtraction import ContentPart
from ..subRegistry import Extractor from ..subRegistry import Extractor
@ -75,7 +75,8 @@ class XlsxExtractor(Extractor):
elif isinstance(v, datetime): elif isinstance(v, datetime):
cells.append(v.strftime("%Y-%m-%d %H:%M:%S")) cells.append(v.strftime("%Y-%m-%d %H:%M:%S"))
else: else:
cells.append(f'"{str(v).replace("\"", "\"\"")}"') escaped_value = str(v).replace('"', '""')
cells.append(f'"{escaped_value}"')
lines.append(",".join(cells)) lines.append(",".join(cells))
csvData = "\n".join(lines) csvData = "\n".join(lines)
parts.append(ContentPart( parts.append(ContentPart(

View file

@ -2,7 +2,7 @@ from typing import Any, Dict, List
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
from modules.datamodels.datamodelExtraction import ContentPart from modules.datamodels.datamodelExtraction import ContentPart
from ..utils import makeId from ..subUtils import makeId
from ..subRegistry import Extractor from ..subRegistry import Extractor

View file

@ -1,9 +1,14 @@
from typing import Any, Dict, List, Optional, Union from typing import Any, Dict, List, Optional, Union
import uuid import uuid
import logging
from .subRegistry import ExtractorRegistry, ChunkerRegistry from .subRegistry import ExtractorRegistry, ChunkerRegistry
from .subPipeline import runExtraction, poolAndLimit, applyAiIfRequested from .subPipeline import runExtraction, poolAndLimit, applyAiIfRequested
from modules.datamodels.datamodelExtraction import ExtractedContent, ContentPart from modules.datamodels.datamodelExtraction import ContentExtracted, ContentPart, MergeStrategy
from modules.datamodels.datamodelChat import ChatDocument
logger = logging.getLogger(__name__)
class ExtractionService: class ExtractionService:
@ -12,45 +17,325 @@ class ExtractionService:
self._extractorRegistry = ExtractorRegistry() self._extractorRegistry = ExtractorRegistry()
self._chunkerRegistry = ChunkerRegistry() self._chunkerRegistry = ChunkerRegistry()
def extractContent(self, documentList: List[Dict[str, Any]], options: Dict[str, Any]) -> List[ExtractedContent]: def extractContent(self, documents: List[ChatDocument], options: Dict[str, Any]) -> List[ContentExtracted]:
results: List[ExtractedContent] = [] """
for doc in documentList: Extract content from a list of ChatDocument objects.
Args:
documents: List of ChatDocument objects to extract content from
options: Extraction options including maxSize, chunkAllowed, mergeStrategy, etc.
Returns:
List of ContentExtracted objects, one per input document
"""
results: List[ContentExtracted] = []
# Lazy import to avoid circular deps and heavy init at module import
from modules.interfaces.interfaceDbComponentObjects import getInterface
dbInterface = getInterface()
for i, doc in enumerate(documents):
logger.info(f"=== DOCUMENT {i}: {doc.fileName} ===")
logger.info(f"Initial MIME type: {doc.mimeType}")
# Resolve raw bytes for this document using interface
documentBytes = dbInterface.getFileData(doc.fileId)
if not documentBytes:
raise ValueError(f"No file data found for fileId={doc.fileId}")
# Convert ChatDocument to the format expected by runExtraction
documentData = {
"id": doc.id,
"bytes": documentBytes,
"fileName": doc.fileName,
"mimeType": doc.mimeType
}
ec = runExtraction( ec = runExtraction(
extractorRegistry=self._extractorRegistry, extractorRegistry=self._extractorRegistry,
chunkerRegistry=self._chunkerRegistry, chunkerRegistry=self._chunkerRegistry,
documentBytes=doc.get("bytes"), documentBytes=documentData["bytes"],
fileName=doc.get("fileName"), fileName=documentData["fileName"],
mimeType=doc.get("mimeType"), mimeType=documentData["mimeType"],
options=options options=options
) )
# Log content parts metadata
logger.debug(f"Content parts: {len(ec.parts)}")
for j, part in enumerate(ec.parts):
logger.debug(f" Part {j}: {part.typeGroup} ({part.mimeType}) - {len(part.data) if part.data else 0} chars")
if part.metadata:
logger.debug(f" Metadata: {part.metadata}")
# Attach document id to parts if missing # Attach document id to parts if missing
for p in ec.parts: for p in ec.parts:
if "documentId" not in p.metadata: if "documentId" not in p.metadata:
p.metadata["documentId"] = doc.get("id") or str(uuid.uuid4()) p.metadata["documentId"] = documentData["id"] or str(uuid.uuid4())
# Log chunking information
chunked_parts = [p for p in ec.parts if p.metadata.get("chunk", False)]
if chunked_parts:
logger.debug(f"=== CHUNKING RESULTS ===")
logger.debug(f"Total parts: {len(ec.parts)}")
logger.debug(f"Chunked parts: {len(chunked_parts)}")
for chunk in chunked_parts:
logger.debug(f" Chunk: {chunk.label} - {len(chunk.data)} chars (parent: {chunk.parentId})")
else:
logger.debug(f"No chunking needed - {len(ec.parts)} parts fit within size limits")
ec = applyAiIfRequested(ec, options) ec = applyAiIfRequested(ec, options)
results.append(ec) results.append(ec)
return results return results
async def extractContentFromDocument(self, prompt: str, documents: List[Dict[str, Any]], options: Optional[Dict[str, Any]] = None) -> List[ExtractedContent]: def mergeAiResults(
self,
extractedContent: List[ContentExtracted],
aiResults: List[str],
strategy: MergeStrategy
) -> ContentExtracted:
""" """
Batch extract content from multiple documents. Merge AI results from chunked content back into a single ContentExtracted.
Args: Args:
prompt: Instructional prompt for optional AI post-processing/selection. extractedContent: List of ContentExtracted objects that were processed
documents: List of dicts with keys: id, bytes, fileName, mimeType. aiResults: List of AI response strings, one per chunk
options: Optional extraction options. "ai" config may be provided. strategy: Merge strategy configuration (dict or MergeStrategy object)
Returns: Returns:
List[ExtractedContent]: one per input document in order. Single ContentExtracted with merged AI results
""" """
# Build options safely and inject prompt for downstream AI selection if desired logger.debug(f"=== MERGING AI RESULTS ===")
effectiveOptions: Dict[str, Any] = options.copy() if options else {} logger.debug(f"Extracted content: {len(extractedContent)} documents")
aiCfg = effectiveOptions.get("ai") or {} logger.debug(f"AI results: {len(aiResults)} responses")
if prompt: logger.debug(f"Merge strategy: {strategy.mergeType}")
aiCfg["prompt"] = prompt
effectiveOptions["ai"] = aiCfg mergeStrategy = strategy
# Delegate to existing synchronous pipeline # Collect all parts from all extracted content
return self.extractContent(documents, effectiveOptions) allParts: List[ContentPart] = []
for ec in extractedContent:
allParts.extend(ec.parts)
logger.debug(f"Total original parts: {len(allParts)}")
# Create AI result parts
aiResultParts: List[ContentPart] = []
for i, aiResult in enumerate(aiResults):
aiPart = ContentPart(
id=f"ai_result_{i}",
parentId=None, # Will be set based on strategy
label="ai_result",
typeGroup="text",
mimeType="text/plain",
data=aiResult,
metadata={
"aiResult": True,
"order": i,
"size": len(aiResult.encode('utf-8'))
}
)
aiResultParts.append(aiPart)
logger.debug(f"Created {len(aiResultParts)} AI result parts")
# Apply merging strategy
if mergeStrategy.mergeType == "concatenate":
mergedParts = self._mergeConcatenate(allParts, aiResultParts, mergeStrategy)
elif mergeStrategy.mergeType == "hierarchical":
mergedParts = self._mergeHierarchical(allParts, aiResultParts, mergeStrategy)
elif mergeStrategy.mergeType == "intelligent":
mergedParts = self._mergeIntelligent(allParts, aiResultParts, mergeStrategy)
else:
# Default to concatenate
mergedParts = self._mergeConcatenate(allParts, aiResultParts, mergeStrategy)
# Create final ContentExtracted
mergedContent = ContentExtracted(
id=f"merged_{uuid.uuid4()}",
parts=mergedParts
)
logger.debug(f"=== MERGE COMPLETED ===")
logger.debug(f"Final merged parts: {len(mergedParts)}")
logger.debug(f"Merged content ID: {mergedContent.id}")
return mergedContent
def _mergeConcatenate(
self,
originalParts: List[ContentPart],
aiResultParts: List[ContentPart],
strategy: MergeStrategy
) -> List[ContentPart]:
"""Merge parts by simple concatenation."""
mergedParts = []
# Add original parts (filtered if needed)
for part in originalParts:
if strategy.preserveChunks or not part.metadata.get("chunk", False):
mergedParts.append(part)
# Add AI results
if aiResultParts:
# Group AI results by parentId if available
aiResultsByParent = {}
for aiPart in aiResultParts:
parentId = aiPart.parentId or "root"
if parentId not in aiResultsByParent:
aiResultsByParent[parentId] = []
aiResultsByParent[parentId].append(aiPart)
# Merge AI results for each parent
for parentId, aiParts in aiResultsByParent.items():
if len(aiParts) == 1:
mergedParts.append(aiParts[0])
else:
# Concatenate multiple AI results for same parent
combinedData = strategy.chunkSeparator.join([p.data for p in aiParts])
combinedPart = ContentPart(
id=f"merged_ai_{parentId}",
parentId=parentId if parentId != "root" else None,
label="merged_ai_result",
typeGroup="text",
mimeType="text/plain",
data=combinedData,
metadata={
"aiResult": True,
"merged": True,
"sourceCount": len(aiParts),
"size": len(combinedData.encode('utf-8'))
}
)
mergedParts.append(combinedPart)
return mergedParts
def _mergeHierarchical(
self,
originalParts: List[ContentPart],
aiResultParts: List[ContentPart],
strategy: MergeStrategy
) -> List[ContentPart]:
"""Merge parts hierarchically based on parentId relationships."""
# Group parts by parentId
partsByParent = {}
for part in originalParts:
parentId = part.parentId or "root"
if parentId not in partsByParent:
partsByParent[parentId] = []
partsByParent[parentId].append(part)
# Group AI results by parentId
aiResultsByParent = {}
for aiPart in aiResultParts:
parentId = aiPart.parentId or "root"
if parentId not in aiResultsByParent:
aiResultsByParent[parentId] = []
aiResultsByParent[parentId].append(aiPart)
mergedParts = []
# Process each parent group
for parentId in set(list(partsByParent.keys()) + list(aiResultsByParent.keys())):
originalGroup = partsByParent.get(parentId, [])
aiGroup = aiResultsByParent.get(parentId, [])
# Add original parts
mergedParts.extend(originalGroup)
# Add AI results for this parent
if aiGroup:
if len(aiGroup) == 1:
mergedParts.append(aiGroup[0])
else:
# Merge multiple AI results
combinedData = strategy.chunkSeparator.join([p.data for p in aiGroup])
combinedPart = ContentPart(
id=f"hierarchical_ai_{parentId}",
parentId=parentId if parentId != "root" else None,
label="hierarchical_ai_result",
typeGroup="text",
mimeType="text/plain",
data=combinedData,
metadata={
"aiResult": True,
"hierarchical": True,
"sourceCount": len(aiGroup),
"size": len(combinedData.encode('utf-8'))
}
)
mergedParts.append(combinedPart)
return mergedParts
def _mergeIntelligent(
self,
originalParts: List[ContentPart],
aiResultParts: List[ContentPart],
strategy: MergeStrategy
) -> List[ContentPart]:
"""Merge parts using intelligent strategies based on content type."""
mergedParts = []
# Group by typeGroup for intelligent merging
partsByType = {}
for part in originalParts:
typeGroup = part.typeGroup
if typeGroup not in partsByType:
partsByType[typeGroup] = []
partsByType[typeGroup].append(part)
# Process each type group
for typeGroup, parts in partsByType.items():
if typeGroup == "text":
mergedParts.extend(self._mergeTextIntelligent(parts, aiResultParts, strategy))
elif typeGroup == "table":
mergedParts.extend(self._mergeTableIntelligent(parts, aiResultParts, strategy))
elif typeGroup == "structure":
mergedParts.extend(self._mergeStructureIntelligent(parts, aiResultParts, strategy))
else:
# Default handling for other types
mergedParts.extend(parts)
# Add any remaining AI results that weren't merged
for aiPart in aiResultParts:
if not any(p.id == aiPart.id for p in mergedParts):
mergedParts.append(aiPart)
return mergedParts
def _mergeTextIntelligent(
self,
textParts: List[ContentPart],
aiResultParts: List[ContentPart],
strategy: MergeStrategy
) -> List[ContentPart]:
"""Intelligent merging for text content."""
# For now, use concatenate strategy
# This could be enhanced with semantic analysis, summarization, etc.
return self._mergeConcatenate(textParts, aiResultParts, strategy)
def _mergeTableIntelligent(
self,
tableParts: List[ContentPart],
aiResultParts: List[ContentPart],
strategy: MergeStrategy
) -> List[ContentPart]:
"""Intelligent merging for table content."""
# For now, use concatenate strategy
# This could be enhanced with table merging logic
return self._mergeConcatenate(tableParts, aiResultParts, strategy)
def _mergeStructureIntelligent(
self,
structureParts: List[ContentPart],
aiResultParts: List[ContentPart],
strategy: MergeStrategy
) -> List[ContentPart]:
"""Intelligent merging for structured content."""
# For now, use concatenate strategy
# This could be enhanced with structure-aware merging
return self._mergeConcatenate(structureParts, aiResultParts, strategy)

View file

@ -1,6 +1,6 @@
from typing import Any, Dict, List from typing import Any, Dict, List
from modules.datamodels.datamodelExtraction import ContentPart from modules.datamodels.datamodelExtraction import ContentPart
from ..utils import makeId from ..subUtils import makeId
class TableMerger: class TableMerger:

View file

@ -1,6 +1,6 @@
from typing import Any, Dict, List from typing import Any, Dict, List
from modules.datamodels.datamodelExtraction import ContentPart from modules.datamodels.datamodelExtraction import ContentPart
from ..utils import makeId from ..subUtils import makeId
class TextMerger: class TextMerger:

View file

@ -1,14 +1,61 @@
from typing import Any, Dict, List from typing import Any, Dict, List
import logging
import os
from modules.datamodels.datamodelExtraction import ExtractedContent, ContentPart from modules.datamodels.datamodelExtraction import ContentExtracted, ContentPart
from .utils import makeId from .subUtils import makeId
from .subRegistry import ExtractorRegistry, ChunkerRegistry from .subRegistry import ExtractorRegistry, ChunkerRegistry
from .merging.text_merger import TextMerger from .merging.text_merger import TextMerger
from .merging.table_merger import TableMerger from .merging.table_merger import TableMerger
from .merging.default_merger import DefaultMerger from .merging.default_merger import DefaultMerger
logger = logging.getLogger(__name__)
def runExtraction(extractorRegistry: ExtractorRegistry, chunkerRegistry: ChunkerRegistry, documentBytes: bytes, fileName: str, mimeType: str, options: Dict[str, Any]) -> ExtractedContent:
def _mergeParts(parts: List[ContentPart], mergeStrategy: Dict[str, Any]) -> List[ContentPart]:
"""Merge parts based on the provided strategy."""
if not parts or not mergeStrategy:
return parts
groupBy = mergeStrategy.get("groupBy", "typeGroup")
orderBy = mergeStrategy.get("orderBy", "id")
# Group parts by the specified field
groups = {}
for part in parts:
key = getattr(part, groupBy, "unknown")
if key not in groups:
groups[key] = []
groups[key].append(part)
# Merge each group
merged_parts = []
for group_key, group_parts in groups.items():
if len(group_parts) == 1:
merged_parts.extend(group_parts)
else:
# Sort by orderBy field if specified
if orderBy:
group_parts.sort(key=lambda p: getattr(p, orderBy, ""))
# Use appropriate merger based on type
type_group = group_parts[0].typeGroup if group_parts else "unknown"
if type_group == "text":
merger = TextMerger()
elif type_group == "table":
merger = TableMerger()
else:
merger = DefaultMerger()
# Merge the group
merged = merger.merge(group_parts, mergeStrategy)
merged_parts.extend(merged)
return merged_parts
def runExtraction(extractorRegistry: ExtractorRegistry, chunkerRegistry: ChunkerRegistry, documentBytes: bytes, fileName: str, mimeType: str, options: Dict[str, Any]) -> ContentExtracted:
extractor = extractorRegistry.resolve(mimeType, fileName) extractor = extractorRegistry.resolve(mimeType, fileName)
if extractor is None: if extractor is None:
# fallback: single binary part # fallback: single binary part
@ -21,14 +68,66 @@ def runExtraction(extractorRegistry: ExtractorRegistry, chunkerRegistry: Chunker
data="", data="",
metadata={"warning": "No extractor registered"} metadata={"warning": "No extractor registered"}
) )
return ExtractedContent(id=makeId(), parts=[part]) return ContentExtracted(id=makeId(), parts=[part])
parts = extractor.extract(documentBytes, {"fileName": fileName, "mimeType": mimeType, "options": options}) parts = extractor.extract(documentBytes, {"fileName": fileName, "mimeType": mimeType, "options": options})
# Optional merge step
# Apply chunking and size limiting
parts = poolAndLimit(parts, chunkerRegistry, options)
# Optional merge step - but preserve chunks
mergeStrategy = options.get("mergeStrategy", {}) mergeStrategy = options.get("mergeStrategy", {})
if mergeStrategy: if mergeStrategy:
parts = _mergeParts(parts, mergeStrategy)
return ExtractedContent(id=makeId(), parts=parts) # Don't merge chunks - they should stay separate for processing
non_chunk_parts = [p for p in parts if not p.metadata.get("chunk", False)]
chunk_parts = [p for p in parts if p.metadata.get("chunk", False)]
logger.debug(f"runExtraction: Preserving {len(chunk_parts)} chunks from merging")
if non_chunk_parts:
non_chunk_parts = _mergeParts(non_chunk_parts, mergeStrategy)
# Combine non-chunk parts with chunk parts (chunks stay separate)
parts = non_chunk_parts + chunk_parts
logger.debug(f"runExtraction: Final parts after merging: {len(parts)} (chunks: {len(chunk_parts)})")
# DEBUG: dump parts and chunks to files TODO TO REMOVE
try:
base_dir = "./test-chat/ai"
os.makedirs(base_dir, exist_ok=True)
# Generate timestamp for consistent naming
from datetime import datetime, UTC
ts = datetime.now(UTC).strftime('%Y%m%d-%H%M%S-%f')[:-3]
# Write a summary file
summary_lines: List[str] = [f"fileName: {fileName}", f"mimeType: {mimeType}", f"totalParts: {len(parts)}"]
text_index = 0
for idx, part in enumerate(parts):
is_texty = part.typeGroup in ("text", "table", "structure")
size = int(part.metadata.get("size", 0) or 0)
is_chunk = bool(part.metadata.get("chunk", False))
summary_lines.append(
f"part[{idx}]: typeGroup={part.typeGroup}, label={part.label}, size={size}, chunk={is_chunk}"
)
if is_texty and getattr(part, "data", None):
text_index += 1
fname = f"{ts}_extract_{fileName}_part_{idx:03d}_{'chunk' if is_chunk else 'full'}_{text_index:03d}.txt"
fpath = os.path.join(base_dir, fname)
with open(fpath, "w", encoding="utf-8") as f:
f.write(f"# typeGroup: {part.typeGroup}\n# label: {part.label}\n# chunk: {is_chunk}\n# size: {size}\n\n")
f.write(str(part.data))
# Write summary file
summary_fname = f"{ts}_extract_{fileName}_summary.txt"
summary_fpath = os.path.join(base_dir, summary_fname)
with open(summary_fpath, "w", encoding="utf-8") as f:
f.write("\n".join(summary_lines))
except Exception as _e:
logger.debug(f"Debug dump skipped: {_e}")
return ContentExtracted(id=makeId(), parts=parts)
def poolAndLimit(parts: List[ContentPart], chunkerRegistry: ChunkerRegistry, options: Dict[str, Any]) -> List[ContentPart]: def poolAndLimit(parts: List[ContentPart], chunkerRegistry: ChunkerRegistry, options: Dict[str, Any]) -> List[ContentPart]:
@ -57,28 +156,54 @@ def poolAndLimit(parts: List[ContentPart], chunkerRegistry: ChunkerRegistry, opt
# If we have remaining parts and chunking is allowed, try chunking # If we have remaining parts and chunking is allowed, try chunking
if remaining and chunkAllowed: if remaining and chunkAllowed:
logger.debug(f"=== CHUNKING ACTIVATED ===")
logger.debug(f"Remaining parts to chunk: {len(remaining)}")
logger.debug(f"Max size limit: {maxSize} bytes")
logger.debug(f"Current size used: {current} bytes")
for p in remaining: for p in remaining:
if p.typeGroup in ("text", "table", "structure"): if p.typeGroup in ("text", "table", "structure", "image"):
logger.debug(f"Chunking {p.typeGroup} part: {len(p.data)} chars")
chunks = chunkerRegistry.resolve(p.typeGroup).chunk(p, options) chunks = chunkerRegistry.resolve(p.typeGroup).chunk(p, options)
logger.debug(f"Created {len(chunks)} chunks")
chunks_added = 0
for ch in chunks: for ch in chunks:
chSize = int(ch.get("size", 0) or 0) chSize = int(ch.get("size", 0) or 0)
if current + chSize <= maxSize: # Add all chunks - don't limit by maxSize since they'll be processed separately
kept.append(ContentPart( kept.append(ContentPart(
id=makeId(), id=makeId(),
parentId=p.id, parentId=p.id,
label=f"chunk_{ch.get('order', 0)}", label=f"chunk_{ch.get('order', 0)}",
typeGroup=p.typeGroup, typeGroup=p.typeGroup,
mimeType=p.mimeType, mimeType=p.mimeType,
data=ch.get("data", ""), data=ch.get("data", ""),
metadata={"size": chSize, "chunk": True} metadata={
)) "size": chSize,
current += chSize "chunk": True,
else: **ch.get("metadata", {})
break }
))
chunks_added += 1
logger.debug(f"Added chunk {ch.get('order', 0)}: {chSize} bytes")
logger.debug(f"Added {chunks_added} chunks from {p.typeGroup} part")
# Apply merging strategy if provided # Apply merging strategy if provided, but preserve chunks
if mergeStrategy: if mergeStrategy:
kept = _applyMerging(kept, mergeStrategy) # Don't merge chunks - they should stay separate for processing
non_chunk_parts = [p for p in kept if not p.metadata.get("chunk", False)]
chunk_parts = [p for p in kept if p.metadata.get("chunk", False)]
logger.debug(f"Preserving {len(chunk_parts)} chunks from merging")
if non_chunk_parts:
non_chunk_parts = _applyMerging(non_chunk_parts, mergeStrategy)
# Combine non-chunk parts with chunk parts (chunks stay separate)
kept = non_chunk_parts + chunk_parts
logger.debug(f"Final parts after merging: {len(kept)} (chunks: {len(chunk_parts)})")
# Re-check size after merging # Re-check size after merging
totalSize = sum(int(p.metadata.get("size", 0) or 0) for p in kept) totalSize = sum(int(p.metadata.get("size", 0) or 0) for p in kept)
@ -151,7 +276,7 @@ def _applySizeLimit(parts: List[ContentPart], maxSize: int) -> List[ContentPart]
return kept return kept
def applyAiIfRequested(extracted: ExtractedContent, options: Dict[str, Any]) -> ExtractedContent: def applyAiIfRequested(extracted: ContentExtracted, options: Dict[str, Any]) -> ContentExtracted:
""" """
Apply AI processing if requested in options. Apply AI processing if requested in options.
This is a placeholder for actual AI integration. This is a placeholder for actual AI integration.

View file

@ -59,8 +59,11 @@ class ExtractorRegistry:
self.register("xlsm", XlsxExtractor()) self.register("xlsm", XlsxExtractor())
# fallback # fallback
self.setFallback(BinaryExtractor()) self.setFallback(BinaryExtractor())
except Exception: print(f"✅ ExtractorRegistry: Successfully registered {len(self._map)} extractors")
pass except Exception as e:
print(f"❌ ExtractorRegistry: Failed to register extractors: {str(e)}")
import traceback
traceback.print_exc()
def register(self, key: str, extractor: Extractor): def register(self, key: str, extractor: Extractor):
self._map[key] = extractor self._map[key] = extractor
@ -88,11 +91,16 @@ class ChunkerRegistry:
from .chunking.text_chunker import TextChunker from .chunking.text_chunker import TextChunker
from .chunking.table_chunker import TableChunker from .chunking.table_chunker import TableChunker
from .chunking.structure_chunker import StructureChunker from .chunking.structure_chunker import StructureChunker
# Skip ImageChunker for now to avoid PIL import hang
# from .chunking.image_chunker import ImageChunker
self.register("text", TextChunker()) self.register("text", TextChunker())
self.register("table", TableChunker()) self.register("table", TableChunker())
self.register("structure", StructureChunker()) self.register("structure", StructureChunker())
except Exception: # self.register("image", ImageChunker())
pass except Exception as e:
print(f"❌ ChunkerRegistry: Failed to register chunkers: {str(e)}")
import traceback
traceback.print_exc()
def register(self, typeGroup: str, chunker: Chunker): def register(self, typeGroup: str, chunker: Chunker):
self._map[typeGroup] = chunker self._map[typeGroup] = chunker

View file

@ -3,5 +3,3 @@ import uuid
def makeId() -> str: def makeId() -> str:
return str(uuid.uuid4()) return str(uuid.uuid4())

View file

@ -105,12 +105,49 @@ class GenerationService:
logger.info(f"Document {document_name} has content: {len(content)} characters") logger.info(f"Document {document_name} has content: {len(content)} characters")
# Normalize file extension based on mime type if missing or incorrect
try:
mime_to_ext = {
"application/vnd.openxmlformats-officedocument.wordprocessingml.document": ".docx",
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": ".xlsx",
"application/vnd.openxmlformats-officedocument.presentationml.presentation": ".pptx",
"application/pdf": ".pdf",
"text/html": ".html",
"text/markdown": ".md",
"text/plain": ".txt",
"application/json": ".json",
}
expected_ext = mime_to_ext.get(mime_type)
if expected_ext:
if not document_name.lower().endswith(expected_ext):
# Append/replace extension to match mime type
if "." in document_name:
document_name = document_name.rsplit(".", 1)[0] + expected_ext
else:
document_name = document_name + expected_ext
except Exception:
pass
# Decide if content is base64-encoded binary (e.g., docx/pdf) or plain text
base64encoded = False
try:
binary_mime_types = {
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
"application/vnd.openxmlformats-officedocument.presentationml.presentation",
"application/pdf",
}
if isinstance(document_data, str) and mime_type in binary_mime_types:
base64encoded = True
except Exception:
base64encoded = False
# Create document with file in one step using interfaces directly # Create document with file in one step using interfaces directly
document = self._createDocument( document = self._createDocument(
fileName=document_name, fileName=document_name,
mimeType=mime_type, mimeType=mime_type,
content=content, content=content,
base64encoded=False, base64encoded=base64encoded,
messageId=message_id messageId=message_id
) )
if document: if document:
@ -257,4 +294,109 @@ class GenerationService:
'totalActions': 0, 'totalActions': 0,
'workflowStatus': 'unknown', 'workflowStatus': 'unknown',
'workflowId': 'unknown' 'workflowId': 'unknown'
} }
async def renderReport(self, extracted_content: str, output_format: str, title: str) -> tuple[str, str]:
"""
Render extracted content to the specified output format.
Args:
extracted_content: Content extracted by AI using format-specific prompt
output_format: Target format (html, pdf, docx, txt, md, json, csv, xlsx)
title: Report title
Returns:
tuple: (rendered_content, mime_type)
"""
try:
# DEBUG: dump renderer input to diagnose JSON+HTML mixtures TODO REMOVE
try:
import os
ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
debug_root = "./test-chat/ai"
debug_dir = os.path.join(debug_root, f"render_input_{ts}")
os.makedirs(debug_dir, exist_ok=True)
with open(os.path.join(debug_dir, "meta.txt"), "w", encoding="utf-8") as f:
f.write(f"title: {title}\nformat: {output_format}\nlength: {len(extracted_content or '')}\nstarts_with_brace: {str(extracted_content.strip().startswith('{') if extracted_content else False)}\n")
with open(os.path.join(debug_dir, "extracted_content.txt"), "w", encoding="utf-8") as f:
f.write(extracted_content or "")
except Exception:
pass
# Get the appropriate renderer for the format
renderer = self._getFormatRenderer(output_format)
if not renderer:
raise ValueError(f"Unsupported output format: {output_format}")
# Render the content
rendered_content, mime_type = await renderer.render(extracted_content, title)
# DEBUG: dump rendered output
try:
import os
with open(os.path.join(debug_dir, "rendered_output.txt"), "w", encoding="utf-8") as f:
f.write(rendered_content or "")
except Exception:
pass
logger.info(f"Successfully rendered report to {output_format} format: {len(rendered_content)} characters")
return rendered_content, mime_type
except Exception as e:
logger.error(f"Error rendering report to {output_format}: {str(e)}")
raise
def getExtractionPrompt(self, output_format: str, user_prompt: str, title: str) -> str:
"""
Get the format-specific extraction prompt for AI content extraction.
Args:
output_format: Target format (html, pdf, docx, txt, md, json, csv, xlsx)
user_prompt: User's original prompt for report generation
title: Report title
Returns:
str: Format-specific prompt for AI extraction
"""
try:
# Get the appropriate renderer for the format
renderer = self._getFormatRenderer(output_format)
if not renderer:
raise ValueError(f"Unsupported output format: {output_format}")
# Build centralized prompt with generic rules + format-specific guidelines
from .prompt_builder import buildExtractionPrompt
extraction_prompt = buildExtractionPrompt(
output_format=output_format,
renderer=renderer,
user_prompt=user_prompt,
title=title
)
logger.info(f"Generated {output_format}-specific extraction prompt: {len(extraction_prompt)} characters")
return extraction_prompt
except Exception as e:
logger.error(f"Error getting extraction prompt for {output_format}: {str(e)}")
raise
def _getFormatRenderer(self, output_format: str):
"""Get the appropriate renderer for the specified format using auto-discovery."""
try:
from .renderers.registry import get_renderer
renderer = get_renderer(output_format)
if renderer:
return renderer
# Fallback to text renderer if no specific renderer found
logger.warning(f"No renderer found for format {output_format}, falling back to text")
fallback_renderer = get_renderer('text')
if fallback_renderer:
return fallback_renderer
logger.error("Even text renderer fallback failed")
return None
except Exception as e:
logger.error(f"Error getting renderer for {output_format}: {str(e)}")
return None

View file

@ -0,0 +1,72 @@
"""
Centralized prompt builder for document generation across formats.
Builds a robust prompt that:
- Accepts any user intent (no fixed structure assumptions)
- Injects format-specific guidelines from the selected renderer
- Adds a common policy section to always use real data from source docs
- Requires the AI to output a filename header that we can parse and use
"""
from typing import Protocol
class _RendererLike(Protocol):
def getExtractionPrompt(self, user_prompt: str, title: str) -> str: # returns only format-specific guidelines
...
def buildExtractionPrompt(
output_format: str,
renderer: _RendererLike,
user_prompt: str,
title: str
) -> str:
"""
Build the final extraction prompt by combining:
- The raw user prompt (verbatim)
- Generic cross-format instructions (filename header + real-data policy)
- Format-specific guidelines snippet provided by the renderer
The AI must place a single filename header at the very top:
FILENAME: <safe-file-name-with-extension>
followed by a blank line and then ONLY the document content according to the target format.
"""
format_guidelines = renderer.getExtractionPrompt(user_prompt, title)
# Generic block appears once for every format
generic_intro = f"""
{user_prompt}
You are generating a document in {output_format.upper()} format for the title: "{title}".
Rules:
- The user's intent fully defines the structure. Do not assume a fixed template or headings.
- Use only factual information extracted from the supplied source documents.
- Do not invent, hallucinate, or include placeholders (e.g., "lorem ipsum", "TBD").
- The output must strictly follow the target format and be ready for saving without extra wrapping.
- At the VERY TOP output exactly one line with the filename header:
FILENAME: <safe-file-name-with-extension>
- The base name should be short, descriptive, and kebab-case or snake-case without spaces.
- Include the correct extension for the requested format (e.g., .html, .pdf, .docx, .md, .txt, .json, .csv, .xlsx).
- Avoid special characters beyond [a-zA-Z0-9-_].
- After this header, insert a single blank line and then provide ONLY the document content.
Common policy:
- Use the actual data from the source documents to create the content.
- Do not generate placeholder text or templates.
- Extract and use the real data provided in the source documents to create meaningful content.
""".strip()
# Final assembly
final_prompt = (
generic_intro
+ "\n\nFORMAT-SPECIFIC GUIDELINES:\n"
+ format_guidelines.strip()
+ "\n\nGenerate the complete document content now based on the source documents below:"
)
return final_prompt

View file

@ -0,0 +1,86 @@
"""
Base renderer class for all format renderers.
"""
from abc import ABC, abstractmethod
from typing import Dict, Any, Tuple, List
import logging
logger = logging.getLogger(__name__)
class BaseRenderer(ABC):
"""Base class for all format renderers."""
def __init__(self):
self.logger = logger
@classmethod
def get_supported_formats(cls) -> List[str]:
"""
Return list of supported format names for this renderer.
Override this method in subclasses to specify supported formats.
"""
return []
@classmethod
def get_format_aliases(cls) -> List[str]:
"""
Return list of format aliases for this renderer.
Override this method in subclasses to specify format aliases.
"""
return []
@classmethod
def get_priority(cls) -> int:
"""
Return priority for this renderer (higher number = higher priority).
Used when multiple renderers support the same format.
"""
return 0
@abstractmethod
def getExtractionPrompt(self, user_prompt: str, title: str) -> str:
"""
Get the format-specific extraction prompt for AI content extraction.
Args:
user_prompt: User's original prompt for report generation
title: Report title
Returns:
str: Format-specific prompt for AI extraction
"""
pass
@abstractmethod
async def render(self, extracted_content: str, title: str) -> Tuple[str, str]:
"""
Render extracted content to the target format.
Args:
extracted_content: Raw content extracted by AI using format-specific prompt
title: Report title
Returns:
tuple: (rendered_content, mime_type)
"""
pass
def _extract_sections(self, report_data: Dict[str, Any]) -> list:
"""Extract sections from report data."""
return report_data.get('sections', [])
def _extract_metadata(self, report_data: Dict[str, Any]) -> Dict[str, Any]:
"""Extract metadata from report data."""
return report_data.get('metadata', {})
def _get_title(self, report_data: Dict[str, Any], fallback_title: str) -> str:
"""Get title from report data or use fallback."""
return report_data.get('title', fallback_title)
def _format_timestamp(self, timestamp: str = None) -> str:
"""Format timestamp for display."""
if timestamp:
return timestamp
from datetime import datetime, UTC
return datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S UTC")

View file

@ -0,0 +1,64 @@
"""
CSV renderer for report generation.
"""
from .base_renderer import BaseRenderer
from typing import Dict, Any, Tuple, List
import csv
import io
class CsvRenderer(BaseRenderer):
"""Renders content to CSV format with format-specific extraction."""
@classmethod
def get_supported_formats(cls) -> List[str]:
"""Return supported CSV formats."""
return ['csv']
@classmethod
def get_format_aliases(cls) -> List[str]:
"""Return format aliases."""
return ['spreadsheet', 'table']
@classmethod
def get_priority(cls) -> int:
"""Return priority for CSV renderer."""
return 70
def getExtractionPrompt(self, user_prompt: str, title: str) -> str:
"""Return only CSV-specific guidelines; global prompt is built centrally."""
return (
"CSV FORMAT GUIDELINES:\n"
"- Emit ONLY CSV text without fences or commentary.\n"
"- Include a single header row with clear column names.\n"
"- Quote fields containing commas, quotes, or newlines; escape quotes by doubling them.\n"
"- Use rows to represent items/records derived from sources.\n"
"- Keep cells concise; include units in headers when useful.\n"
"OUTPUT: Return ONLY valid CSV content that can be imported."
)
async def render(self, extracted_content: str, title: str) -> Tuple[str, str]:
"""Render extracted content to CSV format."""
try:
# The extracted content should already be CSV from the AI
# Just clean it up
csv_content = self._clean_csv_content(extracted_content, title)
return csv_content, "text/csv"
except Exception as e:
self.logger.error(f"Error rendering CSV: {str(e)}")
# Return minimal CSV fallback
return f"Title,Content\n{title},Error rendering report: {str(e)}", "text/csv"
def _clean_csv_content(self, content: str, title: str) -> str:
"""Clean and validate CSV content from AI."""
content = content.strip()
# Remove markdown code blocks if present
if content.startswith("```") and content.endswith("```"):
lines = content.split('\n')
if len(lines) > 2:
content = '\n'.join(lines[1:-1]).strip()
return content

View file

@ -0,0 +1,249 @@
"""
DOCX renderer for report generation using python-docx.
"""
from .base_renderer import BaseRenderer
from typing import Dict, Any, Tuple, List
import io
import base64
from datetime import datetime, UTC
try:
from docx import Document
from docx.shared import Inches, Pt
from docx.enum.text import WD_ALIGN_PARAGRAPH
from docx.enum.table import WD_TABLE_ALIGNMENT
from docx.oxml.shared import OxmlElement, qn
from docx.oxml.ns import nsdecls
from docx.oxml import parse_xml
DOCX_AVAILABLE = True
except ImportError:
DOCX_AVAILABLE = False
class DocxRenderer(BaseRenderer):
"""Renders content to DOCX format using python-docx."""
@classmethod
def get_supported_formats(cls) -> List[str]:
"""Return supported DOCX formats."""
return ['docx', 'doc']
@classmethod
def get_format_aliases(cls) -> List[str]:
"""Return format aliases."""
return ['word', 'document']
@classmethod
def get_priority(cls) -> int:
"""Return priority for DOCX renderer."""
return 115
def getExtractionPrompt(self, user_prompt: str, title: str) -> str:
"""Return only DOCX-specific guidelines; global prompt is built centrally."""
return (
"DOCX FORMAT GUIDELINES:\n"
"- Provide plain text content suitable for Word generation (no markdown/HTML).\n"
"- Use clear section hierarchy; bullet and numbered lists where needed.\n"
"- Include tables as simple pipe-delimited lines if tabular data is needed.\n"
"OUTPUT: Return ONLY the structured plain text to be converted into DOCX."
)
async def render(self, extracted_content: str, title: str) -> Tuple[str, str]:
"""Render extracted content to DOCX format."""
try:
if not DOCX_AVAILABLE:
# Fallback to HTML if python-docx not available
from .html_renderer import HtmlRenderer
html_renderer = HtmlRenderer()
html_content, _ = await html_renderer.render(extracted_content, title)
return html_content, "text/html"
# Generate DOCX using python-docx
docx_content = self._generate_docx(extracted_content, title)
return docx_content, "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
except Exception as e:
self.logger.error(f"Error rendering DOCX: {str(e)}")
# Return minimal fallback
return f"DOCX Generation Error: {str(e)}", "text/plain"
def _generate_docx(self, content: str, title: str) -> str:
"""Generate DOCX content using python-docx."""
try:
# Create new document
doc = Document()
# Set up document styles
self._setup_document_styles(doc)
# Add title
title_para = doc.add_heading(title, 0)
title_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
# Add generation date
date_para = doc.add_paragraph(f"Generated: {self._format_timestamp()}")
date_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
# Add page break
doc.add_page_break()
# Process content
lines = content.split('\n')
current_section = []
for line in lines:
line = line.strip()
if not line:
continue
# Check for ALL CAPS headings (major headings)
if line.isupper() and len(line) > 3 and not line.startswith('-') and not line.startswith('*'):
if current_section:
self._process_section(doc, current_section)
current_section = []
doc.add_heading(line, level=1)
# Check for Title Case headings (subheadings)
elif line.istitle() and len(line) > 5 and not line.startswith('-') and not line.startswith('*') and not line.startswith(('1.', '2.', '3.', '4.', '5.')):
if current_section:
self._process_section(doc, current_section)
current_section = []
doc.add_heading(line, level=2)
# Check for markdown headings (fallback)
elif line.startswith('# '):
# H1 heading
if current_section:
self._process_section(doc, current_section)
current_section = []
doc.add_heading(line[2:], level=1)
elif line.startswith('## '):
# H2 heading
if current_section:
self._process_section(doc, current_section)
current_section = []
doc.add_heading(line[3:], level=2)
elif line.startswith('### '):
# H3 heading
if current_section:
self._process_section(doc, current_section)
current_section = []
doc.add_heading(line[4:], level=3)
else:
current_section.append(line)
# Process remaining content
if current_section:
self._process_section(doc, current_section)
# Save to buffer
buffer = io.BytesIO()
doc.save(buffer)
buffer.seek(0)
# Convert to base64
docx_bytes = buffer.getvalue()
docx_base64 = base64.b64encode(docx_bytes).decode('utf-8')
return docx_base64
except Exception as e:
self.logger.error(f"Error generating DOCX: {str(e)}")
raise
def _setup_document_styles(self, doc):
"""Set up document styles."""
try:
# Set default font
style = doc.styles['Normal']
font = style.font
font.name = 'Calibri'
font.size = Pt(11)
# Set heading styles
for i in range(1, 4):
heading_style = doc.styles[f'Heading {i}']
heading_font = heading_style.font
heading_font.name = 'Calibri'
heading_font.size = Pt(16 - i * 2)
heading_font.bold = True
except Exception as e:
self.logger.warning(f"Could not set up document styles: {str(e)}")
def _process_section(self, doc, lines: list):
"""Process a section of content into DOCX elements."""
for line in lines:
if not line.strip():
continue
# Check for tables (lines with |)
if '|' in line and not line.startswith('|'):
# This might be part of a table, process as table
table_data = self._extract_table_data(lines)
if table_data:
self._add_table(doc, table_data)
return
# Check for lists
if line.startswith('- ') or line.startswith('* '):
# This is a list item
doc.add_paragraph(line[2:], style='List Bullet')
elif line.startswith(('1. ', '2. ', '3. ', '4. ', '5. ')):
# This is a numbered list item
doc.add_paragraph(line[3:], style='List Number')
else:
# Regular paragraph
doc.add_paragraph(line)
def _extract_table_data(self, lines: list) -> list:
"""Extract table data from lines."""
table_data = []
in_table = False
for line in lines:
if '|' in line:
if not in_table:
in_table = True
# Split by | and clean up
cells = [cell.strip() for cell in line.split('|') if cell.strip()]
if cells:
table_data.append(cells)
elif in_table and not line.strip():
# Empty line, might be end of table
break
return table_data if len(table_data) > 1 else []
def _add_table(self, doc, table_data: list):
"""Add a table to the document."""
try:
if not table_data:
return
# Create table
table = doc.add_table(rows=len(table_data), cols=len(table_data[0]))
table.alignment = WD_TABLE_ALIGNMENT.CENTER
# Add data to table
for row_idx, row_data in enumerate(table_data):
for col_idx, cell_data in enumerate(row_data):
if col_idx < len(table.rows[row_idx].cells):
table.rows[row_idx].cells[col_idx].text = cell_data
# Style the table
self._style_table(table)
except Exception as e:
self.logger.warning(f"Could not add table: {str(e)}")
def _style_table(self, table):
"""Apply styling to the table."""
try:
# Style header row
if len(table.rows) > 0:
header_cells = table.rows[0].cells
for cell in header_cells:
for paragraph in cell.paragraphs:
for run in paragraph.runs:
run.bold = True
except Exception as e:
self.logger.warning(f"Could not style table: {str(e)}")

View file

@ -0,0 +1,210 @@
"""
Excel renderer for report generation using openpyxl.
"""
from .base_renderer import BaseRenderer
from typing import Dict, Any, Tuple, List
import io
import base64
from datetime import datetime, UTC
try:
from openpyxl import Workbook
from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
from openpyxl.utils import get_column_letter
from openpyxl.worksheet.table import Table, TableStyleInfo
OPENPYXL_AVAILABLE = True
except ImportError:
OPENPYXL_AVAILABLE = False
class ExcelRenderer(BaseRenderer):
"""Renders content to Excel format using openpyxl."""
@classmethod
def get_supported_formats(cls) -> List[str]:
"""Return supported Excel formats."""
return ['xlsx', 'xls', 'excel']
@classmethod
def get_format_aliases(cls) -> List[str]:
"""Return format aliases."""
return ['spreadsheet', 'workbook']
@classmethod
def get_priority(cls) -> int:
"""Return priority for Excel renderer."""
return 110
def getExtractionPrompt(self, user_prompt: str, title: str) -> str:
"""Return only Excel-specific guidelines; global prompt is built centrally."""
return (
"EXCEL FORMAT GUIDELINES:\n"
"- Output one or more pipe-delimited tables with a single header row.\n"
"- Let user intent define columns; use clear names and ISO dates.\n"
"- Separate multiple tables by a single blank line.\n"
"- No markdown/HTML/code fences; tables only unless user explicitly asks for notes.\n"
"OUTPUT: Return ONLY pipe-delimited tables suitable for import."
)
async def render(self, extracted_content: str, title: str) -> Tuple[str, str]:
"""Render extracted content to Excel format."""
try:
if not OPENPYXL_AVAILABLE:
# Fallback to CSV if openpyxl not available
from .csv_renderer import CsvRenderer
csv_renderer = CsvRenderer()
csv_content, _ = await csv_renderer.render(extracted_content, title)
return csv_content, "text/csv"
# Generate Excel using openpyxl
excel_content = self._generate_excel(extracted_content, title)
return excel_content, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
except Exception as e:
self.logger.error(f"Error rendering Excel: {str(e)}")
# Return CSV fallback
return f"Title,Content\n{title},Error rendering Excel report: {str(e)}", "text/csv"
def _generate_excel(self, content: str, title: str) -> str:
"""Generate Excel content using openpyxl."""
try:
# Create workbook
wb = Workbook()
# Remove default sheet
wb.remove(wb.active)
# Create sheets
summary_sheet = wb.create_sheet("Summary", 0)
data_sheet = wb.create_sheet("Data", 1)
analysis_sheet = wb.create_sheet("Analysis", 2)
# Add content to sheets
self._populate_summary_sheet(summary_sheet, title)
self._populate_data_sheet(data_sheet, content)
self._populate_analysis_sheet(analysis_sheet, content)
# Save to buffer
buffer = io.BytesIO()
wb.save(buffer)
buffer.seek(0)
# Convert to base64
excel_bytes = buffer.getvalue()
excel_base64 = base64.b64encode(excel_bytes).decode('utf-8')
return excel_base64
except Exception as e:
self.logger.error(f"Error generating Excel: {str(e)}")
raise
def _populate_summary_sheet(self, sheet, title: str):
"""Populate the summary sheet."""
try:
# Title
sheet['A1'] = title
sheet['A1'].font = Font(size=16, bold=True)
sheet['A1'].alignment = Alignment(horizontal='center')
# Generation info
sheet['A3'] = "Generated:"
sheet['B3'] = self._format_timestamp()
sheet['A4'] = "Status:"
sheet['B4'] = "Generated Successfully"
# Key metrics placeholder
sheet['A6'] = "Key Metrics:"
sheet['A6'].font = Font(bold=True)
sheet['A7'] = "Total Items:"
sheet['B7'] = "=COUNTA(Data!A:A)-1" # Count non-empty cells in Data sheet
# Auto-adjust column widths
sheet.column_dimensions['A'].width = 20
sheet.column_dimensions['B'].width = 30
except Exception as e:
self.logger.warning(f"Could not populate summary sheet: {str(e)}")
def _populate_data_sheet(self, sheet, content: str):
"""Populate the data sheet."""
try:
# Headers
headers = ["Item/Category", "Value/Amount", "Percentage", "Source Document", "Notes/Comments"]
for col, header in enumerate(headers, 1):
cell = sheet.cell(row=1, column=col, value=header)
cell.font = Font(bold=True)
cell.fill = PatternFill(start_color="CCCCCC", end_color="CCCCCC", fill_type="solid")
# Process content
lines = content.split('\n')
row = 2
for line in lines:
line = line.strip()
if not line:
continue
# Check for table data (lines with |)
if '|' in line:
cells = [cell.strip() for cell in line.split('|') if cell.strip()]
for col, cell_data in enumerate(cells[:5], 1): # Limit to 5 columns
sheet.cell(row=row, column=col, value=cell_data)
row += 1
else:
# Regular content
sheet.cell(row=row, column=1, value=line)
row += 1
# Auto-adjust column widths
for col in range(1, 6):
sheet.column_dimensions[get_column_letter(col)].width = 20
except Exception as e:
self.logger.warning(f"Could not populate data sheet: {str(e)}")
def _populate_analysis_sheet(self, sheet, content: str):
"""Populate the analysis sheet."""
try:
# Title
sheet['A1'] = "Analysis & Insights"
sheet['A1'].font = Font(size=14, bold=True)
# Content analysis
lines = content.split('\n')
row = 3
sheet['A3'] = "Content Analysis:"
sheet['A3'].font = Font(bold=True)
row += 1
# Count different types of content
table_lines = sum(1 for line in lines if '|' in line)
list_lines = sum(1 for line in lines if line.startswith(('- ', '* ')))
text_lines = len(lines) - table_lines - list_lines
sheet[f'A{row}'] = f"Total Lines: {len(lines)}"
row += 1
sheet[f'A{row}'] = f"Table Rows: {table_lines}"
row += 1
sheet[f'A{row}'] = f"List Items: {list_lines}"
row += 1
sheet[f'A{row}'] = f"Text Lines: {text_lines}"
row += 2
# Recommendations
sheet[f'A{row}'] = "Recommendations:"
sheet[f'A{row}'].font = Font(bold=True)
row += 1
sheet[f'A{row}'] = "1. Review data accuracy"
row += 1
sheet[f'A{row}'] = "2. Consider additional analysis"
row += 1
sheet[f'A{row}'] = "3. Update regularly"
# Auto-adjust column width
sheet.column_dimensions['A'].width = 30
except Exception as e:
self.logger.warning(f"Could not populate analysis sheet: {str(e)}")

View file

@ -0,0 +1,69 @@
"""
HTML renderer for report generation.
"""
from .base_renderer import BaseRenderer
from typing import Dict, Any, Tuple, List
class HtmlRenderer(BaseRenderer):
"""Renders content to HTML format with format-specific extraction."""
@classmethod
def get_supported_formats(cls) -> List[str]:
"""Return supported HTML formats."""
return ['html', 'htm']
@classmethod
def get_format_aliases(cls) -> List[str]:
"""Return format aliases."""
return ['web', 'webpage']
@classmethod
def get_priority(cls) -> int:
"""Return priority for HTML renderer."""
return 100
def getExtractionPrompt(self, user_prompt: str, title: str) -> str:
"""Return only HTML-specific guidelines; global prompt is built centrally."""
return (
"HTML FORMAT GUIDELINES:\n"
"- Output a complete HTML5 document starting with <!DOCTYPE html>.\n"
"- Include <html>, <head> with <meta charset=\"UTF-8\"> and <title>, and <body>.\n"
"- Use semantic elements: <header>, <main>, <section>, <article>, <footer>.\n"
"- Provide professional CSS in a <style> block; responsive, clean typography.\n"
"- Use h1/h2/h3 for headings; tables and lists for structure.\n"
"OUTPUT: Return ONLY valid HTML (no markdown, no code fences)."
)
async def render(self, extracted_content: str, title: str) -> Tuple[str, str]:
"""Render extracted content to HTML format."""
try:
# The extracted content should already be HTML from the AI
# Just clean it up and ensure it's valid
html_content = self._clean_html_content(extracted_content, title)
return html_content, "text/html"
except Exception as e:
self.logger.error(f"Error rendering HTML: {str(e)}")
# Return minimal HTML fallback
return f"<html><head><title>{title}</title></head><body><h1>{title}</h1><p>Error rendering report: {str(e)}</p></body></html>", "text/html"
def _clean_html_content(self, content: str, title: str) -> str:
"""Clean and validate HTML content from AI."""
content = content.strip()
# Remove markdown code blocks if present
if content.startswith("```") and content.endswith("```"):
lines = content.split('\n')
if len(lines) > 2:
content = '\n'.join(lines[1:-1]).strip()
# Ensure it starts with DOCTYPE
if not content.startswith('<!DOCTYPE'):
if content.startswith('<html'):
content = '<!DOCTYPE html>\n' + content
else:
content = f'<!DOCTYPE html>\n<html>\n<head><meta charset="UTF-8"><title>{title}</title></head>\n<body>\n{content}\n</body>\n</html>'
return content

View file

@ -0,0 +1,74 @@
"""
JSON renderer for report generation.
"""
from .base_renderer import BaseRenderer
from typing import Dict, Any, Tuple, List
import json
class JsonRenderer(BaseRenderer):
"""Renders content to JSON format with format-specific extraction."""
@classmethod
def get_supported_formats(cls) -> List[str]:
"""Return supported JSON formats."""
return ['json']
@classmethod
def get_format_aliases(cls) -> List[str]:
"""Return format aliases."""
return ['data']
@classmethod
def get_priority(cls) -> int:
"""Return priority for JSON renderer."""
return 80
def getExtractionPrompt(self, user_prompt: str, title: str) -> str:
"""Return only JSON-specific guidelines; global prompt is built centrally."""
return (
"JSON FORMAT GUIDELINES:\n"
"- Output ONLY a single valid JSON object (no fences, no pre/post text).\n"
"- Choose a structure that best fits the user's intent; include a top-level title and data.\n"
"- Prefer arrays/objects that map cleanly to the extracted facts.\n"
"- Include minimal metadata only if useful (e.g., generatedAt, sources).\n"
"OUTPUT: Return ONLY valid, parseable JSON."
)
async def render(self, extracted_content: str, title: str) -> Tuple[str, str]:
"""Render extracted content to JSON format."""
try:
# The extracted content should already be JSON from the AI
# Just validate and format it
json_content = self._clean_json_content(extracted_content, title)
return json_content, "application/json"
except Exception as e:
self.logger.error(f"Error rendering JSON: {str(e)}")
# Return minimal JSON fallback
fallback_data = {
"title": title,
"sections": [{"type": "text", "content": f"Error rendering report: {str(e)}"}],
"metadata": {"error": str(e)}
}
return json.dumps(fallback_data, indent=2), "application/json"
def _clean_json_content(self, content: str, title: str) -> str:
"""Clean and validate JSON content from AI."""
content = content.strip()
# Remove markdown code blocks if present
if content.startswith("```") and content.endswith("```"):
lines = content.split('\n')
if len(lines) > 2:
content = '\n'.join(lines[1:-1]).strip()
# Validate JSON
try:
parsed = json.loads(content)
# Re-format with proper indentation
return json.dumps(parsed, indent=2, ensure_ascii=False)
except json.JSONDecodeError:
# If not valid JSON, return as-is
return content

View file

@ -0,0 +1,65 @@
"""
Markdown renderer for report generation.
"""
from .base_renderer import BaseRenderer
from typing import Dict, Any, Tuple, List
class MarkdownRenderer(BaseRenderer):
"""Renders content to Markdown format with format-specific extraction."""
@classmethod
def get_supported_formats(cls) -> List[str]:
"""Return supported Markdown formats."""
return ['md', 'markdown']
@classmethod
def get_format_aliases(cls) -> List[str]:
"""Return format aliases."""
return ['mdown', 'mkd']
@classmethod
def get_priority(cls) -> int:
"""Return priority for markdown renderer."""
return 95
def getExtractionPrompt(self, user_prompt: str, title: str) -> str:
"""Return only Markdown-specific guidelines; global prompt is built centrally."""
return (
"MARKDOWN FORMAT GUIDELINES:\n"
"- Use proper Markdown syntax only (no HTML wrappers).\n"
"- # for main title, ## for sections, ### for subsections.\n"
"- Tables with | separators and a header row.\n"
"- Bullet lists with - or *.\n"
"- Emphasis with **bold** and *italic*.\n"
"- Code blocks with ```language.\n"
"- Horizontal rules (---) to separate major sections when helpful.\n"
"- Include links [text](url) and images ![alt](url) when referenced by sources.\n"
"OUTPUT: Return ONLY raw Markdown content without code fences."
)
async def render(self, extracted_content: str, title: str) -> Tuple[str, str]:
"""Render extracted content to Markdown format."""
try:
# The extracted content should already be Markdown from the AI
# Just clean it up
markdown_content = self._clean_markdown_content(extracted_content, title)
return markdown_content, "text/markdown"
except Exception as e:
self.logger.error(f"Error rendering markdown: {str(e)}")
# Return minimal markdown fallback
return f"# {title}\n\nError rendering report: {str(e)}", "text/markdown"
def _clean_markdown_content(self, content: str, title: str) -> str:
"""Clean and validate Markdown content from AI."""
content = content.strip()
# Remove markdown code blocks if present
if content.startswith("```") and content.endswith("```"):
lines = content.split('\n')
if len(lines) > 2:
content = '\n'.join(lines[1:-1]).strip()
return content

View file

@ -0,0 +1,225 @@
"""
PDF renderer for report generation using reportlab.
"""
from .base_renderer import BaseRenderer
from typing import Dict, Any, Tuple, List
import io
import base64
from datetime import datetime, UTC
try:
from reportlab.lib.pagesizes import letter, A4
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.units import inch
from reportlab.lib import colors
from reportlab.lib.enums import TA_CENTER, TA_LEFT, TA_JUSTIFY
REPORTLAB_AVAILABLE = True
except ImportError:
REPORTLAB_AVAILABLE = False
class PdfRenderer(BaseRenderer):
"""Renders content to PDF format using reportlab."""
@classmethod
def get_supported_formats(cls) -> List[str]:
"""Return supported PDF formats."""
return ['pdf']
@classmethod
def get_format_aliases(cls) -> List[str]:
"""Return format aliases."""
return ['document', 'print']
@classmethod
def get_priority(cls) -> int:
"""Return priority for PDF renderer."""
return 120
def getExtractionPrompt(self, user_prompt: str, title: str) -> str:
"""Return only PDF-specific guidelines; global prompt is built centrally."""
return (
"PDF FORMAT GUIDELINES:\n"
"- Provide structured content suitable for pagination and headings (H1/H2/H3-like).\n"
"- Use bullet lists and tables where useful; separate major sections clearly.\n"
"- Avoid markdown/HTML; produce clean, plain content that can be laid out as PDF.\n"
"OUTPUT: Return ONLY the PDF-ready textual content (no fences)."
)
async def render(self, extracted_content: str, title: str) -> Tuple[str, str]:
"""Render extracted content to PDF format."""
try:
if not REPORTLAB_AVAILABLE:
# Fallback to HTML if reportlab not available
from .html_renderer import HtmlRenderer
html_renderer = HtmlRenderer()
html_content, _ = await html_renderer.render(extracted_content, title)
return html_content, "text/html"
# Generate PDF using reportlab
pdf_content = self._generate_pdf(extracted_content, title)
return pdf_content, "application/pdf"
except Exception as e:
self.logger.error(f"Error rendering PDF: {str(e)}")
# Return minimal fallback
return f"PDF Generation Error: {str(e)}", "text/plain"
def _generate_pdf(self, content: str, title: str) -> str:
"""Generate PDF content using reportlab."""
try:
# Create a buffer to hold the PDF
buffer = io.BytesIO()
# Create PDF document
doc = SimpleDocTemplate(
buffer,
pagesize=A4,
rightMargin=72,
leftMargin=72,
topMargin=72,
bottomMargin=18
)
# Get styles
styles = getSampleStyleSheet()
# Create custom styles
title_style = ParagraphStyle(
'CustomTitle',
parent=styles['Heading1'],
fontSize=24,
spaceAfter=30,
alignment=TA_CENTER,
textColor=colors.darkblue
)
heading_style = ParagraphStyle(
'CustomHeading',
parent=styles['Heading2'],
fontSize=16,
spaceAfter=12,
spaceBefore=12,
textColor=colors.darkblue
)
# Build PDF content
story = []
# Title page
story.append(Paragraph(title, title_style))
story.append(Spacer(1, 20))
story.append(Paragraph(f"Generated: {self._format_timestamp()}", styles['Normal']))
story.append(PageBreak())
# Process content
lines = content.split('\n')
current_section = []
for line in lines:
line = line.strip()
if not line:
continue
# Check for headings
if line.startswith('# '):
# H1 heading
if current_section:
story.extend(self._process_section(current_section, styles))
current_section = []
story.append(Paragraph(line[2:], title_style))
story.append(Spacer(1, 12))
elif line.startswith('## '):
# H2 heading
if current_section:
story.extend(self._process_section(current_section, styles))
current_section = []
story.append(Paragraph(line[3:], heading_style))
story.append(Spacer(1, 8))
elif line.startswith('### '):
# H3 heading
if current_section:
story.extend(self._process_section(current_section, styles))
current_section = []
story.append(Paragraph(line[4:], styles['Heading3']))
story.append(Spacer(1, 6))
else:
current_section.append(line)
# Process remaining content
if current_section:
story.extend(self._process_section(current_section, styles))
# Build PDF
doc.build(story)
# Get PDF content as base64
buffer.seek(0)
pdf_bytes = buffer.getvalue()
pdf_base64 = base64.b64encode(pdf_bytes).decode('utf-8')
return pdf_base64
except Exception as e:
self.logger.error(f"Error generating PDF: {str(e)}")
raise
def _process_section(self, lines: list, styles) -> list:
"""Process a section of content into PDF elements."""
elements = []
for line in lines:
if not line.strip():
continue
# Check for tables (lines with |)
if '|' in line and not line.startswith('|'):
# This might be part of a table, process as table
table_data = self._extract_table_data(lines)
if table_data:
table = Table(table_data)
table.setStyle(TableStyle([
('BACKGROUND', (0, 0), (-1, 0), colors.grey),
('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
('ALIGN', (0, 0), (-1, -1), 'CENTER'),
('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
('FONTSIZE', (0, 0), (-1, 0), 14),
('BOTTOMPADDING', (0, 0), (-1, 0), 12),
('BACKGROUND', (0, 1), (-1, -1), colors.beige),
('GRID', (0, 0), (-1, -1), 1, colors.black)
]))
elements.append(table)
elements.append(Spacer(1, 12))
return elements
# Check for lists
if line.startswith('- ') or line.startswith('* '):
# This is a list item
elements.append(Paragraph(f"{line[2:]}", styles['Normal']))
else:
# Regular paragraph
elements.append(Paragraph(line, styles['Normal']))
elements.append(Spacer(1, 6))
return elements
def _extract_table_data(self, lines: list) -> list:
"""Extract table data from lines."""
table_data = []
in_table = False
for line in lines:
if '|' in line:
if not in_table:
in_table = True
# Split by | and clean up
cells = [cell.strip() for cell in line.split('|') if cell.strip()]
if cells:
table_data.append(cells)
elif in_table and not line.strip():
# Empty line, might be end of table
break
return table_data if len(table_data) > 1 else []

View file

@ -0,0 +1,157 @@
"""
Renderer registry for automatic discovery and registration of renderers.
"""
import logging
import importlib
import pkgutil
from typing import Dict, Type, List, Optional
from .base_renderer import BaseRenderer
logger = logging.getLogger(__name__)
class RendererRegistry:
"""Registry for automatic renderer discovery and management."""
def __init__(self):
self._renderers: Dict[str, Type[BaseRenderer]] = {}
self._format_mappings: Dict[str, str] = {}
self._discovered = False
def discover_renderers(self) -> None:
"""Automatically discover and register all renderers by scanning files."""
if self._discovered:
return
try:
import os
import sys
from pathlib import Path
# Get the directory containing this registry file
current_dir = Path(__file__).parent
renderers_dir = current_dir
# Get the package name dynamically
package_name = __name__.rsplit('.', 1)[0]
# Scan all Python files in the renderers directory
for file_path in renderers_dir.glob("*.py"):
if file_path.name in ['registry.py', 'base_renderer.py', '__init__.py']:
continue
# Extract module name from filename
module_name = file_path.stem
try:
# Import the module dynamically
full_module_name = f"{package_name}.{module_name}"
module = importlib.import_module(full_module_name)
# Look for renderer classes in the module
for attr_name in dir(module):
attr = getattr(module, attr_name)
if (isinstance(attr, type) and
issubclass(attr, BaseRenderer) and
attr != BaseRenderer and
hasattr(attr, 'get_supported_formats')):
# Register the renderer
self._register_renderer_class(attr)
logger.info(f"Discovered renderer: {attr.__name__} from {module_name}")
except Exception as e:
logger.warning(f"Could not load renderer from {module_name}: {str(e)}")
continue
self._discovered = True
logger.info(f"Renderer discovery completed. Found {len(self._renderers)} renderers.")
except Exception as e:
logger.error(f"Error during renderer discovery: {str(e)}")
self._discovered = True # Mark as discovered to avoid repeated attempts
def _register_renderer_class(self, renderer_class: Type[BaseRenderer]) -> None:
"""Register a renderer class with its supported formats."""
try:
# Get supported formats from the renderer class
supported_formats = renderer_class.get_supported_formats()
for format_name in supported_formats:
# Register primary format
self._renderers[format_name.lower()] = renderer_class
# Register aliases if any
if hasattr(renderer_class, 'get_format_aliases'):
aliases = renderer_class.get_format_aliases()
for alias in aliases:
self._format_mappings[alias.lower()] = format_name.lower()
logger.debug(f"Registered {renderer_class.__name__} for formats: {supported_formats}")
except Exception as e:
logger.error(f"Error registering renderer {renderer_class.__name__}: {str(e)}")
def get_renderer(self, output_format: str) -> Optional[BaseRenderer]:
"""Get a renderer instance for the specified format."""
if not self._discovered:
self.discover_renderers()
# Normalize format name
format_name = output_format.lower().strip()
# Check for aliases first
if format_name in self._format_mappings:
format_name = self._format_mappings[format_name]
# Get renderer class
renderer_class = self._renderers.get(format_name)
if renderer_class:
try:
return renderer_class()
except Exception as e:
logger.error(f"Error creating renderer instance for {format_name}: {str(e)}")
return None
logger.warning(f"No renderer found for format: {output_format}")
return None
def get_supported_formats(self) -> List[str]:
"""Get list of all supported formats."""
if not self._discovered:
self.discover_renderers()
formats = list(self._renderers.keys())
formats.extend(self._format_mappings.keys())
return sorted(set(formats))
def get_renderer_info(self) -> Dict[str, Dict[str, str]]:
"""Get information about all registered renderers."""
if not self._discovered:
self.discover_renderers()
info = {}
for format_name, renderer_class in self._renderers.items():
info[format_name] = {
'class_name': renderer_class.__name__,
'module': renderer_class.__module__,
'description': getattr(renderer_class, '__doc__', 'No description').strip().split('\n')[0] if renderer_class.__doc__ else 'No description'
}
return info
# Global registry instance
_registry = RendererRegistry()
def get_renderer(output_format: str) -> Optional[BaseRenderer]:
"""Get a renderer instance for the specified format."""
return _registry.get_renderer(output_format)
def get_supported_formats() -> List[str]:
"""Get list of all supported formats."""
return _registry.get_supported_formats()
def get_renderer_info() -> Dict[str, Dict[str, str]]:
"""Get information about all registered renderers."""
return _registry.get_renderer_info()

View file

@ -0,0 +1,94 @@
"""
Text renderer for report generation.
"""
from .base_renderer import BaseRenderer
from typing import Dict, Any, Tuple, List
class TextRenderer(BaseRenderer):
"""Renders content to plain text format with format-specific extraction."""
@classmethod
def get_supported_formats(cls) -> List[str]:
"""Return supported text formats (excluding formats with dedicated renderers)."""
return [
'txt', 'text', 'plain',
# Programming languages
'js', 'javascript', 'ts', 'typescript', 'jsx', 'tsx',
'py', 'python', 'java', 'cpp', 'c', 'h', 'hpp',
'cs', 'csharp', 'php', 'rb', 'ruby', 'go', 'rs', 'rust',
'swift', 'kt', 'kotlin', 'scala', 'r', 'm', 'objc',
'sh', 'bash', 'zsh', 'fish', 'ps1', 'bat', 'cmd',
# Web technologies (excluding html/htm which have dedicated renderer)
'css', 'scss', 'sass', 'less', 'xml', 'yaml', 'yml', 'toml', 'ini', 'cfg',
# Data formats (excluding csv, md/markdown which have dedicated renderers)
'tsv', 'log', 'rst', 'sql', 'dockerfile', 'dockerignore', 'gitignore',
# Configuration files
'env', 'properties', 'conf', 'config', 'rc',
'gitattributes', 'editorconfig', 'eslintrc',
# Documentation
'readme', 'changelog', 'license', 'authors',
'contributing', 'todo', 'notes', 'docs'
]
@classmethod
def get_format_aliases(cls) -> List[str]:
"""Return format aliases."""
return [
'ascii', 'utf8', 'utf-8', 'code', 'source',
'script', 'program', 'file', 'document',
'raw', 'unformatted', 'plaintext'
]
@classmethod
def get_priority(cls) -> int:
"""Return priority for text renderer."""
return 90
def getExtractionPrompt(self, user_prompt: str, title: str) -> str:
"""Return only plain-text guidelines; global prompt is built centrally."""
return (
"TEXT FORMAT GUIDELINES:\n"
"- Output ONLY plain text (no markdown or HTML).\n"
"- Use clear headings (you may underline with === or --- when helpful).\n"
"- Use simple bullet lists with '-' and tables with '|' when needed.\n"
"- Preserve indentation for code-like content if present.\n"
"OUTPUT: Return ONLY the raw text content."
)
async def render(self, extracted_content: str, title: str) -> Tuple[str, str]:
"""Render extracted content to plain text format."""
try:
# The extracted content should already be formatted text from the AI
# Just clean it up
text_content = self._clean_text_content(extracted_content, title)
return text_content, "text/plain"
except Exception as e:
self.logger.error(f"Error rendering text: {str(e)}")
# Return minimal text fallback
return f"{title}\n\nError rendering report: {str(e)}", "text/plain"
def _clean_text_content(self, content: str, title: str) -> str:
"""Clean and validate text content from AI."""
content = content.strip()
# Remove markdown code blocks if present
if content.startswith("```") and content.endswith("```"):
lines = content.split('\n')
if len(lines) > 2:
content = '\n'.join(lines[1:-1]).strip()
# Remove any remaining markdown formatting
content = content.replace('**', '').replace('*', '')
content = content.replace('__', '').replace('_', '')
# Clean up any HTML-like tags that might have slipped through
import re
content = re.sub(r'<[^>]+>', '', content)
# Ensure proper line endings
content = content.replace('\r\n', '\n').replace('\r', '\n')
return content

View file

@ -3,7 +3,7 @@ import uuid
from typing import Dict, Any, List, Optional from typing import Dict, Any, List, Optional
from modules.datamodels.datamodelUam import User, UserConnection from modules.datamodels.datamodelUam import User, UserConnection
from modules.datamodels.datamodelChat import ChatDocument, ChatMessage from modules.datamodels.datamodelChat import ChatDocument, ChatMessage
from modules.datamodels.datamodelChat import ExtractedContent from modules.datamodels.datamodelChat import ChatContentExtracted
from modules.services.serviceExtraction.mainServiceExtraction import ExtractionService from modules.services.serviceExtraction.mainServiceExtraction import ExtractionService
from modules.services.serviceGeneration.subDocumentUtility import getFileExtension, getMimeTypeFromExtension, detectContentTypeFromData from modules.services.serviceGeneration.subDocumentUtility import getFileExtension, getMimeTypeFromExtension, detectContentTypeFromData
from modules.shared.timezoneUtils import get_utc_timestamp from modules.shared.timezoneUtils import get_utc_timestamp
@ -78,6 +78,12 @@ class WorkflowService:
def getChatDocumentsFromDocumentList(self, documentList: List[str]) -> List[ChatDocument]: def getChatDocumentsFromDocumentList(self, documentList: List[str]) -> List[ChatDocument]:
"""Get ChatDocuments from a list of document references using all three formats.""" """Get ChatDocuments from a list of document references using all three formats."""
try: try:
# Get the current workflow from services (same pattern as setWorkflowContext)
workflow = getattr(self.serviceCenter, 'currentWorkflow', None) or self.workflow
if not workflow:
logger.error("No workflow available for document list resolution")
return []
all_documents = [] all_documents = []
for doc_ref in documentList: for doc_ref in documentList:
if doc_ref.startswith("docItem:"): if doc_ref.startswith("docItem:"):
@ -86,7 +92,7 @@ class WorkflowService:
if len(parts) >= 2: if len(parts) >= 2:
doc_id = parts[1] doc_id = parts[1]
# Find the document by ID # Find the document by ID
for message in self.workflow.messages: for message in workflow.messages:
if message.documents: if message.documents:
for doc in message.documents: for doc in message.documents:
if doc.id == doc_id: if doc.id == doc_id:
@ -101,9 +107,15 @@ class WorkflowService:
# Format: docList:<messageId>:<label> # Format: docList:<messageId>:<label>
message_id = parts[1] message_id = parts[1]
label = parts[2] label = parts[2]
logger.debug(f"Looking for message with ID: {message_id} and label: {label}")
# Find the message by ID and get all its documents # Find the message by ID and get all its documents
for message in self.workflow.messages: message_found = False
for message in workflow.messages:
logger.debug(f"Checking message ID: {message.id} (looking for: {message_id})")
if str(message.id) == message_id: if str(message.id) == message_id:
message_found = True
logger.debug(f"Found message {message.id} with documentsLabel: {getattr(message, 'documentsLabel', 'None')}")
if message.documents: if message.documents:
doc_names = [doc.fileName for doc in message.documents if hasattr(doc, 'fileName')] doc_names = [doc.fileName for doc in message.documents if hasattr(doc, 'fileName')]
logger.debug(f"Found docList reference {doc_ref}: {len(message.documents)} documents - {doc_names}") logger.debug(f"Found docList reference {doc_ref}: {len(message.documents)} documents - {doc_names}")
@ -111,13 +123,16 @@ class WorkflowService:
else: else:
logger.debug(f"Found docList reference {doc_ref} but message has no documents") logger.debug(f"Found docList reference {doc_ref} but message has no documents")
break break
if not message_found:
logger.warning(f"Message with ID {message_id} not found in workflow. Available message IDs: {[str(msg.id) for msg in workflow.messages]}")
elif len(parts) >= 2: elif len(parts) >= 2:
# Format: docList:<label> - find message by documentsLabel # Format: docList:<label> - find message by documentsLabel
label = parts[1] label = parts[1]
logger.debug(f"Looking for message with documentsLabel: {label}") logger.debug(f"Looking for message with documentsLabel: {label}")
# Find messages with matching documentsLabel # Find messages with matching documentsLabel
matching_messages = [] matching_messages = []
for message in self.workflow.messages: for message in workflow.messages:
# Check both attribute and raw data for documentsLabel # Check both attribute and raw data for documentsLabel
msg_label = getattr(message, 'documentsLabel', None) msg_label = getattr(message, 'documentsLabel', None)
if msg_label == label: if msg_label == label:
@ -158,7 +173,7 @@ class WorkflowService:
# Find messages with matching documentsLabel (this is the correct way!) # Find messages with matching documentsLabel (this is the correct way!)
# In case of retries, we want the NEWEST message (most recent publishedAt) # In case of retries, we want the NEWEST message (most recent publishedAt)
matching_messages = [] matching_messages = []
for message in self.workflow.messages: for message in workflow.messages:
msg_documents_label = getattr(message, 'documentsLabel', '') msg_documents_label = getattr(message, 'documentsLabel', '')
# Check if this message's documentsLabel matches our reference # Check if this message's documentsLabel matches our reference
@ -187,7 +202,7 @@ class WorkflowService:
# Fallback: also check if any message has this documentsLabel as a prefix # Fallback: also check if any message has this documentsLabel as a prefix
logger.debug(f"Trying fallback search for messages with documentsLabel containing: {doc_ref}") logger.debug(f"Trying fallback search for messages with documentsLabel containing: {doc_ref}")
fallback_messages = [] fallback_messages = []
for message in self.workflow.messages: for message in workflow.messages:
msg_documents_label = getattr(message, 'documentsLabel', '') msg_documents_label = getattr(message, 'documentsLabel', '')
if msg_documents_label and msg_documents_label.startswith(doc_ref): if msg_documents_label and msg_documents_label.startswith(doc_ref):
fallback_messages.append(message) fallback_messages.append(message)
@ -422,24 +437,30 @@ class WorkflowService:
def setWorkflowContext(self, round_number: int = None, task_number: int = None, action_number: int = None): def setWorkflowContext(self, round_number: int = None, task_number: int = None, action_number: int = None):
"""Set current workflow context for document generation and routing""" """Set current workflow context for document generation and routing"""
try: try:
# Get the current workflow from services
workflow = getattr(self.serviceCenter, 'currentWorkflow', None) or self.workflow
if not workflow:
logger.error("No workflow available for context setting")
return
# Prepare update data # Prepare update data
update_data = {} update_data = {}
if round_number is not None: if round_number is not None:
self.workflow.currentRound = round_number workflow.currentRound = round_number
update_data["currentRound"] = round_number update_data["currentRound"] = round_number
if task_number is not None: if task_number is not None:
self.workflow.currentTask = task_number workflow.currentTask = task_number
update_data["currentTask"] = task_number update_data["currentTask"] = task_number
if action_number is not None: if action_number is not None:
self.workflow.currentAction = action_number workflow.currentAction = action_number
update_data["currentAction"] = action_number update_data["currentAction"] = action_number
# Persist changes to database if any updates were made # Persist changes to database if any updates were made
if update_data: if update_data:
self.interfaceDbChat.updateWorkflow(self.workflow.id, update_data) self.interfaceDbChat.updateWorkflow(workflow.id, update_data)
logger.debug(f"Updated workflow context: Round {self.workflow.currentRound if hasattr(self.workflow, 'currentRound') else 'N/A'}, Task {self.workflow.currentTask if hasattr(self.workflow, 'currentTask') else 'N/A'}, Action {self.workflow.currentAction if hasattr(self.workflow, 'currentAction') else 'N/A'}") logger.debug(f"Updated workflow context: Round {workflow.currentRound if hasattr(workflow, 'currentRound') else 'N/A'}, Task {workflow.currentTask if hasattr(workflow, 'currentTask') else 'N/A'}, Action {workflow.currentAction if hasattr(workflow, 'currentAction') else 'N/A'}")
except Exception as e: except Exception as e:
logger.error(f"Error setting workflow context: {str(e)}") logger.error(f"Error setting workflow context: {str(e)}")
@ -467,3 +488,376 @@ class WorkflowService:
'workflowStatus': 'unknown', 'workflowStatus': 'unknown',
'workflowId': 'unknown' 'workflowId': 'unknown'
} }
def createWorkflow(self, workflowData: Dict[str, Any]):
"""Create a new workflow by delegating to the chat interface"""
try:
return self.interfaceDbChat.createWorkflow(workflowData)
except Exception as e:
logger.error(f"Error creating workflow: {str(e)}")
raise
def updateWorkflow(self, workflowId: str, updateData: Dict[str, Any]):
"""Update workflow by delegating to the chat interface"""
try:
return self.interfaceDbChat.updateWorkflow(workflowId, updateData)
except Exception as e:
logger.error(f"Error updating workflow: {str(e)}")
raise
def updateWorkflowStats(self, workflowId: str, **kwargs):
"""Update workflow statistics by delegating to the chat interface"""
try:
return self.interfaceDbChat.updateWorkflowStats(workflowId, **kwargs)
except Exception as e:
logger.error(f"Error updating workflow stats: {str(e)}")
raise
def getWorkflow(self, workflowId: str):
"""Get workflow by ID by delegating to the chat interface"""
try:
return self.interfaceDbChat.getWorkflow(workflowId)
except Exception as e:
logger.error(f"Error getting workflow: {str(e)}")
raise
def createMessage(self, messageData: Dict[str, Any]):
"""Create a new message by delegating to the chat interface"""
try:
return self.interfaceDbChat.createMessage(messageData)
except Exception as e:
logger.error(f"Error creating message: {str(e)}")
raise
def updateMessage(self, messageId: str, messageData: Dict[str, Any]):
"""Update message by delegating to the chat interface"""
try:
return self.interfaceDbChat.updateMessage(messageId, messageData)
except Exception as e:
logger.error(f"Error updating message: {str(e)}")
raise
def createLog(self, logData: Dict[str, Any]):
"""Create a new log entry by delegating to the chat interface"""
try:
return self.interfaceDbChat.createLog(logData)
except Exception as e:
logger.error(f"Error creating log: {str(e)}")
raise
def getDocumentCount(self) -> str:
"""Get document count for task planning (matching old handlingTasks.py logic)"""
try:
# Get the current workflow from services
workflow = getattr(self.serviceCenter, 'currentWorkflow', None) or self.workflow
if not workflow:
return "No documents available"
# Count documents from all messages in the workflow (like old system)
total_docs = 0
for message in workflow.messages:
if hasattr(message, 'documents') and message.documents:
total_docs += len(message.documents)
if total_docs == 0:
return "No documents available"
return f"{total_docs} document(s) available"
except Exception as e:
logger.error(f"Error getting document count: {str(e)}")
return "No documents available"
def getWorkflowHistoryContext(self) -> str:
"""Get workflow history context for task planning (matching old handlingTasks.py logic)"""
try:
# Get the current workflow from services
workflow = getattr(self.serviceCenter, 'currentWorkflow', None) or self.workflow
if not workflow:
return "No previous round context available"
# Check if there are any previous rounds by looking for "first" messages
has_previous_rounds = False
for message in workflow.messages:
if hasattr(message, 'status') and message.status == "first":
has_previous_rounds = True
break
if not has_previous_rounds:
return "No previous round context available"
# Get document reference list to show what documents are available from previous rounds
document_list = self._getDocumentReferenceList(workflow)
# Build context string showing previous rounds
context = "Previous workflow rounds contain documents:\n"
# Show history exchanges (previous rounds)
if document_list["history"]:
for exchange in document_list["history"]:
# Find the message that corresponds to this exchange
message_id = None
for message in workflow.messages:
if hasattr(message, 'documentsLabel') and message.documentsLabel == exchange['documentsLabel']:
message_id = message.id
break
if message_id:
doc_list_ref = f"docList:{message_id}:{exchange['documentsLabel']}"
else:
doc_list_ref = f"docList:{exchange['documentsLabel']}"
context += f"- {doc_list_ref} ({len(exchange['documents'])} documents)\n"
else:
context = "No previous round context available"
return context
except Exception as e:
logger.error(f"Error getting workflow history context: {str(e)}")
return "No previous round context available"
def getAvailableDocuments(self, workflow) -> str:
"""Get available documents formatted for AI prompts (exact copy of old ServiceCenter.getEnhancedDocumentContext)"""
try:
if not workflow or not hasattr(workflow, 'messages'):
return "No documents available"
# Get document reference list using the exact same logic as old system
document_list = self._getDocumentReferenceList(workflow)
# Build technical context string for AI action planning (exact copy of old system)
context = "AVAILABLE DOCUMENTS:\n\n"
# Process chat exchanges (current round) - exact copy of old system
if document_list["chat"]:
context += "CURRENT ROUND DOCUMENTS:\n"
for exchange in document_list["chat"]:
# Generate docList reference for the exchange (using message ID and label)
# Find the message that corresponds to this exchange
message_id = None
for message in workflow.messages:
if hasattr(message, 'documentsLabel') and message.documentsLabel == exchange['documentsLabel']:
message_id = message.id
break
if message_id:
doc_list_ref = f"docList:{message_id}:{exchange['documentsLabel']}"
else:
# Fallback to label-only format if message ID not found
doc_list_ref = f"docList:{exchange['documentsLabel']}"
context += f"- {doc_list_ref} contains:\n"
# Generate docItem references for each document in the list
for doc_ref in exchange['documents']:
if doc_ref.startswith("docItem:"):
context += f" - {doc_ref}\n"
else:
# Convert to proper docItem format if needed
context += f" - docItem:{doc_ref}\n"
context += "\n"
# Process history exchanges (previous rounds) - exact copy of old system
if document_list["history"]:
context += "WORKFLOW HISTORY DOCUMENTS:\n"
for exchange in document_list["history"]:
# Generate docList reference for the exchange (using message ID and label)
# Find the message that corresponds to this exchange
message_id = None
for message in workflow.messages:
if hasattr(message, 'documentsLabel') and message.documentsLabel == exchange['documentsLabel']:
message_id = message.id
break
if message_id:
doc_list_ref = f"docList:{message_id}:{exchange['documentsLabel']}"
else:
# Fallback to label-only format if message ID not found
doc_list_ref = f"docList:{exchange['documentsLabel']}"
context += f"- {doc_list_ref} contains:\n"
# Generate docItem references for each document in the list
for doc_ref in exchange['documents']:
if doc_ref.startswith("docItem:"):
context += f" - {doc_ref}\n"
else:
# Convert to proper docItem format if needed
context += f" - docItem:{doc_ref}\n"
context += "\n"
if not document_list["chat"] and not document_list["history"]:
context += "NO DOCUMENTS AVAILABLE - This workflow has no documents to process.\n"
return context
except Exception as e:
logger.error(f"Error getting available documents: {str(e)}")
return "NO DOCUMENTS AVAILABLE - Error generating document context."
def _getDocumentReferenceList(self, workflow) -> Dict[str, List]:
"""Get list of document exchanges with new labeling format, sorted by recency (exact copy of old system)"""
# Collect all documents first and refresh their attributes
all_documents = []
for message in workflow.messages:
if message.documents:
all_documents.extend(message.documents)
# Refresh file attributes for all documents
if all_documents:
self._refreshDocumentFileAttributes(all_documents)
chat_exchanges = []
history_exchanges = []
# Process messages in reverse order; "first" marks boundary
in_current_round = True
for message in reversed(workflow.messages):
is_first = message.status == "first" if hasattr(message, 'status') else False
# Build a DocumentExchange if message has documents
doc_exchange = None
if message.documents:
if message.actionId and message.documentsLabel:
# Validate that we use the same label as in the message
validated_label = self._validateDocumentLabelConsistency(message)
# Use the message's actual documentsLabel
doc_refs = []
for doc in message.documents:
doc_ref = self._getDocumentReferenceFromChatDocument(doc, message)
doc_refs.append(doc_ref)
doc_exchange = {
'documentsLabel': validated_label,
'documents': doc_refs
}
else:
# Generate new labels for documents without explicit labels
doc_refs = []
for doc in message.documents:
doc_ref = self._getDocumentReferenceFromChatDocument(doc, message)
doc_refs.append(doc_ref)
if doc_refs:
# Create a label based on message context
context_prefix = self._generateWorkflowContextPrefix(message)
context_label = f"{context_prefix}_context"
doc_exchange = {
'documentsLabel': context_label,
'documents': doc_refs
}
# Append to appropriate container based on boundary
if doc_exchange:
if in_current_round:
chat_exchanges.append(doc_exchange)
else:
history_exchanges.append(doc_exchange)
# Flip boundary after including the "first" message in chat
if in_current_round and is_first:
in_current_round = False
# Sort by recency: most recent first, then current round, then earlier rounds
# Sort chat exchanges by message sequence number (most recent first)
chat_exchanges.sort(key=lambda x: self._getMessageSequenceForExchange(x, workflow), reverse=True)
# Sort history exchanges by message sequence number (most recent first)
history_exchanges.sort(key=lambda x: self._getMessageSequenceForExchange(x, workflow), reverse=True)
return {
"chat": chat_exchanges,
"history": history_exchanges
}
def _refreshDocumentFileAttributes(self, documents) -> None:
"""Update file attributes (fileName, fileSize, mimeType) for documents"""
for doc in documents:
try:
# Use the proper WorkflowService method to get file info
file_info = self.getFileInfo(doc.fileId)
if file_info:
doc.fileName = file_info.get("fileName", doc.fileName)
doc.fileSize = file_info.get("size", doc.fileSize)
doc.mimeType = file_info.get("mimeType", doc.mimeType)
else:
logger.warning(f"File not found for document {doc.id}, fileId: {doc.fileId}")
except Exception as e:
logger.error(f"Error refreshing file attributes for document {doc.id}: {e}")
def _generateWorkflowContextPrefix(self, message) -> str:
"""Generate workflow context prefix: round{num}_task{num}_action{num}"""
round_num = message.roundNumber if hasattr(message, 'roundNumber') else 1
task_num = message.taskNumber if hasattr(message, 'taskNumber') else 0
action_num = message.actionNumber if hasattr(message, 'actionNumber') else 0
return f"round{round_num}_task{task_num}_action{action_num}"
def _getDocumentReferenceFromChatDocument(self, document, message) -> str:
"""Get document reference using document ID and filename."""
try:
# Use document ID and filename for simple reference
return f"docItem:{document.id}:{document.fileName}"
except Exception as e:
logger.error(f"Critical error creating document reference for document {document.id}: {str(e)}")
# Re-raise the error to prevent workflow from continuing with invalid data
raise
def _getMessageSequenceForExchange(self, exchange, workflow) -> int:
"""Get message sequence number for sorting exchanges by recency"""
try:
# Extract message ID from the first document reference
if exchange['documents'] and len(exchange['documents']) > 0:
first_doc_ref = exchange['documents'][0]
if first_doc_ref.startswith("docItem:"):
# docItem:<id>:<label> - extract ID
parts = first_doc_ref.split(':')
if len(parts) >= 2:
doc_id = parts[1]
# Find the message containing this document
for message in workflow.messages:
if message.documents:
for doc in message.documents:
if doc.id == doc_id:
return message.sequenceNr if hasattr(message, 'sequenceNr') else 0
elif first_doc_ref.startswith("docList:"):
# docList:<message_id>:<label> - extract message ID
parts = first_doc_ref.split(':')
if len(parts) >= 2:
message_id = parts[1]
# Find the message by ID
for message in workflow.messages:
if str(message.id) == message_id:
return message.sequenceNr if hasattr(message, 'sequenceNr') else 0
return 0
except Exception as e:
logger.error(f"Error getting message sequence for exchange: {str(e)}")
return 0
def _validateDocumentLabelConsistency(self, message) -> str:
"""Validate that the document label used for references matches the message's actual label"""
if not hasattr(message, 'documentsLabel') or not message.documentsLabel:
return None
# Simply return the message's actual documentsLabel - no correction, just validation
return message.documentsLabel
def getConnectionReferenceList(self) -> List[str]:
"""Get connection reference list (matching old handlingTasks.py logic)"""
try:
# Get connections from the database using the same logic as the old system
if hasattr(self.serviceCenter, 'interfaceDbApp') and hasattr(self.serviceCenter, 'user'):
userId = self.serviceCenter.user.id
connections = self.serviceCenter.interfaceDbApp.getUserConnections(userId)
if connections:
# Format connections as reference strings using the same pattern as the old system
connectionRefs = []
for conn in connections:
# Create reference string in format: connection:{authority}:{username}:{id} [status:..., token:...]
# This matches the format expected by getUserConnectionFromConnectionReference()
ref = self.getConnectionReferenceFromUserConnection(conn)
connectionRefs.append(ref)
return connectionRefs
return []
except Exception as e:
logger.error(f"Error getting connection reference list: {str(e)}")
return []

View file

@ -1,226 +0,0 @@
import asyncio
import os
import sys
from typing import List, Dict, Any
# Ensure relative imports work when running directly
CURRENT_DIR = os.path.dirname(__file__)
GATEWAY_DIR = os.path.dirname(os.path.dirname(CURRENT_DIR))
if GATEWAY_DIR not in sys.path:
sys.path.append(GATEWAY_DIR)
from modules.services.serviceExtraction.mainServiceExtraction import ExtractionService
from modules.services.serviceGeneration.mainServiceGeneration import DocumentGenerationService
from modules.datamodels.datamodelWorkflow import ActionResult, ActionDocument
from modules.datamodels.datamodelAi import AiCallOptions, OperationType, ProcessingMode, Priority
from modules.services.serviceAi.mainServiceAi import AiService
TESTDATA_DIR = os.path.join(GATEWAY_DIR, "testdata")
def _read_test_files() -> List[Dict[str, Any]]:
files = []
for name in os.listdir(TESTDATA_DIR):
path = os.path.join(TESTDATA_DIR, name)
if not os.path.isfile(path):
continue
try:
with open(path, "rb") as f:
data = f.read()
mime = _guess_mime(name)
files.append({
"id": name,
"bytes": data,
"fileName": name,
"mimeType": mime,
})
except Exception:
continue
return files
def _guess_mime(name: str) -> str:
lower = name.lower()
if lower.endswith(".pdf"):
return "application/pdf"
if lower.endswith(".xlsx"):
return "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
if lower.endswith(".jpg") or lower.endswith(".jpeg"):
return "image/jpeg"
if lower.endswith(".png"):
return "image/png"
return "application/octet-stream"
def run_extraction_1000_bytes() -> None:
svc = ExtractionService()
docs = _read_test_files()
options = {
# cap total pooled size per document set
"maxSize": 1000,
# allow chunking to respect the cap across parts
"chunkAllowed": True,
# chunk sizes for different content types to help fit under the cap
"textChunkSize": 500,
"tableChunkSize": 500,
"structureChunkSize": 500,
# simple merge strategy if supported
"mergeStrategy": {},
}
results = svc.extractContent(docs, options)
print("[extraction] documents:", len(docs), "results:", len(results))
for i, ec in enumerate(results):
total = sum(int(p.metadata.get("size", 0) or 0) for p in ec.parts)
print(f" - doc[{i}] parts={len(ec.parts)} pooledBytes={total}")
async def main():
print("=== serviceExtraction: compress to 1000 bytes ===")
run_extraction_1000_bytes()
print("\n=== serviceGeneration: create ActionResult and write output to testdata ===")
await run_generation_write_file()
print("\n=== serviceAi: planning call + image + pdf extraction ===")
await run_ai_tests()
if __name__ == "__main__":
asyncio.run(main())
async def run_generation_write_file() -> None:
# Minimal stubs for interfaces expected by DocumentGenerationService
class _FileItem:
def __init__(self, file_id: str, file_name: str, mime_type: str, content: bytes):
self.id = file_id
self.fileName = file_name
self.mimeType = mime_type
self.fileSize = len(content)
class _ComponentInterface:
def __init__(self):
self._files = {}
def createFile(self, name: str, mimeType: str, content: bytes):
fid = f"test_{len(self._files)+1}"
item = _FileItem(fid, name, mimeType, content)
self._files[fid] = item
return item
def createFileData(self, fileId: str, content: bytes):
# Persist into testdata directory as requested
item = self._files[fileId]
out_path = os.path.join(TESTDATA_DIR, f"output_{fileId}_{item.fileName}")
with open(out_path, "wb") as f:
f.write(content)
def getFile(self, fileId: str):
return self._files.get(fileId)
class _ServiceCenter:
def __init__(self, comp):
self.interfaceDbComponent = comp
self.interfaceDbChat = None
self.workflow = type("_Wf", (), {"id": "wf_test", "currentRound": 1, "currentTask": 1, "currentAction": 1, "status": "running", "totalTasks": 1, "totalActions": 1})()
component = _ComponentInterface()
center = _ServiceCenter(component)
gen = DocumentGenerationService(center)
# Build a fake action and ActionResult with a small text document
class _Action:
def __init__(self):
self.id = "action_test"
self.execMethod = "document"
self.execAction = "generate"
self.execParameters = {}
self.execResultLabel = "round1_task1_action1_results"
action = _Action()
content = "This is a generated test file from serviceGeneration test."
action_doc = ActionDocument(documentName="test_generated.txt", documentData=content, mimeType="text/plain")
action_result = ActionResult(success=True, documents=[action_doc])
docs = gen.createDocumentsFromActionResult(action_result, action, center.workflow, message_id="msg_test")
print("[generation] created documents:", len(docs))
async def run_ai_tests() -> None:
# Create AiService instance (uses internal default model registry; no external creds required for this test)
ai = await AiService.create()
# Planning AI call (like in handlingTasks.generateTaskPlan)
plan_options = AiCallOptions(
operationType=OperationType.GENERATE_PLAN,
priority=Priority.QUALITY,
compressPrompt=False,
compressContext=False,
processingMode=ProcessingMode.DETAILED,
maxCost=0.05,
maxProcessingTime=10,
)
plan_prompt = """
You are a planning assistant. Return a compact JSON with fields: tasks:[{id, objective, success_criteria:[]}], languageUserDetected:"en".
Create exactly one simple task id:"task_1" objective:"Test planning" success_criteria:["done"].
""".strip()
plan_resp = await ai.callAi(prompt=plan_prompt, placeholders=None, options=plan_options)
print("[ai] planning response length:", len(plan_resp) if plan_resp else 0)
# Image content extraction prompt using test JPEG
img_path = os.path.join(TESTDATA_DIR, "00Untitled.jpg")
img_resp = None
if os.path.exists(img_path):
try:
with open(img_path, "rb") as f:
img_bytes = f.read()
img_options = AiCallOptions(
operationType=OperationType.ANALYSE_CONTENT,
priority=Priority.BALANCED,
compressPrompt=True,
compressContext=False,
processingMode=ProcessingMode.ADVANCED,
maxCost=0.02,
maxProcessingTime=10,
)
img_resp = await ai.callAiImage(
prompt="Describe the content of this image succinctly.",
imageData=img_bytes,
mimeType="image/jpeg",
options=img_options,
)
print("[ai] image analysis response length:", len(img_resp) if img_resp else 0)
except Exception as e:
print("[ai] image analysis error:", str(e))
else:
print("[ai] image test file not found; skipping")
# PDF extraction prompt: emulate text call with document context built via ExtractionService
pdf_path = os.path.join(TESTDATA_DIR, "diagramm_komponenten.pdf")
if os.path.exists(pdf_path):
try:
# Build a minimal ChatDocument-like shim that AiService._callAiText expects via extraction
class _Doc:
def __init__(self, file_path: str, mime: str):
self.id = "doc_pdf"
self.fileName = os.path.basename(file_path)
self.mimeType = mime
with open(file_path, "rb") as f:
self.fileData = f.read()
pdf_doc = _Doc(pdf_path, "application/pdf")
pdf_options = AiCallOptions(
operationType=OperationType.ANALYSE_CONTENT,
priority=Priority.BALANCED,
compressPrompt=True,
compressContext=True,
processingMode=ProcessingMode.ADVANCED,
maxContextBytes=1000,
chunkAllowed=True,
maxCost=0.02,
maxProcessingTime=10,
)
pdf_prompt = "Extract key information from the attached PDF."
pdf_resp = await ai.callAi(prompt=pdf_prompt, documents=[pdf_doc], options=pdf_options)
print("[ai] pdf extraction response length:", len(pdf_resp) if pdf_resp else 0)
except Exception as e:
print("[ai] pdf extraction error:", str(e))
else:
print("[ai] pdf test file not found; skipping")

View file

@ -324,7 +324,7 @@ def getModelClasses() -> Dict[str, Type[BaseModel]]:
os.path.dirname(os.path.dirname(__file__)), "interfaces" os.path.dirname(os.path.dirname(__file__)), "interfaces"
) )
# Find all model files # Find all model files in interfaces directory
for fileName in os.listdir(interfaces_dir): for fileName in os.listdir(interfaces_dir):
if fileName.endswith("Model.py"): if fileName.endswith("Model.py"):
# Convert fileName to module name (e.g., gatewayModel.py -> gatewayModel) # Convert fileName to module name (e.g., gatewayModel.py -> gatewayModel)
@ -342,6 +342,29 @@ def getModelClasses() -> Dict[str, Type[BaseModel]]:
): ):
modelClasses[name] = obj modelClasses[name] = obj
# Also get models from datamodels directory
datamodels_dir = os.path.join(
os.path.dirname(os.path.dirname(__file__)), "datamodels"
)
# Find all model files in datamodels directory
for fileName in os.listdir(datamodels_dir):
if fileName.startswith("datamodel") and fileName.endswith(".py"):
# Convert fileName to module name (e.g., datamodelUtils.py -> datamodelUtils)
module_name = fileName[:-3]
# Import the module dynamically
module = importlib.import_module(f"modules.datamodels.{module_name}")
# Get all classes from the module
for name, obj in inspect.getmembers(module):
if (
inspect.isclass(obj)
and issubclass(obj, BaseModel)
and obj != BaseModel
):
modelClasses[name] = obj
return modelClasses return modelClasses

View file

@ -8,8 +8,10 @@ from typing import Dict, Any, List, Optional
from datetime import datetime, UTC from datetime import datetime, UTC
from modules.workflows.methods.methodBase import MethodBase, action from modules.workflows.methods.methodBase import MethodBase, action
from modules.datamodels.datamodelWorkflow import ActionResult from modules.datamodels.datamodelChat import ActionResult
from modules.datamodels.datamodelAi import AiCallOptions, OperationType, Priority from modules.datamodels.datamodelAi import AiCallOptions, OperationType, Priority
from modules.datamodels.datamodelChat import ChatDocument
from modules.datamodels.datamodelWeb import WebResearchRequest, WebResearchOptions
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -28,26 +30,36 @@ class MethodAi(MethodBase):
@action @action
async def process(self, parameters: Dict[str, Any]) -> ActionResult: async def process(self, parameters: Dict[str, Any]) -> ActionResult:
""" """
Perform an AI call for any type of task with optional document references GENERAL:
- Purpose: AI-based analysis and content generation with optional document context.
- Input requirements: aiPrompt (required); optional documentList, resultType, processingMode, includeMetadata, operationType, priority, maxCost, maxProcessingTime, requiredTags.
- Output format: Single or multiple documents in requested format.
Parameters: Parameters:
aiPrompt (str): The AI prompt for processing - aiPrompt (str, required): Instruction for the AI.
documentList (list, optional): List of document references to include in context - documentList (list, optional): Document reference(s) for context.
expectedDocumentFormat (str, optional): Expected document output format with extension, mimeType, description - resultType (str, optional): Output extension (txt, json, md, csv, xml, html, pdf, docx, xlsx, png). Default: txt.
processingMode (str, optional): Processing mode - use 'basic', 'advanced', or 'detailed' (defaults to 'basic') - processingMode (str, optional): basic | advanced | detailed. Default: basic.
includeMetadata (bool, optional): Whether to include metadata (default: True) - includeMetadata (bool, optional): Include metadata when available. Default: True.
operationType (str, optional): Operation type - use 'general', 'generate_plan', 'analyse_content', 'generate_content', 'web_research', 'image_analysis', or 'image_generation' - operationType (str, optional): general | generate_plan | analyse_content | generate_content | web_research | image_analysis | image_generation. Default: general.
priority (str, optional): Priority level - use 'speed', 'quality', 'cost', or 'balanced' - priority (str, optional): speed | quality | cost | balanced. Default: balanced.
maxCost (float, optional): Maximum cost budget for the AI call - maxCost (float, optional): Cost limit.
maxProcessingTime (int, optional): Maximum processing time in seconds - maxProcessingTime (int, optional): Time limit in seconds.
requiredTags (list, optional): Required model tags - use 'text', 'chat', 'reasoning', 'analysis', 'image', 'vision', 'web', 'search', etc. - requiredTags (list, optional): Capability tags (e.g., text, chat, reasoning, analysis, image, vision, web, search).
""" """
try: try:
# Debug logging to see what parameters are received
logger.info(f"MethodAi.process received parameters: {parameters}")
logger.info(f"Parameters type: {type(parameters)}")
logger.info(f"Parameters keys: {list(parameters.keys()) if isinstance(parameters, dict) else 'Not a dict'}")
aiPrompt = parameters.get("aiPrompt") aiPrompt = parameters.get("aiPrompt")
logger.info(f"aiPrompt extracted: '{aiPrompt}' (type: {type(aiPrompt)})")
documentList = parameters.get("documentList", []) documentList = parameters.get("documentList", [])
if isinstance(documentList, str): if isinstance(documentList, str):
documentList = [documentList] documentList = [documentList]
expectedDocumentFormat = parameters.get("expectedDocumentFormat", "") resultType = parameters.get("resultType", "txt")
processingMode = parameters.get("processingMode", "basic") processingMode = parameters.get("processingMode", "basic")
includeMetadata = parameters.get("includeMetadata", True) includeMetadata = parameters.get("includeMetadata", True)
operationType = parameters.get("operationType", "general") operationType = parameters.get("operationType", "general")
@ -57,102 +69,23 @@ class MethodAi(MethodBase):
requiredTags = parameters.get("requiredTags") requiredTags = parameters.get("requiredTags")
if not aiPrompt: if not aiPrompt:
logger.error(f"aiPrompt is missing or empty. Parameters: {parameters}")
return ActionResult.isFailure( return ActionResult.isFailure(
error="AI prompt is required" error="AI prompt is required"
) )
# Determine output format first (needed for context building) # Determine output extension and default MIME type without duplicating service logic
output_extension = ".txt" # Default normalized_result_type = (str(resultType).strip().lstrip('.').lower() or "txt")
output_mime_type = "text/plain" # Default output_extension = f".{normalized_result_type}"
output_mime_type = "application/octet-stream" # Prefer service-provided mimeType when available
logger.info(f"Using result type: {resultType} -> {output_extension}")
if expectedDocumentFormat: # Get ChatDocuments for AI service - let AI service handle all document processing
output_extension = expected_format.get("extension", ".txt") chatDocuments = []
output_mime_type = expected_format.get("mimeType", "text/plain")
logger.info(f"Using expected format: {output_extension} ({output_mime_type})")
# Build context from documents if provided
context = ""
if documentList: if documentList:
chatDocuments = self.services.workflow.getChatDocumentsFromDocumentList(documentList) chatDocuments = self.services.workflow.getChatDocumentsFromDocumentList(documentList)
if chatDocuments: if chatDocuments:
context_parts = [] logger.info(f"Prepared {len(chatDocuments)} documents for AI processing")
# Build batch payload for extraction
batch_docs = []
for doc in chatDocuments:
try:
fileBytes = self.services.workflow.getFileData(doc.fileId) if hasattr(doc, 'fileId') else None
except Exception:
fileBytes = None
batch_docs.append({
"id": getattr(doc, 'id', None),
"bytes": fileBytes or b"",
"fileName": getattr(doc, 'fileName', 'unknown'),
"mimeType": getattr(doc, 'mimeType', None) or "application/octet-stream"
})
extraction_prompt = (
f"Extract content for AI task context. Task: {aiPrompt}. Mode: {processingMode}."
)
try:
extracted_list = await self.services.extraction.extractContentFromDocuments(
prompt=extraction_prompt,
documents=batch_docs,
options={"ai": {"enabled": False}, "mergeStrategy": {}}
)
except Exception:
extracted_list = []
# Helper to aggregate readable text from parts
def _partsToText(parts) -> str:
lines: List[str] = []
for p in (parts or []):
try:
if getattr(p, 'typeGroup', '') in ("text", "table", "structure") and getattr(p, 'data', None):
lines.append(p.data)
except Exception:
continue
return "\n\n".join(lines)
for i, doc in enumerate(chatDocuments):
file_info = self.services.workflow.getFileInfo(doc.fileId)
content = ""
try:
ec = extracted_list[i] if i < len(extracted_list) else None
if ec:
content = _partsToText(getattr(ec, 'parts', []))
except Exception:
content = ""
if content.strip():
metadata_info = ""
if file_info and includeMetadata:
metadata_info = f" (Size: {file_info.get('fileSize', 'unknown')}, Type: {file_info.get('mimeType', 'unknown')})"
base_length = 5000 if processingMode == "detailed" else 3000 if processingMode == "advanced" else 2000
if processingMode == "detailed":
context_parts.append(
f"Document: {doc.fileName}{metadata_info}\nRelevance to AI Task: This document contains content directly related to '{aiPrompt[:100]}...'\nContent:\n{content[:base_length]}..."
)
else:
context_parts.append(
f"Document: {doc.fileName}{metadata_info}\nContent:\n{content[:base_length]}..."
)
else:
context_parts.append(f"Document: {doc.fileName} [No readable text content - binary file]")
if context_parts:
context_header = f"""
=== DOCUMENT CONTEXT FOR AI PROCESSING ===
AI Task: {aiPrompt[:100]}...
Processing Mode: {processingMode}
Expected Output Format: {output_extension.upper()}
Total Documents: {len(chatDocuments)}
The following documents contain content relevant to your task.
Use this information to provide the most accurate and helpful response.
================================================
"""
context = context_header + "\n\n" + "\n\n".join(context_parts)
logger.info(f"Included {len(chatDocuments)} documents in AI context with task-specific extraction")
# Build enhanced prompt # Build enhanced prompt
enhanced_prompt = aiPrompt enhanced_prompt = aiPrompt
@ -163,52 +96,14 @@ class MethodAi(MethodBase):
elif processingMode == "advanced": elif processingMode == "advanced":
enhanced_prompt += "\n\nPlease provide an advanced response with deep insights." enhanced_prompt += "\n\nPlease provide an advanced response with deep insights."
# Add custom instructions if provided # Note: customInstructions parameter was removed as it's not defined in the method signature
if customInstructions:
enhanced_prompt += f"\n\nAdditional Instructions: {customInstructions}"
# Add format-specific instructions only if non-text format is requested # Add format guidance to prompt
if output_extension != ".txt": if normalized_result_type != "txt":
if output_extension == ".csv": enhanced_prompt += f"\n\nPlease deliver the result in {normalized_result_type.upper()} format. Ensure the output follows the proper {normalized_result_type.upper()} syntax and structure."
enhanced_prompt += f"\n\nCRITICAL: Deliver the result as pure CSV data without any markdown formatting, code blocks, or additional text. Output only the CSV content with proper headers and data rows."
elif output_extension == ".json":
enhanced_prompt += f"\n\nCRITICAL: Deliver the result as pure JSON data without any markdown formatting, code blocks, or additional text. Output only the JSON content."
elif output_extension == ".xml":
enhanced_prompt += f"\n\nCRITICAL: Deliver the result as pure XML data without any markdown formatting, code blocks, or additional text. Output only the XML content."
else:
enhanced_prompt += f"\n\nCRITICAL: Deliver the result as pure {output_extension.upper()} data without any markdown formatting, code blocks, or additional text."
# Call appropriate AI service based on processing mode # Build options and delegate document handling to AI/Extraction/Generation services
logger.info(f"Executing AI call with mode: {processingMode}, prompt length: {len(enhanced_prompt)}")
if context:
logger.info(f"Including context from {len(documentList)} documents")
# Encourage longer, structured outputs with a min-length hint
min_tokens_hint = "\n\nPlease ensure the response is substantial and complete."
call_prompt = enhanced_prompt + min_tokens_hint
# Centralized AI call with optional document context
documents = []
try:
if documentList:
for d in (chatDocuments or []):
try:
file_data = self.services.workflow.getFileData(d.fileId)
documents.append(
ChatDocument(
fileData=file_data,
fileName=d.fileName,
mimeType=d.mimeType
)
)
except Exception:
continue
except Exception:
documents = None
output_format = output_extension.replace('.', '') or 'txt' output_format = output_extension.replace('.', '') or 'txt'
# Build options using new AiCallOptions format
options = AiCallOptions( options = AiCallOptions(
operationType=operationType, operationType=operationType,
priority=priority, priority=priority,
@ -221,76 +116,240 @@ class MethodAi(MethodBase):
maxProcessingTime=maxProcessingTime, maxProcessingTime=maxProcessingTime,
requiredTags=requiredTags requiredTags=requiredTags
) )
supported_generation_formats = {"html", "pdf", "docx", "txt", "md", "json", "csv", "xlsx"}
output_format_arg = output_format if output_format in supported_generation_formats else None
result = await self.services.ai.callAi( result = await self.services.ai.callAi(
prompt=call_prompt, prompt=enhanced_prompt,
documents=documents or None, documents=chatDocuments if chatDocuments else None,
options=options options=options,
outputFormat=output_format_arg
) )
# If expected JSON and too short/not JSON, retry with stricter JSON guardrails from modules.datamodels.datamodelChat import ActionDocument
if output_extension == ".json":
import json
cleaned = (result or "").strip()
if cleaned.startswith('```json'):
cleaned = cleaned[7:]
if cleaned.endswith('```'):
cleaned = cleaned[:-3]
cleaned = cleaned.strip()
needs_retry = False
try:
parsed = json.loads(cleaned)
# Heuristic: small dict -> possibly underfilled
if isinstance(parsed, dict) and len(parsed.keys()) <= 2:
needs_retry = True
except Exception:
needs_retry = True
if needs_retry: if isinstance(result, dict) and isinstance(result.get("documents"), list):
guardrail_prompt = ( action_documents = []
enhanced_prompt for d in result["documents"]:
+ "\n\nCRITICAL: Return ONLY valid JSON, no markdown, no code fences. " action_documents.append(ActionDocument(
"Include all requested fields with detailed content." documentName=d.get("documentName"),
) documentData=d.get("documentData"),
try: mimeType=d.get("mimeType") or output_mime_type
result = await self.services.ai.callAi( ))
prompt=guardrail_prompt, return ActionResult.isSuccess(documents=action_documents)
documents=context or None,
options=AiCallOptions(
operationType=OperationType.GENERATE_CONTENT,
priority=Priority.QUALITY,
compressPrompt=False,
compressContext=True,
processDocumentsIndividually=True,
processingMode="detailed",
resultFormat="json",
maxCost=0.03,
maxProcessingTime=30
)
)
except Exception:
result = cleaned # fallback to first attempt
# Create result document
fileName = f"ai_{processingMode}_{self._format_timestamp_for_filename()}{output_extension}"
extension = output_extension.lstrip('.')
# Return result in the standard ActionResult format meaningful_name = self._generateMeaningfulFileName(
return ActionResult.isSuccess( base_name="ai",
documents=[{ extension=extension,
"documentName": fileName, action_name="result"
"documentData": {
"result": result,
"fileName": fileName,
"processedDocuments": len(documentList) if documentList else 0
},
"mimeType": output_mime_type
}]
) )
action_document = ActionDocument(
documentName=meaningful_name,
documentData=result,
mimeType=output_mime_type
)
return ActionResult.isSuccess(documents=[action_document])
except Exception as e: except Exception as e:
logger.error(f"Error in AI processing: {str(e)}") logger.error(f"Error in AI processing: {str(e)}")
return ActionResult.isFailure( return ActionResult.isFailure(
error=str(e) error=str(e)
) )
@action
async def webResearch(self, parameters: Dict[str, Any]) -> ActionResult:
"""
GENERAL:
- Purpose: Web research and information gathering with basic analysis and sources.
- Input requirements: user_prompt (required); optional urls, max_results, max_pages, search_depth, extract_depth, pages_search_depth, country, time_range, topic, language.
- Output format: JSON with results and sources.
Parameters:
- user_prompt (str, required): Research question or topic.
- urls (list, optional): Specific URLs to crawl.
- max_results (int, optional): Max search results. Default: 10.
- max_pages (int, optional): Max pages to crawl per site. Default: 10.
- search_depth (str, optional): basic | advanced. Default: basic.
- extract_depth (str, optional): basic | advanced. Default: advanced.
- pages_search_depth (int, optional): Crawl depth level. Default: 2.
- country (str, optional): Country code for bias.
- time_range (str, optional): d | w | m | y.
- topic (str, optional): general | news | academic.
- language (str, optional): Language code (e.g., de, en, fr).
"""
try:
user_prompt = parameters.get("user_prompt")
urls = parameters.get("urls")
max_results = parameters.get("max_results", 10)
max_pages = parameters.get("max_pages", 10)
search_depth = parameters.get("search_depth", "basic")
extract_depth = parameters.get("extract_depth", "advanced")
pages_search_depth = parameters.get("pages_search_depth", 2)
country = parameters.get("country")
time_range = parameters.get("time_range")
topic = parameters.get("topic")
language = parameters.get("language")
if not user_prompt:
return ActionResult.isFailure(
error="Search query is required"
)
# Build WebResearchOptions
options = WebResearchOptions(
max_pages=max_pages,
search_depth=search_depth,
extract_depth=extract_depth,
pages_search_depth=pages_search_depth,
country=country,
time_range=time_range,
topic=topic,
language=language
)
# Build WebResearchRequest
request = WebResearchRequest(
user_prompt=user_prompt,
urls=urls,
max_results=max_results,
options=options
)
# Call web research service
logger.info(f"Performing comprehensive web research for: {user_prompt}")
logger.info(f"Max results: {max_results}, Max pages: {max_pages}")
if urls:
logger.info(f"Using provided URLs: {len(urls)}")
result = await self.services.ai.webResearch(request)
if not result.success:
return ActionResult.isFailure(error=result.error)
# Convert WebResearchActionResult to ActionResult format
documents = []
for doc in result.documents:
documents.append({
"documentName": doc.documentName,
"documentData": {
"user_prompt": doc.documentData.user_prompt,
"websites_analyzed": doc.documentData.websites_analyzed,
"additional_links_found": doc.documentData.additional_links_found,
"analysis_result": doc.documentData.analysis_result,
"sources": [{"title": s.title, "url": str(s.url)} for s in doc.documentData.sources],
"additional_links": doc.documentData.additional_links,
"debug_info": doc.documentData.debug_info
},
"mimeType": doc.mimeType
})
# Return result in the standard ActionResult format
return ActionResult.isSuccess(
documents=documents
)
except Exception as e:
logger.error(f"Error in web research: {str(e)}")
return ActionResult.isFailure(
error=str(e)
)
def _mergeDataChunks(self, chunks: List[str], resultType: str, mimeType: str) -> str:
"""Intelligently merge data chunks using strategies based on content type"""
try:
if resultType == "json":
return self._mergeJsonChunks(chunks)
elif resultType in ["csv", "table"]:
return self._mergeTableChunks(chunks)
elif resultType in ["txt", "md", "text"]:
return self._mergeTextChunks(chunks)
else:
# Default: simple concatenation
return "\n".join(str(chunk) for chunk in chunks)
except Exception as e:
logger.warning(f"Failed to merge chunks intelligently: {str(e)}, using simple concatenation")
return "\n".join(str(chunk) for chunk in chunks)
def _mergeJsonChunks(self, chunks: List[str]) -> str:
"""Merge JSON chunks intelligently"""
import json
merged_data = []
for i, chunk in enumerate(chunks):
try:
if isinstance(chunk, str):
chunk_data = json.loads(chunk)
else:
chunk_data = chunk
if isinstance(chunk_data, list):
merged_data.extend(chunk_data)
elif isinstance(chunk_data, dict):
# For objects, merge by combining keys
if not merged_data:
merged_data = chunk_data
else:
if isinstance(merged_data, dict):
merged_data.update(chunk_data)
else:
merged_data.append(chunk_data)
else:
merged_data.append(chunk_data)
except Exception as e:
logger.warning(f"Failed to parse chunk {i}: {str(e)}")
# Add as string if JSON parsing fails
merged_data.append(str(chunk))
return json.dumps(merged_data, indent=2)
def _mergeTableChunks(self, chunks: List[str]) -> str:
"""Merge table chunks (CSV) intelligently"""
import csv
import io
merged_rows = []
headers = None
for i, chunk in enumerate(chunks):
try:
# Parse CSV chunk
reader = csv.reader(io.StringIO(str(chunk)))
rows = list(reader)
if not rows:
continue
# First chunk: capture headers
if i == 0:
headers = rows[0] if rows else []
merged_rows.extend(rows)
else:
# Subsequent chunks: skip header if it matches
if rows and rows[0] == headers:
merged_rows.extend(rows[1:]) # Skip duplicate header
else:
merged_rows.extend(rows)
except Exception as e:
logger.warning(f"Failed to parse table chunk {i}: {str(e)}")
# Add as raw text if CSV parsing fails
merged_rows.append([f"Raw chunk {i}: {str(chunk)[:100]}..."])
# Convert back to CSV
output = io.StringIO()
writer = csv.writer(output)
writer.writerows(merged_rows)
return output.getvalue()
def _mergeTextChunks(self, chunks: List[str]) -> str:
"""Merge text chunks intelligently"""
# Simple concatenation with proper spacing
merged = []
for chunk in chunks:
chunk_str = str(chunk).strip()
if chunk_str:
merged.append(chunk_str)
return "\n\n".join(merged) # Double newline between chunks for readability

View file

@ -130,6 +130,9 @@ class MethodBase:
# Extract parameter name and type # Extract parameter name and type
if '(' in paramPart: if '(' in paramPart:
paramName = paramPart.split('(')[0].strip() paramName = paramPart.split('(')[0].strip()
# Normalize bullet-prefixed parameter names like "- aiPrompt" or "* aiPrompt"
if paramName.startswith('-') or paramName.startswith('*'):
paramName = paramName[1:].strip()
paramType = paramPart[paramPart.find('(')+1:paramPart.find(')')].strip() paramType = paramPart[paramPart.find('(')+1:paramPart.find(')')].strip()
descriptions[paramName] = descPart descriptions[paramName] = descPart
types[paramName] = paramType types[paramName] = paramType
@ -165,4 +168,54 @@ class MethodBase:
elif hasattr(type_annotation, '_name'): elif hasattr(type_annotation, '_name'):
return type_annotation._name return type_annotation._name
else: else:
return str(type_annotation) return str(type_annotation)
def _generateMeaningfulFileName(self, base_name: str, extension: str, workflow_context: Dict[str, Any] = None, action_name: str = None) -> str:
"""
Generate a meaningful file name with round/task/action information.
Format: {base_name}_alpha_r{round}t{task}a{action}.{extension}
Example: report_alpha_r1t3a4.json
Args:
base_name: Base name for the file (e.g., "report", "analysis", "summary")
extension: File extension without dot (e.g., "json", "html", "txt")
workflow_context: Dictionary with currentRound, currentTask, currentAction
action_name: Name of the action being performed (optional, for additional context)
Returns:
Formatted file name string
"""
try:
# Get workflow context from services if not provided
if workflow_context is None and hasattr(self.services, 'workflow'):
workflow_context = self.services.workflow.getWorkflowContext()
# Extract round, task, action numbers
round_num = workflow_context.get('currentRound', 0) if workflow_context else 0
task_num = workflow_context.get('currentTask', 0) if workflow_context else 0
action_num = workflow_context.get('currentAction', 0) if workflow_context else 0
# Clean base name (remove special characters, spaces)
clean_base = base_name.lower().replace(' ', '_').replace('-', '_')
# Remove any non-alphanumeric characters except underscores
import re
clean_base = re.sub(r'[^a-z0-9_]', '', clean_base)
# Add action name if provided
if action_name:
clean_action = action_name.lower().replace(' ', '_').replace('-', '_')
clean_action = re.sub(r'[^a-z0-9_]', '', clean_action)
clean_base = f"{clean_base}_{clean_action}"
# Generate the meaningful file name
meaningful_name = f"{clean_base}_r{round_num}t{task_num}a{action_num}.{extension}"
self.logger.debug(f"Generated meaningful file name: {meaningful_name} (Round: {round_num}, Task: {task_num}, Action: {action_num})")
return meaningful_name
except Exception as e:
self.logger.warning(f"Error generating meaningful file name, using fallback: {str(e)}")
# Fallback to timestamp-based naming
timestamp = datetime.now(UTC).strftime("%Y%m%d_%H%M%S")
return f"{base_name}_{timestamp}.{extension}"

View file

@ -9,7 +9,8 @@ from typing import Dict, Any, List, Optional
from datetime import datetime, UTC from datetime import datetime, UTC
from modules.workflows.methods.methodBase import MethodBase, action from modules.workflows.methods.methodBase import MethodBase, action
from modules.datamodels.datamodelWorkflow import ActionResult, ChatDocument from modules.datamodels.datamodelChat import ActionResult, ActionDocument
from modules.datamodels.datamodelChat import ChatDocument
from modules.datamodels.datamodelAi import AiCallOptions, OperationType, Priority from modules.datamodels.datamodelAi import AiCallOptions, OperationType, Priority
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -30,19 +31,34 @@ class MethodDocument(MethodBase):
@action @action
async def extract(self, parameters: Dict[str, Any]) -> ActionResult: async def extract(self, parameters: Dict[str, Any]) -> ActionResult:
""" """
Extract content from any document using AI prompt. GENERAL:
- Purpose: Extract and analyze content from existing documents using AI.
- Input requirements: documentList (required); prompt (required).
- Output format: Plain text per source document (.txt by default).
Parameters: Parameters:
documentList (list): Document list reference(s) - documentList (list, required): Document reference(s) to extract from.
aiPrompt (str): AI prompt for extraction - prompt (str, required): Instruction describing what to extract.
expectedDocumentFormats (list, optional): Output formats - operationType (str, optional): extract_content | analyze_document | summarize_content. Default: extract_content.
includeMetadata (bool, optional): Include metadata (default: True) - processDocumentsIndividually (bool, optional): Process each document separately. Default: True.
- chunkAllowed (bool, optional): Allow chunking for large inputs. Default: True.
- mergeStrategy (dict, optional): Merge strategy for chunked content.
- expectedDocumentFormats (list, optional): Desired output format specs.
- includeMetadata (bool, optional): Include file metadata. Default: True.
""" """
try: try:
documentList = parameters.get("documentList") documentList = parameters.get("documentList")
if isinstance(documentList, str): if isinstance(documentList, str):
documentList = [documentList] documentList = [documentList]
aiPrompt = parameters.get("aiPrompt") prompt = parameters.get("prompt")
operationType = parameters.get("operationType", "extract_content")
processDocumentsIndividually = parameters.get("processDocumentsIndividually", True)
chunkAllowed = parameters.get("chunkAllowed", True)
mergeStrategy = parameters.get("mergeStrategy", {
"groupBy": "typeGroup",
"orderBy": "id",
"mergeType": "concatenate"
})
expectedDocumentFormats = parameters.get("expectedDocumentFormats", []) expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
includeMetadata = parameters.get("includeMetadata", True) includeMetadata = parameters.get("includeMetadata", True)
@ -51,9 +67,9 @@ class MethodDocument(MethodBase):
error="Document list reference is required" error="Document list reference is required"
) )
if not aiPrompt: if not prompt:
return ActionResult.isFailure( return ActionResult.isFailure(
error="AI prompt is required" error="Prompt is required"
) )
chatDocuments = self.services.workflow.getChatDocumentsFromDocumentList(documentList) chatDocuments = self.services.workflow.getChatDocumentsFromDocumentList(documentList)
@ -62,120 +78,77 @@ class MethodDocument(MethodBase):
error="No documents found for the provided reference" error="No documents found for the provided reference"
) )
# Batch extract content from all documents at once # Use enhanced AI service with integrated extraction
all_extracted_content = []
file_infos = []
batch_docs = []
for chatDocument in chatDocuments:
file_info = self.services.workflow.getFileInfo(chatDocument.fileId)
if includeMetadata:
file_infos.append(file_info)
try:
data = self.services.workflow.getFileData(chatDocument.fileId) if hasattr(chatDocument, 'fileId') else None
except Exception:
data = None
batch_docs.append({
"id": getattr(chatDocument, 'id', None),
"bytes": data or b"",
"fileName": getattr(chatDocument, 'fileName', 'unknown'),
"mimeType": getattr(chatDocument, 'mimeType', None) or "application/octet-stream"
})
try: try:
extracted_list = await self.services.extraction.extractContentFromDocuments( # Build AI call options
prompt=aiPrompt, ai_options = AiCallOptions(
documents=batch_docs, operationType=operationType,
options={"ai": {"enabled": False}} processDocumentsIndividually=processDocumentsIndividually,
compressContext=not chunkAllowed
) )
# Add format instructions to prompt if expected formats are provided
enhanced_prompt = prompt
if expectedDocumentFormats:
format_instructions = []
for fmt in expectedDocumentFormats:
extension = fmt.get("extension", ".txt")
mime_type = fmt.get("mimeType", "text/plain")
description = fmt.get("description", "")
format_instructions.append(f"- {extension} ({mime_type}): {description}")
if format_instructions:
enhanced_prompt += f"\n\nPlease format the output as: {', '.join([fmt.get('extension', '.txt') for fmt in expectedDocumentFormats])}"
enhanced_prompt += f"\nExpected formats:\n" + "\n".join(format_instructions)
# Use enhanced AI service for extraction
ai_response = await self.services.ai.callAi(
prompt=enhanced_prompt,
documents=chatDocuments,
options=ai_options
)
logger.info(f"AI extraction completed: {len(ai_response)} characters")
except Exception as e: except Exception as e:
logger.error(f"Batch extraction failed: {str(e)}") logger.error(f"AI extraction failed: {str(e)}")
extracted_list = [] ai_response = ""
all_extracted_content = extracted_list or []
if not all_extracted_content: if not ai_response or ai_response.strip() == "":
return ActionResult.isFailure( return ActionResult.isFailure(
error="No content could be extracted from any documents" error="No content could be extracted from any documents"
) )
# Process each document individually with its own format conversion # Process each document individually with extracted content
output_documents = [] action_documents = []
for i, chatDocument in enumerate(chatDocuments): for i, chatDocument in enumerate(chatDocuments):
# Extract text content from this document # Use the AI response directly - it already contains processed content
text_content = "" final_content = ai_response
try: final_mime_type = "text/plain"
ec = all_extracted_content[i] if i < len(all_extracted_content) else None final_extension = ".txt"
if ec and hasattr(ec, 'parts'):
text_parts = []
for part in getattr(ec, 'parts', []):
try:
if getattr(part, 'typeGroup', '') in ("text", "table", "structure") and getattr(part, 'data', None):
text_parts.append(part.data)
except Exception:
continue
text_content = "\n".join(text_parts)
else:
text_content = ""
except Exception:
text_content = ""
# Get the expected format for this document (or use default) # Create meaningful output fileName with workflow context
target_format = None
if expectedDocumentFormats and i < len(expectedDocumentFormats):
target_format = expectedDocumentFormats[i]
elif expectedDocumentFormats and len(expectedDocumentFormats) > 0:
# If fewer formats than documents, use the last format for remaining documents
target_format = expectedDocumentFormats[-1]
# Determine output format and fileName
if target_format:
target_extension = target_format.get("extension", ".txt")
target_mime_type = target_format.get("mimeType", "text/plain")
# Check if format conversion is needed
if target_extension not in [".txt", ".text"] or target_mime_type != "text/plain":
logger.info(f"Converting document {i+1} to format: {target_extension} ({target_mime_type})")
# Use AI to convert format
formatted_content = await self._convertContentToFormat(text_content, target_format)
final_content = formatted_content
final_mime_type = target_mime_type
final_extension = target_extension
else:
logger.info(f"Document {i+1}: No format conversion needed, using plain text")
final_content = text_content
final_mime_type = "text/plain"
final_extension = ".txt"
else:
logger.info(f"Document {i+1}: No expected format specified, using plain text")
final_content = text_content
final_mime_type = "text/plain"
final_extension = ".txt"
# Create output fileName based on original fileName and target format
original_fileName = chatDocument.fileName original_fileName = chatDocument.fileName
base_name = original_fileName.rsplit('.', 1)[0] if '.' in original_fileName else original_fileName base_name = original_fileName.rsplit('.', 1)[0] if '.' in original_fileName else original_fileName
output_fileName = f"{base_name}_extracted_{self._format_timestamp_for_filename()}{final_extension}" extension = final_extension.lstrip('.') # Remove leading dot for meaningful naming
output_fileName = self._generateMeaningfulFileName(
# Create result data for this document base_name=f"{base_name}_extracted",
result_data = { extension=extension,
"documentCount": 1, action_name="extract"
"content": final_content, )
"originalfileName": original_fileName,
"fileInfos": [file_infos[i]] if includeMetadata and i < len(file_infos) else None,
"timestamp": self.services.utils.getUtcTimestamp()
}
logger.info(f"Created output document: {output_fileName} with {len(final_content)} characters") logger.info(f"Created output document: {output_fileName} with {len(final_content)} characters")
output_documents.append({ # Create proper ActionDocument object
"documentName": output_fileName, action_documents.append(ActionDocument(
"documentData": result_data, documentName=output_fileName,
"mimeType": final_mime_type documentData=final_content,
}) mimeType=final_mime_type
))
return ActionResult.isSuccess( return ActionResult.isSuccess(
documents=output_documents documents=action_documents
) )
except Exception as e: except Exception as e:
logger.error(f"Error extracting content: {str(e)}") logger.error(f"Error extracting content: {str(e)}")
@ -183,454 +156,27 @@ class MethodDocument(MethodBase):
error=str(e) error=str(e)
) )
@action @action
async def generate(self, parameters: Dict[str, Any]) -> ActionResult: async def generate(self, parameters: Dict[str, Any]) -> ActionResult:
""" """
Convert TEXT-ONLY documents to target formats (NO AI usage). GENERAL:
- Purpose: Generate formatted documents and reports from source documents.
- Input requirements: documentList (required); prompt (required); optional title and outputFormat.
- Any output format, e.g.: html | pdf | docx | txt | md | json | csv | xlsx
Parameters: Parameters:
documentList (list): TEXT-ONLY documents only - documentList (list, required): Document reference(s) to include as context.
expectedDocumentFormats (list): Target formats - prompt (str, required): Instruction describing the desired document/report.
originalDocuments (list, optional): Original names - title (str, optional): Title for the generated document. Default: "Summary Report".
includeMetadata (bool, optional): Include metadata (default: True) - outputFormat (str, optional): html | pdf | docx | txt | md | json | csv | xlsx. Default: html.
mergeDocuments (bool, optional): Merge all documents into single output (default: False) - operationType (str, optional): generate_report | analyze_documents. Default: generate_report.
""" - processDocumentsIndividually (bool, optional): Process per document. Default: True.
try: - chunkAllowed (bool, optional): Allow chunking for large inputs. Default: True.
document_list = parameters.get("documentList", []) - mergeStrategy (dict, optional): Merging rules for multi-part generation.
if isinstance(document_list, str): - includeMetadata (bool, optional): Include file metadata. Default: True.
document_list = [document_list]
expected_document_formats = parameters.get("expectedDocumentFormats", [])
original_documents = parameters.get("originalDocuments", [])
include_metadata = parameters.get("includeMetadata", True)
merge_documents = parameters.get("mergeDocuments", False)
if not document_list:
return ActionResult.isFailure(
error="Document list is required for generation"
)
if not expected_document_formats or len(expected_document_formats) == 0:
return ActionResult.isFailure(
error="Expected document formats specification is required"
)
# Get chat documents for original documents list
chat_documents = self.services.workflow.getChatDocumentsFromDocumentList(document_list)
logger.info(f"Found {len(chat_documents)} chat documents")
if not chat_documents:
return ActionResult.isFailure(
error="No documents found for the provided documentList reference"
)
# Update original documents list if not provided
if not original_documents:
original_documents = [doc.fileName if hasattr(doc, 'fileName') else str(doc.id) for doc in chat_documents]
# Extract content from all documents first
document_contents = []
for i, chat_document in enumerate(chat_documents):
# Extract content from this document directly - NO AI, just read the data as-is
# This ensures we get the original text content for format conversion
content = ""
if hasattr(chat_document, 'fileId') and chat_document.fileId:
try:
# Get file data directly without AI processing
file_data = self.services.workflow.getFileData(chat_document.fileId)
if file_data:
# Check if it's text data and convert to string
if isinstance(file_data, bytes):
try:
# Try to decode as UTF-8 to check if it's text
content = file_data.decode('utf-8')
logger.info(f"Document {i+1} ({chat_document.fileName}): Successfully decoded as UTF-8 text")
except UnicodeDecodeError:
logger.info(f"Document {i+1} ({chat_document.fileName}): Binary data, not text - skipping")
continue
else:
# Already a string
content = str(file_data)
logger.info(f"Document {i+1} ({chat_document.fileName}): Already text data")
else:
logger.warning(f"Document {i+1} ({chat_document.fileName}): No file data found")
continue
if not content.strip():
logger.info(f"Document {i+1} ({chat_document.fileName}): Empty text content, skipping")
continue
except Exception as e:
logger.warning(f"Error reading document {i+1} ({chat_document.fileName}): {str(e)}")
continue
else:
logger.warning(f"Document {i+1} has no fileId, skipping")
continue
logger.info(f"Extracted content from document {i+1}: {len(content)} characters")
document_contents.append({
"document": chat_document,
"content": content,
"index": i,
"original_name": original_documents[i] if i < len(original_documents) else f"document_{i+1}"
})
if not document_contents:
return ActionResult.isFailure(
error="No valid text content could be extracted from any documents"
)
if merge_documents and len(document_contents) > 1:
# Merge all documents into single output
logger.info("Merging all documents into single output")
return await self._mergeDocuments(document_contents, expected_document_formats, include_metadata)
else:
# Process each document individually with its own format conversion
logger.info("Processing documents individually")
output_documents = []
for item in document_contents:
chat_document = item["document"]
content = item["content"]
i = item["index"]
original_name = item["original_name"]
# Get the expected format for this document (or use default)
target_format = None
if i < len(expected_document_formats):
target_format = expected_document_formats[i]
elif len(expected_document_formats) > 0:
# If fewer formats than documents, use the last format for remaining documents
target_format = expected_document_formats[-1]
if not target_format:
logger.warning(f"No expected format for document {i+1}, skipping")
continue
# Use AI to convert format
formatted_content = await self._convertContentToFormat(content, target_format)
if not formatted_content:
logger.warning(f"Failed to format document {i+1}, skipping")
continue
target_extension = target_format.get("extension", ".txt")
target_mime_type = target_format.get("mimeType", "text/plain")
# Create output fileName
base_name = original_name.rsplit('.', 1)[0] if '.' in original_name else original_name
output_fileName = f"{base_name}_generated_{self._format_timestamp_for_filename()}{target_extension}"
# Create result data
result_data = {
"documentCount": 1,
"content": formatted_content,
"outputFormat": target_format,
"originalDocument": original_name,
"timestamp": self.services.utils.getUtcTimestamp()
}
logger.info(f"Generated document: {output_fileName} with {len(formatted_content)} characters")
output_documents.append({
"documentName": output_fileName,
"documentData": result_data,
"mimeType": target_mime_type
})
if not output_documents:
return ActionResult.isFailure(
error="No documents could be generated"
)
return ActionResult.isSuccess(
documents=output_documents
)
except Exception as e:
logger.error(f"Error generating document: {str(e)}")
return ActionResult.isFailure(
error=str(e)
)
async def _mergeDocuments(self, document_contents: List[Dict[str, Any]],
expected_document_formats: List[Dict[str, Any]],
include_metadata: bool) -> ActionResult:
"""
Merge all documents into a single output document.
"""
try:
# Combine all document content
combined_content_parts = []
original_file_names = []
for item in document_contents:
chat_document = item["document"]
content = item["content"]
original_name = item["original_name"]
if content.strip():
combined_content_parts.append(f"=== Document: {original_name} ===\n{content}\n")
original_file_names.append(original_name)
if not combined_content_parts:
return ActionResult.isFailure(
error="No content could be extracted from any documents for merging"
)
# Combine all content
combined_content = "\n".join(combined_content_parts)
logger.info(f"Combined content from {len(original_file_names)} documents: {len(combined_content)} characters")
# Get the expected format for the merged output
target_format = None
if expected_document_formats and len(expected_document_formats) > 0:
target_format = expected_document_formats[0] # Use first format for merged output
if not target_format:
logger.warning("No expected format specified for merged output, using plain text")
target_format = {"extension": ".txt", "mimeType": "text/plain"}
# Use AI to convert format
formatted_content = await self._convertContentToFormat(combined_content, target_format)
if not formatted_content:
logger.warning("Failed to format merged content, using raw content")
formatted_content = combined_content
target_extension = target_format.get("extension", ".txt")
target_mime_type = target_format.get("mimeType", "text/plain")
# Create output fileName for merged document
timestamp = self._format_timestamp_for_filename()
output_fileName = f"merged_documents_{timestamp}{target_extension}"
# Create result data for merged document
result_data = {
"documentCount": len(document_contents),
"content": formatted_content,
"outputFormat": target_format,
"originalDocuments": original_file_names,
"timestamp": self.services.utils.getUtcTimestamp(),
"merged": True
}
logger.info(f"Created merged document: {output_fileName} with {len(formatted_content)} characters")
return ActionResult.isSuccess(
documents=[{
"documentName": output_fileName,
"documentData": result_data,
"mimeType": target_mime_type
}]
)
except Exception as e:
logger.error(f"Error merging documents: {str(e)}")
return ActionResult.isFailure(
error=f"Failed to merge documents: {str(e)}"
)
async def _convertContentToFormat(self, content: str, target_format: Dict[str, Any]) -> str:
"""
Helper function to convert content to the specified format using AI.
"""
try:
extension = target_format.get("extension", ".txt")
mime_type = target_format.get("mimeType", "text/plain")
logger.info(f"Converting content to format: {extension} ({mime_type})")
# Create AI prompt for format conversion
format_prompts = {
".csv": f"""
Convert the following content into a proper CSV format.
Requirements:
1. Output ONLY the CSV data without any markdown, code blocks, or additional text
2. Use appropriate headers based on the content
3. Ensure proper CSV formatting with commas and quotes where needed
4. Make the data easily readable and importable into spreadsheet applications
Content to convert:
{content}
Generate ONLY the CSV data:
""",
".json": f"""
Convert the following content into a proper JSON format.
Requirements:
1. Output ONLY the JSON data without any markdown, code blocks, or additional text
2. Structure the data logically with appropriate keys and values
3. Ensure valid JSON syntax
4. Make the data easily parseable and readable
Content to convert:
{content}
Generate ONLY the JSON data:
""",
".xml": f"""
Convert the following content into a proper XML format.
Requirements:
1. Output ONLY the XML data without any markdown, code blocks, or additional text
2. Use appropriate XML tags and structure
3. Ensure valid XML syntax
4. Make the data easily parseable and readable
Content to convert:
{content}
Generate ONLY the XML data:
""",
".html": f"""
Convert the following content into a proper HTML format.
Requirements:
1. Output ONLY the HTML data without any markdown, code blocks, or additional text
2. Use appropriate HTML tags and structure
3. Ensure valid HTML syntax
4. Make the data easily readable in web browsers
Content to convert:
{content}
Generate ONLY the HTML data:
""",
".md": f"""
Convert the following content into a proper Markdown format.
Requirements:
1. Output ONLY the Markdown data without any code blocks or additional text
2. Use appropriate Markdown syntax for headers, lists, emphasis, etc.
3. Structure the content logically
4. Make the data easily readable and convertible to other formats
Content to convert:
{content}
Generate ONLY the Markdown data:
"""
}
# Get the appropriate prompt for the target format
if extension in format_prompts:
ai_prompt = format_prompts[extension]
else:
# Generic format conversion
ai_prompt = f"""
Convert the following content into {extension.upper()} format.
Requirements:
1. Output ONLY the {extension.upper()} data without any markdown, code blocks, or additional text
2. Use appropriate formatting for {extension.upper()} files
3. Ensure the output is valid and usable
4. Make the data easily readable and importable
Content to convert:
{content}
Generate ONLY the {extension.upper()} data:
"""
# Call AI to generate the formatted content
logger.info(f"Calling AI for {extension} format conversion")
formatted_content = await self.services.ai.callAi(
prompt=ai_prompt,
documents=None,
options=AiCallOptions(
operationType=OperationType.GENERATE_CONTENT,
priority=Priority.SPEED,
compressPrompt=True,
compressContext=False,
maxCost=0.02
)
)
if not formatted_content or formatted_content.strip() == "":
logger.warning("AI format conversion failed, using fallback")
return self._generateFallbackFormattedContent(content, extension, mime_type)
# Clean up the AI response
formatted_content = formatted_content.strip()
# Remove markdown code blocks if present
if formatted_content.startswith("```") and formatted_content.endswith("```"):
lines = formatted_content.split('\n')
if len(lines) > 2:
formatted_content = '\n'.join(lines[1:-1])
# For HTML format, check if AI returned complete HTML document
if extension == ".html" and (formatted_content.startswith('<!DOCTYPE') or formatted_content.startswith('<html')):
return formatted_content
return formatted_content
except Exception as e:
logger.error(f"Error in AI format conversion: {str(e)}")
return self._generateFallbackFormattedContent(content, extension, mime_type)
def _generateFallbackFormattedContent(self, content: str, extension: str, mime_type: str) -> str:
"""
Generate fallback formatted content when AI conversion fails.
"""
try:
if extension == ".csv":
# Simple CSV fallback - split by lines and create basic CSV
lines = content.strip().split('\n')
if lines:
# Create a simple CSV with line numbers and content
csv_lines = ["Line,Content"]
for i, line in enumerate(lines, 1):
# Escape quotes and wrap in quotes if comma present
if ',' in line:
line = f'"{line.replace(chr(34), chr(34) + chr(34))}"'
csv_lines.append(f"{i},{line}")
return '\n'.join(csv_lines)
return "Line,Content\n1,No content available"
elif extension == ".json":
# Simple JSON fallback
content_escaped = content.replace('"', '\\"')
timestamp = self.services.utils.getUtcTimestamp()
return f'{{"content": "{content_escaped}", "format": "json", "timestamp": {timestamp}}}'
elif extension == ".xml":
# Simple XML fallback
timestamp = self.services.utils.getUtcTimestamp()
return f'<?xml version="1.0" encoding="UTF-8"?>\n<document>\n<content>{content}</content>\n<format>xml</format>\n<timestamp>{timestamp}</timestamp>\n</document>'
elif extension == ".html":
# Simple HTML fallback
timestamp = int(self.services.utils.getUtcTimestamp())
return f'<!DOCTYPE html>\n<html>\n<head><meta charset="UTF-8"><title>Generated Document</title></head>\n<body>\n<pre>{content}</pre>\n<p><em>Generated on {timestamp}</em></p>\n</body>\n</html>'
elif extension == ".md":
# Simple Markdown fallback
timestamp = int(self.services.utils.getUtcTimestamp())
return f"# Generated Document\n\n{content}\n\n---\n*Generated on {timestamp}*"
else:
# Generic fallback - return content as-is
return content
except Exception as e:
logger.error(f"Error in fallback format conversion: {str(e)}")
return content
@action
async def generateReport(self, parameters: Dict[str, Any]) -> ActionResult:
"""
Generate HTML report from multiple documents using AI.
Parameters:
documentList (list): Document list reference(s)
prompt (str): AI prompt for report generation
title (str, optional): Report title (default: "Summary Report")
includeMetadata (bool, optional): Include metadata (default: True)
""" """
try: try:
documentList = parameters.get("documentList") documentList = parameters.get("documentList")
@ -638,6 +184,15 @@ class MethodDocument(MethodBase):
documentList = [documentList] documentList = [documentList]
prompt = parameters.get("prompt") prompt = parameters.get("prompt")
title = parameters.get("title", "Summary Report") title = parameters.get("title", "Summary Report")
outputFormat = parameters.get("outputFormat", "html")
operationType = parameters.get("operationType", "generate_report")
processDocumentsIndividually = parameters.get("processDocumentsIndividually", True)
chunkAllowed = parameters.get("chunkAllowed", True)
mergeStrategy = parameters.get("mergeStrategy", {
"groupBy": "typeGroup",
"orderBy": "id",
"mergeType": "concatenate"
})
includeMetadata = parameters.get("includeMetadata", True) includeMetadata = parameters.get("includeMetadata", True)
if not documentList: if not documentList:
@ -658,179 +213,54 @@ class MethodDocument(MethodBase):
error="No documents found for the provided reference" error="No documents found for the provided reference"
) )
# Generate HTML report # Use enhanced AI service with document generation
html_content = await self._generateHtmlReport(chatDocuments, title, includeMetadata, prompt) try:
# Build AI call options
ai_options = AiCallOptions(
operationType=operationType,
processDocumentsIndividually=processDocumentsIndividually,
compressContext=not chunkAllowed
)
# Use enhanced AI service with document generation
result = await self.services.ai.callAi(
prompt=prompt,
documents=chatDocuments,
options=ai_options,
outputFormat=outputFormat,
title=title
)
if isinstance(result, dict) and result.get("success"):
# Extract document information from result
documents = result.get("documents", [])
if documents:
# Convert to ActionDocument format
action_documents = []
for doc in documents:
action_documents.append(ActionDocument(
documentName=doc["documentName"],
documentData=doc["documentData"],
mimeType=doc["mimeType"]
))
logger.info(f"Generated {outputFormat.upper()} report: {len(action_documents)} documents")
return ActionResult.isSuccess(documents=action_documents)
else:
return ActionResult.isFailure(error="No documents generated")
else:
error_msg = result.get("error", "Unknown error") if isinstance(result, dict) else "AI generation failed"
return ActionResult.isFailure(error=error_msg)
except Exception as e:
logger.error(f"AI generation failed: {str(e)}")
return ActionResult.isFailure(error=str(e))
# Create output fileName
timestamp = int(self.services.utils.getUtcTimestamp())
output_fileName = f"report_{self._format_timestamp_for_filename()}.html"
result_data = {
"documentCount": len(chatDocuments),
"content": html_content,
"title": title,
"timestamp": self.services.utils.getUtcTimestamp()
}
logger.info(f"Generated HTML report: {output_fileName} with {len(html_content)} characters")
return ActionResult.isSuccess(
documents=[{
"documentName": output_fileName,
"documentData": result_data,
"mimeType": "text/html"
}]
)
except Exception as e: except Exception as e:
logger.error(f"Error generating report: {str(e)}") logger.error(f"Error generating report: {str(e)}")
return ActionResult.isFailure( return ActionResult.isFailure(
error=str(e) error=str(e)
) )
async def _generateHtmlReport(self, chatDocuments: List[Any], title: str, includeMetadata: bool, prompt: str) -> str:
"""
Generate a comprehensive HTML report using AI from all input documents.
"""
try:
# Filter out empty documents and collect content
validDocuments = []
allContent = []
for doc in chatDocuments:
content = ""
logger.info(f"Processing document: type={type(doc)}")
# Batch extraction approach: prepare one doc payload and call extractor
try:
try:
data = self.services.workflow.getFileData(doc.fileId) if hasattr(doc, 'fileId') else None
except Exception:
data = None
extracted_list = await self.services.extraction.extractContentFromDocuments(
prompt="Extract readable text content for HTML report generation",
documents=[{
"id": getattr(doc, 'id', None),
"bytes": data or b"",
"fileName": getattr(doc, 'fileName', 'unknown'),
"mimeType": getattr(doc, 'mimeType', None) or "application/octet-stream"
}],
options={"ai": {"enabled": False}}
)
ec = extracted_list[0] if extracted_list else None
if ec and hasattr(ec, 'parts'):
for part in getattr(ec, 'parts', []):
try:
if getattr(part, 'typeGroup', '') in ("text", "table", "structure") and getattr(part, 'data', None):
content += part.data + " "
except Exception:
continue
if content.strip():
logger.info(f" Retrieved content from file: {len(content)} characters")
else:
logger.info(f" No readable text content found (binary file)")
else:
logger.info(f" No content extracted (binary file)")
except Exception as e:
logger.info(f" Could not extract content (binary file): {str(e)}")
# Skip empty documents
if content and content.strip():
validDocuments.append(doc)
allContent.append(f"Document: {doc.fileName}\n{content}\n")
logger.info(f" Added document to valid documents list")
else:
logger.info(f" Skipping document with no readable text content")
if not validDocuments:
# No readable content; return a minimal valid HTML document
timestamp = int(self.services.utils.getUtcTimestamp())
return f"<!DOCTYPE html><html><head><meta charset=\"UTF-8\"><title>{title}</title></head><body><h1>{title}</h1><p>Keine auswertbaren Inhalte gefunden.</p><p>Generated: {timestamp}</p></body></html>"
# Create AI prompt for comprehensive report generation using user's prompt
combinedContent = "\n\n".join(allContent)
aiPrompt = f"""
{prompt}
Report Title: {title}
OUTPUT POLICY:
- Return ONLY a complete, raw HTML document.
- Start with: <!DOCTYPE html>
- Must include: <html>, <head> (with <meta charset="UTF-8"> and <title>), and <body>.
- The response must be valid, self-contained HTML suitable for saving as .html.
Structure:
- Title and short subtitle
- Executive summary
- Sections with clear headings
- Use tables for structured data when helpful
- Key findings and recommendations
- Generation date and number of documents
Quality and design requirements:
- Use clear, professional, and accessible styling in a <style> block
- Apply clean layout, spacing, and visual hierarchy for headings
- Keep HTML and CSS standards-compliant and lightweight
SOURCE DOCUMENT CONTENT:
---START---
{combinedContent}
---END---
"""
# Call AI to generate the report
logger.info(f"Generating AI report for {len(validDocuments)} documents")
# Build ChatDocument list from chatDocuments
documents = []
try:
for d in validDocuments:
try:
data = self.services.workflow.getFileData(d.fileId) if hasattr(d, 'fileId') else None
if data:
documents.append(ChatDocument(fileData=data, fileName=d.fileName, mimeType=d.mimeType))
except Exception:
continue
except Exception:
documents = None
aiReport = await self.services.ai.callAi(
prompt=aiPrompt,
documents=documents or None,
options=AiCallOptions(
operationType=OperationType.GENERATE_CONTENT, # Using GENERATE_CONTENT for report generation
priority=Priority.QUALITY,
compressPrompt=False,
compressContext=True,
processDocumentsIndividually=True,
resultFormat="html",
processingMode="detailed",
maxCost=0.08,
maxProcessingTime=90
)
)
# If AI call fails, return error - AI is crucial for report generation
if not aiReport or aiReport.strip() == "":
logger.error("AI report generation failed - AI is crucial for this action")
raise Exception("AI report generation failed - AI is required for report generation")
# Clean up the AI response and ensure it's valid HTML
aiReport = aiReport.strip()
# Normalize: strip code fences if present
if aiReport.startswith("```") and aiReport.endswith("```"):
lines = aiReport.split('\n')
if len(lines) >= 2:
aiReport = '\n'.join(lines[1:-1]).strip()
cleaned = aiReport.strip()
# Return exactly what we have (no wrapping)
return cleaned
except Exception as e:
logger.error(f"Error generating AI report: {str(e)}")
# Re-raise the error - AI is crucial for report generation
raise

File diff suppressed because it is too large Load diff

View file

@ -14,7 +14,7 @@ import aiohttp
import asyncio import asyncio
from modules.workflows.methods.methodBase import MethodBase, action from modules.workflows.methods.methodBase import MethodBase, action
from modules.datamodels.datamodelWorkflow import ActionResult from modules.datamodels.datamodelChat import ActionResult, ActionDocument
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -443,13 +443,16 @@ class MethodSharepoint(MethodBase):
@action @action
async def findDocumentPath(self, parameters: Dict[str, Any]) -> ActionResult: async def findDocumentPath(self, parameters: Dict[str, Any]) -> ActionResult:
""" """
Find documents/folders by searching their NAMES across SharePoint sites. GENERAL:
- Purpose: Find documents and folders by name/path across sites.
- Input requirements: connectionReference (required); searchQuery (required); optional site, maxResults.
- Output format: JSON with found items and paths.
Parameters: Parameters:
connectionReference (str): Microsoft connection reference - connectionReference (str, required): Microsoft connection label.
site (str, optional): Site hint (e.g., "SSS", "KM XYZ") - site (str, optional): Site hint.
searchQuery (str): Search query - "budget", "folders:alpha", "files:budget", "/Documents/Project1", "namepart1 namepart2 namepart3". Use "folders:" prefix when user wants to store files or find folders - searchQuery (str, required): Search terms or path.
maxResults (int, optional): Max results (default: 100) - maxResults (int, optional): Maximum items to return. Default: 100.
""" """
try: try:
connectionReference = parameters.get("connectionReference") connectionReference = parameters.get("connectionReference")
@ -799,11 +802,11 @@ class MethodSharepoint(MethodBase):
return ActionResult( return ActionResult(
success=True, success=True,
documents=[ documents=[
{ ActionDocument(
"documentName": f"sharepoint_find_path_{self._format_timestamp_for_filename()}{output_extension}", documentName=f"sharepoint_find_path_{self._format_timestamp_for_filename()}{output_extension}",
"documentData": result_data, documentData=json.dumps(result_data, indent=2),
"mimeType": output_mime_type mimeType=output_mime_type
} )
] ]
) )
@ -814,14 +817,17 @@ class MethodSharepoint(MethodBase):
@action @action
async def readDocuments(self, parameters: Dict[str, Any]) -> ActionResult: async def readDocuments(self, parameters: Dict[str, Any]) -> ActionResult:
""" """
Read documents from SharePoint across all accessible sites GENERAL:
- Purpose: Read documents from SharePoint and extract content/metadata.
- Input requirements: connectionReference (required); documentList (required); optional pathObject or pathQuery; includeMetadata.
- Output format: JSON with read results per document.
Parameters: Parameters:
documentList (list): Reference(s) to the document list to read - documentList (list, required): Document list reference(s) to read.
connectionReference (str): Reference to the Microsoft connection - connectionReference (str, required): Microsoft connection label.
pathObject (str, optional): Path object to locate documents. This can ONLY be a reference to a result from sharepoint.findDocumentPath action - pathObject (str, optional): Reference to a previous path result.
pathQuery (str, optional): Path query to locate documents, only if no pathObject is provided (e.g., "/Documents/Project1", "*" for all sites) - pathQuery (str, optional): Path query if no pathObject.
includeMetadata (bool, optional): Whether to include metadata (default: True) - includeMetadata (bool, optional): Include metadata. Default: True.
""" """
try: try:
documentList = parameters.get("documentList") documentList = parameters.get("documentList")
@ -1073,11 +1079,11 @@ class MethodSharepoint(MethodBase):
return ActionResult( return ActionResult(
success=True, success=True,
documents=[ documents=[
{ ActionDocument(
"documentName": f"sharepoint_documents_{self._format_timestamp_for_filename()}{output_extension}", documentName=f"sharepoint_documents_{self._format_timestamp_for_filename()}{output_extension}",
"documentData": result_data, documentData=json.dumps(result_data, indent=2),
"mimeType": output_mime_type mimeType=output_mime_type
} )
] ]
) )
except Exception as e: except Exception as e:
@ -1090,14 +1096,17 @@ class MethodSharepoint(MethodBase):
@action @action
async def uploadDocument(self, parameters: Dict[str, Any]) -> ActionResult: async def uploadDocument(self, parameters: Dict[str, Any]) -> ActionResult:
""" """
Upload documents to SharePoint across accessible sites GENERAL:
- Purpose: Upload documents to SharePoint.
- Input requirements: connectionReference (required); documentList (required); fileNames (required); optional pathObject or pathQuery.
- Output format: JSON with upload status and file info.
Parameters: Parameters:
connectionReference (str): Reference to the Microsoft connection - connectionReference (str, required): Microsoft connection label.
pathObject (str, optional): Path object to locate documents. This can ONLY be a reference to a result from sharepoint.findDocumentPath action - pathObject (str, optional): Reference to a previous path result.
pathQuery (str, optional): Path query to locate documents, only if no pathObject is provided (e.g., "/Documents/Project1", "*" for all sites) - pathQuery (str, optional): Upload target path if no pathObject.
documentList (list): Reference(s) to the document list to upload - documentList (list, required): Document reference(s) to upload.
fileNames (List[str]): List of names for the uploaded files - fileNames (list, required): Output file names.
""" """
try: try:
connectionReference = parameters.get("connectionReference") connectionReference = parameters.get("connectionReference")
@ -1424,11 +1433,11 @@ class MethodSharepoint(MethodBase):
return ActionResult( return ActionResult(
success=True, success=True,
documents=[ documents=[
{ ActionDocument(
"documentName": f"sharepoint_upload_{self._format_timestamp_for_filename()}{output_extension}", documentName=f"sharepoint_upload_{self._format_timestamp_for_filename()}{output_extension}",
"documentData": result_data, documentData=json.dumps(result_data, indent=2),
"mimeType": output_mime_type mimeType=output_mime_type
} )
] ]
) )
@ -1442,13 +1451,16 @@ class MethodSharepoint(MethodBase):
@action @action
async def listDocuments(self, parameters: Dict[str, Any]) -> ActionResult: async def listDocuments(self, parameters: Dict[str, Any]) -> ActionResult:
""" """
List documents in SharePoint folders across accessible sites GENERAL:
- Purpose: List documents and folders in SharePoint paths across sites.
- Input requirements: connectionReference (required); optional pathObject or pathQuery; includeSubfolders.
- Output format: JSON with folder items and metadata.
Parameters: Parameters:
connectionReference (str): Reference to the Microsoft connection - connectionReference (str, required): Microsoft connection label.
pathObject (str, optional): Path object to locate documents. This can ONLY be a reference to a result from sharepoint.findDocumentPath action - pathObject (str, optional): Reference to a previous path result.
pathQuery (str, optional): Path query to locate documents, only if no pathObject is provided (e.g., "/Documents/Project1", "*" for all sites) - pathQuery (str, optional): Path query if no pathObject.
includeSubfolders (bool, optional): Whether to include subfolders (default: False) - includeSubfolders (bool, optional): Include one level of subfolders. Default: False.
""" """
try: try:
connectionReference = parameters.get("connectionReference") connectionReference = parameters.get("connectionReference")
@ -1817,11 +1829,11 @@ class MethodSharepoint(MethodBase):
return ActionResult( return ActionResult(
success=True, success=True,
documents=[ documents=[
{ ActionDocument(
"documentName": f"sharepoint_document_list_{self._format_timestamp_for_filename()}{output_extension}", documentName=f"sharepoint_document_list_{self._format_timestamp_for_filename()}{output_extension}",
"documentData": result_data, documentData=json.dumps(result_data, indent=2),
"mimeType": output_mime_type mimeType=output_mime_type
} )
] ]
) )

View file

@ -1,437 +0,0 @@
import logging
import csv
import io
import json as _json
from typing import Any, Dict
from modules.workflows.methods.methodBase import MethodBase, action
from modules.datamodels.datamodelWorkflow import ActionResult, ActionDocument
from modules.datamodels.datamodelAi import AiCallOptions, OperationType, Priority
from modules.datamodels.datamodelWeb import (
WebSearchRequest,
WebCrawlRequest,
WebScrapeRequest,
)
logger = logging.getLogger(__name__)
class MethodWeb(MethodBase):
"""Web method implementation for web operations."""
def __init__(self, services):
super().__init__(services)
self.name = "web"
self.description = "Web search, crawling, and scraping operations using Tavily"
@action
async def search(self, parameters: Dict[str, Any]) -> ActionResult:
"""Perform a web search and output a CSV with the found URLs. Each result row contains columns "url" and "title".
Parameters:
query (str, required): Search query.
maxResults (int, optional): Max number of results. Default: 10.
searchDepth ("basic"|"advanced", optional): Search depth. Default: provider default.
timeRange ("d"|"w"|"m"|"y", optional): Limit to last day/week/month/year.
topic ("general"|"news"|"academic", optional): Result domain preference.
includeDomains (list[str], optional): Only include these domains.
excludeDomains (list[str], optional): Exclude these domains.
language (str, optional): ISO code like "de", "en" to bias results.
includeAnswer (bool, optional): Ask provider to generate a short answer.
includeRawContent (bool, optional): Include raw content where possible.
"""
try:
# Prepare request data (generic, no region/language bias)
raw_query = parameters.get("query")
max_results = parameters.get("maxResults", 10)
if not raw_query or not isinstance(raw_query, str):
return ActionResult(success=False, error="Search query is required")
web_search_request = WebSearchRequest(
query=raw_query.strip(),
max_results=max_results,
search_depth=parameters.get("searchDepth"),
time_range=parameters.get("timeRange"),
topic=parameters.get("topic"),
include_domains=parameters.get("includeDomains"),
exclude_domains=parameters.get("excludeDomains"),
language=parameters.get("language"),
include_answer=parameters.get("includeAnswer"),
include_raw_content=parameters.get("includeRawContent"),
)
# Perform request via centralized service wrappers
web_search_result = await self.services.web.webSearch(web_search_request)
# Convert search results to CSV format (generic)
if web_search_result.success and web_search_result.documents:
csv_content = self._convert_web_result_to_csv(web_search_result)
csv_document = ActionDocument(
documentName=f"web_search_results.csv",
documentData=csv_content,
mimeType="text/csv"
)
return ActionResult(success=True, documents=[csv_document])
else:
return web_search_result
except Exception as e:
return ActionResult(success=False, error=str(e))
def _read_csv_with_urls(self, csv_content: str) -> list:
"""Read CSV content and extract URLs from url,title or title,url format (both ; and , delimiters)"""
urls = []
# Try both semicolon and comma delimiters
for delimiter in [';', ',']:
try:
reader = csv.DictReader(io.StringIO(csv_content), delimiter=delimiter)
for row in reader:
# Look for url column (case insensitive)
url = None
for key in row.keys():
if key.lower() == 'url':
url = row[key].strip()
break
if url and (url.startswith('http://') or url.startswith('https://')):
urls.append(url)
# If we found URLs with this delimiter, return them
if urls:
return urls
except Exception:
# Try next delimiter
continue
# If no valid CSV found, try simple text parsing as fallback
lines = csv_content.split('\n')
for line in lines:
line = line.strip()
if line and (line.startswith('http://') or line.startswith('https://')):
urls.append(line)
return urls
@action
async def crawl(self, parameters: Dict[str, Any]) -> ActionResult:
"""Crawl a list of URLs and extract text content.
Parameters:
documentList (list[str]|str, required): Reference(s) to documents containing URLs (e.g., CSV from search). Can be a single ref or list.
expectedDocumentFormats (list, optional): Hint for downstream handling.
extractDepth ("basic"|"advanced", optional): Extraction depth. Default: "advanced".
format ("text"|"markdown", optional): Output format. Default: "text".
"""
try:
document_list = parameters.get("documentList")
# Normalize to list if a single string reference is provided
if isinstance(document_list, str):
document_list = [document_list]
if not document_list:
return ActionResult(
success=False, error="No document list reference provided."
)
# Resolve document list reference to ChatDocument objects
chat_documents = self.services.workflow.getChatDocumentsFromDocumentList(document_list)
if not chat_documents:
return ActionResult(
success=False,
error=f"No documents found for reference: {document_list}",
)
# Extract URLs from all documents and combine them
all_urls = []
import json
import re
for i, doc in enumerate(chat_documents):
logger.info(f"Processing document {i+1}/{len(chat_documents)}: {doc.fileName}")
# Get file data using the service center
file_data = self.services.workflow.getFileData(doc.fileId)
if not file_data:
logger.warning(f"Could not retrieve file data for document: {doc.fileName}")
continue
content = file_data.decode("utf-8")
# Try to parse as CSV first (for new CSV format)
if doc.fileName.lower().endswith('.csv') or 'csv' in doc.mimeType.lower():
logger.info(f"Processing CSV file: {doc.fileName}")
doc_urls = self._read_csv_with_urls(content)
else:
# Parse JSON to extract URLs from search results
try:
# The document structure from WebSearchActionResult
search_data = json.loads(content)
# Extract URLs from the search results structure
doc_urls = []
if isinstance(search_data, dict):
# Handle the document structure: documentData contains the actual search results
doc_data = search_data.get("documentData", search_data)
if "results" in doc_data and isinstance(doc_data["results"], list):
doc_urls = [
result["url"]
for result in doc_data["results"]
if isinstance(result, dict) and "url" in result
]
elif "urls" in doc_data and isinstance(doc_data["urls"], list):
# Fallback: if URLs are stored directly in a 'urls' field
doc_urls = [url for url in doc_data["urls"] if isinstance(url, str)]
# Fallback: try to parse as plain text with regex (for backward compatibility)
if not doc_urls:
logger.warning(
f"Could not extract URLs from JSON structure in {doc.fileName}, trying plain text parsing"
)
doc_urls = re.split(r"[\n,;]+", content)
doc_urls = [
u.strip()
for u in doc_urls
if u.strip()
and (
u.strip().startswith("http://")
or u.strip().startswith("https://")
)
]
except json.JSONDecodeError:
# Fallback to plain text parsing if JSON parsing fails
logger.warning(f"Document {doc.fileName} is not valid JSON, trying plain text parsing")
doc_urls = re.split(r"[\n,;]+", content)
doc_urls = [
u.strip()
for u in doc_urls
if u.strip()
and (
u.strip().startswith("http://")
or u.strip().startswith("https://")
)
]
if doc_urls:
all_urls.extend(doc_urls)
logger.info(f"Extracted {len(doc_urls)} URLs from {doc.fileName}")
else:
logger.warning(f"No valid URLs found in document: {doc.fileName}")
if not all_urls:
return ActionResult(
success=False, error="No valid URLs found in any of the documents."
)
# Remove duplicates while preserving order
unique_urls = list(dict.fromkeys(all_urls))
logger.info(f"Extracted {len(unique_urls)} unique URLs from {len(chat_documents)} documents")
# Prepare request data with normalization
allowed_extract_depth = {"basic", "advanced"}
allowed_formats = {"text", "markdown"}
extract_depth = parameters.get("extractDepth")
if extract_depth and extract_depth not in allowed_extract_depth:
logger.warning(f"Invalid extractDepth '{extract_depth}' provided. Falling back to 'advanced'.")
extract_depth = "advanced"
fmt = parameters.get("format")
if fmt and fmt not in allowed_formats:
logger.warning(f"Invalid format '{fmt}' provided. Falling back to 'text'.")
fmt = "text"
web_crawl_request = WebCrawlRequest(
urls=unique_urls,
extract_depth=extract_depth,
format=fmt,
)
# Perform request via centralized service wrappers
web_crawl_result = await self.services.web.webCrawl(web_crawl_request)
# Convert and enrich with concise summaries per URL for better context
if web_crawl_result.success:
try:
doc = web_crawl_result.documents[0]
results = getattr(doc.documentData, "results", [])
enriched = []
# Summarize each result briefly using AI for added context
for item in results:
url = str(getattr(item, "url", ""))
content = str(getattr(item, "content", ""))
summary = ""
try:
if content:
prompt = (
"Summarize the following webpage content in 3-5 concise bullet points. "
"Focus on key points, figures, named entities (companies/institutions), and location context. "
"Return only bullet points without any preface."
)
context = content[:4000]
# Centralized AI summary (balanced analyse_content)
summary = await self.services.ai.callAi(
prompt=prompt,
documents=None,
options=AiCallOptions(
operationType=OperationType.ANALYSE_CONTENT,
priority=Priority.BALANCED,
compressPrompt=True,
compressContext=False,
processingMode="advanced",
maxCost=0.05,
maxProcessingTime=30
)
)
summary = summary.strip()
except Exception:
summary = ""
enriched.append({
"url": url,
"summary": summary,
"snippet": content[:500]
})
import json as _json
payload = {
"success": True,
"total_count": len(enriched),
"results": enriched,
}
json_content = _json.dumps(payload, ensure_ascii=False, indent=2)
except Exception:
# Fallback to original conversion
json_content = self._convert_web_result_to_json(web_crawl_result)
json_document = ActionDocument(
documentName=f"web_crawl_results.json",
documentData=json_content,
mimeType="application/json"
)
return ActionResult(success=True, documents=[json_document])
else:
return web_crawl_result
except Exception as e:
logger.error(f"Error in crawl method: {str(e)}")
return ActionResult(success=False, error=str(e))
@action
async def scrape(self, parameters: Dict[str, Any]) -> ActionResult:
"""Search and then crawl the found URLs in one step. To use for market analysis, web research, internet searches
Parameters:
query (str, required): Search query.
maxResults (int, optional): Max number of results. Default: 10.
searchDepth ("basic"|"advanced", optional): Search depth.
timeRange ("d"|"w"|"m"|"y", optional): Time window.
topic ("general"|"news"|"academic", optional): Result domain preference.
includeDomains (list[str], optional): Only include these domains.
excludeDomains (list[str], optional): Exclude these domains.
language (str, optional): ISO language bias.
includeAnswer (bool, optional): Ask provider to include an answer.
includeRawContent (bool, optional): Include raw content where possible.
extractDepth ("basic"|"advanced", optional): Crawl extraction depth. Default: "advanced".
format ("text"|"markdown", optional): Crawl output format. Default: "text".
"""
try:
query = parameters.get("query")
max_results = parameters.get("maxResults", 10)
# Normalize optional enums to avoid validation errors
allowed_search_depth = {"basic", "advanced"}
allowed_extract_depth = {"basic", "advanced"}
allowed_formats = {"text", "markdown"}
search_depth = parameters.get("searchDepth")
if search_depth and search_depth not in allowed_search_depth:
logger.warning(f"Invalid searchDepth '{search_depth}' provided. Falling back to None.")
search_depth = None
extract_depth = parameters.get("extractDepth")
if extract_depth and extract_depth not in allowed_extract_depth:
logger.warning(f"Invalid extractDepth '{extract_depth}' provided. Falling back to 'advanced'.")
extract_depth = "advanced"
fmt = parameters.get("format")
if fmt and fmt not in allowed_formats:
logger.warning(f"Invalid format '{fmt}' provided. Falling back to 'text'.")
fmt = "text"
if not query:
return ActionResult(success=False, error="Search query is required")
# Prepare request data
web_scrape_request = WebScrapeRequest(
query=query,
max_results=max_results,
search_depth=search_depth,
time_range=parameters.get("timeRange"),
topic=parameters.get("topic"),
include_domains=parameters.get("includeDomains"),
exclude_domains=parameters.get("excludeDomains"),
language=parameters.get("language"),
include_answer=parameters.get("includeAnswer"),
include_raw_content=parameters.get("includeRawContent"),
extract_depth=extract_depth,
format=fmt,
)
# Perform request via centralized service wrappers
web_scrape_result = await self.services.web.webScrape(web_scrape_request)
# Convert to proper JSON format
if web_scrape_result.success:
json_content = self._convert_web_result_to_json(web_scrape_result)
json_document = ActionDocument(
documentName=f"web_scrape_results.json",
documentData=json_content,
mimeType="application/json"
)
return ActionResult(
success=True,
documents=[json_document]
)
else:
return web_scrape_result
except Exception as e:
return ActionResult(success=False, error=str(e))
# Helpers
def _convert_web_result_to_json(self, web_result):
if not getattr(web_result, 'success', False) or not getattr(web_result, 'documents', None):
return _json.dumps({"success": getattr(web_result, 'success', False), "error": getattr(web_result, 'error', None)})
document_data = web_result.documents[0].documentData
result_dict = {
"success": True,
"results": [
{
"url": str(getattr(result, 'url', "")),
"content": getattr(result, 'content', "")
}
for result in getattr(document_data, 'results', [])
],
"total_count": getattr(document_data, 'total_count', 0)
}
if hasattr(document_data, 'urls'):
result_dict["urls"] = [str(url) for url in getattr(document_data, 'urls', [])]
elif hasattr(document_data, 'query'):
result_dict["query"] = getattr(document_data, 'query', None)
return _json.dumps(result_dict, indent=2, ensure_ascii=False)
def _convert_web_result_to_csv(self, web_search_result):
if not getattr(web_search_result, 'success', False) or not getattr(web_search_result, 'documents', None):
return ""
output = io.StringIO()
writer = csv.writer(output, delimiter=';')
writer.writerow(['url', 'title'])
document_data = web_search_result.documents[0].documentData
for result in getattr(document_data, 'results', []):
writer.writerow([str(getattr(result, 'url', "")), getattr(result, 'title', "")])
return output.getvalue()

View file

@ -0,0 +1,9 @@
# adaptive module for React mode
# Provides adaptive learning capabilities
from .intentAnalyzer import IntentAnalyzer, DataType, ExpectedFormat
from .contentValidator import ContentValidator
from .learningEngine import LearningEngine
from .progressTracker import ProgressTracker
__all__ = ['IntentAnalyzer', 'ContentValidator', 'LearningEngine', 'ProgressTracker', 'DataType', 'ExpectedFormat']

View file

@ -0,0 +1,308 @@
# contentValidator.py
# Content validation for adaptive React mode
import re
import logging
from typing import List, Dict, Any
logger = logging.getLogger(__name__)
class ContentValidator:
"""Validates delivered content against user intent"""
def __init__(self):
pass
def validateContent(self, documents: List[Any], intent: Dict[str, Any]) -> Dict[str, Any]:
"""Validates delivered content against user intent"""
try:
validationDetails = []
for doc in documents:
content = self._extractContent(doc)
detail = self._validateSingleDocument(content, doc, intent)
validationDetails.append(detail)
# Calculate overall success
overallSuccess = all(detail.get("successCriteriaMet", [False]) for detail in validationDetails)
# Calculate quality score
qualityScore = self._calculateQualityScore(validationDetails)
# Generate improvement suggestions
improvementSuggestions = self._generateImprovementSuggestions(validationDetails, intent)
return {
"overallSuccess": overallSuccess,
"qualityScore": qualityScore,
"validationDetails": validationDetails,
"improvementSuggestions": improvementSuggestions
}
except Exception as e:
logger.error(f"Error validating content: {str(e)}")
return self._createFailedValidationResult(str(e))
def _extractContent(self, doc: Any) -> str:
"""Extracts content from a document"""
try:
if hasattr(doc, 'documentData'):
data = doc.documentData
if isinstance(data, dict) and 'content' in data:
return str(data['content'])
else:
return str(data)
return ""
except Exception:
return ""
def _validateSingleDocument(self, content: str, doc: Any, intent: Dict[str, Any]) -> Dict[str, Any]:
"""Validates a single document against intent"""
# Check data type match
dataTypeMatch = self._checkDataTypeMatch(content, intent.get("dataType", "unknown"))
# Check format match
formatMatch = self._checkFormatMatch(content, intent.get("expectedFormat", "unknown"))
# Calculate quality score
qualityScore = self._calculateDocumentQualityScore(content, intent)
# Check success criteria
successCriteriaMet = self._checkSuccessCriteria(content, intent)
# Identify specific issues
specificIssues = self._identifySpecificIssues(content, intent)
# Generate improvement suggestions
improvementSuggestions = self._generateDocumentImprovementSuggestions(content, intent)
return {
"documentName": getattr(doc, 'documentName', 'Unknown'),
"dataTypeMatch": dataTypeMatch,
"formatMatch": formatMatch,
"qualityScore": qualityScore,
"successCriteriaMet": successCriteriaMet,
"specificIssues": specificIssues,
"improvementSuggestions": improvementSuggestions
}
def _checkDataTypeMatch(self, content: str, dataType: str) -> bool:
"""Checks if content matches the expected data type"""
if dataType == "numbers":
return self._containsNumbers(content)
elif dataType == "text":
return self._containsText(content)
elif dataType == "documents":
return self._containsDocumentContent(content)
elif dataType == "analysis":
return self._containsAnalysis(content)
elif dataType == "code":
return self._containsCode(content)
else:
return True # Unknown type, assume match
def _containsNumbers(self, content: str) -> bool:
"""Checks if content contains actual numbers (not code)"""
# Look for actual numbers in the content
numbers = re.findall(r'\b\d+\b', content)
# Check if it's code (contains function definitions, etc.)
isCode = any(keyword in content.lower() for keyword in [
'def ', 'function', 'import ', 'class ', 'for ', 'while ', 'if ',
'return', 'print(', 'console.log', 'public ', 'private '
])
# If it's code, it doesn't contain actual numbers
if isCode:
return False
# If it has numbers and it's not code, it contains actual numbers
return len(numbers) > 0
def _containsText(self, content: str) -> bool:
"""Checks if content contains readable text"""
# Remove numbers and special characters
textContent = re.sub(r'[^\w\s]', '', content)
words = textContent.split()
# Check if there are enough words to be considered text
return len(words) > 5
def _containsDocumentContent(self, content: str) -> bool:
"""Checks if content is suitable for document creation"""
# Check for structured content
hasStructure = any(indicator in content for indicator in [
'\n', '\t', '|', '-', '*', '1.', '2.', '', ''
])
# Check for meaningful content
hasMeaningfulContent = len(content.strip()) > 50
return hasStructure and hasMeaningfulContent
def _containsAnalysis(self, content: str) -> bool:
"""Checks if content contains analysis"""
analysisIndicators = [
'analysis', 'findings', 'conclusion', 'summary', 'insights',
'trends', 'patterns', 'comparison', 'evaluation', 'assessment'
]
contentLower = content.lower()
return any(indicator in contentLower for indicator in analysisIndicators)
def _containsCode(self, content: str) -> bool:
"""Checks if content contains code"""
codeIndicators = [
'def ', 'function', 'import ', 'class ', 'for ', 'while ', 'if ',
'return', 'print(', 'console.log', 'public ', 'private ', 'void ',
'int ', 'string ', 'var ', 'let ', 'const '
]
contentLower = content.lower()
return any(indicator in contentLower for indicator in codeIndicators)
def _checkFormatMatch(self, content: str, expectedFormat: str) -> bool:
"""Checks if content matches expected format"""
if expectedFormat == "raw_data":
# Raw data should be simple, not heavily formatted
return not any(indicator in content for indicator in [
'<html>', '<div>', '<table>', '## ', '### ', '**', '__'
])
elif expectedFormat == "formatted":
# Formatted content should have structure
return any(indicator in content for indicator in [
'\n', '\t', '|', '-', '*', '1.', '2.', ''
])
elif expectedFormat == "structured":
# Structured content should have clear organization
return any(indicator in content for indicator in [
'{', '}', '[', ']', '|', '\t', ' '
])
else:
return True # Unknown format, assume match
def _checkSuccessCriteria(self, content: str, intent: Dict[str, Any]) -> List[bool]:
"""Checks if content meets success criteria"""
criteriaMet = []
successCriteria = intent.get("successCriteria", [])
for criterion in successCriteria:
if 'prime numbers' in criterion.lower():
# Check if content contains actual prime numbers, not code
hasNumbers = bool(re.search(r'\b\d+\b', content))
isNotCode = not any(keyword in content.lower() for keyword in [
'def ', 'function', 'import ', 'class '
])
criteriaMet.append(hasNumbers and isNotCode)
elif 'document' in criterion.lower():
# Check if content is suitable for document creation
hasStructure = any(indicator in content for indicator in [
'\n', '\t', '|', '-', '*', '1.', '2.'
])
criteriaMet.append(hasStructure)
elif 'format' in criterion.lower():
# Check if content is properly formatted
hasFormatting = any(indicator in content for indicator in [
'\n', '\t', '|', '-', '*', '1.', '2.', ''
])
criteriaMet.append(hasFormatting)
else:
# Generic check - content should not be empty
criteriaMet.append(len(content.strip()) > 0)
return criteriaMet
def _calculateDocumentQualityScore(self, content: str, intent: Dict[str, Any]) -> float:
"""Calculates quality score for a single document"""
score = 0.0
# Base score for having content
if len(content.strip()) > 0:
score += 0.2
# Score for data type match
if self._checkDataTypeMatch(content, intent.get("dataType", "unknown")):
score += 0.3
# Score for format match
if self._checkFormatMatch(content, intent.get("expectedFormat", "unknown")):
score += 0.2
# Score for success criteria
successCriteriaMet = self._checkSuccessCriteria(content, intent)
if successCriteriaMet:
successRate = sum(successCriteriaMet) / len(successCriteriaMet)
score += 0.3 * successRate
return min(score, 1.0)
def _calculateQualityScore(self, validationDetails: List[Dict[str, Any]]) -> float:
"""Calculates overall quality score from validation details"""
if not validationDetails:
return 0.0
totalScore = sum(detail.get("qualityScore", 0) for detail in validationDetails)
return totalScore / len(validationDetails)
def _identifySpecificIssues(self, content: str, intent: Dict[str, Any]) -> List[str]:
"""Identifies specific issues with the content"""
issues = []
# Check for common issues
if intent.get("dataType") == "numbers" and self._containsCode(content):
issues.append("Content contains code instead of actual numbers")
if intent.get("expectedFormat") == "raw_data" and any(indicator in content for indicator in ['<html>', '## ', '**']):
issues.append("Content is formatted when raw data was requested")
if len(content.strip()) == 0:
issues.append("Content is empty")
return issues
def _generateDocumentImprovementSuggestions(self, content: str, intent: Dict[str, Any]) -> List[str]:
"""Generates improvement suggestions for a single document"""
suggestions = []
dataType = intent.get("dataType", "unknown")
expectedFormat = intent.get("expectedFormat", "unknown")
if dataType == "numbers" and self._containsCode(content):
suggestions.append("Deliver actual numbers, not code to generate them")
if expectedFormat == "raw_data" and any(indicator in content for indicator in ['<html>', '## ']):
suggestions.append("Provide raw data without formatting")
if len(content.strip()) == 0:
suggestions.append("Provide actual content")
return suggestions
def _generateImprovementSuggestions(self, validationDetails: List[Dict[str, Any]],
intent: Dict[str, Any]) -> List[str]:
"""Generates improvement suggestions based on validation results"""
suggestions = []
# Check for common issues
if not any(detail.get("dataTypeMatch", False) for detail in validationDetails):
dataType = intent.get("dataType", "unknown")
suggestions.append(f"Content should contain {dataType} data, not code or other formats")
if not any(detail.get("formatMatch", False) for detail in validationDetails):
expectedFormat = intent.get("expectedFormat", "unknown")
suggestions.append(f"Content should be in {expectedFormat} format")
# Add specific suggestions from validation details
for detail in validationDetails:
suggestions.extend(detail.get("improvementSuggestions", []))
return list(set(suggestions)) # Remove duplicates
def _createFailedValidationResult(self, error: str) -> Dict[str, Any]:
"""Creates a failed validation result"""
return {
"overallSuccess": False,
"qualityScore": 0.0,
"validationDetails": [],
"improvementSuggestions": [f"Validation failed: {error}"]
}

View file

@ -0,0 +1,239 @@
# intentAnalyzer.py
# Intent analysis for adaptive React mode
import re
import logging
from typing import Dict, Any, List
from enum import Enum
logger = logging.getLogger(__name__)
class DataType(Enum):
NUMBERS = "numbers"
TEXT = "text"
DOCUMENTS = "documents"
ANALYSIS = "analysis"
CODE = "code"
UNKNOWN = "unknown"
class ExpectedFormat(Enum):
RAW_DATA = "raw_data"
FORMATTED = "formatted"
STRUCTURED = "structured"
VISUAL = "visual"
UNKNOWN = "unknown"
class IntentAnalyzer:
"""Analyzes user intent to understand what they actually want"""
def __init__(self):
self.dataTypePatterns = {
DataType.NUMBERS: [
r'\b(numbers?|digits?|count|list|sequence)\b',
r'\b(prime|fibonacci|random|even|odd)\s+(numbers?)\b',
r'\b(calculate|compute|generate)\s+(numbers?)\b',
r'\b(first|last)\s+\d+\s+(numbers?)\b'
],
DataType.TEXT: [
r'\b(text|content|words?|sentences?|paragraphs?)\b',
r'\b(write|create|generate)\s+(text|content)\b',
r'\b(summary|description|explanation)\b',
r'\b(article|essay|report)\b'
],
DataType.DOCUMENTS: [
r'\b(document|file|report|pdf|word|excel)\b',
r'\b(create|generate|make)\s+(document|file|report)\b',
r'\b(format|structure|organize)\s+(document)\b',
r'\b(presentation|slides?)\b'
],
DataType.ANALYSIS: [
r'\b(analyze|analysis|examine|study|evaluate)\b',
r'\b(insights?|findings?|results?)\b',
r'\b(compare|contrast|evaluate)\b',
r'\b(trends?|patterns?)\b'
],
DataType.CODE: [
r'\b(code|program|script|algorithm|function)\b',
r'\b(write|create|develop)\s+(code|program|script)\b',
r'\b(implement|build|construct)\b',
r'\b(debug|fix|optimize)\s+(code)\b'
]
}
self.formatPatterns = {
ExpectedFormat.RAW_DATA: [
r'\b(raw|plain|simple|basic)\b',
r'\b(numbers?|data|list)\b(?!\s+(in|as|with))',
r'\b(just|only)\s+(numbers?|data)\b'
],
ExpectedFormat.FORMATTED: [
r'\b(formatted|structured|organized|presented)\b',
r'\b(table|chart|graph|visual)\b',
r'\b(pretty|nice|clean)\s+(format|presentation)\b',
r'\b(professional|polished)\b'
],
ExpectedFormat.STRUCTURED: [
r'\b(json|xml|csv|structured)\b',
r'\b(organized|categorized|grouped)\b',
r'\b(systematic|methodical)\b',
r'\b(database|spreadsheet)\b'
]
}
def analyzeUserIntent(self, userPrompt: str, context: Any) -> Dict[str, Any]:
"""Analyzes user intent from prompt and context"""
try:
# Extract primary goal
primaryGoal = self._extractPrimaryGoal(userPrompt)
# Classify data type
dataType = self._classifyDataType(userPrompt)
# Determine expected format
expectedFormat = self._determineExpectedFormat(userPrompt)
# Assess quality requirements
qualityRequirements = self._assessQualityRequirements(userPrompt, context)
# Extract success criteria
successCriteria = self._extractSuccessCriteria(userPrompt, context)
# Calculate confidence score
confidenceScore = self._calculateConfidenceScore(dataType, expectedFormat, successCriteria)
return {
"primaryGoal": primaryGoal,
"dataType": dataType.value,
"expectedFormat": expectedFormat.value,
"qualityRequirements": qualityRequirements,
"successCriteria": successCriteria,
"confidenceScore": confidenceScore
}
except Exception as e:
logger.error(f"Error analyzing user intent: {str(e)}")
return self._createDefaultIntentAnalysis(userPrompt)
def _extractPrimaryGoal(self, userPrompt: str) -> str:
"""Extracts the primary goal from user prompt"""
# Simple extraction - can be enhanced
return userPrompt.strip()
def _classifyDataType(self, userPrompt: str) -> DataType:
"""Classifies the type of data the user wants"""
promptLower = userPrompt.lower()
for dataType, patterns in self.dataTypePatterns.items():
for pattern in patterns:
if re.search(pattern, promptLower):
return dataType
return DataType.UNKNOWN
def _determineExpectedFormat(self, userPrompt: str) -> ExpectedFormat:
"""Determines the expected format of the output"""
promptLower = userPrompt.lower()
for formatType, patterns in self.formatPatterns.items():
for pattern in patterns:
if re.search(pattern, promptLower):
return formatType
return ExpectedFormat.UNKNOWN
def _assessQualityRequirements(self, userPrompt: str, context: Any) -> Dict[str, Any]:
"""Assesses quality requirements from prompt and context"""
promptLower = userPrompt.lower()
# Check for accuracy requirements
accuracyThreshold = 0.8
if any(word in promptLower for word in ['exact', 'precise', 'accurate', 'correct']):
accuracyThreshold = 0.95
elif any(word in promptLower for word in ['approximate', 'rough', 'estimate']):
accuracyThreshold = 0.7
# Check for completeness requirements
completenessThreshold = 0.8
if any(word in promptLower for word in ['complete', 'full', 'comprehensive', 'all']):
completenessThreshold = 0.95
elif any(word in promptLower for word in ['summary', 'brief', 'overview']):
completenessThreshold = 0.6
# Check for format requirements
formatRequirement = "any"
if any(word in promptLower for word in ['formatted', 'structured', 'organized']):
formatRequirement = "formatted"
elif any(word in promptLower for word in ['raw', 'plain', 'simple']):
formatRequirement = "raw"
return {
"accuracyThreshold": accuracyThreshold,
"completenessThreshold": completenessThreshold,
"formatRequirement": formatRequirement
}
def _extractSuccessCriteria(self, userPrompt: str, context: Any) -> List[str]:
"""Extracts success criteria from prompt and context"""
criteria = []
promptLower = userPrompt.lower()
# Extract explicit criteria
if 'first' in promptLower and 'numbers' in promptLower:
criteria.append("Contains the first N numbers as requested")
if 'prime' in promptLower:
criteria.append("Contains actual prime numbers, not code to generate them")
if 'document' in promptLower:
criteria.append("Creates a properly formatted document")
if 'format' in promptLower:
criteria.append("Content is properly formatted as requested")
# Add context-based criteria
if hasattr(context, 'task_step') and context.task_step:
taskObjective = context.task_step.objective.lower()
if 'word' in taskObjective:
criteria.append("Creates a Word document")
if 'excel' in taskObjective:
criteria.append("Creates an Excel spreadsheet")
return criteria if criteria else ["Delivers what the user requested"]
def _calculateConfidenceScore(self, dataType: DataType, expectedFormat: ExpectedFormat,
successCriteria: List[str]) -> float:
"""Calculates confidence score for the intent analysis"""
score = 0.0
# Data type confidence
if dataType != DataType.UNKNOWN:
score += 0.3
# Format confidence
if expectedFormat != ExpectedFormat.UNKNOWN:
score += 0.2
# Success criteria confidence
if len(successCriteria) > 0:
score += 0.3
# Additional confidence for specific patterns
if len(successCriteria) > 1:
score += 0.2
return min(score, 1.0)
def _createDefaultIntentAnalysis(self, userPrompt: str) -> Dict[str, Any]:
"""Creates a default intent analysis when analysis fails"""
return {
"primaryGoal": userPrompt,
"dataType": "unknown",
"expectedFormat": "unknown",
"qualityRequirements": {
"accuracyThreshold": 0.8,
"completenessThreshold": 0.8,
"formatRequirement": "any"
},
"successCriteria": ["Delivers what the user requested"],
"confidenceScore": 0.1
}

View file

@ -0,0 +1,166 @@
# learningEngine.py
# Learning engine for adaptive React mode
import json
import logging
from typing import Dict, Any, List
from datetime import datetime, timezone
logger = logging.getLogger(__name__)
class LearningEngine:
"""Learns from feedback and adapts future behavior"""
def __init__(self):
self.strategies = {}
self.feedbackHistory = []
def learnFromFeedback(self, feedback: Dict[str, Any], context: Any, intent: Dict[str, Any]):
"""Learns from feedback and updates strategies"""
try:
# Store feedback
self.feedbackHistory.append({
"feedback": feedback,
"context": self._serializeContext(context),
"intent": intent,
"timestamp": datetime.now(timezone.utc).timestamp()
})
# Update strategies based on feedback
self._updateStrategies(feedback, intent)
logger.info(f"Learning from feedback: {feedback.get('actionAttempted', 'unknown')} - "
f"Quality: {feedback.get('qualityScore', 0):.2f}, Intent Match: {feedback.get('intentMatchScore', 0):.2f}")
except Exception as e:
logger.error(f"Error learning from feedback: {str(e)}")
def getImprovedStrategy(self, context: Any, intent: Dict[str, Any]) -> Dict[str, Any]:
"""Returns improved strategy based on learning"""
try:
# Get strategy key based on intent
strategyKey = self._getStrategyKey(intent)
# Get existing strategy or create default
if strategyKey in self.strategies:
strategy = self.strategies[strategyKey]
logger.info(f"Using learned strategy for {strategyKey}: {strategy}")
return strategy
else:
# Create default strategy
defaultStrategy = self._createDefaultStrategy(intent)
self.strategies[strategyKey] = defaultStrategy
logger.info(f"Created default strategy for {strategyKey}")
return defaultStrategy
except Exception as e:
logger.error(f"Error getting improved strategy: {str(e)}")
return self._createDefaultStrategy(intent)
def _updateStrategies(self, feedback: Dict[str, Any], intent: Dict[str, Any]):
"""Updates strategies based on feedback"""
strategyKey = self._getStrategyKey(intent)
actionAttempted = feedback.get('actionAttempted', 'unknown')
qualityScore = feedback.get('qualityScore', 0)
intentMatchScore = feedback.get('intentMatchScore', 0)
# Get or create strategy
if strategyKey not in self.strategies:
self.strategies[strategyKey] = self._createDefaultStrategy(intent)
strategy = self.strategies[strategyKey]
# Update based on success/failure
if qualityScore > 0.7 and intentMatchScore > 0.7:
# Successful action - reinforce it
if 'successfulActions' not in strategy:
strategy['successfulActions'] = []
if actionAttempted not in strategy['successfulActions']:
strategy['successfulActions'].append(actionAttempted)
strategy['successRate'] = min(strategy.get('successRate', 0.5) + 0.1, 1.0)
logger.info(f"Reinforced successful action: {actionAttempted}")
elif qualityScore < 0.3 or intentMatchScore < 0.3:
# Failed action - avoid it
if 'failedActions' not in strategy:
strategy['failedActions'] = []
if actionAttempted not in strategy['failedActions']:
strategy['failedActions'].append(actionAttempted)
strategy['successRate'] = max(strategy.get('successRate', 0.5) - 0.1, 0.0)
logger.info(f"Marked failed action to avoid: {actionAttempted}")
# Update last modified
strategy['lastModified'] = datetime.now(timezone.utc).timestamp()
def _getStrategyKey(self, intent: Dict[str, Any]) -> str:
"""Gets strategy key based on intent"""
dataType = intent.get('dataType', 'unknown')
expectedFormat = intent.get('expectedFormat', 'unknown')
return f"{dataType}_{expectedFormat}"
def _createDefaultStrategy(self, intent: Dict[str, Any]) -> Dict[str, Any]:
"""Creates a default strategy for the intent"""
dataType = intent.get('dataType', 'unknown')
expectedFormat = intent.get('expectedFormat', 'unknown')
# Create strategy based on intent type
if dataType == 'numbers':
return {
'strategyId': f"numbers_{expectedFormat}",
'successfulActions': [],
'failedActions': [],
'successRate': 0.5,
'lastModified': datetime.now(timezone.utc).timestamp(),
'recommendedPrompt': f"Deliver {dataType} data in {expectedFormat} format. Provide actual numbers, not code to generate them.",
'avoidPrompt': "Do not ask AI to write code when user wants data. Deliver the data directly."
}
elif dataType == 'text':
return {
'strategyId': f"text_{expectedFormat}",
'successfulActions': [],
'failedActions': [],
'successRate': 0.5,
'lastModified': datetime.now(timezone.utc).timestamp(),
'recommendedPrompt': f"Generate {dataType} content in {expectedFormat} format.",
'avoidPrompt': "Ensure content is readable and well-structured."
}
elif dataType == 'documents':
return {
'strategyId': f"documents_{expectedFormat}",
'successfulActions': [],
'failedActions': [],
'successRate': 0.5,
'lastModified': datetime.now(timezone.utc).timestamp(),
'recommendedPrompt': f"Create {dataType} in {expectedFormat} format with proper structure.",
'avoidPrompt': "Ensure document is properly formatted and organized."
}
else:
return {
'strategyId': f"unknown_{expectedFormat}",
'successfulActions': [],
'failedActions': [],
'successRate': 0.5,
'lastModified': datetime.now(timezone.utc).timestamp(),
'recommendedPrompt': f"Deliver {dataType} content in {expectedFormat} format.",
'avoidPrompt': "Ensure content matches user requirements."
}
def _serializeContext(self, context: Any) -> Dict[str, Any]:
"""Serializes context for storage"""
try:
return {
"taskObjective": getattr(context, 'task_step', {}).get('objective', '') if hasattr(context, 'task_step') else '',
"workflowId": getattr(context, 'workflow_id', ''),
"availableDocuments": getattr(context, 'available_documents', [])
}
except Exception:
return {}
def getLearningSummary(self) -> Dict[str, Any]:
"""Gets a summary of what has been learned"""
return {
"totalStrategies": len(self.strategies),
"totalFeedback": len(self.feedbackHistory),
"strategies": list(self.strategies.keys()),
"averageSuccessRate": sum(s.get('successRate', 0) for s in self.strategies.values()) / max(len(self.strategies), 1)
}

View file

@ -0,0 +1,142 @@
# progressTracker.py
# Progress tracking for adaptive React mode
import logging
from typing import Dict, Any, List
from datetime import datetime, timezone
logger = logging.getLogger(__name__)
class ProgressTracker:
"""Tracks what has been accomplished and what's still needed"""
def __init__(self):
self.completedObjectives = []
self.partialAchievements = []
self.failedAttempts = []
self.learningInsights = []
self.currentPhase = "planning"
def updateProgress(self, result: Any, validation: Dict[str, Any], intent: Dict[str, Any]):
"""Updates progress tracking based on action result"""
try:
overallSuccess = validation.get('overallSuccess', False)
qualityScore = validation.get('qualityScore', 0)
improvementSuggestions = validation.get('improvementSuggestions', [])
if overallSuccess and qualityScore > 0.7:
# Successful completion
self.completedObjectives.append({
"objective": intent.get('primaryGoal', 'Unknown'),
"achievement": f"Quality score: {qualityScore:.2f}",
"qualityScore": qualityScore,
"timestamp": datetime.now(timezone.utc).timestamp()
})
self.currentPhase = "completed"
logger.info(f"Objective completed: {intent.get('primaryGoal', 'Unknown')}")
elif qualityScore > 0.3:
# Partial achievement
self.partialAchievements.append({
"objective": intent.get('primaryGoal', 'Unknown'),
"partialAchievement": f"Quality score: {qualityScore:.2f}",
"missingParts": improvementSuggestions,
"timestamp": datetime.now(timezone.utc).timestamp()
})
self.currentPhase = "partial"
logger.info(f"Partial achievement: {intent.get('primaryGoal', 'Unknown')}")
else:
# Failed attempt
self.failedAttempts.append({
"objective": intent.get('primaryGoal', 'Unknown'),
"failureReason": f"Quality score: {qualityScore:.2f}",
"learningOpportunity": improvementSuggestions,
"timestamp": datetime.now(timezone.utc).timestamp()
})
self.currentPhase = "failed"
logger.info(f"Failed attempt: {intent.get('primaryGoal', 'Unknown')}")
# Extract learning insights
if improvementSuggestions:
for suggestion in improvementSuggestions:
if suggestion not in self.learningInsights:
self.learningInsights.append(suggestion)
except Exception as e:
logger.error(f"Error updating progress: {str(e)}")
def getCurrentProgress(self) -> Dict[str, Any]:
"""Gets current progress state"""
return {
"completedObjectives": self.completedObjectives,
"partialAchievements": self.partialAchievements,
"failedAttempts": self.failedAttempts,
"learningInsights": self.learningInsights,
"currentPhase": self.currentPhase,
"nextActionsSuggested": self._getNextActionSuggestions()
}
def shouldContinue(self, progress: Dict[str, Any], validation: Dict[str, Any]) -> bool:
"""Determines if the task should continue"""
try:
# If we have completed objectives, don't continue
if progress.get('completedObjectives'):
return False
# If we have too many failed attempts, don't continue
if len(progress.get('failedAttempts', [])) >= 3:
return False
# If validation shows success, don't continue
if validation.get('overallSuccess', False):
return False
# Otherwise, continue
return True
except Exception as e:
logger.error(f"Error checking if should continue: {str(e)}")
return True # Default to continue on error
def _getNextActionSuggestions(self) -> List[str]:
"""Suggests next actions based on progress"""
suggestions = []
# If we have failed attempts, suggest avoiding those actions
if self.failedAttempts:
suggestions.append("Avoid actions that have failed before")
# If we have partial achievements, suggest building on them
if self.partialAchievements:
suggestions.append("Build on partial achievements")
# If we have learning insights, suggest applying them
if self.learningInsights:
suggestions.extend(self.learningInsights[:3]) # Top 3 insights
# Default suggestions
if not suggestions:
suggestions.append("Try a different approach")
suggestions.append("Focus on user intent")
return suggestions
def getProgressSummary(self) -> Dict[str, Any]:
"""Gets a summary of progress"""
return {
"totalCompleted": len(self.completedObjectives),
"totalPartial": len(self.partialAchievements),
"totalFailed": len(self.failedAttempts),
"totalInsights": len(self.learningInsights),
"currentPhase": self.currentPhase,
"successRate": len(self.completedObjectives) / max(len(self.completedObjectives) + len(self.failedAttempts), 1)
}
def reset(self):
"""Resets progress tracking"""
self.completedObjectives = []
self.partialAchievements = []
self.failedAttempts = []
self.learningInsights = []
self.currentPhase = "planning"

View file

@ -0,0 +1 @@
# Core workflow processing modules

View file

@ -0,0 +1,302 @@
# actionExecutor.py
# Action execution functionality for workflows
import logging
from typing import Dict, Any, List
from modules.datamodels.datamodelChat import ActionResult, ActionItem, TaskStep
from modules.datamodels.datamodelChat import ChatWorkflow
from modules.workflows.processing.shared.methodDiscovery import methods
logger = logging.getLogger(__name__)
class ActionExecutor:
"""Handles execution of workflow actions"""
def __init__(self, services):
self.services = services
def _checkWorkflowStopped(self, workflow):
"""Check if workflow has been stopped by user and raise exception if so"""
try:
# Get the current workflow status from the database to avoid stale data
current_workflow = self.services.interfaceDbChat.getWorkflow(workflow.id)
if current_workflow and current_workflow.status == "stopped":
logger.info("Workflow stopped by user, aborting action execution")
raise Exception("Workflow was stopped by user")
except Exception as e:
# If we can't get the current status due to other database issues, fall back to the in-memory object
logger.warning(f"Could not check current workflow status from database: {str(e)}")
if workflow and workflow.status == "stopped":
logger.info("Workflow stopped by user (from in-memory object), aborting action execution")
raise Exception("Workflow was stopped by user")
async def executeAction(self, methodName: str, actionName: str, parameters: Dict[str, Any]) -> ActionResult:
"""Execute a method action"""
try:
if methodName not in methods:
raise ValueError(f"Unknown method: {methodName}")
method = methods[methodName]
if actionName not in method['actions']:
raise ValueError(f"Unknown action: {actionName} for method {methodName}")
action = method['actions'][actionName]
# Execute the action
return await action['method'](parameters)
except Exception as e:
logger.error(f"Error executing method {methodName}.{actionName}: {str(e)}")
raise
async def executeCompoundAction(self, compoundActionName: str, parameters: Dict[str, Any]) -> ActionResult:
"""Execute a compound action (method.action format)"""
try:
# Parse compound action name (e.g., "ai.process" -> method="ai", action="process")
if '.' not in compoundActionName:
raise ValueError(f"Invalid compound action name: {compoundActionName}. Expected format: method.action")
methodName, actionName = compoundActionName.split('.', 1)
# Execute using the existing method
return await self.executeAction(methodName, actionName, parameters)
except Exception as e:
logger.error(f"Error executing compound action {compoundActionName}: {str(e)}")
raise
async def executeSingleAction(self, action: ActionItem, workflow: ChatWorkflow, taskStep: TaskStep,
taskIndex: int = None, actionIndex: int = None, totalActions: int = None) -> ActionResult:
"""Execute a single action and return ActionResult with enhanced document processing"""
try:
# Check workflow status before executing action
self._checkWorkflowStopped(workflow)
# Use passed indices or fallback to '?'
taskNum = taskIndex if taskIndex is not None else '?'
actionNum = actionIndex if actionIndex is not None else '?'
logger.info(f"=== TASK {taskNum} ACTION {actionNum}: {action.execMethod}.{action.execAction} ===")
# Log input parameters
inputDocs = action.execParameters.get('documentList', [])
inputConnections = action.execParameters.get('connections', [])
logger.info(f"Input documents: {inputDocs} (type: {type(inputDocs)})")
if inputConnections:
logger.info(f"Input connections: {inputConnections}")
# Log all action parameters for debugging
logger.info(f"All action parameters: {action.execParameters}")
enhancedParameters = action.execParameters.copy()
if action.expectedDocumentFormats:
enhancedParameters['expectedDocumentFormats'] = action.expectedDocumentFormats
logger.info(f"Expected formats: {action.expectedDocumentFormats}")
# Check workflow status before executing the action
self._checkWorkflowStopped(workflow)
result = await self.executeAction(
methodName=action.execMethod,
actionName=action.execAction,
parameters=enhancedParameters
)
resultLabel = action.execResultLabel
# Trace action result with full document metadata
actionResultTrace = {
"method": action.execMethod,
"action": action.execAction,
"success": result.success,
"error": result.error,
"resultLabel": resultLabel,
"documentsCount": len(result.documents) if result.documents else 0
}
# Add full document metadata if documents exist
if result.documents:
actionResultTrace["documents"] = []
for doc in result.documents:
docMetadata = {
"name": getattr(doc, 'documentName', 'Unknown'),
"mimeType": getattr(doc, 'mimeType', 'Unknown'),
"size": getattr(doc, 'size', 'Unknown'),
"created": getattr(doc, 'created', 'Unknown'),
"modified": getattr(doc, 'modified', 'Unknown'),
"typeGroup": getattr(doc, 'typeGroup', 'Unknown'),
"documentId": getattr(doc, 'documentId', 'Unknown'),
"reference": getattr(doc, 'reference', 'Unknown')
}
# Remove 'Unknown' values to keep it clean
docMetadata = {k: v for k, v in docMetadata.items() if v != 'Unknown'}
actionResultTrace["documents"].append(docMetadata)
self._writeTraceLog("Action Result", actionResultTrace)
# Process action result
if result.success:
action.setSuccess()
# Extract result text from ALL documents using generation service
action.result = self._extractResultText(result)
# Preserve the action's execResultLabel for document routing
# Action methods should NOT return resultLabel - this is managed by the action handler
if not action.execResultLabel:
logger.warning(f"Action {action.execMethod}.{action.execAction} has no execResultLabel set")
# Log action results
logger.info(f"Action completed successfully")
if result.documents:
logger.info(f"Output documents ({len(result.documents)}):")
for i, doc in enumerate(result.documents):
logger.info(f" {i+1}. {doc.documentName}")
else:
logger.info("Output: No documents created")
else:
action.setError(result.error or "Action execution failed")
logger.error(f"Action failed: {result.error}")
# Create database log entry for action failure
self.services.interfaceDbChat.createLog({
"workflowId": workflow.id,
"message": f"❌ **Task {taskNum}**\n\n❌ **Action {actionNum}/{totalActions}** failed: {result.error}",
"type": "error"
})
# Log action summary
logger.info(f"=== TASK {taskNum} ACTION {actionNum} COMPLETED ===")
# Create action completion message with documents (generic)
await self._createActionCompletionMessage(action, result, workflow, taskStep, taskIndex, actionIndex, totalActions)
return ActionResult(
success=result.success,
documents=result.documents, # Return original ActionDocument objects
resultLabel=action.execResultLabel, # Always use action's execResultLabel
error=result.error or ""
)
except Exception as e:
logger.error(f"Error executing single action: {str(e)}")
action.setError(str(e))
return ActionResult(
success=False,
documents=[], # Empty documents for error case
resultLabel=action.execResultLabel,
error=str(e)
)
def _extractResultText(self, result: ActionResult) -> str:
"""Extract result text from ActionResult documents"""
if not result.success or not result.documents:
return ""
# Extract text directly from ActionDocument objects
resultParts = []
for doc in result.documents:
if hasattr(doc, 'documentData') and doc.documentData:
resultParts.append(str(doc.documentData))
# Join all document results with separators
return "\n\n---\n\n".join(resultParts) if resultParts else ""
async def _createActionCompletionMessage(self, action: ActionItem, result: ActionResult, workflow: ChatWorkflow,
taskStep: TaskStep, taskIndex: int, actionIndex: int, totalActions: int):
"""Create action completion message with documents (generic)"""
try:
# Convert ActionDocument objects to ChatDocument objects for message creation
createdDocuments = []
if result.documents:
createdDocuments = self.services.generation.createDocumentsFromActionResult(result, action, workflow, None)
# Create action message using message creator
from modules.workflows.processing.core.messageCreator import MessageCreator
messageCreator = MessageCreator(self.services)
await messageCreator.createActionMessage(
action=action,
result=result,
workflow=workflow,
resultLabel=action.execResultLabel,
createdDocuments=createdDocuments,
taskStep=taskStep,
taskIndex=taskIndex,
actionIndex=actionIndex,
totalActions=totalActions
)
except Exception as e:
logger.error(f"Error creating action completion message: {str(e)}")
def _writeTraceLog(self, contextText: str, data: Any) -> None:
"""Write trace data to configured trace file if in debug mode with improved JSON formatting"""
try:
import os
import json
from datetime import datetime, UTC
# Only write if logger is in debug mode
if logger.level > logging.DEBUG:
return
# Get log directory from configuration
logDir = self.services.utils.configGet("APP_LOGGING_LOG_DIR", "./")
if not os.path.isabs(logDir):
# If relative path, make it relative to the gateway directory
gatewayDir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
logDir = os.path.join(gatewayDir, logDir)
# Ensure log directory exists
os.makedirs(logDir, exist_ok=True)
# Create trace file path
traceFile = os.path.join(logDir, "log_trace.log")
# Format the trace entry with better structure
timestamp = datetime.fromtimestamp(self.services.utils.getUtcTimestamp(), UTC).strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
# Create a structured trace entry
traceEntry = f"[{timestamp}] {contextText}\n"
traceEntry += "=" * 80 + "\n"
# Add data if provided with improved formatting
if data is not None:
try:
if isinstance(data, (dict, list)):
# Format as pretty JSON with better settings
jsonStr = json.dumps(data, indent=2, default=str, ensure_ascii=False, sort_keys=False)
traceEntry += f"JSON Data:\n{jsonStr}\n"
elif isinstance(data, str):
# For string data, try to parse as JSON first, then fall back to plain text
try:
parsed = json.loads(data)
jsonStr = json.dumps(parsed, indent=2, default=str, ensure_ascii=False, sort_keys=False)
traceEntry += f"JSON Data (parsed from string):\n{jsonStr}\n"
except (json.JSONDecodeError, TypeError):
# Not valid JSON, show as plain text with proper line breaks
formatted_data = data.replace('\\n', '\n')
traceEntry += f"Text Data:\n{formatted_data}\n"
else:
# For other types, convert to string and try to parse as JSON
dataStr = str(data)
try:
parsed = json.loads(dataStr)
jsonStr = json.dumps(parsed, indent=2, default=str, ensure_ascii=False, sort_keys=False)
traceEntry += f"JSON Data (parsed from object):\n{jsonStr}\n"
except (json.JSONDecodeError, TypeError):
# Not valid JSON, show as plain text with proper line breaks
formatted_data = dataStr.replace('\\n', '\n')
traceEntry += f"Object Data:\n{formatted_data}\n"
except Exception as e:
# Fallback to simple string representation
traceEntry += f"Data (fallback): {str(data)}\n"
else:
traceEntry += "No data provided\n"
traceEntry += "=" * 80 + "\n\n"
# Write to trace file
with open(traceFile, "a", encoding="utf-8") as f:
f.write(traceEntry)
except Exception as e:
# Don't log trace errors to avoid recursion
pass

View file

@ -0,0 +1,368 @@
# messageCreator.py
# Generic message creation for all workflow phases
import logging
from typing import Dict, Any, Optional, List
from modules.datamodels.datamodelChat import TaskPlan, TaskStep, ActionResult, ReviewResult
from modules.datamodels.datamodelChat import ChatWorkflow
logger = logging.getLogger(__name__)
class MessageCreator:
"""Handles creation of all workflow messages"""
def __init__(self, services):
self.services = services
def _checkWorkflowStopped(self, workflow):
"""Check if workflow has been stopped by user and raise exception if so"""
try:
# Get the current workflow status from the database to avoid stale data
current_workflow = self.services.interfaceDbChat.getWorkflow(workflow.id)
if current_workflow and current_workflow.status == "stopped":
logger.info("Workflow stopped by user, aborting message creation")
raise Exception("Workflow was stopped by user")
except Exception as e:
# If we can't get the current status due to other database issues, fall back to the in-memory object
logger.warning(f"Could not check current workflow status from database: {str(e)}")
if workflow and workflow.status == "stopped":
logger.info("Workflow stopped by user (from in-memory object), aborting message creation")
raise Exception("Workflow was stopped by user")
async def createTaskPlanMessage(self, taskPlan: TaskPlan, workflow: ChatWorkflow):
"""Create a chat message containing the task plan with user-friendly messages"""
try:
# Check workflow status before creating message
self._checkWorkflowStopped(workflow)
# Build task plan summary
taskSummary = f"📋 **Task Plan**\n\n"
# Get overall user message from task plan if available
overallMessage = taskPlan.userMessage
if overallMessage:
taskSummary += f"{overallMessage}\n\n"
# Add each task with its user message
for i, task in enumerate(taskPlan.tasks):
if task.userMessage:
taskSummary += f"💬 {task.userMessage}\n"
taskSummary += "\n"
# Create workflow message
messageData = {
"workflowId": workflow.id,
"role": "assistant",
"message": taskSummary,
"status": "step",
"sequenceNr": len(workflow.messages) + 1,
"publishedAt": self.services.utils.getUtcTimestamp(),
"documentsLabel": "task_plan",
"documents": [],
# Add workflow context fields - use current workflow round instead of hardcoded 1
"roundNumber": workflow.currentRound, # Use current workflow round
"taskNumber": 1, # Task plan is before individual tasks; to keep 1, that UI not filtering the message
"actionNumber": 0,
# Add task progress status
"taskProgress": "pending"
}
message = self.services.interfaceDbChat.createMessage(messageData)
if message:
workflow.messages.append(message)
logger.info("Task plan message created successfully")
except Exception as e:
logger.error(f"Error creating task plan message: {str(e)}")
async def createTaskStartMessage(self, taskStep: TaskStep, workflow: ChatWorkflow, taskIndex: int, totalTasks: int):
"""Create a task start message for the user"""
try:
# Check workflow status before creating message
self._checkWorkflowStopped(workflow)
# Create a task start message for the user
taskProgress = f"{taskIndex}/{totalTasks}" if totalTasks is not None else str(taskIndex)
taskStartMessage = {
"workflowId": workflow.id,
"role": "assistant",
"message": f"🚀 **Task {taskProgress}**",
"status": "step",
"sequenceNr": len(workflow.messages) + 1,
"publishedAt": self.services.utils.getUtcTimestamp(),
"documentsLabel": f"task_{taskIndex}_start",
"documents": [],
# Add workflow context fields
"roundNumber": workflow.currentRound, # Use current workflow round
"taskNumber": taskIndex,
"actionNumber": 0,
# Add task progress status
"taskProgress": "running"
}
# Add user-friendly message if available
if taskStep.userMessage:
taskStartMessage["message"] += f"\n\n💬 {taskStep.userMessage}"
message = self.services.interfaceDbChat.createMessage(taskStartMessage)
if message:
workflow.messages.append(message)
logger.info(f"Task start message created for task {taskIndex}")
except Exception as e:
logger.error(f"Error creating task start message: {str(e)}")
async def createActionMessage(self, action, result: ActionResult, workflow: ChatWorkflow, resultLabel: str = None,
createdDocuments: List = None, taskStep: TaskStep = None,
taskIndex: int = None, actionIndex: int = None, totalActions: int = None):
"""Create and store a message for the action result in the workflow with enhanced document processing"""
try:
# Check workflow status before creating action message
self._checkWorkflowStopped(workflow)
if resultLabel is None:
resultLabel = action.execResultLabel
# Log delivered documents
if createdDocuments:
logger.info(f"Result label: {resultLabel} - {len(createdDocuments)} documents")
else:
logger.info(f"Result label: {resultLabel} - No documents")
# Get current workflow context and stats
workflowContext = self.services.workflow.getWorkflowContext()
workflowStats = self.services.workflow.getWorkflowStats()
# Create a more meaningful message that includes task context
taskObjective = taskStep.objective if taskStep else 'Unknown task'
# Extract round, task, and action numbers from resultLabel first, then fallback to workflow context
currentRound = self._extractRoundNumberFromLabel(resultLabel) if resultLabel else workflowContext.get('currentRound', 0)
currentTask = self._extractTaskNumberFromLabel(resultLabel) if resultLabel else (taskIndex if taskIndex is not None else workflowContext.get('currentTask', 0))
totalTasks = workflowStats.get('totalTasks', 0)
currentAction = self._extractActionNumberFromLabel(resultLabel) if resultLabel else (actionIndex if actionIndex is not None else workflowContext.get('currentAction', 0))
totalActions = totalActions if totalActions is not None else workflowStats.get('totalActions', 0)
# Debug logging for round number extraction
logger.info(f"Action message round number extraction: resultLabel='{resultLabel}', extractedRound={currentRound}, workflowRound={workflowContext.get('currentRound', 0)}")
# Build a user-friendly message based on success/failure
if result.success:
messageText = f"**Action {currentAction}/{totalActions} ({action.execMethod}.{action.execAction})**\n\n"
messageText += f"{taskObjective}\n\n"
else:
# ⚠️ FAILURE MESSAGE - Show error details to user
errorDetails = result.error if result.error else "Unknown error occurred"
messageText = f"**Action {currentAction}/{totalActions} ({action.execMethod}.{action.execAction})**\n\n"
messageText += f"{taskObjective}\n\n"
messageText += f"{errorDetails}\n\n"
# Build concise summary to persist for history context
doc_count = len(createdDocuments) if createdDocuments else 0
trimmed_msg = (messageText or "").strip().replace("\n", " ")
if len(trimmed_msg) > 160:
trimmed_msg = trimmed_msg[:157] + "..."
messageData = {
"workflowId": workflow.id,
"role": "assistant",
"message": messageText,
"status": "step",
"sequenceNr": len(workflow.messages) + 1,
"publishedAt": self.services.utils.getUtcTimestamp(),
"actionId": action.id,
"actionMethod": action.execMethod,
"actionName": action.execAction,
"documentsLabel": resultLabel,
"documents": createdDocuments,
# Add workflow context fields - extract from resultLabel to match document reference
"roundNumber": currentRound,
"taskNumber": currentTask,
"actionNumber": currentAction,
"actionProgress": "success" if result.success else "fail",
"summary": f"{action.execMethod}.{action.execAction}: {doc_count} docs | msg='{trimmed_msg}'"
}
# Add debugging for error messages
if not result.success:
logger.info(f"Creating ERROR message: {messageText}")
logger.info(f"Message data: {messageData}")
message = self.services.interfaceDbChat.createMessage(messageData)
if message:
workflow.messages.append(message)
logger.info(f"Message created: {action.execMethod}.{action.execAction}")
return message
else:
logger.error(f"Failed to create workflow message for action {action.execMethod}.{action.execAction}")
return None
except Exception as e:
logger.error(f"Error creating action message: {str(e)}")
return None
async def createTaskCompletionMessage(self, taskStep: TaskStep, workflow: ChatWorkflow, taskIndex: int,
totalTasks: int, reviewResult: ReviewResult):
"""Create a task completion message for the user"""
try:
# Check workflow status before creating message
self._checkWorkflowStopped(workflow)
# Create a task completion message for the user
taskProgress = f"{taskIndex}/{totalTasks}" if totalTasks is not None else str(taskIndex)
# Enhanced completion message with criteria details
completionMessage = f"🎯 **Task {taskProgress}**\n\n{reviewResult.reason or 'Task completed successfully'}"
# Add criteria status if available
if hasattr(reviewResult, 'met_criteria') and reviewResult.met_criteria:
for criterion in reviewResult.met_criteria:
completionMessage += f"\n{criterion}"
if hasattr(reviewResult, 'quality_score'):
completionMessage += f"\n📊 Score {reviewResult.quality_score}/10"
taskCompletionMessage = {
"workflowId": workflow.id,
"role": "assistant",
"message": completionMessage,
"status": "step",
"sequenceNr": len(workflow.messages) + 1,
"publishedAt": self.services.utils.getUtcTimestamp(),
"documentsLabel": f"task_{taskIndex}_completion",
"documents": [],
# Add workflow context fields
"roundNumber": workflow.currentRound, # Use current workflow round
"taskNumber": taskIndex,
"actionNumber": 0,
# Add task progress status
"taskProgress": "success"
}
message = self.services.interfaceDbChat.createMessage(taskCompletionMessage)
if message:
workflow.messages.append(message)
logger.info(f"Task completion message created for task {taskIndex}")
except Exception as e:
logger.error(f"Error creating task completion message: {str(e)}")
async def createRetryMessage(self, taskStep: TaskStep, workflow: ChatWorkflow, taskIndex: int, reviewResult: ReviewResult):
"""Create a retry message for the user"""
try:
# Check workflow status before creating message
self._checkWorkflowStopped(workflow)
# Create retry message for user
retryMessage = {
"workflowId": workflow.id,
"role": "assistant",
"message": f"🔄 **Task {taskIndex}** needs retry: {reviewResult.improvements}",
"status": "step",
"sequenceNr": len(workflow.messages) + 1,
"publishedAt": self.services.utils.getUtcTimestamp(),
"documentsLabel": f"task_{taskIndex}_retry",
"documents": [],
"roundNumber": workflow.currentRound,
"taskNumber": taskIndex,
"actionNumber": 0,
"taskProgress": "retry"
}
message = self.services.interfaceDbChat.createMessage(retryMessage)
if message:
workflow.messages.append(message)
logger.info(f"Retry message created for task {taskIndex}")
except Exception as e:
logger.error(f"Error creating retry message: {str(e)}")
async def createErrorMessage(self, taskStep: TaskStep, workflow: ChatWorkflow, taskIndex: int, errorDetails: str):
"""Create an error message for the user"""
try:
# Check workflow status before creating message
self._checkWorkflowStopped(workflow)
# Create user-facing error message for task failure
errorMessage = f"**Task {taskIndex}**\n\n'{taskStep.objective}' failed\n\n"
# Add specific error details if available
if errorDetails:
errorMessage += f"{errorDetails}\n\n"
# Create workflow message for user
messageData = {
"workflowId": workflow.id,
"role": "assistant",
"message": errorMessage,
"status": "step",
"sequenceNr": len(workflow.messages) + 1,
"publishedAt": self.services.utils.getUtcTimestamp(),
"actionId": None,
"actionMethod": "task",
"actionName": "task_error",
"documentsLabel": None,
"documents": [],
# Add workflow context fields
"roundNumber": workflow.currentRound, # Use current workflow round
"taskNumber": taskIndex,
"actionNumber": 0,
# Add task progress status
"taskProgress": "fail"
}
message = self.services.interfaceDbChat.createMessage(messageData)
if message:
workflow.messages.append(message)
logger.info(f"Error message created for task {taskIndex}")
except Exception as e:
logger.error(f"Error creating error message: {str(e)}")
def _extractRoundNumberFromLabel(self, label: str) -> int:
"""Extract round number from a document label like 'round1_task1_action1_diagram_analysis'"""
try:
if not label or not isinstance(label, str):
return 0
# Parse label format: round{round}_task{task}_action{action}_{context}
if label.startswith('round'):
roundPart = label.split('_')[0] # Get 'round1' part
if roundPart.startswith('round'):
roundNumber = roundPart[5:] # Remove 'round' prefix
return int(roundNumber)
return 0
except Exception as e:
logger.warning(f"Could not extract round number from label '{label}': {str(e)}")
return 0
def _extractTaskNumberFromLabel(self, label: str) -> int:
"""Extract task number from a document label like 'round1_task1_action1_diagram_analysis'"""
try:
if not label or not isinstance(label, str):
return 0
# Parse label format: round{round}_task{task}_action{action}_{context}
if '_task' in label:
taskPart = label.split('_task')[1]
if taskPart and '_' in taskPart:
taskNumber = taskPart.split('_')[0]
return int(taskNumber)
return 0
except Exception as e:
logger.warning(f"Could not extract task number from label '{label}': {str(e)}")
return 0
def _extractActionNumberFromLabel(self, label: str) -> int:
"""Extract action number from a document label like 'round1_task1_action1_diagram_analysis'"""
try:
if not label or not isinstance(label, str):
return 0
# Parse label format: round{round}_task{task}_action{action}_{context}
if '_action' in label:
actionPart = label.split('_action')[1]
if actionPart and '_' in actionPart:
actionNumber = actionPart.split('_')[0]
return int(actionNumber)
return 0
except Exception as e:
logger.warning(f"Could not extract action number from label '{label}': {str(e)}")
return 0

View file

@ -0,0 +1,333 @@
# taskPlanner.py
# Task planning functionality for workflows
import json
import logging
from typing import Dict, Any
from modules.datamodels.datamodelChat import TaskStep, TaskContext, TaskPlan
from modules.datamodels.datamodelAi import AiCallOptions, OperationType, ProcessingMode, Priority
from modules.workflows.processing.shared.promptGenerationTaskplan import (
generateTaskPlanningPrompt
)
logger = logging.getLogger(__name__)
class TaskPlanner:
"""Handles task planning for workflows"""
def __init__(self, services):
self.services = services
def _checkWorkflowStopped(self, workflow):
"""Check if workflow has been stopped by user and raise exception if so"""
try:
# Get the current workflow status from the database to avoid stale data
current_workflow = self.services.interfaceDbChat.getWorkflow(workflow.id)
if current_workflow and current_workflow.status == "stopped":
logger.info("Workflow stopped by user, aborting task planning")
raise Exception("Workflow was stopped by user")
except Exception as e:
# If we can't get the current status due to other database issues, fall back to the in-memory object
logger.warning(f"Could not check current workflow status from database: {str(e)}")
if workflow and workflow.status == "stopped":
logger.info("Workflow stopped by user (from in-memory object), aborting task planning")
raise Exception("Workflow was stopped by user")
async def generateTaskPlan(self, userInput: str, workflow) -> TaskPlan:
"""Generate a high-level task plan for the workflow"""
try:
# Check workflow status before generating task plan
self._checkWorkflowStopped(workflow)
logger.info(f"=== STARTING TASK PLAN GENERATION ===")
logger.info(f"Workflow ID: {workflow.id}")
logger.info(f"User Input: {userInput}")
# Use stored user prompt if available, otherwise use the input
actualUserPrompt = self.services.currentUserPrompt if self.services and hasattr(self.services, 'currentUserPrompt') and self.services.currentUserPrompt else userInput
logger.info(f"Actual User Prompt: {actualUserPrompt}")
# Check workflow status before calling AI service
self._checkWorkflowStopped(workflow)
# Create proper context object for task planning
# For task planning, we need to create a minimal TaskStep since TaskContext requires it
planningTaskStep = TaskStep(
id="planning",
objective=actualUserPrompt,
dependencies=[],
success_criteria=[],
estimated_complexity="medium"
)
taskPlanningContext = TaskContext(
task_step=planningTaskStep,
workflow=workflow,
workflow_id=workflow.id,
available_documents=None,
available_connections=None,
previous_results=[],
previous_handover=None,
improvements=[],
retry_count=0,
previous_action_results=[],
previous_review_result=None,
is_regeneration=False,
failure_patterns=[],
failed_actions=[],
successful_actions=[],
criteria_progress={
'met_criteria': set(),
'unmet_criteria': set(),
'attempt_history': []
}
)
# Build prompt bundle (template + placeholders) using new API
bundle = generateTaskPlanningPrompt(self.services, taskPlanningContext)
taskPlanningPromptTemplate = bundle.prompt
placeholders = bundle.placeholders
# Log task planning prompt sent to AI
logger.info("=== TASK PLANNING PROMPT SENT TO AI ===")
# Trace task planning prompt
self._writeTraceLog("Task Plan Prompt", taskPlanningPromptTemplate)
self._writeTraceLog("Task Plan Placeholders", placeholders)
# Centralized AI call: Task planning (quality, detailed) with placeholders
options = AiCallOptions(
operationType=OperationType.GENERATE_PLAN,
priority=Priority.QUALITY,
compressPrompt=False,
compressContext=False,
processingMode=ProcessingMode.DETAILED,
maxCost=0.10,
maxProcessingTime=30
)
prompt = await self.services.ai.callAi(
prompt=taskPlanningPromptTemplate,
placeholders=placeholders,
options=options
)
# Check if AI response is valid
if not prompt:
raise ValueError("AI service returned no response for task planning")
# Log task planning response received
logger.info("=== TASK PLANNING AI RESPONSE RECEIVED ===")
logger.info(f"Response length: {len(prompt) if prompt else 0}")
# Trace task planning response
self._writeTraceLog("Task Plan Response", prompt)
# Parse task plan response
try:
jsonStart = prompt.find('{')
jsonEnd = prompt.rfind('}') + 1
if jsonStart == -1 or jsonEnd == 0:
raise ValueError("No JSON found in response")
jsonStr = prompt[jsonStart:jsonEnd]
taskPlanDict = json.loads(jsonStr)
if 'tasks' not in taskPlanDict:
raise ValueError("Task plan missing 'tasks' field")
except Exception as e:
logger.error(f"Error parsing task plan response: {str(e)}")
taskPlanDict = {'tasks': []}
if not self._validateTaskPlan(taskPlanDict):
logger.error("Generated task plan failed validation")
logger.error(f"AI Response: {prompt}")
logger.error(f"Parsed Task Plan: {json.dumps(taskPlanDict, indent=2)}")
raise Exception("AI-generated task plan failed validation - AI is required for task planning")
if not taskPlanDict.get('tasks'):
raise ValueError("Task plan contains no tasks")
# LANGUAGE DETECTION: Determine user language once for the entire workflow
# Priority: 1. languageUserDetected from AI response, 2. service.user.language, 3. "en"
detectedLanguage = taskPlanDict.get('languageUserDetected', '').strip()
serviceUserLanguage = getattr(self.services.user, 'language', '') if self.services and self.services.user else ''
if detectedLanguage and len(detectedLanguage) == 2: # Valid language code like "en", "de", "fr"
userLanguage = detectedLanguage
logger.info(f"Using detected language from AI response: {userLanguage}")
elif serviceUserLanguage and len(serviceUserLanguage) == 2:
userLanguage = serviceUserLanguage
logger.info(f"Using language from service user object: {userLanguage}")
else:
userLanguage = "en"
logger.info(f"Using default language: {userLanguage}")
# Set the detected language in the service for use throughout the workflow
if self.services and self.services.user:
self.services.user.language = userLanguage
logger.info(f"Set workflow user language to: {userLanguage}")
tasks = []
for i, taskDict in enumerate(taskPlanDict.get('tasks', [])):
if not isinstance(taskDict, dict):
logger.warning(f"Skipping invalid task {i+1}: not a dictionary")
continue
# Map old 'description' field to new 'objective' field
if 'description' in taskDict and 'objective' not in taskDict:
taskDict['objective'] = taskDict.pop('description')
try:
task = TaskStep(**taskDict)
tasks.append(task)
except Exception as e:
logger.warning(f"Skipping invalid task {i+1}: {str(e)}")
continue
if not tasks:
raise ValueError("No valid tasks could be created from AI response")
taskPlan = TaskPlan(
overview=taskPlanDict.get('overview', ''),
tasks=tasks,
userMessage=taskPlanDict.get('userMessage', '')
)
logger.info(f"Task plan generated successfully with {len(tasks)} tasks")
logger.info(f"Workflow user language set to: {userLanguage}")
return taskPlan
except Exception as e:
logger.error(f"Error in generateTaskPlan: {str(e)}")
raise
def _validateTaskPlan(self, taskPlan: Dict[str, Any]) -> bool:
"""Validate task plan structure"""
try:
if not isinstance(taskPlan, dict):
logger.error("Task plan is not a dictionary")
return False
if 'tasks' not in taskPlan or not isinstance(taskPlan['tasks'], list):
logger.error(f"Task plan missing 'tasks' field or not a list. Found: {type(taskPlan.get('tasks', 'MISSING'))}")
return False
# First pass: collect all task IDs to validate dependencies
taskIds = set()
for task in taskPlan['tasks']:
if not isinstance(task, dict):
logger.error(f"Task is not a dictionary: {type(task)}")
return False
if 'id' not in task:
logger.error(f"Task missing 'id' field: {task}")
return False
taskIds.add(task['id'])
# Second pass: validate each task
for i, task in enumerate(taskPlan['tasks']):
if not isinstance(task, dict):
logger.error(f"Task {i} is not a dictionary: {type(task)}")
return False
requiredFields = ['id', 'objective', 'success_criteria']
missingFields = [field for field in requiredFields if field not in task]
if missingFields:
logger.error(f"Task {i} missing required fields: {missingFields}")
return False
# Check for duplicate IDs (shouldn't happen after first pass, but safety check)
if task['id'] in taskIds and list(taskPlan['tasks']).count(task['id']) > 1:
logger.error(f"Task {i} has duplicate ID: {task['id']}")
return False
dependencies = task.get('dependencies', [])
if not isinstance(dependencies, list):
logger.error(f"Task {i} dependencies is not a list: {type(dependencies)}")
return False
for dep in dependencies:
if dep not in taskIds and dep != 'task_0':
logger.error(f"Task {i} has invalid dependency: {dep} (available: {list(taskIds) + ['task_0']})")
return False
logger.info(f"Task plan validation successful with {len(taskIds)} tasks")
return True
except Exception as e:
logger.error(f"Error validating task plan: {str(e)}")
return False
def _writeTraceLog(self, contextText: str, data: Any) -> None:
"""Write trace data to configured trace file if in debug mode with improved JSON formatting"""
try:
import os
import json
from datetime import datetime, UTC
# Only write if logger is in debug mode
if logger.level > logging.DEBUG:
return
# Get log directory from configuration
logDir = self.services.utils.configGet("APP_LOGGING_LOG_DIR", "./")
if not os.path.isabs(logDir):
# If relative path, make it relative to the gateway directory
gatewayDir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
logDir = os.path.join(gatewayDir, logDir)
# Ensure log directory exists
os.makedirs(logDir, exist_ok=True)
# Create trace file path
traceFile = os.path.join(logDir, "log_trace.log")
# Format the trace entry with better structure
timestamp = datetime.fromtimestamp(self.services.utils.getUtcTimestamp(), UTC).strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
# Create a structured trace entry
traceEntry = f"[{timestamp}] {contextText}\n"
traceEntry += "=" * 80 + "\n"
# Add data if provided with improved formatting
if data is not None:
try:
if isinstance(data, (dict, list)):
# Format as pretty JSON with better settings
jsonStr = json.dumps(data, indent=2, default=str, ensure_ascii=False, sort_keys=False)
traceEntry += f"JSON Data:\n{jsonStr}\n"
elif isinstance(data, str):
# For string data, try to parse as JSON first, then fall back to plain text
try:
parsed = json.loads(data)
jsonStr = json.dumps(parsed, indent=2, default=str, ensure_ascii=False, sort_keys=False)
traceEntry += f"JSON Data (parsed from string):\n{jsonStr}\n"
except (json.JSONDecodeError, TypeError):
# Not valid JSON, show as plain text with proper line breaks
formatted_data = data.replace('\\n', '\n')
traceEntry += f"Text Data:\n{formatted_data}\n"
else:
# For other types, convert to string and try to parse as JSON
dataStr = str(data)
try:
parsed = json.loads(dataStr)
jsonStr = json.dumps(parsed, indent=2, default=str, ensure_ascii=False, sort_keys=False)
traceEntry += f"JSON Data (parsed from object):\n{jsonStr}\n"
except (json.JSONDecodeError, TypeError):
# Not valid JSON, show as plain text with proper line breaks
formatted_data = dataStr.replace('\\n', '\n')
traceEntry += f"Object Data:\n{formatted_data}\n"
except Exception as e:
# Fallback to simple string representation
traceEntry += f"Data (fallback): {str(data)}\n"
else:
traceEntry += "No data provided\n"
traceEntry += "=" * 80 + "\n\n"
# Write to trace file
with open(traceFile, "a", encoding="utf-8") as f:
f.write(traceEntry)
except Exception as e:
# Don't log trace errors to avoid recursion
pass

View file

@ -0,0 +1,111 @@
# validator.py
# Validation logic for workflows
import logging
from typing import Dict, Any, List
logger = logging.getLogger(__name__)
class WorkflowValidator:
"""Handles validation of workflow components"""
def __init__(self, services):
self.services = services
def validateTask(self, taskPlan: Dict[str, Any]) -> bool:
"""Validate task plan structure"""
try:
if not isinstance(taskPlan, dict):
logger.error("Task plan is not a dictionary")
return False
if 'tasks' not in taskPlan or not isinstance(taskPlan['tasks'], list):
logger.error(f"Task plan missing 'tasks' field or not a list. Found: {type(taskPlan.get('tasks', 'MISSING'))}")
return False
# First pass: collect all task IDs to validate dependencies
taskIds = set()
for task in taskPlan['tasks']:
if not isinstance(task, dict):
logger.error(f"Task is not a dictionary: {type(task)}")
return False
if 'id' not in task:
logger.error(f"Task missing 'id' field: {task}")
return False
taskIds.add(task['id'])
# Second pass: validate each task
for i, task in enumerate(taskPlan['tasks']):
if not isinstance(task, dict):
logger.error(f"Task {i} is not a dictionary: {type(task)}")
return False
requiredFields = ['id', 'objective', 'success_criteria']
missingFields = [field for field in requiredFields if field not in task]
if missingFields:
logger.error(f"Task {i} missing required fields: {missingFields}")
return False
# Check for duplicate IDs (shouldn't happen after first pass, but safety check)
if task['id'] in taskIds and list(taskPlan['tasks']).count(task['id']) > 1:
logger.error(f"Task {i} has duplicate ID: {task['id']}")
return False
dependencies = task.get('dependencies', [])
if not isinstance(dependencies, list):
logger.error(f"Task {i} dependencies is not a list: {type(dependencies)}")
return False
for dep in dependencies:
if dep not in taskIds and dep != 'task_0':
logger.error(f"Task {i} has invalid dependency: {dep} (available: {list(taskIds) + ['task_0']})")
return False
logger.info(f"Task plan validation successful with {len(taskIds)} tasks")
return True
except Exception as e:
logger.error(f"Error validating task plan: {str(e)}")
return False
def validateAction(self, actions: List[Dict[str, Any]], context) -> bool:
"""Validate action structure"""
try:
if not isinstance(actions, list):
logger.error("Actions must be a list")
return False
if len(actions) == 0:
logger.warning("No actions generated")
return False
for i, action in enumerate(actions):
if not isinstance(action, dict):
logger.error(f"Action {i} must be a dictionary")
return False
# Check for compound action format (new) or separate method/action format (old)
if 'action' in action and '.' in str(action.get('action', '')):
# New compound action format: "method.action"
requiredFields = ['action', 'parameters', 'resultLabel']
else:
# Old separate format: method + action fields
requiredFields = ['method', 'action', 'parameters', 'resultLabel']
missingFields = []
for field in requiredFields:
if field not in action or not action[field]:
missingFields.append(field)
if missingFields:
logger.error(f"Action {i} missing required fields: {missingFields}")
return False
resultLabel = action.get('resultLabel', '')
if not resultLabel.startswith('round'):
logger.error(f"Action {i} result label must start with 'round': {resultLabel}")
return False
parameters = action.get('parameters', {})
if not isinstance(parameters, dict):
logger.error(f"Action {i} parameters must be a dictionary")
return False
logger.info(f"Successfully validated {len(actions)} actions")
return True
except Exception as e:
logger.error(f"Error validating actions: {str(e)}")
return False

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1 @@
# Workflow mode implementations

View file

@ -0,0 +1,833 @@
# modeActionplan.py
# Actionplan mode implementation for workflows
import json
import logging
import uuid
from typing import List, Dict, Any
from modules.datamodels.datamodelChat import (
TaskStep, TaskContext, TaskResult, ActionItem, TaskStatus,
ActionResult, ReviewResult, ReviewContext
)
from modules.datamodels.datamodelChat import ChatWorkflow
from modules.datamodels.datamodelAi import AiCallOptions, OperationType, ProcessingMode, Priority
from modules.workflows.processing.modes.modeBase import BaseMode
from modules.workflows.processing.shared.executionState import TaskExecutionState
from modules.workflows.processing.shared.promptGenerationActionsActionplan import (
generateActionDefinitionPrompt,
generateResultReviewPrompt
)
logger = logging.getLogger(__name__)
class ActionplanMode(BaseMode):
"""Actionplan mode implementation - batch planning and sequential execution"""
def __init__(self, services, workflow):
super().__init__(services, workflow)
async def generateActionItems(self, taskStep: TaskStep, workflow: ChatWorkflow,
previousResults: List = None, enhancedContext: TaskContext = None) -> List[ActionItem]:
"""Generate actions for a given task step using batch planning approach"""
try:
# Check workflow status before generating actions
self._checkWorkflowStopped(workflow)
retryInfo = f" (Retry #{enhancedContext.retry_count})" if enhancedContext and enhancedContext.retry_count > 0 else ""
logger.info(f"Generating actions for task: {taskStep.objective}{retryInfo}")
# Log criteria progress if this is a retry
if enhancedContext and hasattr(enhancedContext, 'criteria_progress') and enhancedContext.criteria_progress is not None:
progress = enhancedContext.criteria_progress
logger.info(f"Retry attempt {enhancedContext.retry_count} - Criteria progress:")
if progress.get('met_criteria'):
logger.info(f" Met criteria: {', '.join(progress['met_criteria'])}")
if progress.get('unmet_criteria'):
logger.warning(f" Unmet criteria: {', '.join(progress['unmet_criteria'])}")
# Show improvement trends
if progress.get('attempt_history'):
recentAttempts = progress['attempt_history'][-2:] # Last 2 attempts
if len(recentAttempts) >= 2:
prevScore = recentAttempts[0].get('quality_score', 0)
currScore = recentAttempts[1].get('quality_score', 0)
if currScore > prevScore:
logger.info(f" Quality improving: {prevScore} -> {currScore}")
elif currScore < prevScore:
logger.warning(f" Quality declining: {prevScore} -> {currScore}")
else:
logger.info(f" Quality stable: {currScore}")
# Enhanced retry context logging
if enhancedContext and enhancedContext.retry_count > 0:
logger.info("=== RETRY CONTEXT FOR ACTION GENERATION ===")
logger.info(f"Retry Count: {enhancedContext.retry_count}")
logger.debug(f"Previous Improvements: {enhancedContext.improvements}")
logger.debug(f"Previous Review Result: {enhancedContext.previous_review_result}")
logger.debug(f"Failure Patterns: {enhancedContext.failure_patterns}")
logger.debug(f"Failed Actions: {enhancedContext.failed_actions}")
logger.debug(f"Successful Actions: {enhancedContext.successful_actions}")
logger.info("=== END RETRY CONTEXT ===")
# Log that we're starting action generation
logger.info("=== STARTING ACTION GENERATION ===")
# Create proper context object for action definition
if enhancedContext and isinstance(enhancedContext, TaskContext):
# Use existing TaskContext if provided
actionContext = TaskContext(
task_step=enhancedContext.task_step,
workflow=enhancedContext.workflow,
workflow_id=enhancedContext.workflow_id,
available_documents=enhancedContext.available_documents,
available_connections=enhancedContext.available_connections,
previous_results=enhancedContext.previous_results or previousResults or [],
previous_handover=enhancedContext.previous_handover,
improvements=enhancedContext.improvements or [],
retry_count=enhancedContext.retry_count or 0,
previous_action_results=enhancedContext.previous_action_results or [],
previous_review_result=enhancedContext.previous_review_result,
is_regeneration=enhancedContext.is_regeneration or False,
failure_patterns=enhancedContext.failure_patterns or [],
failed_actions=enhancedContext.failed_actions or [],
successful_actions=enhancedContext.successful_actions or [],
criteria_progress=enhancedContext.criteria_progress
)
else:
# Create new context from scratch
actionContext = TaskContext(
task_step=taskStep,
workflow=workflow,
workflow_id=workflow.id,
available_documents=None,
available_connections=None,
previous_results=previousResults or [],
previous_handover=None,
improvements=[],
retry_count=0,
previous_action_results=[],
previous_review_result=None,
is_regeneration=False,
failure_patterns=[],
failed_actions=[],
successful_actions=[],
criteria_progress=None
)
# Check workflow status before calling AI service
self._checkWorkflowStopped(workflow)
# Build prompt bundle (template + placeholders)
bundle = generateActionDefinitionPrompt(self.services, actionContext)
actionPromptTemplate = bundle.prompt
placeholders = bundle.placeholders
# Trace action planning prompt
self._writeTraceLog("Action Plan Prompt", actionPromptTemplate)
self._writeTraceLog("Action Plan Placeholders", placeholders)
# Centralized AI call: Action planning (quality, detailed) with placeholders
options = AiCallOptions(
operationType=OperationType.GENERATE_PLAN,
priority=Priority.QUALITY,
compressPrompt=False,
compressContext=False,
processingMode=ProcessingMode.DETAILED,
maxCost=0.10,
maxProcessingTime=30
)
prompt = await self.services.ai.callAi(prompt=actionPromptTemplate, placeholders=placeholders, options=options)
# Check if AI response is valid
if not prompt:
raise ValueError("AI service returned no response")
# Log action response received
logger.info("=== ACTION PLAN AI RESPONSE RECEIVED ===")
logger.info(f"Response length: {len(prompt) if prompt else 0}")
# Trace action planning response
self._writeTraceLog("Action Plan Response", prompt)
# Parse action response
jsonStart = prompt.find('{')
jsonEnd = prompt.rfind('}') + 1
if jsonStart == -1 or jsonEnd == 0:
raise ValueError("No JSON found in response")
jsonStr = prompt[jsonStart:jsonEnd]
try:
actionData = json.loads(jsonStr)
except Exception as e:
logger.error(f"Error parsing action response JSON: {str(e)}")
actionData = {}
if 'actions' not in actionData:
raise ValueError("Action response missing 'actions' field")
actions = actionData['actions']
if not actions:
raise ValueError("Action response contains empty actions list")
if not isinstance(actions, list):
raise ValueError(f"Action response 'actions' field is not a list: {type(actions)}")
if not self.validator.validateAction(actions, actionContext):
logger.error("Generated actions failed validation")
raise Exception("AI-generated actions failed validation - AI is required for action generation")
# Convert to ActionItem objects
taskActions = []
for i, a in enumerate(actions):
if not isinstance(a, dict):
logger.warning(f"Skipping invalid action {i+1}: not a dictionary")
continue
# Handle compound action format (new) or separate method/action format (old)
action_name = a.get('action', 'unknown')
if '.' in action_name:
# New compound action format: "method.action"
method_name, action_name = action_name.split('.', 1)
else:
# Old separate format: method + action fields
method_name = a.get('method', 'unknown')
taskAction = self._createActionItem({
"execMethod": method_name,
"execAction": action_name,
"execParameters": a.get('parameters', {}),
"execResultLabel": a.get('resultLabel', ''),
"expectedDocumentFormats": a.get('expectedDocumentFormats', None),
"status": TaskStatus.PENDING,
# Extract user-friendly message if available
"userMessage": a.get('userMessage', None)
})
if taskAction:
taskActions.append(taskAction)
else:
logger.warning(f"Skipping invalid action {i+1}: failed to create ActionItem")
validActions = [ta for ta in taskActions if ta]
if not validActions:
raise ValueError("No valid actions could be created from AI response")
return validActions
except Exception as e:
logger.error(f"Error in generateActionItems: {str(e)}")
return []
async def executeTask(self, taskStep: TaskStep, workflow: ChatWorkflow, context: TaskContext,
taskIndex: int = None, totalTasks: int = None) -> TaskResult:
"""Execute all actions for a task step using Actionplan mode"""
logger.info(f"=== STARTING TASK {taskIndex or '?'}: {taskStep.objective} ===")
# Update workflow object before executing task
if taskIndex is not None:
self._updateWorkflowBeforeExecutingTask(taskIndex)
# Update workflow context for this task
if taskIndex is not None:
self.services.workflow.setWorkflowContext(task_number=taskIndex)
# Create task start message
await self.messageCreator.createTaskStartMessage(taskStep, workflow, taskIndex, totalTasks)
state = TaskExecutionState(taskStep)
retryContext = context
maxRetries = state.max_retries
for attempt in range(maxRetries):
logger.info(f"Task execution attempt {attempt+1}/{maxRetries}")
# Check workflow status before starting task execution
self._checkWorkflowStopped(workflow)
# Update retry context with current attempt information
if retryContext:
retryContext.retry_count = attempt + 1
actions = await self.generateActionItems(taskStep, workflow,
previousResults=retryContext.previous_results,
enhancedContext=retryContext)
# Log total actions count for this task
totalActions = len(actions) if actions else 0
logger.info(f"Task {taskIndex or '?'} has {totalActions} actions")
# Update workflow object after action planning
self._updateWorkflowAfterActionPlanning(totalActions)
self._setWorkflowTotals(totalActions=totalActions)
if not actions:
logger.error("No actions defined for task step, aborting task execution")
break
actionResults = []
for actionIdx, action in enumerate(actions):
# Check workflow status before each action execution
self._checkWorkflowStopped(workflow)
# Update workflow object before executing action
actionNumber = actionIdx + 1
self._updateWorkflowBeforeExecutingAction(actionNumber)
# Update workflow context for this action
self.services.workflow.setWorkflowContext(action_number=actionNumber)
# Log action start
logger.info(f"Task {taskIndex} - Starting action {actionNumber}/{totalActions}")
# Create action start message
actionStartMessage = {
"workflowId": workflow.id,
"role": "assistant",
"message": f"⚡ **Action {actionNumber}/{totalActions}** (Method {action.execMethod}.{action.execAction})",
"status": "step",
"sequenceNr": len(workflow.messages) + 1,
"publishedAt": self.services.utils.getUtcTimestamp(),
"documentsLabel": f"action_{actionNumber}_start",
"documents": [],
"actionProgress": "running",
"roundNumber": workflow.currentRound,
"taskNumber": taskIndex,
"actionNumber": actionNumber
}
# Add user-friendly message if available
if action.userMessage:
actionStartMessage["message"] += f"\n\n💬 {action.userMessage}"
message = self.services.interfaceDbChat.createMessage(actionStartMessage)
if message:
workflow.messages.append(message)
logger.info(f"Action start message created for action {actionNumber}")
# Execute single action
result = await self.actionExecutor.executeSingleAction(action, workflow, taskStep,
taskIndex, actionNumber, totalActions)
actionResults.append(result)
if result.success:
state.addSuccessfulAction(result)
else:
state.addFailedAction(result)
# Check workflow status before review
self._checkWorkflowStopped(workflow)
reviewResult = await self._reviewTaskCompletion(taskStep, actions, actionResults, workflow)
success = reviewResult.status == 'success'
feedback = reviewResult.reason
error = None if success else reviewResult.reason
if success:
logger.info(f"=== TASK {taskIndex or '?'} COMPLETED SUCCESSFULLY: {taskStep.objective} ===")
# Create task completion message
await self.messageCreator.createTaskCompletionMessage(taskStep, workflow, taskIndex, totalTasks, reviewResult)
return TaskResult(
taskId=taskStep.id,
status=TaskStatus.COMPLETED,
success=True,
feedback=feedback,
error=None
)
elif reviewResult.status == 'retry' and state.canRetry():
logger.warning(f"Task step '{taskStep.objective}' requires retry: {reviewResult.improvements}")
# Enhanced logging of criteria status
if reviewResult.met_criteria:
logger.info(f"Met criteria: {', '.join(reviewResult.met_criteria)}")
if reviewResult.unmet_criteria:
logger.warning(f"Unmet criteria: {', '.join(reviewResult.unmet_criteria)}")
state.incrementRetryCount()
# Update retry context with retry information and criteria tracking
if retryContext:
retryContext.retry_count = state.retry_count
retryContext.improvements = reviewResult.improvements
retryContext.previous_action_results = actionResults
retryContext.previous_review_result = reviewResult
retryContext.is_regeneration = True
retryContext.failure_patterns = state.getFailurePatterns()
retryContext.failed_actions = state.failed_actions
retryContext.successful_actions = state.successful_actions
# Track criteria progress across retries
if not hasattr(retryContext, 'criteria_progress'):
retryContext.criteria_progress = {
'met_criteria': set(),
'unmet_criteria': set(),
'attempt_history': []
}
# Update criteria progress
if reviewResult.met_criteria:
retryContext.criteria_progress['met_criteria'].update(reviewResult.met_criteria)
if reviewResult.unmet_criteria:
retryContext.criteria_progress['unmet_criteria'].update(reviewResult.unmet_criteria)
# Record this attempt's criteria status
attemptRecord = {
'attempt': state.retry_count,
'met_criteria': reviewResult.met_criteria or [],
'unmet_criteria': reviewResult.unmet_criteria or [],
'quality_score': reviewResult.quality_score,
'improvements': reviewResult.improvements or []
}
retryContext.criteria_progress['attempt_history'].append(attemptRecord)
# Create retry message
await self.messageCreator.createRetryMessage(taskStep, workflow, taskIndex, reviewResult)
continue
else:
logger.error(f"=== TASK {taskIndex or '?'} FAILED: {taskStep.objective} after {attempt+1} attempts ===")
# Create error message
await self.messageCreator.createErrorMessage(taskStep, workflow, taskIndex, reviewResult.reason)
return TaskResult(
taskId=taskStep.id,
status=TaskStatus.FAILED,
success=False,
feedback=feedback,
error=reviewResult.reason if reviewResult and hasattr(reviewResult, 'reason') else "Task failed after retry attempts"
)
logger.error(f"=== TASK {taskIndex or '?'} FAILED AFTER ALL RETRIES: {taskStep.objective} ===")
# Create final error message
await self.messageCreator.createErrorMessage(taskStep, workflow, taskIndex, "Task failed after all retries")
return TaskResult(
taskId=taskStep.id,
status=TaskStatus.FAILED,
success=False,
feedback="Task failed after all retries.",
error="Task failed after all retries."
)
async def _reviewTaskCompletion(self, taskStep: TaskStep, taskActions: List[ActionItem],
actionResults: List[ActionResult], workflow: ChatWorkflow) -> ReviewResult:
"""Review task completion and determine success/failure/retry"""
try:
# Check workflow status before reviewing task completion
self._checkWorkflowStopped(workflow)
logger.info(f"=== STARTING TASK COMPLETION REVIEW ===")
logger.info(f"Task: {taskStep.objective}")
logger.info(f"Actions executed: {len(taskActions) if taskActions else 0}")
logger.info(f"Action results: {len(actionResults) if actionResults else 0}")
# Create proper context object for result review
reviewContext = ReviewContext(
task_step=taskStep,
task_actions=taskActions,
action_results=actionResults,
step_result={
'successful_actions': sum(1 for result in actionResults if result.success),
'total_actions': len(actionResults),
'results': [self._extractResultText(result) for result in actionResults if result.success],
'errors': [result.error for result in actionResults if not result.success],
'documents': [
{
'action_index': i,
'documents_count': len(result.documents) if result.documents else 0,
'documents': result.documents if result.documents else []
}
for i, result in enumerate(actionResults)
]
},
workflow_id=workflow.id,
previous_results=[]
)
# Check workflow status before calling AI service
self._checkWorkflowStopped(workflow)
# Build prompt bundle for result review
bundle = generateResultReviewPrompt(reviewContext)
promptTemplate = bundle.prompt
placeholders = bundle.placeholders
# Log result review prompt sent to AI
logger.info("=== RESULT REVIEW PROMPT SENT TO AI ===")
logger.info(f"Task: {taskStep.objective}")
logger.info(f"Action Results Count: {len(reviewContext.action_results) if reviewContext.action_results else 0}")
logger.info(f"Task Actions Count: {len(reviewContext.task_actions) if reviewContext.task_actions else 0}")
# Trace result review prompt
self._writeTraceLog("Result Review Prompt", promptTemplate)
self._writeTraceLog("Result Review Placeholders", placeholders)
# Centralized AI call: Result validation (balanced analysis) with placeholders
options = AiCallOptions(
operationType=OperationType.ANALYSE_CONTENT,
priority=Priority.BALANCED,
compressPrompt=True,
compressContext=False,
processingMode=ProcessingMode.ADVANCED,
maxCost=0.05,
maxProcessingTime=30
)
response = await self.services.ai.callAi(prompt=promptTemplate, placeholders=placeholders, options=options)
# Log result review response received
logger.info("=== RESULT REVIEW AI RESPONSE RECEIVED ===")
logger.info(f"Response length: {len(response) if response else 0}")
# Trace result review response
self._writeTraceLog("Result Review Response", response)
# Parse review response
jsonStart = response.find('{')
jsonEnd = response.rfind('}') + 1
if jsonStart == -1 or jsonEnd == 0:
raise ValueError("No JSON found in review response")
jsonStr = response[jsonStart:jsonEnd]
try:
review = json.loads(jsonStr)
except Exception as e:
logger.error(f"Error parsing review response JSON: {str(e)}")
review = {}
if 'status' not in review:
raise ValueError("Review response missing 'status' field")
review.setdefault('status', 'unknown')
review.setdefault('reason', 'No reason provided')
review.setdefault('quality_score', 5)
# Ensure improvements is a list
improvements = review.get('improvements', [])
if isinstance(improvements, str):
# Split string into list if it's a single improvement
improvements = [improvements.strip()] if improvements.strip() else []
elif not isinstance(improvements, list):
improvements = []
# Ensure all list fields are properly typed
metCriteria = review.get('met_criteria', [])
if not isinstance(metCriteria, list):
metCriteria = []
unmetCriteria = review.get('unmet_criteria', [])
if not isinstance(unmetCriteria, list):
unmetCriteria = []
reviewResult = ReviewResult(
status=review.get('status', 'unknown'),
reason=review.get('reason', 'No reason provided'),
improvements=improvements,
quality_score=review.get('quality_score', 5),
missing_outputs=[],
met_criteria=metCriteria,
unmet_criteria=unmetCriteria,
confidence=review.get('confidence', 0.5),
# Extract user-friendly message if available
userMessage=review.get('userMessage', None)
)
# Enhanced validation logging
logger.info(f"VALIDATION RESULT - Task: '{taskStep.objective}' - Status: {reviewResult.status.upper()}, Quality: {reviewResult.quality_score}/10")
if reviewResult.status == 'success':
logger.info(f"VALIDATION SUCCESS - Task completed successfully")
if reviewResult.met_criteria:
logger.info(f"Met criteria: {', '.join(reviewResult.met_criteria)}")
elif reviewResult.status == 'retry':
logger.warning(f"VALIDATION RETRY - Task requires retry: {reviewResult.improvements}")
if reviewResult.unmet_criteria:
logger.warning(f"Unmet criteria: {', '.join(reviewResult.unmet_criteria)}")
else:
logger.error(f"VALIDATION FAILED - Task failed: {reviewResult.reason}")
logger.info(f"=== TASK COMPLETION REVIEW FINISHED ===")
logger.info(f"Final Status: {reviewResult.status}")
logger.info(f"Quality Score: {reviewResult.quality_score}/10")
logger.info(f"Improvements: {reviewResult.improvements}")
logger.info("=== END REVIEW ===")
return reviewResult
except Exception as e:
logger.error(f"Error in reviewTaskCompletion: {str(e)}")
return ReviewResult(
status='failed',
reason=str(e),
quality_score=0
)
def _createActionItem(self, actionData: Dict[str, Any]) -> ActionItem:
"""Creates a new task action"""
try:
# Ensure ID is present
if "id" not in actionData or not actionData["id"]:
actionData["id"] = f"action_{uuid.uuid4()}"
# Ensure required fields
if "status" not in actionData:
actionData["status"] = TaskStatus.PENDING
if "execMethod" not in actionData:
logger.error("execMethod is required for task action")
return None
if "execAction" not in actionData:
logger.error("execAction is required for task action")
return None
if "execParameters" not in actionData:
actionData["execParameters"] = {}
# Use generic field separation based on ActionItem model
simpleFields, objectFields = self.services.interfaceDbChat._separate_object_fields(ActionItem, actionData)
# Create action in database
createdAction = self.services.interfaceDbChat.db.recordCreate(ActionItem, simpleFields)
# Convert to ActionItem model
return ActionItem(
id=createdAction["id"],
execMethod=createdAction["execMethod"],
execAction=createdAction["execAction"],
execParameters=createdAction.get("execParameters", {}),
execResultLabel=createdAction.get("execResultLabel"),
expectedDocumentFormats=createdAction.get("expectedDocumentFormats"),
status=createdAction.get("status", TaskStatus.PENDING),
error=createdAction.get("error"),
retryCount=createdAction.get("retryCount", 0),
retryMax=createdAction.get("retryMax", 3),
processingTime=createdAction.get("processingTime"),
timestamp=float(createdAction.get("timestamp", self.services.utils.getUtcTimestamp())),
result=createdAction.get("result"),
resultDocuments=createdAction.get("resultDocuments", []),
userMessage=createdAction.get("userMessage")
)
except Exception as e:
logger.error(f"Error creating task action: {str(e)}")
return None
def _extractResultText(self, result: ActionResult) -> str:
"""Extract result text from ActionResult documents"""
if not result.success or not result.documents:
return ""
# Extract text directly from ActionDocument objects
resultParts = []
for doc in result.documents:
if hasattr(doc, 'documentData') and doc.documentData:
resultParts.append(str(doc.documentData))
# Join all document results with separators
return "\n\n---\n\n".join(resultParts) if resultParts else ""
def _updateWorkflowBeforeExecutingTask(self, taskNumber: int):
"""Update workflow object before executing a task"""
try:
updateData = {
"currentTask": taskNumber,
"currentAction": 0,
"totalActions": 0
}
# Update workflow object
self.workflow.currentTask = taskNumber
self.workflow.currentAction = 0
self.workflow.totalActions = 0
# Update in database
self.services.interfaceDbChat.updateWorkflow(self.workflow.id, updateData)
logger.info(f"Updated workflow {self.workflow.id} before executing task {taskNumber}: {updateData}")
except Exception as e:
logger.error(f"Error updating workflow before executing task: {str(e)}")
def _updateWorkflowAfterActionPlanning(self, totalActions: int):
"""Update workflow object after action planning for current task"""
try:
updateData = {
"totalActions": totalActions
}
# Update workflow object
self.workflow.totalActions = totalActions
# Update in database
self.services.interfaceDbChat.updateWorkflow(self.workflow.id, updateData)
logger.info(f"Updated workflow {self.workflow.id} after action planning: {updateData}")
except Exception as e:
logger.error(f"Error updating workflow after action planning: {str(e)}")
def _updateWorkflowBeforeExecutingAction(self, actionNumber: int):
"""Update workflow object before executing an action"""
try:
updateData = {
"currentAction": actionNumber
}
# Update workflow object
self.workflow.currentAction = actionNumber
# Update in database
self.services.interfaceDbChat.updateWorkflow(self.workflow.id, updateData)
logger.info(f"Updated workflow {self.workflow.id} before executing action {actionNumber}: {updateData}")
except Exception as e:
logger.error(f"Error updating workflow before executing action: {str(e)}")
def _setWorkflowTotals(self, totalTasks: int = None, totalActions: int = None):
"""Set total counts for workflow progress tracking and update database"""
try:
updateData = {}
if totalTasks is not None:
self.workflow.totalTasks = totalTasks
updateData["totalTasks"] = totalTasks
if totalActions is not None:
self.workflow.totalActions = totalActions
updateData["totalActions"] = totalActions
# Update workflow object in database if we have changes
if updateData:
self.services.interfaceDbChat.updateWorkflow(self.workflow.id, updateData)
logger.info(f"Updated workflow {self.workflow.id} totals in database: {updateData}")
logger.debug(f"Updated workflow totals: Tasks {self.workflow.totalTasks if hasattr(self.workflow, 'totalTasks') else 'N/A'}, Actions {self.workflow.totalActions if hasattr(self.workflow, 'totalActions') else 'N/A'}")
except Exception as e:
logger.error(f"Error setting workflow totals: {str(e)}")
def _createActionItem(self, actionData: Dict[str, Any]) -> ActionItem:
"""Creates a new task action"""
try:
import uuid
# Ensure ID is present
if "id" not in actionData or not actionData["id"]:
actionData["id"] = f"action_{uuid.uuid4()}"
# Ensure required fields
if "status" not in actionData:
actionData["status"] = TaskStatus.PENDING
if "execMethod" not in actionData:
logger.error("execMethod is required for task action")
return None
if "execAction" not in actionData:
logger.error("execAction is required for task action")
return None
if "execParameters" not in actionData:
actionData["execParameters"] = {}
# Use generic field separation based on ActionItem model
simpleFields, objectFields = self.services.interfaceDbChat._separate_object_fields(ActionItem, actionData)
# Create action in database
createdAction = self.services.interfaceDbChat.db.recordCreate(ActionItem, simpleFields)
# Convert to ActionItem model
return ActionItem(
id=createdAction["id"],
execMethod=createdAction["execMethod"],
execAction=createdAction["execAction"],
execParameters=createdAction.get("execParameters", {}),
execResultLabel=createdAction.get("execResultLabel"),
expectedDocumentFormats=createdAction.get("expectedDocumentFormats"),
status=createdAction.get("status", TaskStatus.PENDING),
error=createdAction.get("error"),
retryCount=createdAction.get("retryCount", 0),
retryMax=createdAction.get("retryMax", 3),
processingTime=createdAction.get("processingTime"),
timestamp=float(createdAction.get("timestamp", self.services.utils.getUtcTimestamp())),
result=createdAction.get("result"),
resultDocuments=createdAction.get("resultDocuments", []),
userMessage=createdAction.get("userMessage")
)
except Exception as e:
logger.error(f"Error creating task action: {str(e)}")
return None
def _writeTraceLog(self, contextText: str, data: Any) -> None:
"""Write trace data to configured trace file if in debug mode with improved JSON formatting"""
try:
import os
import json
from datetime import datetime, UTC
# Only write if logger is in debug mode
if logger.level > logging.DEBUG:
return
# Get log directory from configuration
logDir = self.services.utils.configGet("APP_LOGGING_LOG_DIR", "./")
if not os.path.isabs(logDir):
# If relative path, make it relative to the gateway directory
gatewayDir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
logDir = os.path.join(gatewayDir, logDir)
# Ensure log directory exists
os.makedirs(logDir, exist_ok=True)
# Create trace file path
traceFile = os.path.join(logDir, "log_trace.log")
# Format the trace entry with better structure
timestamp = datetime.fromtimestamp(self.services.utils.getUtcTimestamp(), UTC).strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
# Create a structured trace entry
traceEntry = f"[{timestamp}] {contextText}\n"
traceEntry += "=" * 80 + "\n"
# Add data if provided with improved formatting
if data is not None:
try:
if isinstance(data, (dict, list)):
# Format as pretty JSON with better settings
jsonStr = json.dumps(data, indent=2, default=str, ensure_ascii=False, sort_keys=False)
traceEntry += f"JSON Data:\n{jsonStr}\n"
elif isinstance(data, str):
# For string data, try to parse as JSON first, then fall back to plain text
try:
parsed = json.loads(data)
jsonStr = json.dumps(parsed, indent=2, default=str, ensure_ascii=False, sort_keys=False)
traceEntry += f"JSON Data (parsed from string):\n{jsonStr}\n"
except (json.JSONDecodeError, TypeError):
# Not valid JSON, show as plain text with proper line breaks
formatted_data = data.replace('\\n', '\n')
traceEntry += f"Text Data:\n{formatted_data}\n"
else:
# For other types, convert to string and try to parse as JSON
dataStr = str(data)
try:
parsed = json.loads(dataStr)
jsonStr = json.dumps(parsed, indent=2, default=str, ensure_ascii=False, sort_keys=False)
traceEntry += f"JSON Data (parsed from object):\n{jsonStr}\n"
except (json.JSONDecodeError, TypeError):
# Not valid JSON, show as plain text with proper line breaks
formatted_data = dataStr.replace('\\n', '\n')
traceEntry += f"Object Data:\n{formatted_data}\n"
except Exception as e:
# Fallback to simple string representation
traceEntry += f"Data (fallback): {str(data)}\n"
else:
traceEntry += "No data provided\n"
traceEntry += "=" * 80 + "\n\n"
# Write to trace file
with open(traceFile, "a", encoding="utf-8") as f:
f.write(traceEntry)
except Exception as e:
# Don't log trace errors to avoid recursion
pass

View file

@ -0,0 +1,63 @@
# modeBase.py
# Abstract base class for workflow modes
from abc import ABC, abstractmethod
import logging
from typing import List, Dict, Any
from modules.datamodels.datamodelChat import TaskStep, TaskContext, TaskResult, ActionItem
from modules.datamodels.datamodelChat import ChatWorkflow
from modules.workflows.processing.core.taskPlanner import TaskPlanner
from modules.workflows.processing.core.actionExecutor import ActionExecutor
from modules.workflows.processing.core.messageCreator import MessageCreator
from modules.workflows.processing.core.validator import WorkflowValidator
logger = logging.getLogger(__name__)
class BaseMode(ABC):
"""Abstract base class for workflow execution modes"""
def __init__(self, services, workflow):
self.services = services
self.workflow = workflow
self.taskPlanner = TaskPlanner(services)
self.actionExecutor = ActionExecutor(services)
self.messageCreator = MessageCreator(services)
self.validator = WorkflowValidator(services)
def _checkWorkflowStopped(self, workflow):
"""Check if workflow has been stopped by user and raise exception if so"""
try:
# Get the current workflow status from the database to avoid stale data
current_workflow = self.services.interfaceDbChat.getWorkflow(workflow.id)
if current_workflow and current_workflow.status == "stopped":
logger.info("Workflow stopped by user, aborting execution")
raise Exception("Workflow was stopped by user")
except Exception as e:
# If this was the explicit stop signal, re-raise to abort immediately
if str(e) == "Workflow was stopped by user":
raise
# If we can't get the current status due to other database issues, fall back to the in-memory object
logger.warning(f"Could not check current workflow status from database: {str(e)}")
if workflow and workflow.status == "stopped":
logger.info("Workflow stopped by user (from in-memory object), aborting execution")
raise Exception("Workflow was stopped by user")
@abstractmethod
async def executeTask(self, taskStep: TaskStep, workflow: ChatWorkflow, context: TaskContext,
taskIndex: int = None, totalTasks: int = None) -> TaskResult:
"""Execute a task step - must be implemented by concrete modes"""
pass
@abstractmethod
async def generateActionItems(self, taskStep: TaskStep, workflow: ChatWorkflow,
previousResults: List = None, enhancedContext: TaskContext = None) -> List[ActionItem]:
"""Generate actions for a task step - must be implemented by concrete modes"""
pass
async def generateTaskPlan(self, userInput: str, workflow: ChatWorkflow):
"""Generate task plan - common to all modes"""
return await self.taskPlanner.generateTaskPlan(userInput, workflow)
async def createTaskPlanMessage(self, taskPlan, workflow: ChatWorkflow):
"""Create task plan message - common to all modes"""
return await self.messageCreator.createTaskPlanMessage(taskPlan, workflow)

View file

@ -0,0 +1,938 @@
# modeReact.py
# React mode implementation for workflows
import json
import logging
import re
import time
from datetime import datetime, timezone
from typing import List, Dict, Any
from modules.datamodels.datamodelChat import (
TaskStep, TaskContext, TaskResult, ActionItem, TaskStatus,
ActionResult
)
from modules.datamodels.datamodelChat import ChatWorkflow
from modules.datamodels.datamodelAi import AiCallOptions, OperationType, ProcessingMode, Priority
from modules.workflows.processing.modes.modeBase import BaseMode
from modules.workflows.processing.shared.executionState import TaskExecutionState, shouldContinue
from modules.workflows.processing.shared.promptGenerationActionsReact import (
generateReactPlanSelectionPrompt,
generateReactParametersPrompt,
generateReactRefinementPrompt
)
from modules.workflows.processing.shared.placeholderFactory import extractReviewContent
from modules.workflows.processing.adaptive import IntentAnalyzer, ContentValidator, LearningEngine, ProgressTracker
logger = logging.getLogger(__name__)
class ReactMode(BaseMode):
"""React mode implementation - iterative plan-act-observe-refine loop"""
def __init__(self, services, workflow):
super().__init__(services, workflow)
# Initialize adaptive components
self.intentAnalyzer = IntentAnalyzer()
self.contentValidator = ContentValidator()
self.learningEngine = LearningEngine()
self.progressTracker = ProgressTracker()
self.currentIntent = None
# Placeholder service no longer used; prompts are generated directly
async def generateActionItems(self, taskStep: TaskStep, workflow: ChatWorkflow,
previousResults: List = None, enhancedContext: TaskContext = None) -> List[ActionItem]:
"""React mode doesn't use batch action generation - actions are generated iteratively"""
# React mode generates actions one at a time in the execution loop
return []
async def executeTask(self, taskStep: TaskStep, workflow: ChatWorkflow, context: TaskContext,
taskIndex: int = None, totalTasks: int = None) -> TaskResult:
"""Execute task using React mode - iterative plan-act-observe-refine loop"""
logger.info(f"=== STARTING TASK {taskIndex or '?'}: {taskStep.objective} ===")
# NEW: Analyze user intent with both original prompt and task objective
# Get original user prompt from services (clean and reliable)
original_prompt = self.services.currentUserPrompt if self.services and hasattr(self.services, 'currentUserPrompt') else taskStep.objective
combined_context = f"Original request: {original_prompt}\n\nCurrent task: {taskStep.objective}"
self.currentIntent = self.intentAnalyzer.analyzeUserIntent(combined_context, context)
logger.info(f"Intent analysis (original + task): {self.currentIntent}")
# NEW: Reset progress tracking for new task
self.progressTracker.reset()
# Update workflow object before executing task
if taskIndex is not None:
self._updateWorkflowBeforeExecutingTask(taskIndex)
# Update workflow context for this task
if taskIndex is not None:
self.services.workflow.setWorkflowContext(task_number=taskIndex)
# Create task start message
await self.messageCreator.createTaskStartMessage(taskStep, workflow, taskIndex, totalTasks)
state = TaskExecutionState(taskStep)
# React mode uses max_steps instead of max_retries
state.max_steps = max(1, int(getattr(workflow, 'maxSteps', 5)))
logger.info(f"Using React mode execution with max_steps: {state.max_steps}")
step = 1
lastReviewDict = None
while step <= state.max_steps:
self._checkWorkflowStopped(workflow)
# Update workflow[currentAction] for UI
self._updateWorkflowBeforeExecutingAction(step)
self.services.workflow.setWorkflowContext(action_number=step)
try:
t0 = time.time()
selection = await self._planSelect(context)
logger.info(f"React step {step}: Selected action: {selection}")
# Create user-friendly message BEFORE action execution
# Action intention message is now handled by the standard message creator in _actExecute
result = await self._actExecute(context, selection, taskStep, workflow, step)
observation = self._observeBuild(result)
# Attach deterministic label for clarity
observation['resultLabel'] = result.resultLabel
# NEW: Add content validation
if self.currentIntent and result.documents:
validationResult = self.contentValidator.validateContent(result.documents, self.currentIntent)
observation['contentValidation'] = validationResult
logger.info(f"Content validation: {validationResult['overallSuccess']} (quality: {validationResult['qualityScore']:.2f})")
# NEW: Learn from feedback
feedback = self._collectFeedback(result, validationResult, self.currentIntent)
self.learningEngine.learnFromFeedback(feedback, context, self.currentIntent)
# NEW: Update progress
self.progressTracker.updateProgress(result, validationResult, self.currentIntent)
decision = await self._refineDecide(context, observation)
# Store refinement decision in context for next iteration
if not hasattr(context, 'previous_review_result') or context.previous_review_result is None:
context.previous_review_result = []
if decision: # Only append if decision is not None
context.previous_review_result.append(decision)
# Update context with learnings from this step
if decision and decision.get('reason'):
if not hasattr(context, 'improvements'):
context.improvements = []
context.improvements.append(f"Step {step}: {decision.get('reason')}")
# Telemetry: simple duration per step
duration = time.time() - t0
self.services.interfaceDbChat.createLog({
"workflowId": workflow.id,
"message": f"react_step_duration_sec={duration:.3f}",
"type": "info"
})
lastReviewDict = decision
# Create user-friendly message AFTER action execution
# Action completion message is now handled by the standard message creator in _actExecute
except Exception as e:
logger.error(f"React step {step} error: {e}")
break
# NEW: Use adaptive stopping logic
progressState = self.progressTracker.getCurrentProgress()
continueByProgress = self.progressTracker.shouldContinue(progressState, observation.get('contentValidation', {}))
continueByReview = shouldContinue(observation, lastReviewDict, step, state.max_steps)
if not continueByProgress or not continueByReview:
logger.info(f"Stopping at step {step}: progress={continueByProgress}, review={continueByReview}")
break
step += 1
# Summarize task result for react mode
status = TaskStatus.COMPLETED
success = True
feedback = lastReviewDict.get('reason') if lastReviewDict and isinstance(lastReviewDict, dict) else 'Completed'
if lastReviewDict and isinstance(lastReviewDict, dict) and lastReviewDict.get('decision') == 'stop':
success = True
# Create task completion message
await self.messageCreator.createTaskCompletionMessage(taskStep, workflow, taskIndex, totalTasks,
type('ReviewResult', (), {'reason': feedback, 'met_criteria': [], 'quality_score': 8})())
return TaskResult(
taskId=taskStep.id,
status=status,
success=success,
feedback=feedback,
error=None if success else feedback
)
async def _planSelect(self, context: TaskContext) -> Dict[str, Any]:
"""Plan: select exactly one action. Returns {"action": {method, name}}"""
bundle = generateReactPlanSelectionPrompt(self.services, context)
promptTemplate = bundle.prompt
placeholders = bundle.placeholders
self._writeTraceLog("React Plan Selection Prompt", promptTemplate)
self._writeTraceLog("React Plan Selection Placeholders", placeholders)
# Centralized AI call for plan selection (use plan generation quality)
options = AiCallOptions(
operationType=OperationType.GENERATE_PLAN,
priority=Priority.QUALITY,
compressPrompt=False,
compressContext=False,
processingMode=ProcessingMode.DETAILED,
maxCost=0.10,
maxProcessingTime=30
)
response = await self.services.ai.callAi(
prompt=promptTemplate,
placeholders=placeholders,
options=options
)
self._writeTraceLog("React Plan Selection Response", response)
jsonStart = response.find('{') if response else -1
jsonEnd = response.rfind('}') + 1 if response else 0
if jsonStart == -1 or jsonEnd == 0:
raise ValueError("No JSON in selection response")
selection = json.loads(response[jsonStart:jsonEnd])
if 'action' not in selection or not isinstance(selection['action'], str):
raise ValueError("Selection missing 'action' as string")
# Enforce spec: Stage 1 must NOT include 'parameters'
if 'parameters' in selection:
# Remove to avoid accidental carryover
try:
del selection['parameters']
except Exception:
selection['parameters'] = None
return selection
async def _actExecute(self, context: TaskContext, selection: Dict[str, Any], taskStep: TaskStep,
workflow: ChatWorkflow, stepIndex: int) -> ActionResult:
"""Act: request minimal parameters then execute selected action"""
compoundActionName = selection.get('action', '')
# Parse compound action name (e.g., "ai.webResearch" -> method="ai", action="webResearch")
if '.' not in compoundActionName:
raise ValueError(f"Invalid compound action name: {compoundActionName}. Expected format: method.action")
methodName, actionName = compoundActionName.split('.', 1)
# Always request parameters in Stage 2 (spec: Stage 1 must not provide them)
logger.info("Requesting parameters in Stage 2 based on Stage 1 outputs")
# Create a permissive Stage 2 context to avoid TaskContext attribute restrictions
from types import SimpleNamespace
stage2Context = SimpleNamespace()
# Copy essential fields from original context for fallbacks (snake_case for placeholderFactory compatibility)
stage2Context.task_step = getattr(context, 'task_step', None)
stage2Context.workflow_id = getattr(context, 'workflow_id', None)
# Set Stage 1 data directly on the permissive context (snake_case for promptGenerationActionsReact compatibility)
if isinstance(selection, dict):
stage2Context.action_objective = selection.get('actionObjective', '')
stage2Context.parameters_context = selection.get('parametersContext', '')
stage2Context.learnings = selection.get('learnings', [])
else:
stage2Context.action_objective = ''
stage2Context.parameters_context = ''
stage2Context.learnings = []
# Build and send the Stage 2 parameters prompt (always)
bundle = generateReactParametersPrompt(self.services, stage2Context, compoundActionName)
promptTemplate = bundle.prompt
placeholders = bundle.placeholders
self._writeTraceLog("React Parameters Prompt", promptTemplate)
self._writeTraceLog("React Parameters Placeholders", placeholders)
# Centralized AI call for parameter suggestion (balanced analysis)
options = AiCallOptions(
operationType=OperationType.ANALYSE_CONTENT,
priority=Priority.BALANCED,
compressPrompt=True,
compressContext=False,
processingMode=ProcessingMode.ADVANCED,
maxCost=0.05,
maxProcessingTime=30,
temperature=0.3, # Slightly higher temperature for better instruction following
# maxTokens not set - use model's maximum for big JSON responses
resultFormat="json" # Explicitly request JSON format
)
paramsResp = await self.services.ai.callAi(
prompt=promptTemplate,
placeholders=placeholders,
options=options
)
# Parse JSON response
js = paramsResp[paramsResp.find('{'):paramsResp.rfind('}')+1] if paramsResp else '{}'
try:
paramObj = json.loads(js)
parameters = paramObj.get('parameters', {}) if isinstance(paramObj, dict) else {}
except Exception as e:
logger.error(f"Failed to parse AI parameters response as JSON: {str(e)}")
logger.error(f"Response was: {paramsResp}")
parameters = {}
# Merge Stage 1 resource selections into Stage 2 parameters (only if action expects them)
try:
requiredDocs = selection.get('requiredInputDocuments')
if requiredDocs:
# Ensure list
if isinstance(requiredDocs, list):
# Only attach if target action defines 'documentList'
methodName, actionName = compoundActionName.split('.', 1)
from modules.workflows.processing.shared.methodDiscovery import getActionParameterList, methods as _methods
expectedParams = getActionParameterList(methodName, actionName, _methods)
if 'documentList' in expectedParams:
parameters['documentList'] = requiredDocs
requiredConn = selection.get('requiredConnection')
if requiredConn:
# Only attach if target action defines 'connectionReference'
methodName, actionName = compoundActionName.split('.', 1)
from modules.workflows.processing.shared.methodDiscovery import getActionParameterList, methods as _methods
expectedParams = getActionParameterList(methodName, actionName, _methods)
if 'connectionReference' in expectedParams:
parameters['connectionReference'] = requiredConn
except Exception:
pass
# Apply minimal defaults in-code (language)
if 'language' not in parameters and hasattr(self.services, 'user') and getattr(self.services.user, 'language', None):
parameters['language'] = self.services.user.language
# Write merged parameters to trace BEFORE continuing
try:
mergedParamObj = {
"schema": (paramObj.get('schema') if isinstance(paramObj, dict) else 'parameters_v1'),
"parameters": parameters
}
self._writeTraceLog("React Parameters Response", mergedParamObj)
except Exception:
pass
# Build a synthetic ActionItem for execution routing and labels
currentRound = getattr(self.workflow, 'currentRound', 0)
currentTask = getattr(self.workflow, 'currentTask', 0)
resultLabel = f"round{currentRound}_task{currentTask}_action{stepIndex}_results"
taskAction = self._createActionItem({
"execMethod": methodName,
"execAction": actionName,
"execParameters": parameters,
"execResultLabel": resultLabel,
"status": TaskStatus.PENDING
})
# Execute using existing single action flow (message creation is handled internally)
result = await self.actionExecutor.executeSingleAction(taskAction, workflow, taskStep, currentTask, stepIndex, 1)
return result
def _observeBuild(self, actionResult: ActionResult) -> Dict[str, Any]:
"""Observe: build compact observation object from ActionResult with full document metadata"""
previews = []
notes = []
if actionResult and actionResult.documents:
# Process all documents and show full metadata
for doc in actionResult.documents:
# Extract all available metadata without content
docMetadata = {
"name": getattr(doc, 'fileName', None) or getattr(doc, 'documentName', 'Unknown'),
"mimeType": getattr(doc, 'mimeType', 'Unknown'),
"size": getattr(doc, 'size', 'Unknown'),
"created": getattr(doc, 'created', 'Unknown'),
"modified": getattr(doc, 'modified', 'Unknown'),
"typeGroup": getattr(doc, 'typeGroup', 'Unknown'),
"documentId": getattr(doc, 'documentId', 'Unknown'),
"reference": getattr(doc, 'reference', 'Unknown')
}
# Remove 'Unknown' values to keep it clean
docMetadata = {k: v for k, v in docMetadata.items() if v != 'Unknown'}
# Add content size indicator instead of actual content
if hasattr(doc, 'documentData') and doc.documentData:
if isinstance(doc.documentData, dict) and 'content' in doc.documentData:
contentLength = len(str(doc.documentData['content']))
docMetadata['contentSize'] = f"{contentLength} characters"
else:
contentLength = len(str(doc.documentData))
docMetadata['contentSize'] = f"{contentLength} characters"
# Extract comment if available
if hasattr(doc, 'documentData') and doc.documentData:
data = getattr(doc, 'documentData', None)
if isinstance(data, dict):
comment = data.get("comment", "")
if comment:
notes.append(f"Document '{docMetadata.get('name', 'Unknown')}': {comment}")
previews.append(docMetadata)
observation = {
"success": bool(actionResult.success),
"resultLabel": actionResult.resultLabel or "",
"documentsCount": len(actionResult.documents) if actionResult.documents else 0,
"previews": previews,
"notes": notes
}
# NEW: Add content analysis if intent is available
if self.currentIntent and actionResult.documents:
contentAnalysis = self._analyzeContent(actionResult.documents)
observation['contentAnalysis'] = contentAnalysis
return observation
def _analyzeContent(self, documents: List[Any]) -> Dict[str, Any]:
"""Analyzes content of documents for adaptive learning"""
try:
if not documents:
return {"contentType": "none", "contentSnippet": "", "intentMatch": False}
# Extract content from first document
firstDoc = documents[0]
content = ""
if hasattr(firstDoc, 'documentData'):
data = firstDoc.documentData
if isinstance(data, dict) and 'content' in data:
content = str(data['content'])
else:
content = str(data)
# Classify content type
contentType = self._classifyContent(content)
# Create content snippet
contentSnippet = content[:200] + "..." if len(content) > 200 else content
# Assess intent match
intentMatch = self._assessIntentMatch(content, self.currentIntent)
return {
"contentType": contentType,
"contentSnippet": contentSnippet,
"intentMatch": intentMatch
}
except Exception as e:
logger.error(f"Error analyzing content: {str(e)}")
return {"contentType": "error", "contentSnippet": "", "intentMatch": False}
def _classifyContent(self, content: str) -> str:
"""Classifies the type of content"""
if not content:
return "empty"
# Check for code
codeIndicators = ['def ', 'function', 'import ', 'class ', 'for ', 'while ', 'if ']
if any(indicator in content.lower() for indicator in codeIndicators):
return "code"
# Check for numbers
if re.search(r'\b\d+\b', content):
return "numbers"
# Check for structured content
if any(indicator in content for indicator in ['\n', '\t', '|', '-', '*', '1.', '2.']):
return "structured"
# Default to text
return "text"
def _assessIntentMatch(self, content: str, intent: Dict[str, Any]) -> bool:
"""Assesses if content matches the user intent"""
if not intent:
return False
dataType = intent.get("dataType", "unknown")
if dataType == "numbers":
# Check if content contains actual numbers, not code
hasNumbers = bool(re.search(r'\b\d+\b', content))
isNotCode = not any(keyword in content.lower() for keyword in ['def ', 'function', 'import '])
return hasNumbers and isNotCode
elif dataType == "text":
# Check if content is readable text
words = re.findall(r'\b\w+\b', content)
return len(words) > 5
elif dataType == "documents":
# Check if content is suitable for document creation
hasStructure = any(indicator in content for indicator in ['\n', '\t', '|', '-', '*'])
hasContent = len(content.strip()) > 50
return hasStructure and hasContent
return True # Default to match for unknown types
def _collectFeedback(self, result: Any, validation: Dict[str, Any], intent: Dict[str, Any]) -> Dict[str, Any]:
"""Collects comprehensive feedback from action execution"""
try:
# Extract content summary
contentDelivered = ""
if result.documents:
firstDoc = result.documents[0]
if hasattr(firstDoc, 'documentData'):
data = firstDoc.documentData
if isinstance(data, dict) and 'content' in data:
content = str(data['content'])
contentDelivered = content[:100] + "..." if len(content) > 100 else content
else:
contentDelivered = str(data)[:100] + "..." if len(str(data)) > 100 else str(data)
return {
"actionAttempted": result.resultLabel or "unknown",
"parametersUsed": {}, # Would be extracted from action context
"contentDelivered": contentDelivered,
"intentMatchScore": validation.get('qualityScore', 0),
"qualityScore": validation.get('qualityScore', 0),
"issuesFound": validation.get('improvementSuggestions', []),
"learningOpportunities": validation.get('improvementSuggestions', []),
"userSatisfaction": None, # Would be collected from user feedback
"timestamp": datetime.now(timezone.utc).timestamp()
}
except Exception as e:
logger.error(f"Error collecting feedback: {str(e)}")
return {
"actionAttempted": "unknown",
"parametersUsed": {},
"contentDelivered": "",
"intentMatchScore": 0,
"qualityScore": 0,
"issuesFound": [],
"learningOpportunities": [],
"userSatisfaction": None,
"timestamp": datetime.now(timezone.utc).timestamp()
}
async def _refineDecide(self, context: TaskContext, observation: Dict[str, Any]) -> Dict[str, Any]:
"""Refine: decide continue or stop, with reason"""
# Create proper ReviewContext for extractReviewContent
from modules.datamodels.datamodelChat import ReviewContext
reviewContext = ReviewContext(
task_step=context.task_step,
task_actions=[],
action_results=[], # React mode doesn't have action results in this context
step_result={'observation': observation},
workflow_id=context.workflow_id,
previous_results=[]
)
baseReviewContent = extractReviewContent(reviewContext)
placeholders = {"REVIEW_CONTENT": baseReviewContent}
# NEW: Add content validation to review content
enhancedReviewContent = placeholders.get("REVIEW_CONTENT", "")
if 'contentValidation' in observation:
validation = observation['contentValidation']
enhancedReviewContent += f"\n\nCONTENT VALIDATION:\n"
enhancedReviewContent += f"Overall Success: {validation['overallSuccess']}\n"
enhancedReviewContent += f"Quality Score: {validation['qualityScore']:.2f}\n"
if validation['improvementSuggestions']:
enhancedReviewContent += f"Improvement Suggestions: {', '.join(validation['improvementSuggestions'])}\n"
# NEW: Add content analysis to review content
if 'contentAnalysis' in observation:
analysis = observation['contentAnalysis']
enhancedReviewContent += f"\nCONTENT ANALYSIS:\n"
enhancedReviewContent += f"Content Type: {analysis['contentType']}\n"
enhancedReviewContent += f"Intent Match: {analysis['intentMatch']}\n"
if analysis['contentSnippet']:
enhancedReviewContent += f"Content Preview: {analysis['contentSnippet']}\n"
# NEW: Add progress state to review content
progressState = self.progressTracker.getCurrentProgress()
enhancedReviewContent += f"\nPROGRESS STATE:\n"
enhancedReviewContent += f"Completed Objectives: {len(progressState['completedObjectives'])}\n"
enhancedReviewContent += f"Partial Achievements: {len(progressState['partialAchievements'])}\n"
enhancedReviewContent += f"Failed Attempts: {len(progressState['failedAttempts'])}\n"
enhancedReviewContent += f"Current Phase: {progressState['currentPhase']}\n"
if progressState['nextActionsSuggested']:
enhancedReviewContent += f"Next Action Suggestions: {', '.join(progressState['nextActionsSuggested'])}\n"
# Update placeholders with enhanced review content
placeholders["REVIEW_CONTENT"] = enhancedReviewContent
bundle = generateReactRefinementPrompt(self.services, context, enhancedReviewContent)
promptTemplate = bundle.prompt
placeholders = bundle.placeholders
self._writeTraceLog("React Refinement Prompt", promptTemplate)
self._writeTraceLog("React Refinement Placeholders", placeholders)
# Centralized AI call for refinement decision (balanced analysis)
options = AiCallOptions(
operationType=OperationType.ANALYSE_CONTENT,
priority=Priority.BALANCED,
compressPrompt=True,
compressContext=False,
processingMode=ProcessingMode.ADVANCED,
maxCost=0.05,
maxProcessingTime=30
)
resp = await self.services.ai.callAi(
prompt=promptTemplate,
placeholders=placeholders,
options=options
)
self._writeTraceLog("React Refinement Response", resp)
js = resp[resp.find('{'):resp.rfind('}')+1] if resp else '{}'
try:
decision = json.loads(js)
except Exception:
decision = {"decision": "continue", "reason": "default"}
return decision
async def _createReactActionMessage(self, workflow: ChatWorkflow, selection: Dict[str, Any],
step: int, maxSteps: int, taskIndex: int, messageType: str,
result: ActionResult = None, observation: Dict[str, Any] = None):
"""Create user-friendly messages for React workflow actions"""
try:
action = selection.get('action', {})
method = action.get('method', '')
actionName = action.get('name', '')
# Get user language
userLanguage = self.services.user.language if self.services and self.services.user else 'en'
if messageType == "before":
# Message BEFORE action execution
userMessage = await self._generateActionIntentionMessage(method, actionName, userLanguage)
messageContent = f"🔄 **Step {step}/{maxSteps}**\n\n{userMessage}"
status = "step"
actionProgress = "pending"
documentsLabel = f"action_{step}_intention"
elif messageType == "after":
# Message AFTER action execution
userMessage = await self._generateActionResultMessage(method, actionName, result, observation, userLanguage)
successIcon = "" if result and result.success else ""
messageContent = f"{successIcon} **Step {step}/{maxSteps} Complete**\n\n{userMessage}"
status = "step"
actionProgress = "success" if result and result.success else "fail"
documentsLabel = observation.get('resultLabel') if observation else f"action_{step}_result"
else:
return
# Create workflow message
messageData = {
"workflowId": workflow.id,
"role": "assistant",
"message": messageContent,
"status": status,
"sequenceNr": len(workflow.messages) + 1,
"publishedAt": self.services.utils.getUtcTimestamp(),
"documentsLabel": documentsLabel,
"documents": [],
"roundNumber": workflow.currentRound,
"taskNumber": taskIndex,
"actionNumber": step,
"actionProgress": actionProgress
}
message = self.services.interfaceDbChat.createMessage(messageData)
if message:
workflow.messages.append(message)
except Exception as e:
logger.error(f"Error creating React action message: {str(e)}")
async def _generateActionIntentionMessage(self, method: str, actionName: str, userLanguage: str):
"""Generate user-friendly message explaining what action will do"""
try:
# Create a simple AI prompt to generate user-friendly action descriptions
prompt = f"""Generate a brief, user-friendly message explaining what the {method}.{actionName} action will do.
User language: {userLanguage}
Return only the user-friendly message, no technical details."""
# Call AI to generate user-friendly message
response = await self.services.ai.callAi(
prompt=prompt,
options=AiCallOptions(
operationType=OperationType.GENERATE_CONTENT,
priority=Priority.SPEED,
compressPrompt=True,
maxCost=0.01,
maxProcessingTime=5
)
)
return response.strip() if response else f"Executing {method}.{actionName} action..."
except Exception as e:
logger.error(f"Error generating action intention message: {str(e)}")
return f"Executing {method}.{actionName} action..."
async def _generateActionResultMessage(self, method: str, actionName: str, result: ActionResult,
observation: Dict[str, Any], userLanguage: str):
"""Generate user-friendly message explaining action results"""
try:
# Build result context
resultContext = ""
if result and result.documents:
docCount = len(result.documents)
resultContext = f"Generated {docCount} document(s)"
elif observation and observation.get('documentsCount', 0) > 0:
docCount = observation.get('documentsCount', 0)
resultContext = f"Generated {docCount} document(s)"
# Create AI prompt for result message
prompt = f"""Generate a brief, user-friendly message explaining the result of the {method}.{actionName} action.
User language: {userLanguage}
Success: {result.success if result else 'Unknown'}
Result context: {resultContext}
Return only the user-friendly message, no technical details."""
# Call AI to generate user-friendly result message
response = await self.services.ai.callAi(
prompt=prompt,
options=AiCallOptions(
operationType=OperationType.GENERATE_CONTENT,
priority=Priority.SPEED,
compressPrompt=True,
maxCost=0.01,
maxProcessingTime=5
)
)
return response.strip() if response else f"{method}.{actionName} action completed"
except Exception as e:
logger.error(f"Error generating action result message: {str(e)}")
return f"{method}.{actionName} action completed"
def _createActionItem(self, actionData: Dict[str, Any]) -> ActionItem:
"""Creates a new task action for React mode"""
try:
import uuid
# Ensure ID is present
if "id" not in actionData or not actionData["id"]:
actionData["id"] = f"action_{uuid.uuid4()}"
# Ensure required fields
if "status" not in actionData:
actionData["status"] = TaskStatus.PENDING
if "execMethod" not in actionData:
logger.error("execMethod is required for task action")
return None
if "execAction" not in actionData:
logger.error("execAction is required for task action")
return None
if "execParameters" not in actionData:
actionData["execParameters"] = {}
# Use generic field separation based on ActionItem model
simpleFields, objectFields = self.services.interfaceDbChat._separate_object_fields(ActionItem, actionData)
# Create action in database
createdAction = self.services.interfaceDbChat.db.recordCreate(ActionItem, simpleFields)
# Convert to ActionItem model
return ActionItem(
id=createdAction["id"],
execMethod=createdAction["execMethod"],
execAction=createdAction["execAction"],
execParameters=createdAction.get("execParameters", {}),
execResultLabel=createdAction.get("execResultLabel"),
expectedDocumentFormats=createdAction.get("expectedDocumentFormats"),
status=createdAction.get("status", TaskStatus.PENDING),
error=createdAction.get("error"),
retryCount=createdAction.get("retryCount", 0),
retryMax=createdAction.get("retryMax", 3),
processingTime=createdAction.get("processingTime"),
timestamp=float(createdAction.get("timestamp", self.services.utils.getUtcTimestamp())),
result=createdAction.get("result"),
resultDocuments=createdAction.get("resultDocuments", []),
userMessage=createdAction.get("userMessage")
)
except Exception as e:
logger.error(f"Error creating task action: {str(e)}")
return None
def _updateWorkflowBeforeExecutingTask(self, taskNumber: int):
"""Update workflow object before executing a task"""
try:
updateData = {
"currentTask": taskNumber,
"currentAction": 0,
"totalActions": 0
}
# Update workflow object
self.workflow.currentTask = taskNumber
self.workflow.currentAction = 0
self.workflow.totalActions = 0
# Update in database
self.services.interfaceDbChat.updateWorkflow(self.workflow.id, updateData)
logger.info(f"Updated workflow {self.workflow.id} before executing task {taskNumber}: {updateData}")
except Exception as e:
logger.error(f"Error updating workflow before executing task: {str(e)}")
def _updateWorkflowBeforeExecutingAction(self, actionNumber: int):
"""Update workflow object before executing an action"""
try:
updateData = {
"currentAction": actionNumber
}
# Update workflow object
self.workflow.currentAction = actionNumber
# Update in database
self.services.interfaceDbChat.updateWorkflow(self.workflow.id, updateData)
logger.info(f"Updated workflow {self.workflow.id} before executing action {actionNumber}: {updateData}")
except Exception as e:
logger.error(f"Error updating workflow before executing action: {str(e)}")
def _createActionItem(self, actionData: Dict[str, Any]) -> ActionItem:
"""Creates a new task action for React mode"""
try:
import uuid
# Ensure ID is present
if "id" not in actionData or not actionData["id"]:
actionData["id"] = f"action_{uuid.uuid4()}"
# Ensure required fields
if "status" not in actionData:
actionData["status"] = TaskStatus.PENDING
if "execMethod" not in actionData:
logger.error("execMethod is required for task action")
return None
if "execAction" not in actionData:
logger.error("execAction is required for task action")
return None
if "execParameters" not in actionData:
actionData["execParameters"] = {}
# Use generic field separation based on ActionItem model
simpleFields, objectFields = self.services.interfaceDbChat._separate_object_fields(ActionItem, actionData)
# Create action in database
createdAction = self.services.interfaceDbChat.db.recordCreate(ActionItem, simpleFields)
# Convert to ActionItem model
return ActionItem(
id=createdAction["id"],
execMethod=createdAction["execMethod"],
execAction=createdAction["execAction"],
execParameters=createdAction.get("execParameters", {}),
execResultLabel=createdAction.get("execResultLabel"),
expectedDocumentFormats=createdAction.get("expectedDocumentFormats"),
status=createdAction.get("status", TaskStatus.PENDING),
error=createdAction.get("error"),
retryCount=createdAction.get("retryCount", 0),
retryMax=createdAction.get("retryMax", 3),
processingTime=createdAction.get("processingTime"),
timestamp=float(createdAction.get("timestamp", self.services.utils.getUtcTimestamp())),
result=createdAction.get("result"),
resultDocuments=createdAction.get("resultDocuments", []),
userMessage=createdAction.get("userMessage")
)
except Exception as e:
logger.error(f"Error creating task action: {str(e)}")
return None
def _writeTraceLog(self, contextText: str, data: Any) -> None:
"""Write trace data to configured trace file if in debug mode with improved JSON formatting"""
try:
import os
import json
from datetime import datetime, UTC
# Only write if logger is in debug mode
if logger.level > logging.DEBUG:
return
# Get log directory from configuration
logDir = self.services.utils.configGet("APP_LOGGING_LOG_DIR", "./")
if not os.path.isabs(logDir):
# If relative path, make it relative to the gateway directory
gatewayDir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
logDir = os.path.join(gatewayDir, logDir)
# Ensure log directory exists
os.makedirs(logDir, exist_ok=True)
# Create trace file path
traceFile = os.path.join(logDir, "log_trace.log")
# Format the trace entry with better structure
timestamp = datetime.fromtimestamp(self.services.utils.getUtcTimestamp(), UTC).strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
# Create a structured trace entry
traceEntry = f"[{timestamp}] {contextText}\n"
traceEntry += "=" * 80 + "\n"
# Add data if provided with improved formatting
if data is not None:
try:
if isinstance(data, (dict, list)):
# Format as pretty JSON with better settings
jsonStr = json.dumps(data, indent=2, default=str, ensure_ascii=False, sort_keys=False)
traceEntry += f"JSON Data:\n{jsonStr}\n"
elif isinstance(data, str):
# For string data, try to parse as JSON first, then fall back to plain text
try:
parsed = json.loads(data)
jsonStr = json.dumps(parsed, indent=2, default=str, ensure_ascii=False, sort_keys=False)
traceEntry += f"JSON Data (parsed from string):\n{jsonStr}\n"
except (json.JSONDecodeError, TypeError):
# Not valid JSON, show as plain text with proper line breaks
formatted_data = data.replace('\\n', '\n')
traceEntry += f"Text Data:\n{formatted_data}\n"
else:
# For other types, convert to string and try to parse as JSON
dataStr = str(data)
try:
parsed = json.loads(dataStr)
jsonStr = json.dumps(parsed, indent=2, default=str, ensure_ascii=False, sort_keys=False)
traceEntry += f"JSON Data (parsed from object):\n{jsonStr}\n"
except (json.JSONDecodeError, TypeError):
# Not valid JSON, show as plain text with proper line breaks
formatted_data = dataStr.replace('\\n', '\n')
traceEntry += f"Object Data:\n{formatted_data}\n"
except Exception as e:
# Fallback to simple string representation
traceEntry += f"Data (fallback): {str(data)}\n"
else:
traceEntry += "No data provided\n"
traceEntry += "=" * 80 + "\n\n"
# Write to trace file
with open(traceFile, "a", encoding="utf-8") as f:
f.write(traceEntry)
except Exception as e:
# Don't log trace errors to avoid recursion
pass

File diff suppressed because it is too large Load diff

View file

@ -1,418 +0,0 @@
"""
Placeholder-based prompt factory for dynamic AI calls.
This module provides prompt templates with placeholders that can be filled dynamically.
"""
import json
from typing import Dict, Any
from modules.workflows.processing.promptFactory import (
_getAvailableDocuments,
_getPreviousRoundContext,
getMethodsList,
getEnhancedDocumentContext,
_getConnectionReferenceList,
methods
)
def createTaskPlanningPromptTemplate() -> str:
"""Create task planning prompt template with placeholders."""
return """You are a task planning AI that analyzes user requests and creates structured, self-contained task plans with user-friendly feedback messages.
USER REQUEST: {{KEY:USER_PROMPT}}
AVAILABLE DOCUMENTS: {{KEY:AVAILABLE_DOCUMENTS}}
PREVIOUS WORKFLOW ROUNDS CONTEXT:
{{KEY:WORKFLOW_HISTORY}}
INSTRUCTIONS:
1. Analyze the user request, available documents, and previous workflow rounds context
2. If the user request appears to be a follow-up (like "try again", "versuche es nochmals", "retry", etc.),
use the PREVIOUS WORKFLOW ROUNDS CONTEXT to understand what the user wants to retry or continue
3. Group related topics and sequential steps into single, comprehensive tasks
4. Focus on business outcomes, not technical operations
5. Make each task self-contained: clearly state what to do and what outputs are expected
6. Ensure proper handover between tasks (later actions will use your task outputs)
7. Detect the language of the user request and include it in languageUserDetected
8. Generate user-friendly messages for each task in the user's request language
9. Return a JSON object with the exact structure shown below
TASK GROUPING PRINCIPLES:
- COMBINE RELATED TOPICS: Group related subjects, sequential steps, or workflow-structured activities into single tasks
- SEQUENTIAL WORKFLOWS: If the user says "first do this, then that, then that" create ONE task that handles the entire sequence
- SIMILAR CONTENT: If multiple items deal with the same subject matter combine into ONE comprehensive task
- ONLY SPLIT WHEN DIFFERENT: Create separate tasks ONLY when the user explicitly wants different, independent things
EXAMPLES OF GOOD TASK GROUPING:
COMBINE INTO ONE TASK:
- "Analyze the documents, extract key insights, and create a summary report" ONE task: "Analyze documents and create comprehensive summary report"
- "First check my emails, then respond to urgent ones, then organize my inbox" ONE task: "Process and organize email inbox with priority responses"
- "Review the budget, analyze spending patterns, and suggest cost-cutting measures" ONE task: "Comprehensive budget analysis with optimization recommendations"
- "Create a business strategy, develop marketing plan, and prepare presentation" ONE task: "Develop complete business strategy with marketing plan and presentation"
SPLIT INTO MULTIPLE TASKS:
- "Create a business strategy for Q4" AND "Check my emails for messages from my assistant" TWO separate tasks (different subjects)
- "Analyze customer feedback" AND "Prepare quarterly financial report" TWO separate tasks (different business areas)
- "Review project timeline" AND "Update employee handbook" TWO separate tasks (unrelated activities)
TASK PLANNING PRINCIPLES:
- Break down complex requests into logical, sequential steps
- Focus on business value and outcomes
- Keep tasks at a meaningful level of abstraction (not implementation details)
- Each task should produce results that can be used by subsequent tasks
- Ensure clear dependencies and handovers between tasks
- Provide clear, actionable user messages in the user's request language
- Group related activities to minimize task fragmentation
- Only create multiple tasks when dealing with truly different, independent objectives
- Make task objectives action-oriented and specific (include scope, data sources to consider, and output intent at high level)
- Write success_criteria as measurable acceptance criteria focusing on outputs (what artifacts or insights will exist and how they are validated)
FOLLOW-UP PROMPT HANDLING:
- If the user request is a follow-up (e.g., "try again", "versuche es nochmals", "retry", "continue", "proceed"),
analyze the PREVIOUS WORKFLOW ROUNDS CONTEXT to understand what failed or was incomplete
- Use the previous round's user requests and task outcomes to determine what the user wants to retry
- If previous rounds failed due to missing documents, and documents are now available,
create tasks that use the newly available documents to accomplish the original request
- Maintain the same business objective from previous rounds but adapt to current available resources
SPECIFIC SCENARIO HANDLING:
- If previous round failed with "documents missing" error and current round has documents available,
the user likely wants to retry the same operation with the newly provided documents
- Example: Previous round "speichere mir die 3 dokumente im sharepoint unter xxx" failed due to missing documents,
current round "versuche es nochmals" with documents should retry the SharePoint save operation
- Always check if the current request is a retry by looking for retry keywords and previous round context
REQUIRED JSON STRUCTURE:
{{
"overview": "Brief description of the overall plan",
"languageUserDetected": "en", // Language code detected from user request (en, de, fr, it, es, etc.)
"userMessage": "User-friendly message explaining the task plan in user's request language",
"tasks": [
{{
"id": "task_1",
"objective": "Clear business objective this task accomplishes (combining related activities)",
"dependencies": ["task_0"], // IDs of tasks that must complete first
"success_criteria": ["criteria1", "criteria2"],
"estimated_complexity": "low|medium|high",
"userMessage": "User-friendly message explaining what this task will accomplish in user's request language"
}}
]
}}
EXAMPLES OF GOOD TASK OBJECTIVES (COMBINING RELATED ACTIVITIES):
- "Analyze documents and extract key insights for business communication"
- "Create professional business communication incorporating analyzed information"
- "Execute business communication using specified channels and document outcomes"
- "Develop comprehensive business strategy with implementation roadmap and success metrics"
EXAMPLES OF WELL-FORMED SUCCESS CRITERIA (OUTPUT-FOCUSED):
- "Deliver a prioritized list of 1020 candidates with justification"
- "Provide a structured JSON with fields: company, ticker, rationale, metrics"
- "Produce a presentation outline with 5 sections and bullet points per section"
- "Include data sources and date stamped references for traceability"
EXAMPLES OF GOOD SUCCESS CRITERIA:
- "Key insights extracted and ready for business use"
- "Professional communication created with clear business value"
- "Business communication successfully delivered and documented"
- "All outcomes properly documented and accessible"
EXAMPLES OF BAD TASK OBJECTIVES:
- "Read the PDF file" (too granular - should be "Analyze document content")
- "Convert data to CSV" (implementation detail - should be "Structure data for analysis")
- "Send email" (too specific - should be "Deliver business communication")
LANGUAGE DETECTION:
- Analyze the user request text to identify the language
- Use standard language codes: en (English), de (German), fr (French), it (Italian), es (Spanish), etc.
- If the language cannot be determined, use "en" as default
- Include the detected language in the languageUserDetected field
NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
def createActionDefinitionPromptTemplate() -> str:
"""Create action definition prompt template with placeholders."""
return """You are an action planning AI that generates specific, executable actions for task steps.
TASK OBJECTIVE: {{KEY:USER_PROMPT}}
AVAILABLE DOCUMENTS: {{KEY:AVAILABLE_DOCUMENTS}}
WORKFLOW HISTORY: {{KEY:WORKFLOW_HISTORY}}
AVAILABLE METHODS: {{KEY:AVAILABLE_METHODS}}
USER LANGUAGE: {{KEY:USER_LANGUAGE}}
INSTRUCTIONS:
- Generate actions to accomplish this task step using available documents, connections, and previous results
- Use docItem for single documents and docList for groups of documents as shown in AVAILABLE DOCUMENTS
- If there are no documents available, do not create document extraction actions. Select methods strictly based on the task objective; choose web actions when external information is required. Otherwise, generate a status/information report requesting needed inputs.
- Always pass documentList as a LIST of references (docItem and/or docList) - this list CANNOT be empty for document extraction actions
- For referencing documents from previous actions, use the format "round{current_round}_task{current_task}_action{action_number}_{descriptive_label}"
- Each action must be self-contained and executable with the provided parameters
- For document extraction, ensure prompts are specific and detailed
- Include validation steps in extraction prompts where relevant
- If this is a retry, learn from previous failures and improve the approach
- Address specific issues mentioned in previous review feedback
- When specifying expectedDocumentFormats, ensure AI prompts explicitly request pure data without markdown formatting
- Generate user-friendly messages for each action in the user's language
PARAMETER COMPLETENESS REQUIREMENTS:
- Every parameter must contain all information needed to execute without implicit context
- Use explicit, concrete values (units, languages, formats, limits, date ranges, IDs) when applicable
- For search-like parameters (if any method requires a query), derive the query from the task objective AND ALL success criteria dimensions. Include:
- Key entities and domain terms from the objective
- All distinct facets from success_criteria (e.g., valuation AND AI potential AND know-how needs)
- Geography/localization (e.g., Schweiz/Suisse/Switzerland; use multilingual synonyms when helpful)
- Time horizon or recency if relevant
- Boolean operators and synonyms to increase precision (use AND/OR, quotes, parentheses)
- Avoid single-topic or generic queries focused only on one facet (e.g., pure valuation metrics)
- When facets are truly distinct, create 13 focused actions with precise queries rather than one vague catch-all
- Document list parameters must reference only existing labels or prior action outputs; do not reference future outputs
DOCUMENT ROUTING GUIDANCE:
- Each action should produce documents with a clear resultLabel for routing
- Use consistent naming: "round{current_round}_task{current_task}_action{action_number}_{descriptive_label}"
- Ensure document flow: Action A produces documents that Action B can consume
- Document labels should be descriptive of content, not just "results" or "output"
- Consider what subsequent actions will need and structure outputs accordingly
REQUIRED JSON STRUCTURE:
{{
"actions": [
{{
"method": "method_name",
"action": "action_name",
"parameters": {{}},
"resultLabel": "round{current_round}_task{current_task}_action{action_number}_{descriptive_label}",
"description": "Brief description of what this action accomplishes",
"userMessage": "User-friendly message explaining what this action will do in user's language"
}}
]
}}
IMPORTANT NOTES:
- Respond with ONLY the JSON object. Do not include any explanatory text.
- Before creating any document extraction action, verify that AVAILABLE DOCUMENTS contains actual document references.
- Always include a user-friendly userMessage for each action in the user's language.
- The examples above show German user messages as reference - adapt the language to match the USER LANGUAGE specified above."""
def createActionSelectionPromptTemplate() -> str:
"""Create action selection prompt template with placeholders."""
return """Select exactly one action to advance the task.
OBJECTIVE: {{KEY:USER_PROMPT}}
AVAILABLE DOCUMENTS: {{KEY:AVAILABLE_DOCUMENTS}}
USER LANGUAGE: {{KEY:USER_LANGUAGE}}
MINIMAL TOOL CATALOG (method -> action -> [parameterNames]):
{{KEY:AVAILABLE_METHODS}}
BUSINESS RULES:
- Pick exactly one action per step.
- Derive choice from objective and success criteria.
- Prefer user language.
- Keep it minimal; avoid provider specifics.
RESPONSE FORMAT (JSON only):
{{"action":{{"method":"web","name":"search"}}}}"""
def createActionParameterPromptTemplate() -> str:
"""Create action parameter prompt template with placeholders."""
return """Provide only the required parameters for this action.
SELECTED ACTION: {{KEY:SELECTED_ACTION}}
ACTION SIGNATURE: {{KEY:ACTION_SIGNATURE}}
OBJECTIVE: {{KEY:USER_PROMPT}}
AVAILABLE DOCUMENTS: {{KEY:AVAILABLE_DOCUMENTS}}
USER LANGUAGE: {{KEY:USER_LANGUAGE}}
RULES:
- Return only the parameters object.
- Include user language if relevant.
- Reference documents only by exact labels available.
- Avoid unnecessary fields; host applies defaults.
- Use the ACTION SIGNATURE above to understand what parameters are required.
- Convert the objective into appropriate parameter values as needed.
RESPONSE FORMAT (JSON only):
{{"parameters":{{}}}}"""
def createRefinementPromptTemplate() -> str:
"""Create refinement prompt template with placeholders."""
return """Decide next step based on observation.
OBJECTIVE: {{KEY:USER_PROMPT}}
OBSERVATION:
{{KEY:REVIEW_CONTENT}}
RULES:
- If criteria are met or no further action helps, decide stop.
- Else decide continue.
RESPONSE FORMAT (JSON only):
{{"decision":"continue","reason":"Need more data"}}"""
def createResultReviewPromptTemplate() -> str:
"""Create result review prompt template with placeholders."""
return """You are a result validation AI that reviews task execution outcomes and determines success, retry needs, or failure.
TASK OBJECTIVE: {{KEY:USER_PROMPT}}
EXECUTION RESULTS:
{{KEY:REVIEW_CONTENT}}
VALIDATION CRITERIA:
- Review each action's success/failure status
- Check if required documents were produced
- Validate document quality and completeness
- Assess if success criteria were met
- Identify any missing or incomplete outputs
- Determine if retry would help or if task should be marked as failed
REQUIRED JSON STRUCTURE:
{{
"status": "success|retry|failed",
"reason": "Detailed explanation of the validation decision",
"improvements": ["specific improvement 1", "specific improvement 2"],
"quality_score": 8, // 1-10 scale
"met_criteria": ["criteria1", "criteria2"],
"unmet_criteria": ["criteria3", "criteria4"],
"confidence": 0.85, // 0.0-1.0 scale
"userMessage": "User-friendly message explaining the validation result"
}}
VALIDATION PRINCIPLES:
- Be thorough but fair in assessment
- Focus on business value and outcomes
- Consider both technical execution and business results
- Provide specific, actionable improvement suggestions
- Use quality scores to track progress across retries
- Clearly identify which success criteria were met vs. unmet
- Set appropriate confidence levels based on evidence quality
NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
# Helper functions to extract content for placeholders
def extractUserPrompt(context) -> str:
"""Extract user prompt from context."""
if hasattr(context, 'task_step') and context.task_step:
return context.task_step.objective or 'No request specified'
return 'No request specified'
def extractAvailableDocuments(context) -> str:
"""Extract available documents from context."""
if hasattr(context, 'workflow') and context.workflow:
return _getAvailableDocuments(context.workflow)
return "No documents available"
def extractWorkflowHistory(service, context) -> str:
"""Extract workflow history from context."""
if hasattr(context, 'workflow') and context.workflow:
return _getPreviousRoundContext(service, context.workflow) or "No previous workflow rounds - this is the first round."
return "No previous workflow rounds - this is the first round."
def extractAvailableMethods(service) -> str:
"""Extract available methods for action planning."""
methodList = getMethodsList(service)
method_actions = {}
for sig in methodList:
if '.' in sig:
method, rest = sig.split('.', 1)
action = rest.split('(')[0]
method_actions.setdefault(method, []).append((action, sig))
# Create a structured JSON format for better AI parsing
available_methods_json = {}
for method, actions in method_actions.items():
available_methods_json[method] = {}
# Get the method instance for accessing docstrings
method_instance = methods.get(method, {}).get('instance') if methods else None
for action, sig in actions:
# Parse the signature to extract parameters
if '(' in sig and ')' in sig:
# Extract parameters from signature
params_start = sig.find('(')
params_end = sig.find(')')
params_str = sig[params_start+1:params_end]
# Parse parameters directly from the docstring - much simpler and more reliable!
parameters = []
# Get the actual function's docstring
if method_instance and hasattr(method_instance, action):
func = getattr(method_instance, action)
if hasattr(func, '__doc__') and func.__doc__:
docstring = func.__doc__
# Parse Parameters section from docstring
lines = docstring.split('\n')
in_parameters = False
for i, line in enumerate(lines):
original_line = line
line = line.strip()
if line.startswith('Parameters:'):
in_parameters = True
continue
elif line.startswith('Returns:') or line.startswith('Raises:') or line.startswith('Note:') or line.startswith('Example:') or line.startswith('Examples:'):
in_parameters = False
continue
elif in_parameters and line and not line.startswith('-') and not line.startswith('*'):
# This is a parameter line
if ':' in line:
param_name = line.split(':')[0].strip()
param_desc = line.split(':', 1)[1].strip()
parameters.append(f"{param_name}: {param_desc}")
available_methods_json[method][action] = parameters
else:
available_methods_json[method][action] = []
return json.dumps(available_methods_json, indent=2, ensure_ascii=False)
def extractUserLanguage(service) -> str:
"""Extract user language from service."""
return service.user.language if service and service.user else 'en'
def extractReviewContent(context) -> str:
"""Extract review content from context."""
if hasattr(context, 'action_results') and context.action_results:
# Build result summary
result_summary = ""
for i, result in enumerate(context.action_results):
result_summary += f"\nRESULT {i+1}:\n"
result_summary += f" Success: {result.success}\n"
if result.error:
result_summary += f" Error: {result.error}\n"
if result.documents:
result_summary += f" Documents: {len(result.documents)} document(s)\n"
for doc in result.documents:
doc_name = getattr(doc, 'documentName', 'Unknown')
doc_mime = getattr(doc, 'mimeType', 'Unknown')
result_summary += f" - {doc_name} ({doc_mime})\n"
else:
result_summary += f" Documents: None\n"
return result_summary
elif hasattr(context, 'observation') and context.observation:
return json.dumps(context.observation, ensure_ascii=False)
else:
return "No review content available"

View file

@ -0,0 +1 @@
# Shared workflow utilities

View file

@ -4,8 +4,8 @@
import logging import logging
from typing import List from typing import List
from datetime import datetime, UTC from datetime import datetime, UTC
from modules.datamodels.datamodelWorkflow import TaskStep from modules.datamodels.datamodelChat import TaskStep
from modules.datamodels.datamodelWorkflow import ActionResult from modules.datamodels.datamodelChat import ActionResult
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -58,7 +58,7 @@ class TaskExecutionState:
patterns.append("permission_issues") patterns.append("permission_issues")
return list(set(patterns)) return list(set(patterns))
def should_continue(observation, review=None, current_step: int = 0, max_steps: int = 5) -> bool: def shouldContinue(observation, review=None, current_step: int = 0, max_steps: int = 5) -> bool:
"""Helper to decide if the iterative loop should continue """Helper to decide if the iterative loop should continue
- Stop if review indicates 'stop' or success criteria are met - Stop if review indicates 'stop' or success criteria are met
- Stop on failure with no retry path - Stop on failure with no retry path

View file

@ -0,0 +1,131 @@
# methodDiscovery.py
# Method discovery and management for workflow execution
import json
import logging
import importlib
import pkgutil
import inspect
from typing import Any, Dict, List
from modules.datamodels.datamodelChat import TaskContext, ReviewContext, DocumentExchange
from modules.workflows.methods.methodBase import MethodBase
# Set up logger
logger = logging.getLogger(__name__)
# Global methods catalog - moved from serviceCenter
methods = {}
def discoverMethods(serviceCenter):
"""Dynamically discover all method classes and their actions in modules methods package"""
try:
# Import the methods package
methodsPackage = importlib.import_module('modules.workflows.methods')
# Discover all modules in the package
for _, name, isPkg in pkgutil.iter_modules(methodsPackage.__path__):
if not isPkg and name.startswith('method'):
try:
# Import the module
module = importlib.import_module(f'modules.workflows.methods.{name}')
# Find all classes in the module that inherit from MethodBase
for itemName, item in inspect.getmembers(module):
if (inspect.isclass(item) and
issubclass(item, MethodBase) and
item != MethodBase):
# Instantiate the method
methodInstance = item(serviceCenter)
# Use the actions property from MethodBase which handles @action decorator
actions = methodInstance.actions
# Create method info
methodInfo = {
'instance': methodInstance,
'actions': actions,
'description': item.__doc__ or f"Method {itemName}"
}
# Store the method with full class name
methods[itemName] = methodInfo
# Also store with short name for action executor access
shortName = itemName.replace('Method', '').lower()
methods[shortName] = methodInfo
logger.info(f"Discovered method {itemName} (short: {shortName}) with {len(actions)} actions")
except Exception as e:
logger.error(f"Error discovering method {name}: {str(e)}")
continue
logger.info(f"Discovered {len(methods)} method entries total")
except Exception as e:
logger.error(f"Error discovering methods: {str(e)}")
def getMethodsList(serviceCenter):
"""Get a list of available methods with their signatures"""
if not methods:
discoverMethods(serviceCenter)
methodsList = []
for methodName, methodInfo in methods.items():
methodDescription = methodInfo['description']
actionsList = []
for actionName, actionInfo in methodInfo['actions'].items():
actionDescription = actionInfo['description']
parameters = actionInfo['parameters']
# Build parameter signature
paramSig = []
for paramName, paramInfo in parameters.items():
paramType = paramInfo['type']
paramRequired = paramInfo['required']
paramDefault = paramInfo['default']
if paramRequired:
paramSig.append(f"{paramName}: {paramType}")
else:
defaultStr = f" = {paramDefault}" if paramDefault is not None else " = None"
paramSig.append(f"{paramName}: {paramType}{defaultStr}")
paramSignature = f"({', '.join(paramSig)})" if paramSig else "()"
actionsList.append(f"- {actionName}{paramSignature}: {actionDescription}")
actionsStr = "\n".join(actionsList)
methodsList.append(f"**{methodName}**: {methodDescription}\n{actionsStr}")
return "\n\n".join(methodsList)
def getActionParameterList(methodName: str, actionName: str, methods: Dict[str, Any]) -> str:
"""Get action parameter list from method docstring for AI parameter generation (list only)."""
try:
if not methods or methodName not in methods:
return ""
methodInstance = methods[methodName]['instance']
if actionName not in methodInstance.actions:
return ""
action_info = methodInstance.actions[actionName]
# Extract parameter descriptions from docstring
docstring = action_info.get('description', '')
paramDescriptions, paramTypes = methodInstance._extractParameterDetails(docstring)
param_list = []
for paramName, paramDesc in paramDescriptions.items():
paramType = paramTypes.get(paramName, 'Any')
if paramDesc:
param_list.append(f"- {paramName} ({paramType}): {paramDesc}")
else:
param_list.append(f"- {paramName} ({paramType})")
# Return list only, without leading headings or trailing text
return "\n".join(param_list)
except Exception as e:
logger.error(f"Error getting action parameter signature for {methodName}.{actionName}: {str(e)}")
return ""

View file

@ -0,0 +1,411 @@
"""
Placeholder Factory
Centralized placeholder extraction functions for all workflow modes.
Each function corresponds to a {{KEY:PLACEHOLDER_NAME}} in prompt templates.
NAMING CONVENTION:
- All functions follow pattern: extract{PlaceholderName}()
- Placeholder names are in UPPER_CASE with underscores
- Function names are in camelCase
MAPPING TABLE (keys function) with usage [taskplan | actionplan | react]:
{{KEY:USER_PROMPT}} -> extractUserPrompt() [taskplan, actionplan, react]
{{KEY:USER_LANGUAGE}} -> extractUserLanguage() [actionplan, react]
{{KEY:WORKFLOW_HISTORY}} -> extractWorkflowHistory() [taskplan, actionplan, react]
{{KEY:AVAILABLE_CONNECTIONS_INDEX}} -> extractAvailableConnectionsIndex() [actionplan, react]
{{KEY:AVAILABLE_CONNECTIONS_SUMMARY}} -> extractAvailableConnectionsSummary() []
{{KEY:AVAILABLE_DOCUMENTS_SUMMARY}} -> extractAvailableDocumentsSummary() [taskplan, actionplan, react]
{{KEY:AVAILABLE_DOCUMENTS_INDEX}} -> extractAvailableDocumentsIndex() [react]
{{KEY:AVAILABLE_METHODS}} -> extractAvailableMethods() [actionplan, react]
{{KEY:REVIEW_CONTENT}} -> extractReviewContent() [actionplan, react]
{{KEY:PREVIOUS_ACTION_RESULTS}} -> extractPreviousActionResults() [react]
{{KEY:LEARNINGS_AND_IMPROVEMENTS}} -> extractLearningsAndImprovements() [react]
{{KEY:LATEST_REFINEMENT_FEEDBACK}} -> extractLatestRefinementFeedback() [react]
Following placeholders are populated directly by prompt builders with according context in promptGenerationActionsReact module:
- ACTION_OBJECTIVE,
- SELECTED_ACTION,
- ACTION_SIGNATURE
"""
import json
import logging
from typing import Dict, Any, List
from modules.datamodels.datamodelChat import ChatDocument
logger = logging.getLogger(__name__)
from modules.workflows.processing.shared.methodDiscovery import (methods, discoverMethods)
def extractUserPrompt(context: Any) -> str:
"""Extract user prompt from context. Maps to {{KEY:USER_PROMPT}}.
Prefer the cleaned intent stored on the services object if available via context.
Fallback to the task_step objective.
"""
try:
# Prefer services.currentUserPrompt when accessible through context
services = getattr(context, 'services', None)
if services and getattr(services, 'currentUserPrompt', None):
return services.currentUserPrompt
except Exception:
pass
if hasattr(context, 'task_step') and context.task_step:
return context.task_step.objective or 'No request specified'
return 'No request specified'
def extractWorkflowHistory(service: Any, context: Any) -> str:
"""Extract workflow history from context. Maps to {{KEY:WORKFLOW_HISTORY}}
Reverse-chronological, enriched with message summaries and document labels.
"""
# Prefer explicit workflow on context; else fall back to services.workflow
workflow = None
try:
if hasattr(context, 'workflow') and context.workflow:
workflow = context.workflow
elif hasattr(service, 'workflow') and service.workflow:
workflow = service.workflow
except Exception:
workflow = None
if workflow:
history = getPreviousRoundContext(service, workflow)
return history or "No previous workflow rounds available"
return "No previous workflow rounds available"
def extractAvailableMethods(service: Any) -> str:
"""Extract available methods for action planning. Maps to {{KEY:AVAILABLE_METHODS}}"""
try:
# Get the methods dictionary directly from the global methods variable
if not methods:
discoverMethods(service)
# Create a flat JSON format with compound action names for better AI parsing
available_actions_json = {}
for methodName, methodInfo in methods.items():
# Convert MethodAi -> ai, MethodDocument -> document, etc.
shortName = methodName.replace('Method', '').lower()
for actionName, actionInfo in methodInfo['actions'].items():
# Create compound action name: method.action
compoundActionName = f"{shortName}.{actionName}"
# Get the action description
action_description = actionInfo.get('description', f"Execute {actionName} action")
available_actions_json[compoundActionName] = action_description
return json.dumps(available_actions_json, indent=2, ensure_ascii=False)
except Exception as e:
logger.error(f"Error extracting available methods: {str(e)}")
return json.dumps({}, indent=2, ensure_ascii=False)
def extractUserLanguage(service: Any) -> str:
"""Extract user language from service. Maps to {{KEY:USER_LANGUAGE}}"""
return service.user.language if service and service.user else 'en'
def _computeMessageSummary(msg) -> str:
"""Create a concise summary for a ChatMessage with documents only.
Fields: documentCount, roundNumber, documentsLabel, document names, message (full), success flag.
"""
try:
docs = getattr(msg, 'documents', []) or []
if not docs:
return "" # Only summarize messages that contain documents
document_count = len(docs)
round_number = getattr(msg, 'roundNumber', None) or 0
label = getattr(msg, 'documentsLabel', None) or ""
# Collect ALL document names (supports ChatDocument objects and dicts)
doc_names = []
for d in docs:
name = None
try:
if isinstance(d, dict):
# For dict objects, try multiple possible field names
name = d.get('fileName') or d.get('documentName') or d.get('name') or d.get('filename')
else:
# For ChatDocument objects, use fileName field
name = getattr(d, 'fileName', None) or getattr(d, 'documentName', None) or getattr(d, 'name', None) or getattr(d, 'filename', None)
except Exception:
name = None
doc_names.append(name or "(unnamed)")
# Format document names in brackets
if doc_names:
names_part = f"({', '.join(doc_names)})"
else:
names_part = "(no documents)"
# Don't truncate the message - show full content
user_message = (getattr(msg, 'message', '') or '').strip().replace("\n", " ")
# Read success from ChatMessage.success field
success_flag = getattr(msg, 'success', None)
success_text = "success=True" if success_flag is True else ("success=False" if success_flag is False else "success=Unknown")
label_part = f" label='{label}'" if label else ""
# Add learning/feedback if available
learning_part = ""
if hasattr(msg, 'summary') and msg.summary and 'learnings' in msg.summary.lower():
learning_part = " | learnings available"
return f"Round {round_number}: {document_count} docs {names_part}{label_part} | {success_text}{learning_part} | msg='{user_message}'"
except Exception:
return ""
def getMessageSummary(msg) -> str:
"""Return existing ChatMessage.summary or compute, set, and return it (documents only)."""
try:
if getattr(msg, 'summary', None):
return msg.summary
summary = _computeMessageSummary(msg)
# Persist in-memory only; caller can store if desired
if summary:
try:
setattr(msg, 'summary', summary)
except Exception:
pass
return summary
except Exception:
return ""
def getPreviousRoundContext(services, workflow: Any) -> str:
"""Get enriched context:
- Reverse-chronological ordering
- Current round first (newest oldest), then older rounds
- Only messages with documents summarized
- Include available documents snapshot at end
"""
try:
if not workflow:
return "No previous round context available"
lines: List[str] = []
# Reverse-chronological, current round first
try:
msgs = getattr(workflow, 'messages', []) or []
current_round = getattr(workflow, 'currentRound', None)
current_round_msgs: List[Any] = []
previous_round_msgs: List[Any] = []
for m in msgs:
if current_round is not None and getattr(m, 'roundNumber', None) == current_round:
current_round_msgs.append(m)
else:
previous_round_msgs.append(m)
for m in reversed(current_round_msgs):
s = getMessageSummary(m)
if s:
lines.append(f"- {s}")
for m in reversed(previous_round_msgs):
s = getMessageSummary(m)
if s:
lines.append(f"- {s}")
except Exception:
pass
# Include available documents snapshot at end
try:
if hasattr(services, 'workflow'):
docs_index = services.workflow.getAvailableDocuments(workflow)
if docs_index and docs_index != "No documents available":
doc_count = docs_index.count("docItem:") # Only count actual documents, not document list labels
lines.append(f"Available documents: {doc_count}")
except Exception:
pass
if not lines:
return "No previous round context available"
return "\n".join(lines)
except Exception as e:
logger.error(f"Error getting previous round context: {str(e)}")
return "Error retrieving previous round context"
def extractReviewContent(context: Any) -> str:
"""Extract review content for result validation. Maps to {{KEY:REVIEW_CONTENT}}"""
try:
if hasattr(context, 'action_results') and context.action_results:
# Build result summary
result_summary = ""
for i, result in enumerate(context.action_results):
result_summary += f"\nRESULT {i+1}:\n"
result_summary += f" Success: {result.success}\n"
if result.error:
result_summary += f" Error: {result.error}\n"
if result.documents:
result_summary += f" Documents: {len(result.documents)} document(s)\n"
for doc in result.documents:
# Extract all available metadata without content
doc_metadata = {
"name": getattr(doc, 'fileName', None) or getattr(doc, 'documentName', 'Unknown'),
"mimeType": getattr(doc, 'mimeType', 'Unknown'),
"size": getattr(doc, 'size', 'Unknown'),
"created": getattr(doc, 'created', 'Unknown'),
"modified": getattr(doc, 'modified', 'Unknown'),
"typeGroup": getattr(doc, 'typeGroup', 'Unknown'),
"documentId": getattr(doc, 'documentId', 'Unknown'),
"reference": getattr(doc, 'reference', 'Unknown')
}
# Remove 'Unknown' values to keep it clean
doc_metadata = {k: v for k, v in doc_metadata.items() if v != 'Unknown'}
result_summary += f" - {json.dumps(doc_metadata, indent=6, ensure_ascii=False)}\n"
else:
result_summary += f" Documents: None\n"
return result_summary
elif hasattr(context, 'observation') and context.observation:
# For observation data, show full content but handle documents specially
if isinstance(context.observation, dict):
# Create a copy to modify
obs_copy = context.observation.copy()
# If there are previews with documents, show only metadata
if 'previews' in obs_copy and isinstance(obs_copy['previews'], list):
for preview in obs_copy['previews']:
if isinstance(preview, dict) and 'snippet' in preview:
# Replace snippet with metadata indicator
preview['snippet'] = f"[Content: {len(preview.get('snippet', ''))} characters]"
return json.dumps(obs_copy, indent=2, ensure_ascii=False)
else:
return json.dumps(context.observation, ensure_ascii=False)
elif hasattr(context, 'step_result') and context.step_result and 'observation' in context.step_result:
# For observation data in step_result, show full content but handle documents specially
observation = context.step_result['observation']
if isinstance(observation, dict):
# Create a copy to modify
obs_copy = observation.copy()
# If there are previews with documents, show only metadata
if 'previews' in obs_copy and isinstance(obs_copy['previews'], list):
for preview in obs_copy['previews']:
if isinstance(preview, dict) and 'snippet' in preview:
# Replace snippet with metadata indicator
preview['snippet'] = f"[Content: {len(preview.get('snippet', ''))} characters]"
return json.dumps(obs_copy, indent=2, ensure_ascii=False)
else:
return json.dumps(observation, ensure_ascii=False)
else:
return "No review content available"
except Exception as e:
logger.error(f"Error extracting review content: {str(e)}")
return "No review content available"
def extractPreviousActionResults(context: Any) -> str:
"""Extract previous action results for learning context. Maps to {{KEY:PREVIOUS_ACTION_RESULTS}}"""
try:
if not hasattr(context, 'previous_action_results') or not context.previous_action_results:
return "No previous actions executed yet"
results = []
for i, result in enumerate(context.previous_action_results[-5:], 1): # Last 5 results
if hasattr(result, 'resultLabel') and hasattr(result, 'status'):
status = "SUCCESS" if result.status == "completed" else "FAILED"
results.append(f"Action {i}: {result.resultLabel} - {status}")
if hasattr(result, 'error') and result.error:
results.append(f" Error: {result.error}")
return "\n".join(results) if results else "No previous actions executed yet"
except Exception as e:
logger.error(f"Error extracting previous action results: {str(e)}")
return "No previous actions executed yet"
def extractLearningsAndImprovements(context: Any) -> str:
"""Extract learnings and improvements from previous actions. Maps to {{KEY:LEARNINGS_AND_IMPROVEMENTS}}"""
try:
learnings = []
# Get improvements from context
if hasattr(context, 'improvements') and context.improvements and isinstance(context.improvements, list):
learnings.append("IMPROVEMENTS:")
for improvement in context.improvements[-3:]: # Last 3 improvements
learnings.append(f"- {improvement}")
# Get failure patterns
if hasattr(context, 'failure_patterns') and context.failure_patterns and isinstance(context.failure_patterns, list):
learnings.append("FAILURE PATTERNS TO AVOID:")
for pattern in context.failure_patterns[-3:]: # Last 3 patterns
learnings.append(f"- {pattern}")
# Get successful actions
if hasattr(context, 'successful_actions') and context.successful_actions and isinstance(context.successful_actions, list):
learnings.append("SUCCESSFUL APPROACHES:")
for action in context.successful_actions[-3:]: # Last 3 successful
learnings.append(f"- {action}")
return "\n".join(learnings) if learnings else "No learnings available yet"
except Exception as e:
logger.error(f"Error extracting learnings and improvements: {str(e)}")
return "No learnings available yet"
def extractLatestRefinementFeedback(context: Any) -> str:
"""Extract the latest refinement feedback. Maps to {{KEY:LATEST_REFINEMENT_FEEDBACK}}"""
try:
if not hasattr(context, 'previous_review_result') or not context.previous_review_result or not isinstance(context.previous_review_result, list):
return "No previous refinement feedback available"
# Get the most recent refinement decision
latest_decision = context.previous_review_result[-1]
if not isinstance(latest_decision, dict):
return "No previous refinement feedback available"
feedback_parts = []
# Add decision and reason
decision = latest_decision.get('decision', 'unknown')
reason = latest_decision.get('reason', 'No reason provided')
feedback_parts.append(f"Latest Decision: {decision}")
feedback_parts.append(f"Reason: {reason}")
# Add any specific feedback or suggestions
if 'feedback' in latest_decision:
feedback_parts.append(f"Feedback: {latest_decision['feedback']}")
if 'suggestions' in latest_decision:
feedback_parts.append(f"Suggestions: {latest_decision['suggestions']}")
return "\n".join(feedback_parts)
except Exception as e:
logger.error(f"Error extracting latest refinement feedback: {str(e)}")
return "No previous refinement feedback available"
def extractAvailableDocumentsSummary(service: Any, context: Any) -> str:
"""Summary of available documents (count only)."""
try:
documents = service.workflow.getAvailableDocuments(context.workflow)
if documents and documents != "No documents available":
doc_count = documents.count("docList:") + documents.count("docItem:")
return f"{doc_count} documents available from previous tasks"
return "No documents available"
except Exception as e:
logger.error(f"Error getting document summary: {str(e)}")
return "No documents available"
def extractAvailableDocumentsIndex(service: Any, context: Any) -> str:
"""Index of available documents with detailed references for parameter generation."""
try:
return service.workflow.getAvailableDocuments(context.workflow)
except Exception as e:
logger.error(f"Error getting document index: {str(e)}")
return "No documents available"
def extractAvailableConnectionsSummary(service: Any) -> str:
"""Summary of available connections (count only)."""
try:
connections = service.workflow.getConnectionReferenceList()
if connections:
return f"{len(connections)} connections available"
return "No connections available"
except Exception as e:
logger.error(f"Error getting connection summary: {str(e)}")
return "No connections available"
def extractAvailableConnectionsIndex(service: Any) -> str:
"""Index of available connections with detailed references for parameter generation."""
try:
connections = service.workflow.getConnectionReferenceList()
if connections:
return '\n'.join(f"- {conn}" for conn in connections)
return "No connections available"
except Exception as e:
logger.error(f"Error getting connection index: {str(e)}")
return "No connections available"

View file

@ -0,0 +1,236 @@
"""
Actionplan Mode Prompt Generation
Handles prompt templates and extraction functions for actionplan mode action handling.
"""
import json
import logging
from typing import Dict, Any, List
from modules.datamodels.datamodelChat import PromptBundle, PromptPlaceholder
from modules.workflows.processing.shared.placeholderFactory import (
extractUserPrompt,
extractAvailableDocumentsSummary,
extractWorkflowHistory,
extractAvailableMethods,
extractUserLanguage,
extractAvailableConnectionsIndex,
extractReviewContent,
)
logger = logging.getLogger(__name__)
def generateActionDefinitionPrompt(services, context: Any) -> PromptBundle:
"""Define placeholders first, then the template; return PromptBundle."""
placeholders: List[PromptPlaceholder] = [
PromptPlaceholder(label="USER_PROMPT", content=extractUserPrompt(context), summaryAllowed=False),
PromptPlaceholder(label="AVAILABLE_DOCUMENTS_SUMMARY", content=extractAvailableDocumentsSummary(services, context), summaryAllowed=True),
PromptPlaceholder(label="AVAILABLE_CONNECTIONS_INDEX", content=extractAvailableConnectionsIndex(services), summaryAllowed=False),
PromptPlaceholder(label="WORKFLOW_HISTORY", content=extractWorkflowHistory(services, context), summaryAllowed=True),
PromptPlaceholder(label="AVAILABLE_METHODS", content=extractAvailableMethods(services), summaryAllowed=False),
PromptPlaceholder(label="USER_LANGUAGE", content=extractUserLanguage(services), summaryAllowed=False),
]
template = """# Action Definition
Generate the next action to advance toward completing the task objective.
## 📋 Context
### Task Objective
{{KEY:USER_PROMPT}}
### Available Documents
{{KEY:AVAILABLE_DOCUMENTS_SUMMARY}}
### Available Connections
{{KEY:AVAILABLE_CONNECTIONS_INDEX}}
### User Language
{{KEY:USER_LANGUAGE}}
### Workflow History
{{KEY:WORKFLOW_HISTORY}}
### Available Methods
{{KEY:AVAILABLE_METHODS}}
## ⚠️ RULES
### Action Names
- **Use EXACT compound action names** from AVAILABLE_METHODS (e.g., "ai.process", "document.extract", "web.search")
- **DO NOT create** new action names - only use those listed in AVAILABLE_METHODS
- **DO NOT separate** method and action names - use the full compound name
### Parameter Guidelines
- **Use exact document references** from AVAILABLE_DOCUMENTS_INDEX
- **Use exact connection references** from AVAILABLE_CONNECTIONS_INDEX
- **Include user language** if relevant
- **Avoid unnecessary fields** - host applies defaults
## 📊 Required JSON Structure
```json
{
"actions": [
{
"action": "method.action_name",
"parameters": {},
"resultLabel": "round{current_round}_task{current_task}_action{action_number}_{descriptive_label}",
"description": "What this action accomplishes",
"userMessage": "User-friendly message in {{KEY:USER_LANGUAGE}}"
}
]
}
```
## ✅ Correct Example
```json
{
"actions": [
{
"action": "document.extract",
"parameters": {"documentList": ["docList:msg_123:results"]},
"resultLabel": "round1_task1_action1_extract_results",
"description": "Extract data from documents",
"userMessage": "Extracting data from documents"
}
]
}
```
## 🎯 Action Planning Guidelines
### Method Selection
- **Choose appropriate method** based on task requirements
- **Consider available resources** (documents, connections)
- **Match method capabilities** to task objectives
### Parameter Design
- **Use ACTION SIGNATURE** to understand required parameters
- **Convert objective** into appropriate parameter values
- **Include all required parameters** for the action
### Result Labeling
- **Use descriptive labels** that explain what the action produces
- **Follow naming convention**: `round{round}_task{task}_action{action}_{label}`
- **Make labels meaningful** for future reference
### User Messages
- **Write in user language** ({{KEY:USER_LANGUAGE}})
- **Explain what's happening** in user-friendly terms
- **Keep messages concise** but informative
## 🚀 Response Format
Return ONLY the JSON object."""
return PromptBundle(prompt=template, placeholders=placeholders)
def generateResultReviewPrompt(context: Any) -> PromptBundle:
"""Define placeholders first, then the template; return PromptBundle."""
placeholders: List[PromptPlaceholder] = [
PromptPlaceholder(label="USER_PROMPT", content=extractUserPrompt(context), summaryAllowed=False),
PromptPlaceholder(label="REVIEW_CONTENT", content=extractReviewContent(context), summaryAllowed=True),
]
template = """# Result Review & Validation
Review task execution outcomes and determine success, retry needs, or failure.
## 📋 Context
### Task Objective
{{KEY:USER_PROMPT}}
### Execution Results
{{KEY:REVIEW_CONTENT}}
## 🔍 Validation Criteria
### Action Assessment
- **Review each action's success/failure status**
- **Check if required documents were produced**
- **Validate document quality and completeness**
- **Assess if success criteria were met**
- **Identify any missing or incomplete outputs**
### Decision Making
- **Determine if retry would help** or if task should be marked as failed
- **Consider business value** and user satisfaction
- **Evaluate technical execution** and results quality
## 📊 Required JSON Structure
```json
{
"status": "success|retry|failed",
"reason": "Detailed explanation of the validation decision",
"improvements": ["specific improvement 1", "specific improvement 2"],
"quality_score": 8,
"met_criteria": ["criteria1", "criteria2"],
"unmet_criteria": ["criteria3", "criteria4"],
"confidence": 0.85,
"userMessage": "User-friendly message explaining the validation result"
}
```
## 🎯 Validation Principles
### Assessment Approach
- **Be thorough but fair** in assessment
- **Focus on business value** and outcomes
- **Consider both technical execution** and business results
- **Provide specific, actionable** improvement suggestions
### Quality Scoring
- **Use quality scores** to track progress across retries
- **Scale 1-10**: 1 = Poor, 5 = Average, 10 = Excellent
- **Consider completeness, accuracy, and usefulness**
### Criteria Evaluation
- **Clearly identify** which success criteria were met vs. unmet
- **List specific criteria** that were achieved
- **Note missing requirements** that need attention
### Confidence Levels
- **Set appropriate confidence levels** based on evidence quality
- **Scale 0.0-1.0**: 0.0 = No confidence, 1.0 = Complete confidence
- **Consider data quality** and result reliability
## 📝 Status Definitions
### Success
- **All objectives met** - User got what they asked for
- **Quality standards met** - Results are complete and accurate
- **No retry needed** - Task is fully complete
### Retry
- **Partial success** - Some but not all objectives met
- **Improvement possible** - Retry could lead to better results
- **Technical issues** - Action failures that can be resolved
### Failed
- **No progress made** - Objectives not achieved
- **Technical limitations** - Cannot be resolved with retry
- **Resource constraints** - Missing required inputs
## 💡 Improvement Suggestions
### Actionable Improvements
- **Be specific** - Don't just say "improve quality"
- **Focus on process** - How to do better next time
- **Consider resources** - What additional inputs might help
- **Technical fixes** - Address specific technical issues
### Examples
- "Use more specific document references from AVAILABLE_DOCUMENTS_INDEX"
- "Include user language parameter for better localization"
- "Break down complex objective into smaller, focused actions"
- "Verify document references before processing"
## 🚀 Response Format
Return ONLY the JSON object. Do not include any explanatory text."""
return PromptBundle(prompt=template, placeholders=placeholders)

View file

@ -0,0 +1,237 @@
"""
React Mode Prompt Generation
Handles prompt templates for react mode action handling.
"""
from typing import Any, List
from modules.datamodels.datamodelChat import PromptBundle, PromptPlaceholder
from modules.workflows.processing.shared.placeholderFactory import (
extractUserPrompt,
extractUserLanguage,
extractAvailableMethods,
extractAvailableDocumentsSummary,
extractAvailableDocumentsIndex,
extractAvailableConnectionsIndex,
extractPreviousActionResults,
extractLearningsAndImprovements,
extractLatestRefinementFeedback,
extractWorkflowHistory,
)
from modules.workflows.processing.shared.methodDiscovery import methods, getActionParameterList
def generateReactPlanSelectionPrompt(services, context: Any) -> PromptBundle:
"""Define placeholders first, then the template; return PromptBundle."""
placeholders: List[PromptPlaceholder] = [
PromptPlaceholder(label="USER_PROMPT", content=extractUserPrompt(context), summaryAllowed=False),
PromptPlaceholder(label="AVAILABLE_DOCUMENTS_SUMMARY", content=extractAvailableDocumentsSummary(services, context), summaryAllowed=True),
PromptPlaceholder(label="AVAILABLE_METHODS", content=extractAvailableMethods(services), summaryAllowed=False),
# Provide enriched history context for Stage 1 to craft parametersContext
PromptPlaceholder(label="WORKFLOW_HISTORY", content=extractWorkflowHistory(services, context), summaryAllowed=True),
# Provide deterministic indexes so the planner can choose exact labels
PromptPlaceholder(label="AVAILABLE_DOCUMENTS_INDEX", content=extractAvailableDocumentsIndex(services, context), summaryAllowed=True),
PromptPlaceholder(label="AVAILABLE_CONNECTIONS_INDEX", content=extractAvailableConnectionsIndex(services), summaryAllowed=False),
]
template = """Select exactly one action to advance the task.
OBJECTIVE:
{{KEY:USER_PROMPT}}
AVAILABLE_DOCUMENTS_SUMMARY:
{{KEY:AVAILABLE_DOCUMENTS_SUMMARY}}
AVAILABLE_METHODS:
{{KEY:AVAILABLE_METHODS}}
WORKFLOW_HISTORY (reverse-chronological, enriched):
{{KEY:WORKFLOW_HISTORY}}
AVAILABLE_DOCUMENTS_INDEX:
{{KEY:AVAILABLE_DOCUMENTS_INDEX}}
AVAILABLE_CONNECTIONS_INDEX:
{{KEY:AVAILABLE_CONNECTIONS_INDEX}}
REPLY: Return ONLY a JSON object with the following structure (no comments, no extra text):
{{
"action": "method.action_name",
"actionObjective": "...",
"learnings": ["..."],
"requiredInputDocuments": ["docList:..."],
"requiredConnection": "connection:..." | null,
"parametersContext": "concise text that Stage 2 will use to set business parameters"
}}
EXAMPLE how to assign references from AVAILABLE_DOCUMENTS_INDEX and AVAILABLE_CONNECTIONS_INDEX:
"requiredInputDocuments": ["docList:msg_47a7a578-e8f2-4ba8-ac66-0dbff40605e0:round8_task1_action1_results","docItem:5d8b7aee-b546-4487-b6a8-835c86f7b186:AI_Generated_Document_20251006-104256.docx"],
"requiredConnection": "connection:msft:p.motsch@valueon.ch:1ae8b8e5-128b-49b8-b1cb-7c632669eeae",
RULES:
1. Use EXACT action names from AVAILABLE_METHODS
2. Do NOT output a "parameters" object
3. parametersContext must be short and sufficient for Stage 2
4. Return ONLY JSON - no markdown, no explanations
5. For requiredInputDocuments, use ONLY exact references from AVAILABLE_DOCUMENTS_INDEX (docList:... or docItem:...)
6. For requiredConnection, use ONLY an exact label from AVAILABLE_CONNECTIONS_INDEX
"""
return PromptBundle(prompt=template, placeholders=placeholders)
def generateReactParametersPrompt(services, context: Any, compoundActionName: str) -> PromptBundle:
"""Define placeholders first, then the template; return PromptBundle.
Minimal Stage 2 (no fallback): consumes actionObjective, selectedAction, parametersContext only.
Excludes documents/connections/history entirely.
"""
# derive method/action and parameter list
methodName, actionName = (compoundActionName.split('.', 1) if '.' in compoundActionName else (compoundActionName, ''))
actionParameterList = getActionParameterList(methodName, actionName, methods)
def _formatBusinessParameters(params) -> str:
excluded = {"documentList", "connectionReference"}
# Case 1: params is a list of dicts or objects with 'name'
if isinstance(params, (list, tuple)):
entries = []
for p in params:
try:
if isinstance(p, dict):
name = p.get("name")
if not name or name in excluded:
continue
ptype = p.get("type") or p.get("dataType") or ""
req = p.get("required")
reqTxt = "required" if (req is True or str(req).lower() == "true") else "optional"
desc = p.get("description") or p.get("desc") or ""
entry = f"- {name} ({ptype}, {reqTxt})" + (f": {desc}" if desc else "")
entries.append(entry)
else:
# Try attribute access
name = getattr(p, "name", None)
if not name or name in excluded:
continue
ptype = getattr(p, "type", "") or getattr(p, "dataType", "")
req = getattr(p, "required", False)
reqTxt = "required" if (req is True or str(req).lower() == "true") else "optional"
desc = getattr(p, "description", None) or getattr(p, "desc", None) or ""
entry = f"- {name} ({ptype}, {reqTxt})" + (f": {desc}" if desc else "")
entries.append(entry)
except Exception:
continue
return "\n".join(entries)
# Case 2: params is a string description: filter out lines mentioning excluded names
if isinstance(params, str):
lines = [ln for ln in params.splitlines() if not any(ex in ln for ex in excluded)]
return "\n".join(lines).strip()
# Fallback: plain string
try:
return str(params)
except Exception:
return ""
actionParametersText = _formatBusinessParameters(actionParameterList)
# determine action objective if available, else fall back to user prompt
if hasattr(context, 'action_objective') and context.action_objective:
actionObjective = context.action_objective
elif hasattr(context, 'task_step') and context.task_step and getattr(context.task_step, 'objective', None):
actionObjective = context.task_step.objective
else:
actionObjective = extractUserPrompt(context)
# Minimal Stage 2 (no fallback)
parametersContext = getattr(context, 'parameters_context', None)
learningsText = ""
try:
# If Stage 1 learnings were attached to context, pass them textually
if hasattr(context, 'learnings') and context.learnings:
if isinstance(context.learnings, (list, tuple)):
learningsText = "\n".join(f"- {str(x)}" for x in context.learnings)
else:
learningsText = str(context.learnings)
except Exception:
learningsText = ""
placeholders: List[PromptPlaceholder] = [
PromptPlaceholder(label="ACTION_OBJECTIVE", content=actionObjective, summaryAllowed=False),
PromptPlaceholder(label="SELECTED_ACTION", content=compoundActionName, summaryAllowed=False),
PromptPlaceholder(label="PARAMETERS_CONTEXT", content=(parametersContext or ""), summaryAllowed=True),
PromptPlaceholder(label="ACTION_PARAMETERS", content=actionParametersText, summaryAllowed=False),
PromptPlaceholder(label="LEARNINGS", content=learningsText, summaryAllowed=True),
]
template = """You are a parameter generator. Set the parameters for this specific action.
CONTEXT AND OBJECTIVE:
-----------------
{{KEY:ACTION_OBJECTIVE}}
-----------------
SELECTED_ACTION:
{{KEY:SELECTED_ACTION}}
REPLY (ONLY JSON):
{{
"schema": "parameters_v1",
"parameters": {{
"paramName": "value"
}}
}}
CONTEXT FOR PARAMETER VALUES:
-----------------
{{KEY:PARAMETERS_CONTEXT}}
-----------------
LEARNINGS (from prior attempts, if any):
{{KEY:LEARNINGS}}
REQUIRED PARAMETERS FOR THIS ACTION (use these exact parameter names):
{{KEY:ACTION_PARAMETERS}}
INSTRUCTIONS:
- Use ONLY the parameter names listed in section REQUIRED PARAMETERS FOR THIS ACTION
- Fill in appropriate values based on the context and objective
- Do NOT invent new parameters
- Do NOT include: documentList, connectionReference, history, documents, connections
RULES:
- Return ONLY JSON (no markdown, no prose)
- Use ONLY the exact parameter names listed in REQUIRED PARAMETERS FOR THIS ACTION
- Do NOT add any parameters not listed above
- Do NOT add nested objects or custom fields
"""
return PromptBundle(prompt=template, placeholders=placeholders)
def generateReactRefinementPrompt(services, context: Any, reviewContent: str) -> PromptBundle:
"""Define placeholders first, then the template; return PromptBundle."""
placeholders: List[PromptPlaceholder] = [
PromptPlaceholder(label="USER_PROMPT", content=extractUserPrompt(context), summaryAllowed=False),
PromptPlaceholder(label="REVIEW_CONTENT", content=reviewContent, summaryAllowed=True),
]
template = """Decide the next step based on the observation.
OBJECTIVE:
{{KEY:USER_PROMPT}}
OBSERVATION:
{{KEY:REVIEW_CONTENT}}
REPLY: Return only a JSON object with your decision:
{{
"decision": "continue|stop",
"reason": "brief explanation"
}}
RULES:
1. Use "continue" if objective NOT fulfilled
2. Use "stop" if objective fulfilled
3. Return ONLY JSON - no other text
4. Do NOT use markdown code blocks
5. Do NOT add explanations
"""
return PromptBundle(prompt=template, placeholders=placeholders)

View file

@ -0,0 +1,121 @@
"""
Task Planning Prompt Generation
Handles prompt templates and extraction functions for task planning phase.
"""
import json
import logging
from typing import Dict, Any, List
from modules.datamodels.datamodelChat import PromptBundle, PromptPlaceholder
from modules.workflows.processing.shared.placeholderFactory import (
extractUserPrompt,
extractAvailableDocumentsSummary,
extractWorkflowHistory,
)
logger = logging.getLogger(__name__)
def generateTaskPlanningPrompt(services, context: Any) -> PromptBundle:
"""Define placeholders first, then the template; return PromptBundle."""
placeholders: List[PromptPlaceholder] = [
PromptPlaceholder(label="USER_PROMPT", content=extractUserPrompt(context), summaryAllowed=False),
PromptPlaceholder(label="AVAILABLE_DOCUMENTS_SUMMARY", content=extractAvailableDocumentsSummary(services, context), summaryAllowed=True),
PromptPlaceholder(label="WORKFLOW_HISTORY", content=extractWorkflowHistory(services, context), summaryAllowed=True),
]
template = """# Task Planning
Break down user requests into logical, executable task steps.
## 📋 Context
### User Request
{{KEY:USER_PROMPT}}
### Available Documents
{{KEY:AVAILABLE_DOCUMENTS_SUMMARY}}
### Previous Workflow Rounds
{{KEY:WORKFLOW_HISTORY}}
## 📝 Task Planning Rules
### Strategic Task Grouping
- **GROUP RELATED ACTIONS** - Combine all actions for the same business topic into ONE task
- **ONE TOPIC PER TASK** - Each task should handle one complete business objective
- **HIGH-LEVEL FOCUS** - Plan strategic outcomes, not implementation steps
- **AVOID MICRO-TASKS** - Don't create separate tasks for each small action
### Task Grouping Examples
- **Research + Analysis + Report** ONE task: "Web research report"
- **Data Collection + Processing + Visualization** ONE task: "Collect and present data"
- **Different topics** (email + flowers) SEPARATE tasks: "Send formal email..." + "Order flowers from Fleurop for delivery to 123 Main St, include card message"
### Retry Handling
- **If retry request**: Analyze previous rounds to understand what failed
- **Learn from mistakes**: Improve the plan based on previous failures
## 📊 Required JSON Structure
```json
{
"overview": "Brief description of the overall plan",
"languageUserDetected": "en",
"userMessage": "User-friendly message explaining the task plan",
"tasks": [
{
"id": "task_1",
"objective": "Clear business objective focusing on what to deliver",
"dependencies": ["task_0"],
"success_criteria": ["measurable criteria 1", "measurable criteria 2"],
"estimated_complexity": "low|medium|high",
"userMessage": "What this task will accomplish"
}
]
}
```
## 🎯 Task Structure Guidelines
### Task ID Format
- Use sequential numbering: `task_1`, `task_2`, `task_3`
- Keep IDs simple and clear
### Objective Writing
- **Be VERY SPECIFIC** - Include exact details needed for action planning
- **Include all requirements** - recipient, attachments, format, recipients, etc.
- **State the complete deliverable** - What exactly will be produced
- **Include context and constraints** - When, where, how, with what
- **Make it actionable** - Clear enough to plan specific actions
### Specific Objective Examples
- **Good**: "Send formal email to ceo and board of directors with annual report as attachment"
- **Bad**: "Handle email communication"
- **Good**: "Order flowers from Fleurop for delivery to 123 Main St, include card message 'Happy Birthday', deliver on March 15th"
- **Bad**: "Order flowers"
### Action Planning Requirements
- **Include all necessary details** - The objective must contain everything needed to plan actions
- **Specify recipients and destinations** - Who should receive what
- **Include file names and formats** - What documents to use/create
- **State timing and deadlines** - When things need to be done
- **Include context and constraints** - Any special requirements or limitations
### Success Criteria
- **Make them measurable** - specific, quantifiable outcomes
- **Focus on deliverables** - what the user will receive
- **Keep criteria realistic** - achievable within the task scope
- **Include all related actions** - success means completing the entire business objective
- **Be specific about requirements** - Include exact details like recipients, formats, deadlines
- **State clear completion criteria** - How to know the task is fully done
### Complexity Estimation
- **Low**: Simple, single-action tasks (1-2 actions)
- **Medium**: Multi-action tasks for one topic (3-5 actions)
- **High**: Complex strategic tasks (6+ actions)
## 🚀 Response Format
Return ONLY the JSON object."""
return PromptBundle(prompt=template, placeholders=placeholders)

View file

@ -0,0 +1,216 @@
"""
Security utilities for AI prompt construction.
Provides secure content escaping to prevent prompt injection attacks.
"""
import re
import json
import logging
from typing import Any, Union, List, Dict
logger = logging.getLogger(__name__)
def _escapeForAiPrompt(content: str) -> str:
"""
Securely escape content for AI prompts to prevent injection attacks.
This function:
1. Escapes all special characters that could break prompt structure
2. Wraps content in secure delimiters
3. Handles multi-line content safely
4. Prevents quote injection and context breaking
Args:
content: The content to escape
Returns:
Safely escaped content wrapped in secure delimiters
"""
if not content:
return ""
# Convert to string if not already
content_str = str(content)
# Remove or escape dangerous characters that could break prompt structure
# This includes quotes, backslashes, and other special characters
escaped = content_str
# Escape backslashes first (order matters)
escaped = escaped.replace('\\', '\\\\')
# Escape quotes and other special characters
escaped = escaped.replace('"', '\\"')
escaped = escaped.replace("'", "\\'")
escaped = escaped.replace('\n', '\\n')
escaped = escaped.replace('\r', '\\r')
escaped = escaped.replace('\t', '\\t')
# Remove or escape other potentially dangerous characters
# Remove control characters except newlines (already handled above)
escaped = re.sub(r'[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]', '', escaped)
# Wrap in secure delimiters with clear boundaries
# Using a unique delimiter pattern that's unlikely to appear in user content
secure_delimiter_start = "===USER_CONTENT_START==="
secure_delimiter_end = "===USER_CONTENT_END==="
return f"{secure_delimiter_start}\n{escaped}\n{secure_delimiter_end}"
def _escapeForJsonPrompt(content: Any) -> str:
"""
Securely escape content for JSON-based AI prompts.
Args:
content: The content to escape (can be any type)
Returns:
Safely escaped JSON string
"""
try:
# Convert to JSON string with proper escaping
json_str = json.dumps(content, ensure_ascii=False, separators=(',', ':'))
return json_str
except Exception as e:
logger.warning(f"Failed to escape content as JSON: {str(e)}")
# Fallback to string escaping
return _escapeForAiPrompt(str(content))
def _escapeForListPrompt(items: List[Any]) -> str:
"""
Securely escape a list of items for AI prompts.
Args:
items: List of items to escape
Returns:
Safely escaped list representation
"""
if not items:
return "[]"
try:
escaped_items = []
for item in items:
if isinstance(item, (dict, list)):
escaped_items.append(_escapeForJsonPrompt(item))
else:
escaped_items.append(_escapeForAiPrompt(str(item)))
return f"[{', '.join(escaped_items)}]"
except Exception as e:
logger.warning(f"Failed to escape list content: {str(e)}")
return "[]"
def securePromptContent(content: Any, content_type: str = "text") -> str:
"""
Main function to securely escape content for AI prompts.
Args:
content: The content to escape
content_type: Type of content ("text", "json", "list", "user_prompt", "document_content")
Returns:
Safely escaped content ready for AI prompt insertion
"""
if content is None:
return ""
try:
if content_type == "json":
return _escapeForJsonPrompt(content)
elif content_type == "list":
if isinstance(content, list):
return _escapeForListPrompt(content)
else:
return _escapeForAiPrompt(str(content))
elif content_type in ["user_prompt", "document_content"]:
# Extra security for user-controlled content
escaped = _escapeForAiPrompt(str(content))
# Add additional warning for AI
return f"⚠️ USER_CONTROLLED_CONTENT: {escaped}"
else: # content_type == "text" or default
return _escapeForAiPrompt(str(content))
except Exception as e:
logger.error(f"Error escaping content for AI prompt: {str(e)}")
# Return a safe fallback
return "[ERROR: Content could not be safely escaped]"
def buildSecurePrompt(template: str, **kwargs) -> str:
"""
Build a secure AI prompt by safely inserting content into a template.
Args:
template: The prompt template with {key} placeholders
**kwargs: Key-value pairs for template substitution
Returns:
Securely constructed prompt
"""
try:
# Escape all values before substitution
escaped_kwargs = {}
for key, value in kwargs.items():
if key.endswith('_json'):
escaped_kwargs[key] = securePromptContent(value, "json")
elif key.endswith('_list'):
escaped_kwargs[key] = securePromptContent(value, "list")
elif key in ['user_prompt', 'context', 'document_content', 'user_input']:
escaped_kwargs[key] = securePromptContent(value, "user_prompt")
else:
escaped_kwargs[key] = securePromptContent(value, "text")
# Use safe string formatting
return template.format(**escaped_kwargs)
except Exception as e:
logger.error(f"Error building secure prompt: {str(e)}")
return template # Return original template if escaping fails
def validatePromptSecurity(prompt: str) -> Dict[str, Any]:
"""
Validate that a prompt is secure and doesn't contain injection patterns.
Args:
prompt: The prompt to validate
Returns:
Dictionary with validation results
"""
issues = []
# Check for unescaped quotes that could break JSON
if '"' in prompt and '\\"' not in prompt:
# Check if quotes are properly escaped
unescaped_quotes = re.findall(r'(?<!\\)"', prompt)
if unescaped_quotes:
issues.append("Unescaped quotes detected")
# Check for potential injection patterns
injection_patterns = [
r'ignore\s+previous\s+instructions',
r'forget\s+everything',
r'you\s+are\s+now',
r'system\s*:',
r'assistant\s*:',
r'user\s*:',
r'<\|.*\|>', # Special tokens
]
for pattern in injection_patterns:
if re.search(pattern, prompt, re.IGNORECASE):
issues.append(f"Potential injection pattern detected: {pattern}")
# Check for proper content delimiters
if "===USER_CONTENT_START===" not in prompt and "===USER_CONTENT_END===" not in prompt:
# This might be okay for some prompts, but flag for review
if any(keyword in prompt.lower() for keyword in ['context', 'user', 'input', 'prompt']):
issues.append("User content may not be properly delimited")
return {
"is_secure": len(issues) == 0,
"issues": issues,
"prompt_length": len(prompt),
"has_user_content_delimiters": "===USER_CONTENT_START===" in prompt
}

View file

@ -0,0 +1,335 @@
# workflowProcessor.py
# Main workflow processor with delegation pattern
import logging
from typing import Dict, Any, Optional, List
from modules.datamodels.datamodelChat import TaskStep, TaskContext, TaskPlan, TaskResult, ReviewResult
from modules.datamodels.datamodelChat import ChatWorkflow
from modules.workflows.processing.modes.modeBase import BaseMode
from modules.workflows.processing.modes.modeActionplan import ActionplanMode
from modules.workflows.processing.modes.modeReact import ReactMode
logger = logging.getLogger(__name__)
class WorkflowStoppedException(Exception):
"""Exception raised when a workflow is stopped by the user."""
pass
class WorkflowProcessor:
"""Main workflow processor that delegates to appropriate mode implementations"""
def __init__(self, services, workflow=None):
self.services = services
self.workflow = workflow
self.mode = self._createMode(workflow.workflowMode if workflow else "Actionplan")
def _createMode(self, workflowMode: str) -> BaseMode:
"""Create the appropriate mode implementation based on workflow mode"""
if workflowMode == "React":
return ReactMode(self.services, self.workflow)
else:
return ActionplanMode(self.services, self.workflow)
def _checkWorkflowStopped(self, workflow):
"""Check if workflow has been stopped by user and raise exception if so"""
try:
# Get the current workflow status from the database to avoid stale data
current_workflow = self.services.interfaceDbChat.getWorkflow(workflow.id)
if current_workflow and current_workflow.status == "stopped":
logger.info("Workflow stopped by user, aborting processing")
raise WorkflowStoppedException("Workflow was stopped by user")
except Exception as e:
# If we can't get the current status due to other database issues, fall back to the in-memory object
logger.warning(f"Could not check current workflow status from database: {str(e)}")
if workflow and workflow.status == "stopped":
logger.info("Workflow stopped by user (from in-memory object), aborting processing")
raise WorkflowStoppedException("Workflow was stopped by user")
async def generateTaskPlan(self, userInput: str, workflow: ChatWorkflow) -> TaskPlan:
"""Generate a high-level task plan for the workflow"""
try:
# Check workflow status before generating task plan
self._checkWorkflowStopped(workflow)
logger.info(f"=== STARTING TASK PLAN GENERATION ===")
logger.info(f"Workflow ID: {workflow.id}")
logger.info(f"User Input: {userInput}")
logger.info(f"Workflow Mode: {workflow.workflowMode}")
# Delegate to the appropriate mode
taskPlan = await self.mode.generateTaskPlan(userInput, workflow)
# Create task plan message
await self.mode.createTaskPlanMessage(taskPlan, workflow)
return taskPlan
except Exception as e:
logger.error(f"Error in generateTaskPlan: {str(e)}")
raise
async def executeTask(self, taskStep: TaskStep, workflow: ChatWorkflow, context: TaskContext,
taskIndex: int = None, totalTasks: int = None) -> TaskResult:
"""Execute a task step using the appropriate mode"""
try:
# Check workflow status before executing task
self._checkWorkflowStopped(workflow)
logger.info(f"=== STARTING TASK EXECUTION ===")
logger.info(f"Task: {taskStep.objective}")
logger.info(f"Mode: {workflow.workflowMode}")
# Delegate to the appropriate mode
return await self.mode.executeTask(taskStep, workflow, context, taskIndex, totalTasks)
except Exception as e:
logger.error(f"Error in executeTask: {str(e)}")
raise
async def generateActionItems(self, taskStep: TaskStep, workflow: ChatWorkflow,
previousResults: List = None, enhancedContext: TaskContext = None) -> List:
"""Generate actions for a task step using the appropriate mode"""
try:
# Check workflow status before generating actions
self._checkWorkflowStopped(workflow)
logger.info(f"=== STARTING ACTION GENERATION ===")
logger.info(f"Task: {taskStep.objective}")
logger.info(f"Mode: {workflow.workflowMode}")
# Delegate to the appropriate mode
return await self.mode.generateActionItems(taskStep, workflow, previousResults, enhancedContext)
except Exception as e:
logger.error(f"Error in generateActionItems: {str(e)}")
raise
def updateWorkflowAfterTaskPlanCreated(self, totalTasks: int):
"""Update workflow object after task plan creation"""
try:
updateData = {
"totalTasks": totalTasks,
"currentTask": 0,
"currentAction": 0,
"totalActions": 0
}
# Update workflow object
self.workflow.totalTasks = totalTasks
self.workflow.currentTask = 0
self.workflow.currentAction = 0
self.workflow.totalActions = 0
# Update in database
self.services.interfaceDbChat.updateWorkflow(self.workflow.id, updateData)
logger.info(f"Updated workflow {self.workflow.id} after task plan creation: {updateData}")
except Exception as e:
logger.error(f"Error updating workflow after task plan creation: {str(e)}")
def updateWorkflowBeforeExecutingTask(self, taskNumber: int):
"""Update workflow object before executing a task"""
try:
updateData = {
"currentTask": taskNumber,
"currentAction": 0,
"totalActions": 0
}
# Update workflow object
self.workflow.currentTask = taskNumber
self.workflow.currentAction = 0
self.workflow.totalActions = 0
# Update in database
self.services.interfaceDbChat.updateWorkflow(self.workflow.id, updateData)
logger.info(f"Updated workflow {self.workflow.id} before executing task {taskNumber}: {updateData}")
except Exception as e:
logger.error(f"Error updating workflow before executing task: {str(e)}")
def updateWorkflowAfterActionPlanning(self, totalActions: int):
"""Update workflow object after action planning for current task"""
try:
updateData = {
"totalActions": totalActions
}
# Update workflow object
self.workflow.totalActions = totalActions
# Update in database
self.services.interfaceDbChat.updateWorkflow(self.workflow.id, updateData)
logger.info(f"Updated workflow {self.workflow.id} after action planning: {updateData}")
except Exception as e:
logger.error(f"Error updating workflow after action planning: {str(e)}")
def updateWorkflowBeforeExecutingAction(self, actionNumber: int):
"""Update workflow object before executing an action"""
try:
updateData = {
"currentAction": actionNumber
}
# Update workflow object
self.workflow.currentAction = actionNumber
# Update in database
self.services.interfaceDbChat.updateWorkflow(self.workflow.id, updateData)
logger.info(f"Updated workflow {self.workflow.id} before executing action {actionNumber}: {updateData}")
except Exception as e:
logger.error(f"Error updating workflow before executing action: {str(e)}")
def setWorkflowTotals(self, totalTasks: int = None, totalActions: int = None):
"""Set total counts for workflow progress tracking and update database"""
try:
updateData = {}
if totalTasks is not None:
self.workflow.totalTasks = totalTasks
updateData["totalTasks"] = totalTasks
if totalActions is not None:
self.workflow.totalActions = totalActions
updateData["totalActions"] = totalActions
# Update workflow object in database if we have changes
if updateData:
self.services.interfaceDbChat.updateWorkflow(self.workflow.id, updateData)
logger.info(f"Updated workflow {self.workflow.id} totals in database: {updateData}")
logger.debug(f"Updated workflow totals: Tasks {self.workflow.totalTasks if hasattr(self.workflow, 'totalTasks') else 'N/A'}, Actions {self.workflow.totalActions if hasattr(self.workflow, 'totalActions') else 'N/A'}")
except Exception as e:
logger.error(f"Error setting workflow totals: {str(e)}")
def resetWorkflowForNewSession(self):
"""Reset workflow object for a new session"""
try:
updateData = {
"currentTask": 0,
"currentAction": 0,
"totalTasks": 0,
"totalActions": 0
}
# Update workflow object
self.workflow.currentTask = 0
self.workflow.currentAction = 0
self.workflow.totalTasks = 0
self.workflow.totalActions = 0
# Update in database
self.services.interfaceDbChat.updateWorkflow(self.workflow.id, updateData)
logger.info(f"Reset workflow {self.workflow.id} for new session: {updateData}")
except Exception as e:
logger.error(f"Error resetting workflow for new session: {str(e)}")
def writeTraceLog(self, contextText: str, data: Any) -> None:
"""Write trace data to configured trace file if in debug mode"""
try:
import os
import json
from datetime import datetime, UTC
# Only write if logger is in debug mode
if logger.level > logging.DEBUG:
return
# Get log directory from configuration
logDir = self.services.utils.configGet("APP_LOGGING_LOG_DIR", "./")
if not os.path.isabs(logDir):
# If relative path, make it relative to the gateway directory
gatewayDir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
logDir = os.path.join(gatewayDir, logDir)
# Ensure log directory exists
os.makedirs(logDir, exist_ok=True)
# Create trace file path
traceFile = os.path.join(logDir, "log_trace.log")
# Format the trace entry
timestamp = datetime.fromtimestamp(self.services.utils.getUtcTimestamp(), UTC).strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
traceEntry = f"[{timestamp}] {contextText}\n"
# Add data if provided - show full content without truncation
if data is not None:
if isinstance(data, (dict, list)):
# Use ensure_ascii=False to preserve Unicode characters and indent=2 for readability
traceEntry += f"Data: {json.dumps(data, indent=2, default=str, ensure_ascii=False)}\n"
else:
# For string data, show full content without truncation
traceEntry += f"Data: {str(data)}\n"
traceEntry += "-" * 80 + "\n\n"
# Write to trace file
with open(traceFile, "a", encoding="utf-8") as f:
f.write(traceEntry)
except Exception as e:
# Don't log trace errors to avoid recursion
pass
def clearTraceLog(self) -> None:
"""Clear the trace log file"""
try:
import os
# Get log directory from configuration
logDir = self.services.utils.configGet("APP_LOGGING_LOG_DIR", "./")
if not os.path.isabs(logDir):
# If relative path, make it relative to the gateway directory
gatewayDir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
logDir = os.path.join(gatewayDir, logDir)
# Create trace file path
traceFile = os.path.join(logDir, "log_trace.log")
# Clear the trace file
if os.path.exists(traceFile):
with open(traceFile, "w", encoding="utf-8") as f:
f.write("")
logger.info("Trace log cleared")
else:
logger.info("Trace log file does not exist, nothing to clear")
except Exception as e:
logger.error(f"Error clearing trace log: {str(e)}")
async def prepareTaskHandover(self, taskStep, taskActions, taskResult, workflow):
"""Prepare task handover data for workflow coordination"""
try:
# Check workflow status before preparing task handover
self._checkWorkflowStopped(workflow)
# Log handover status summary
status = taskResult.status if taskResult else 'unknown'
# Handle both TaskResult and ReviewResult objects
if hasattr(taskResult, 'met_criteria'):
# This is a ReviewResult object
met = taskResult.met_criteria if taskResult.met_criteria else []
reviewResult = taskResult.to_dict()
else:
# This is a TaskResult object
met = []
reviewResult = {
'status': taskResult.status if taskResult else 'unknown',
'reason': taskResult.error if taskResult and hasattr(taskResult, 'error') else None,
'success': taskResult.success if taskResult else False
}
handoverData = {
'task_id': taskStep.id,
'task_description': taskStep.objective,
'actions': [action.to_dict() for action in taskActions] if taskActions else [],
'review_result': reviewResult,
'workflow_id': workflow.id,
'handover_time': self.services.utils.getUtcTimestamp()
}
logger.info(f"Prepared handover for task {taskStep.id} in workflow {workflow.id}")
return handoverData
except Exception as e:
logger.error(f"Error in prepareTaskHandover: {str(e)}")
return {'error': str(e)}

View file

@ -8,11 +8,11 @@ from modules.datamodels.datamodelChat import (
UserInputRequest, UserInputRequest,
ChatMessage, ChatMessage,
ChatWorkflow, ChatWorkflow,
ChatDocument, ChatDocument
WorkflowResult
) )
from modules.datamodels.datamodelWorkflow import TaskItem, TaskStatus, TaskContext from modules.datamodels.datamodelChat import TaskItem, TaskStatus, TaskContext
from modules.workflows.processing.handlingTasks import HandlingTasks, WorkflowStoppedException from modules.workflows.processing.workflowProcessor import WorkflowProcessor, WorkflowStoppedException
from modules.shared.timezoneUtils import get_utc_timestamp
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -22,11 +22,11 @@ class WorkflowManager:
def __init__(self, services): def __init__(self, services):
self.services = services self.services = services
self.handlingTasks = None self.workflowProcessor = None
# Exported functions # Exported functions
async def workflowStart(self, userInput: UserInputRequest, workflowId: Optional[str] = None, workflowMode: str = "Actionplan") -> ChatWorkflow: async def workflowStart(self, userInput: UserInputRequest, workflowId: Optional[str] = None, workflowMode: str = "React") -> ChatWorkflow:
"""Starts a new workflow or continues an existing one, then launches processing.""" """Starts a new workflow or continues an existing one, then launches processing."""
try: try:
# Debug log to check workflowMode parameter # Debug log to check workflowMode parameter
@ -38,8 +38,8 @@ class WorkflowManager:
if not workflow: if not workflow:
raise ValueError(f"Workflow {workflowId} not found") raise ValueError(f"Workflow {workflowId} not found")
# Add workflow to services # Store workflow in services for reference (don't overwrite the workflow service)
self.services.workflow = workflow self.services.currentWorkflow = workflow
if workflow.status == "running": if workflow.status == "running":
logger.info(f"Stopping running workflow {workflowId} before processing new prompt") logger.info(f"Stopping running workflow {workflowId} before processing new prompt")
@ -62,7 +62,8 @@ class WorkflowManager:
self.services.workflow.updateWorkflow(workflowId, { self.services.workflow.updateWorkflow(workflowId, {
"status": "running", "status": "running",
"lastActivity": currentTime, "lastActivity": currentTime,
"currentRound": newRound "currentRound": newRound,
"workflowMode": workflowMode # Update workflow mode for existing workflows
}) })
workflow = self.services.workflow.getWorkflow(workflowId) workflow = self.services.workflow.getWorkflow(workflowId)
@ -71,11 +72,14 @@ class WorkflowManager:
self.services.workflow.createLog({ self.services.workflow.createLog({
"workflowId": workflowId, "workflowId": workflowId,
"message": f"Workflow resumed (round {workflow.currentRound})", "message": f"Workflow resumed (round {workflow.currentRound}) with mode: {workflowMode}",
"type": "info", "type": "info",
"status": "running", "status": "running",
"progress": 0 "progress": 0
}) })
# CRITICAL: Update the workflow object's workflowMode attribute for immediate use
workflow.workflowMode = workflowMode
else: else:
workflowData = { workflowData = {
"name": "New Workflow", "name": "New Workflow",
@ -108,8 +112,8 @@ class WorkflowManager:
self.services.workflow.updateWorkflow(workflow.id, {"currentRound": 1}) self.services.workflow.updateWorkflow(workflow.id, {"currentRound": 1})
self.services.workflow.updateWorkflowStats(workflow.id, bytesSent=0, bytesReceived=0) self.services.workflow.updateWorkflowStats(workflow.id, bytesSent=0, bytesReceived=0)
# Add workflow to services # Store workflow in services for reference (don't overwrite the workflow service)
self.services.workflow = workflow self.services.currentWorkflow = workflow
# Start workflow processing asynchronously # Start workflow processing asynchronously
asyncio.create_task(self._workflowProcess(userInput, workflow)) asyncio.create_task(self._workflowProcess(userInput, workflow))
@ -149,11 +153,14 @@ class WorkflowManager:
async def _workflowProcess(self, userInput: UserInputRequest, workflow: ChatWorkflow) -> None: async def _workflowProcess(self, userInput: UserInputRequest, workflow: ChatWorkflow) -> None:
"""Process a workflow with user input""" """Process a workflow with user input"""
try: try:
self.handlingTasks = HandlingTasks(self.services, workflow) # Store the current user prompt in services for easy access throughout the workflow
self.services.rawUserPrompt = userInput.prompt
self.services.currentUserPrompt = userInput.prompt
self.workflowProcessor = WorkflowProcessor(self.services, workflow)
message = await self._sendFirstMessage(userInput, workflow) message = await self._sendFirstMessage(userInput, workflow)
task_plan = await self._planTasks(userInput, workflow) task_plan = await self._planTasks(userInput, workflow)
workflow_result = await self._executeTasks(task_plan, workflow) await self._executeTasks(task_plan, workflow)
await self._processWorkflowResults(workflow, workflow_result, message) await self._processWorkflowResults(workflow, message)
except WorkflowStoppedException: except WorkflowStoppedException:
self._handleWorkflowStop(workflow) self._handleWorkflowStop(workflow)
@ -166,14 +173,14 @@ class WorkflowManager:
async def _sendFirstMessage(self, userInput: UserInputRequest, workflow: ChatWorkflow) -> ChatMessage: async def _sendFirstMessage(self, userInput: UserInputRequest, workflow: ChatWorkflow) -> ChatMessage:
"""Send first message to start workflow""" """Send first message to start workflow"""
try: try:
self.handlingTasks._checkWorkflowStopped() self.workflowProcessor._checkWorkflowStopped(workflow)
# Create initial message using interface # Create initial message using interface
# Generate the correct documentsLabel that matches what getDocumentReferenceString will create # For first user message, include round info in the user context label
round_num = workflow.currentRound round_num = workflow.currentRound
task_num = 0 task_num = 0
action_num = 0 action_num = 0
context_label = f"round{round_num}_task{task_num}_action{action_num}_context" context_label = f"round{round_num}_usercontext"
messageData = { messageData = {
"workflowId": workflow.id, "workflowId": workflow.id,
@ -199,7 +206,7 @@ class WorkflowManager:
workflow.messages.append(message) workflow.messages.append(message)
# Clear trace log for new workflow session # Clear trace log for new workflow session
self.handlingTasks.clearTraceLog() self.workflowProcessor.clearTraceLog()
# Add documents if any, now with messageId # Add documents if any, now with messageId
if userInput.listFileId: if userInput.listFileId:
@ -208,6 +215,128 @@ class WorkflowManager:
message.documents = documents message.documents = documents
# Update the message with documents in database # Update the message with documents in database
self.services.workflow.updateMessage(message.id, {"documents": [doc.to_dict() for doc in documents]}) self.services.workflow.updateMessage(message.id, {"documents": [doc.to_dict() for doc in documents]})
# Analyze the user's input to extract intent and offload bulky context into documents
try:
analyzerPrompt = (
"You are an input analyzer. Split the user's message into:\n"
"1) intent: the user's core request in one concise paragraph, normalized to the user's language.\n"
"2) contextItems: supportive data to attach as separate documents if significantly larger than the intent. "
"Include large literal data blocks, long lists/tables, code/JSON blocks, quoted transcripts, CSV fragments, or detailed specs. "
"Keep URLs in the intent unless they include large pasted content.\n\n"
"Rules:\n"
"- If total content length (intent + data) is less than 10% of the model's max tokens, do not extract; "
"return an empty contextItems and keep a compact, self-contained intent.\n"
"- If content exceeds that, move bulky parts into contextItems, keeping the intent short and clear.\n"
"- Preserve critical references (URLs, filenames) in the intent.\n"
"- Normalize the intent to the detected language. If mixed-language, use the primary detected language and normalize.\n\n"
"Output JSON only (no markdown):\n"
"{\n"
" \"detectedLanguage\": \"en\",\n"
" \"intent\": \"Concise normalized request...\",\n"
" \"contextItems\": [\n"
" {\n"
" \"title\": \"User context 1\",\n"
" \"mimeType\": \"text/plain\",\n"
" \"content\": \"Full extracted content block here\"\n"
" }\n"
" ]\n"
"}\n\n"
f"User message:\n{userInput.prompt}"
)
# Call AI analyzer
aiResponse = await self.services.ai.callAi(prompt=analyzerPrompt)
detectedLanguage = None
intentText = userInput.prompt
contextItems = []
# Parse analyzer response (JSON expected)
try:
import json
jsonStart = aiResponse.find('{') if aiResponse else -1
jsonEnd = aiResponse.rfind('}') + 1 if aiResponse else 0
if jsonStart != -1 and jsonEnd > jsonStart:
parsed = json.loads(aiResponse[jsonStart:jsonEnd])
detectedLanguage = parsed.get('detectedLanguage') or None
if parsed.get('intent'):
intentText = parsed.get('intent')
contextItems = parsed.get('contextItems') or []
except Exception:
contextItems = []
# Update services state
if detectedLanguage and isinstance(detectedLanguage, str):
self._setUserLanguage(detectedLanguage)
self.services.currentUserPrompt = intentText or userInput.prompt
# Telemetry (sizes and counts)
try:
inputSize = len(userInput.prompt.encode('utf-8')) if userInput and userInput.prompt else 0
outputSize = len(aiResponse.encode('utf-8')) if aiResponse else 0
self.services.workflow.createLog({
"workflowId": workflow.id,
"message": f"User prompt analyzed (input {inputSize} bytes, output {outputSize} bytes, items {len(contextItems)})",
"type": "info",
"status": "running",
"progress": 0
})
except Exception:
pass
# Create and attach documents for context items
if contextItems and isinstance(contextItems, list):
created_docs = []
for idx, item in enumerate(contextItems):
try:
title = item.get('title') if isinstance(item, dict) else None
mime = item.get('mimeType') if isinstance(item, dict) else None
content = item.get('content') if isinstance(item, dict) else None
if not content:
continue
fileName = (title or f"user_context_{idx+1}.txt").strip()
mimeType = (mime or "text/plain").strip()
# Create file in component storage
content_bytes = content.encode('utf-8')
file_item = self.services.interfaceDbComponent.createFile(
name=fileName,
mimeType=mimeType,
content=content_bytes
)
# Persist file data
self.services.interfaceDbComponent.createFileData(file_item.id, content_bytes)
# Collect file info
file_info = self.services.workflow.getFileInfo(file_item.id)
from modules.datamodels.datamodelChat import ChatDocument as _ChatDocument
doc = _ChatDocument(
messageId=message.id,
fileId=file_item.id,
fileName=file_info.get("fileName", fileName) if file_info else fileName,
fileSize=file_info.get("size", len(content_bytes)) if file_info else len(content_bytes),
mimeType=file_info.get("mimeType", mimeType) if file_info else mimeType
)
# Persist document record
self.services.interfaceDbChat.createDocument(doc.to_dict())
created_docs.append(doc)
except Exception:
continue
if created_docs:
# Attach to message and persist
if not message.documents:
message.documents = []
message.documents.extend(created_docs)
# Ensure label is user_context for discoverability
message.documentsLabel = context_label
self.services.workflow.updateMessage(message.id, {
"documents": [d.to_dict() for d in message.documents],
"documentsLabel": context_label
})
except Exception as e:
logger.warning(f"Prompt analysis failed or skipped: {str(e)}")
return message return message
else: else:
@ -219,7 +348,7 @@ class WorkflowManager:
async def _planTasks(self, userInput: UserInputRequest, workflow: ChatWorkflow): async def _planTasks(self, userInput: UserInputRequest, workflow: ChatWorkflow):
"""Generate task plan for workflow execution""" """Generate task plan for workflow execution"""
handling = self.handlingTasks handling = self.workflowProcessor
# Generate task plan first (shared for both modes) # Generate task plan first (shared for both modes)
task_plan = await handling.generateTaskPlan(userInput.prompt, workflow) task_plan = await handling.generateTaskPlan(userInput.prompt, workflow)
if not task_plan or not task_plan.tasks: if not task_plan or not task_plan.tasks:
@ -229,9 +358,9 @@ class WorkflowManager:
logger.info(f"Executing workflow mode={workflow_mode} with {len(task_plan.tasks)} tasks") logger.info(f"Executing workflow mode={workflow_mode} with {len(task_plan.tasks)} tasks")
return task_plan return task_plan
async def _executeTasks(self, task_plan, workflow: ChatWorkflow) -> WorkflowResult: async def _executeTasks(self, task_plan, workflow: ChatWorkflow) -> None:
"""Execute all tasks in the task plan""" """Execute all tasks in the task plan and update workflow status."""
handling = self.handlingTasks handling = self.workflowProcessor
total_tasks = len(task_plan.tasks) total_tasks = len(task_plan.tasks)
all_task_results: List = [] all_task_results: List = []
previous_results: List[str] = [] previous_results: List[str] = []
@ -240,7 +369,7 @@ class WorkflowManager:
current_task_index = idx + 1 current_task_index = idx + 1
logger.info(f"Task {current_task_index}/{total_tasks}: {task_step.objective}") logger.info(f"Task {current_task_index}/{total_tasks}: {task_step.objective}")
# Build TaskContext (mode-specific behavior is inside HandlingTasks) # Build TaskContext (mode-specific behavior is inside WorkflowProcessor)
task_context = TaskContext( task_context = TaskContext(
task_step=task_step, task_step=task_step,
workflow=workflow, workflow=workflow,
@ -274,19 +403,15 @@ class WorkflowManager:
if task_result.success and task_result.feedback: if task_result.success and task_result.feedback:
previous_results.append(task_result.feedback) previous_results.append(task_result.feedback)
return WorkflowResult( # Mark workflow as completed; error/stop cases update status elsewhere
status="completed", workflow.status = "completed"
completed_tasks=len(all_task_results), return None
total_tasks=total_tasks,
execution_time=0.0,
final_results_count=len(all_task_results)
)
async def _processWorkflowResults(self, workflow: ChatWorkflow, workflow_result: WorkflowResult, initial_message: ChatMessage) -> None: async def _processWorkflowResults(self, workflow: ChatWorkflow, initial_message: ChatMessage) -> None:
"""Process workflow results and create appropriate messages""" """Process workflow results based on workflow status and create appropriate messages"""
try: try:
try: try:
self.handlingTasks._checkWorkflowStopped() self.workflowProcessor._checkWorkflowStopped(workflow)
except WorkflowStoppedException: except WorkflowStoppedException:
logger.info(f"Workflow {workflow.id} was stopped during result processing") logger.info(f"Workflow {workflow.id} was stopped during result processing")
@ -321,7 +446,7 @@ class WorkflowManager:
}) })
return return
if workflow_result.status == 'stopped': if workflow.status == 'stopped':
# Create stopped message # Create stopped message
stopped_message = { stopped_message = {
"workflowId": workflow.id, "workflowId": workflow.id,
@ -363,12 +488,12 @@ class WorkflowManager:
"progress": 100 "progress": 100
}) })
return return
elif workflow_result.status == 'failed': elif workflow.status == 'failed':
# Create error message # Create error message
error_message = { error_message = {
"workflowId": workflow.id, "workflowId": workflow.id,
"role": "assistant", "role": "assistant",
"message": f"Workflow failed: {workflow_result.error or 'Unknown error'}", "message": f"Workflow failed: {'Unknown error'}",
"status": "last", "status": "last",
"sequenceNr": len(workflow.messages) + 1, "sequenceNr": len(workflow.messages) + 1,
"publishedAt": self.services.utils.getUtcTimestamp(), "publishedAt": self.services.utils.getUtcTimestamp(),
@ -399,7 +524,7 @@ class WorkflowManager:
# Add failed log entry # Add failed log entry
self.services.workflow.createLog({ self.services.workflow.createLog({
"workflowId": workflow.id, "workflowId": workflow.id,
"message": f"Workflow failed: {workflow_result.error or 'Unknown error'}", "message": "Workflow failed: Unknown error",
"type": "error", "type": "error",
"status": "failed", "status": "failed",
"progress": 100 "progress": 100
@ -504,7 +629,7 @@ class WorkflowManager:
async def _generateWorkflowFeedback(self, workflow: ChatWorkflow) -> str: async def _generateWorkflowFeedback(self, workflow: ChatWorkflow) -> str:
"""Generate feedback message for workflow completion""" """Generate feedback message for workflow completion"""
try: try:
self.handlingTasks._checkWorkflowStopped() self.workflowProcessor._checkWorkflowStopped(workflow)
# Count messages by role # Count messages by role
user_messages = [msg for msg in workflow.messages if msg.role == 'user'] user_messages = [msg for msg in workflow.messages if msg.role == 'user']

Binary file not shown.

Before

Width:  |  Height:  |  Size: 407 KiB

Binary file not shown.

Binary file not shown.