Merge branch 'int' into feat/chatbot
This commit is contained in:
commit
478e139730
95 changed files with 12300 additions and 7373 deletions
3
.gitignore
vendored
3
.gitignore
vendored
|
|
@ -167,4 +167,5 @@ cython_debug/
|
||||||
# local data
|
# local data
|
||||||
gwserver/_database*
|
gwserver/_database*
|
||||||
gwserver/results/*
|
gwserver/results/*
|
||||||
*.log.*
|
*.log.*
|
||||||
|
test-chat
|
||||||
12
env_dev.env
12
env_dev.env
|
|
@ -64,12 +64,12 @@ Connector_AiAnthropic_MODEL_NAME = claude-3-5-sonnet-20241022
|
||||||
Connector_AiAnthropic_TEMPERATURE = 0.2
|
Connector_AiAnthropic_TEMPERATURE = 0.2
|
||||||
Connector_AiAnthropic_MAX_TOKENS = 2000
|
Connector_AiAnthropic_MAX_TOKENS = 2000
|
||||||
|
|
||||||
# LangDoc configuration
|
# Perplexity AI configuration
|
||||||
Connector_AiLangdoc_API_URL = https://api.langdock.com/v1/chat/completions
|
Connector_AiPerplexity_API_URL = https://api.perplexity.ai/chat/completions
|
||||||
Connector_AiLangdoc_API_SECRET = sk-9KaNH1FfEx7SkTijsFpXeTIc9_xOmoo7e0hW6SqrYavFq_bgjcULa7PXp3kWQpp4gfk8-U0B4L91CP6YpAJxZg
|
Connector_AiPerplexity_API_SECRET = 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
|
||||||
Connector_AiLangdoc_MODEL_NAME = gpt-4o
|
Connector_AiPerplexity_MODEL_NAME = sonar
|
||||||
Connector_AiLangdoc_TEMPERATURE = 0.2
|
Connector_AiPerplexity_TEMPERATURE = 0.2
|
||||||
Connector_AiLangdoc_MAX_TOKENS = 2000
|
Connector_AiPerplexity_MAX_TOKENS = 2000
|
||||||
|
|
||||||
# Agent Mail configuration
|
# Agent Mail configuration
|
||||||
Service_MSFT_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
|
Service_MSFT_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
|
||||||
|
|
|
||||||
12
env_int.env
12
env_int.env
|
|
@ -64,12 +64,12 @@ Connector_AiAnthropic_MODEL_NAME = claude-3-5-sonnet-20241022
|
||||||
Connector_AiAnthropic_TEMPERATURE = 0.2
|
Connector_AiAnthropic_TEMPERATURE = 0.2
|
||||||
Connector_AiAnthropic_MAX_TOKENS = 2000
|
Connector_AiAnthropic_MAX_TOKENS = 2000
|
||||||
|
|
||||||
# LangDoc configuration
|
# Perplexity AI configuration
|
||||||
Connector_AiLangdoc_API_URL = https://api.langdock.com/v1/chat/completions
|
Connector_AiPerplexity_API_URL = https://api.perplexity.ai/chat/completions
|
||||||
Connector_AiLangdoc_API_SECRET = sk-9KaNH1FfEx7SkTijsFpXeTIc9_xOmoo7e0hW6SqrYavFq_bgjcULa7PXp3kWQpp4gfk8-U0B4L91CP6YpAJxZg
|
Connector_AiPerplexity_API_SECRET = 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
|
||||||
Connector_AiLangdoc_MODEL_NAME = gpt-4o
|
Connector_AiPerplexity_MODEL_NAME = sonar
|
||||||
Connector_AiLangdoc_TEMPERATURE = 0.2
|
Connector_AiPerplexity_TEMPERATURE = 0.2
|
||||||
Connector_AiLangdoc_MAX_TOKENS = 2000
|
Connector_AiPerplexity_MAX_TOKENS = 2000
|
||||||
|
|
||||||
# Agent Mail configuration
|
# Agent Mail configuration
|
||||||
Service_MSFT_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
|
Service_MSFT_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
|
||||||
|
|
|
||||||
12
env_prod.env
12
env_prod.env
|
|
@ -64,12 +64,12 @@ Connector_AiAnthropic_MODEL_NAME = claude-3-5-sonnet-20241022
|
||||||
Connector_AiAnthropic_TEMPERATURE = 0.2
|
Connector_AiAnthropic_TEMPERATURE = 0.2
|
||||||
Connector_AiAnthropic_MAX_TOKENS = 2000
|
Connector_AiAnthropic_MAX_TOKENS = 2000
|
||||||
|
|
||||||
# LangDoc configuration
|
# Perplexity AI configuration
|
||||||
Connector_AiLangdoc_API_URL = https://api.langdock.com/v1/chat/completions
|
Connector_AiPerplexity_API_URL = https://api.perplexity.ai/chat/completions
|
||||||
Connector_AiLangdoc_API_SECRET = sk-9KaNH1FfEx7SkTijsFpXeTIc9_xOmoo7e0hW6SqrYavFq_bgjcULa7PXp3kWQpp4gfk8-U0B4L91CP6YpAJxZg
|
Connector_AiPerplexity_API_SECRET = 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
|
||||||
Connector_AiLangdoc_MODEL_NAME = gpt-4o
|
Connector_AiPerplexity_MODEL_NAME = sonar
|
||||||
Connector_AiLangdoc_TEMPERATURE = 0.2
|
Connector_AiPerplexity_TEMPERATURE = 0.2
|
||||||
Connector_AiLangdoc_MAX_TOKENS = 2000
|
Connector_AiPerplexity_MAX_TOKENS = 2000
|
||||||
|
|
||||||
# Agent Mail configuration
|
# Agent Mail configuration
|
||||||
Service_MSFT_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
|
Service_MSFT_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
|
||||||
|
|
|
||||||
|
|
@ -62,13 +62,52 @@ class AiAnthropic:
|
||||||
if maxTokens is None:
|
if maxTokens is None:
|
||||||
maxTokens = self.config.get("maxTokens", 2000)
|
maxTokens = self.config.get("maxTokens", 2000)
|
||||||
|
|
||||||
|
# Transform OpenAI-style messages to Anthropic format:
|
||||||
|
# - Move any 'system' role content to top-level 'system'
|
||||||
|
# - Keep only 'user'/'assistant' messages in the list
|
||||||
|
system_contents: List[str] = []
|
||||||
|
converted_messages: List[Dict[str, Any]] = []
|
||||||
|
for m in messages:
|
||||||
|
role = m.get("role")
|
||||||
|
content = m.get("content", "")
|
||||||
|
if role == "system":
|
||||||
|
# Collect system content; Anthropic expects top-level 'system'
|
||||||
|
if isinstance(content, list):
|
||||||
|
# Join text parts if provided as blocks
|
||||||
|
joined = "\n\n".join(
|
||||||
|
[
|
||||||
|
(part.get("text") if isinstance(part, dict) else str(part))
|
||||||
|
for part in content
|
||||||
|
]
|
||||||
|
)
|
||||||
|
system_contents.append(joined)
|
||||||
|
else:
|
||||||
|
system_contents.append(str(content))
|
||||||
|
continue
|
||||||
|
# For Anthropic, content can be a string; pass through strings, collapse blocks
|
||||||
|
if isinstance(content, list):
|
||||||
|
# Collapse to text if blocks are provided
|
||||||
|
collapsed = "\n\n".join(
|
||||||
|
[
|
||||||
|
(part.get("text") if isinstance(part, dict) else str(part))
|
||||||
|
for part in content
|
||||||
|
]
|
||||||
|
)
|
||||||
|
converted_messages.append({"role": role, "content": collapsed})
|
||||||
|
else:
|
||||||
|
converted_messages.append({"role": role, "content": content})
|
||||||
|
|
||||||
|
system_prompt = "\n\n".join([s for s in system_contents if s]) if system_contents else None
|
||||||
|
|
||||||
# Create Anthropic API payload
|
# Create Anthropic API payload
|
||||||
payload = {
|
payload: Dict[str, Any] = {
|
||||||
"model": self.modelName,
|
"model": self.modelName,
|
||||||
"messages": messages,
|
"messages": converted_messages,
|
||||||
"temperature": temperature,
|
"temperature": temperature,
|
||||||
"max_tokens": maxTokens
|
"max_tokens": maxTokens,
|
||||||
}
|
}
|
||||||
|
if system_prompt:
|
||||||
|
payload["system"] = system_prompt
|
||||||
|
|
||||||
response = await self.httpClient.post(
|
response = await self.httpClient.post(
|
||||||
self.apiUrl,
|
self.apiUrl,
|
||||||
|
|
@ -174,8 +213,8 @@ class AiAnthropic:
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
# Use the existing callApi function with the Vision model
|
# Use the existing callAiBasic function with the Vision model
|
||||||
response = await self.callApi(messages)
|
response = await self.callAiBasic(messages)
|
||||||
|
|
||||||
# Extract and return content
|
# Extract and return content
|
||||||
return response["choices"][0]["message"]["content"]
|
return response["choices"][0]["message"]["content"]
|
||||||
|
|
|
||||||
|
|
@ -1,406 +0,0 @@
|
||||||
import logging
|
|
||||||
import httpx
|
|
||||||
import asyncio
|
|
||||||
import re
|
|
||||||
from typing import Dict, Any, List, Union, Optional
|
|
||||||
from fastapi import HTTPException
|
|
||||||
from modules.shared.configuration import APP_CONFIG
|
|
||||||
|
|
||||||
# Configure logger
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
def loadConfigData():
|
|
||||||
"""Load configuration data for LangDoc connector"""
|
|
||||||
return {
|
|
||||||
"apiKey": APP_CONFIG.get('Connector_AiLangdoc_API_SECRET'),
|
|
||||||
"apiUrl": APP_CONFIG.get('Connector_AiLangdoc_API_URL'),
|
|
||||||
"modelName": APP_CONFIG.get('Connector_AiLangdoc_MODEL_NAME'),
|
|
||||||
"temperature": float(APP_CONFIG.get('Connector_AiLangdoc_TEMPERATURE')),
|
|
||||||
"maxTokens": int(APP_CONFIG.get('Connector_AiLangdoc_MAX_TOKENS'))
|
|
||||||
}
|
|
||||||
|
|
||||||
class AiLangdoc:
|
|
||||||
"""Connector for communication with the LangDoc API (OpenAI-compatible)."""
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
# Load configuration
|
|
||||||
self.config = loadConfigData()
|
|
||||||
self.apiKey = self.config["apiKey"]
|
|
||||||
self.apiUrl = self.config["apiUrl"]
|
|
||||||
self.modelName = self.config["modelName"]
|
|
||||||
|
|
||||||
# HttpClient for API calls
|
|
||||||
self.httpClient = httpx.AsyncClient(
|
|
||||||
timeout=120.0, # Longer timeout for complex requests
|
|
||||||
headers={
|
|
||||||
"Authorization": f"Bearer {self.apiKey}",
|
|
||||||
"Content-Type": "application/json"
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
logger.info(f"LangDoc Connector initialized with model: {self.modelName}")
|
|
||||||
|
|
||||||
async def callAiBasic(self, messages: List[Dict[str, Any]], temperature: float = None, maxTokens: int = None) -> str:
|
|
||||||
"""
|
|
||||||
Calls the LangDoc API with the given messages.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
messages: List of messages in OpenAI format (role, content)
|
|
||||||
temperature: Temperature for response generation (0.0-1.0)
|
|
||||||
maxTokens: Maximum number of tokens in the response
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
The response from the LangDoc API
|
|
||||||
|
|
||||||
Raises:
|
|
||||||
HTTPException: For errors in API communication
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
# Use parameters from configuration if none were overridden
|
|
||||||
if temperature is None:
|
|
||||||
temperature = self.config.get("temperature", 0.2)
|
|
||||||
|
|
||||||
if maxTokens is None:
|
|
||||||
maxTokens = self.config.get("maxTokens", 2000)
|
|
||||||
|
|
||||||
payload = {
|
|
||||||
"model": self.modelName,
|
|
||||||
"messages": messages,
|
|
||||||
"temperature": temperature,
|
|
||||||
"max_tokens": maxTokens
|
|
||||||
}
|
|
||||||
|
|
||||||
response = await self.httpClient.post(
|
|
||||||
self.apiUrl,
|
|
||||||
json=payload
|
|
||||||
)
|
|
||||||
|
|
||||||
if response.status_code != 200:
|
|
||||||
error_detail = f"LangDoc API error: {response.status_code} - {response.text}"
|
|
||||||
logger.error(error_detail)
|
|
||||||
|
|
||||||
# Provide more specific error messages based on status code
|
|
||||||
if response.status_code == 429:
|
|
||||||
error_message = "Rate limit exceeded. Please wait before making another request."
|
|
||||||
elif response.status_code == 401:
|
|
||||||
error_message = "Invalid API key. Please check your LangDoc API configuration."
|
|
||||||
elif response.status_code == 400:
|
|
||||||
error_message = f"Invalid request to LangDoc API: {response.text}"
|
|
||||||
else:
|
|
||||||
error_message = f"LangDoc API error ({response.status_code}): {response.text}"
|
|
||||||
|
|
||||||
raise HTTPException(status_code=500, detail=error_message)
|
|
||||||
|
|
||||||
responseJson = response.json()
|
|
||||||
content = responseJson["choices"][0]["message"]["content"]
|
|
||||||
return content
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error calling LangDoc API: {str(e)}")
|
|
||||||
raise HTTPException(status_code=500, detail=f"Error calling LangDoc API: {str(e)}")
|
|
||||||
|
|
||||||
async def callAiImage(self, prompt: str, imageData: Union[str, bytes], mimeType: str = None) -> str:
|
|
||||||
"""
|
|
||||||
Analyzes an image using LangDoc's vision capabilities.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
imageData: Either a file path (str) or image data (bytes)
|
|
||||||
mimeType: The MIME type of the image (optional, only for binary data)
|
|
||||||
prompt: The prompt for analysis
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
The analysis response as text
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
# Distinguish between file path and binary data
|
|
||||||
if isinstance(imageData, str):
|
|
||||||
# It's a file path - import filehandling only when needed
|
|
||||||
from modules import agentserviceFilemanager as fileHandler
|
|
||||||
base64Data, autoMimeType = fileHandler.encodeFileToBase64(imageData)
|
|
||||||
mimeType = mimeType or autoMimeType
|
|
||||||
else:
|
|
||||||
# It's binary data
|
|
||||||
import base64
|
|
||||||
base64Data = base64.b64encode(imageData).decode('utf-8')
|
|
||||||
# MIME type must be specified for binary data
|
|
||||||
if not mimeType:
|
|
||||||
# Fallback to generic image type
|
|
||||||
mimeType = "image/png"
|
|
||||||
|
|
||||||
# Prepare the payload for the Vision API
|
|
||||||
messages = [
|
|
||||||
{
|
|
||||||
"role": "user",
|
|
||||||
"content": [
|
|
||||||
{"type": "text", "text": prompt},
|
|
||||||
{
|
|
||||||
"type": "image_url",
|
|
||||||
"image_url": {
|
|
||||||
"url": f"data:{mimeType};base64,{base64Data}"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
]
|
|
||||||
|
|
||||||
# Use the existing callAiBasic function
|
|
||||||
response = await self.callAiBasic(messages)
|
|
||||||
|
|
||||||
return response
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error during image analysis: {str(e)}", exc_info=True)
|
|
||||||
return f"[Error during image analysis: {str(e)}]"
|
|
||||||
|
|
||||||
async def listModels(self) -> List[Dict[str, Any]]:
|
|
||||||
"""
|
|
||||||
Lists available models from the LangDoc API.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
List of available models with their details
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
# LangDoc uses OpenAI-compatible endpoints
|
|
||||||
modelsUrl = self.apiUrl.replace("/chat/completions", "/models")
|
|
||||||
|
|
||||||
response = await self.httpClient.get(modelsUrl)
|
|
||||||
|
|
||||||
if response.status_code != 200:
|
|
||||||
error_detail = f"LangDoc API error listing models: {response.status_code} - {response.text}"
|
|
||||||
logger.error(error_detail)
|
|
||||||
raise HTTPException(status_code=500, detail=error_detail)
|
|
||||||
|
|
||||||
responseJson = response.json()
|
|
||||||
return responseJson.get("data", [])
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error listing LangDoc models: {str(e)}")
|
|
||||||
raise HTTPException(status_code=500, detail=f"Error listing LangDoc models: {str(e)}")
|
|
||||||
|
|
||||||
async def getModelInfo(self, modelName: str = None) -> Dict[str, Any]:
|
|
||||||
"""
|
|
||||||
Gets information about a specific model.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
modelName: Name of the model to get info for (uses default if None)
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Model information dictionary
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
if modelName is None:
|
|
||||||
modelName = self.modelName
|
|
||||||
|
|
||||||
models = await self.listModels()
|
|
||||||
|
|
||||||
for model in models:
|
|
||||||
if model.get("id") == modelName:
|
|
||||||
return model
|
|
||||||
|
|
||||||
raise HTTPException(status_code=404, detail=f"Model {modelName} not found")
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error getting LangDoc model info: {str(e)}")
|
|
||||||
raise HTTPException(status_code=500, detail=f"Error getting LangDoc model info: {str(e)}")
|
|
||||||
|
|
||||||
async def generateImage(self, prompt: str, size: str = "1024x1024", quality: str = "standard", style: str = "vivid") -> Dict[str, Any]:
|
|
||||||
"""
|
|
||||||
Generates an image using LangDoc's DALL-E 3 integration.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
prompt: Text description of the image to generate
|
|
||||||
size: Image size - "1024x1024", "1792x1024", or "1024x1792"
|
|
||||||
quality: Image quality - "standard" or "hd"
|
|
||||||
style: Image style - "vivid" or "natural"
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Dictionary containing the generated image data and metadata
|
|
||||||
|
|
||||||
Raises:
|
|
||||||
HTTPException: For errors in API communication
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
# Use OpenAI-compatible images endpoint
|
|
||||||
imagesUrl = self.apiUrl.replace("/chat/completions", "/images/generations")
|
|
||||||
|
|
||||||
payload = {
|
|
||||||
"model": "dall-e-3",
|
|
||||||
"prompt": prompt,
|
|
||||||
"size": size,
|
|
||||||
"quality": quality,
|
|
||||||
"style": style,
|
|
||||||
"n": 1
|
|
||||||
}
|
|
||||||
|
|
||||||
response = await self.httpClient.post(
|
|
||||||
imagesUrl,
|
|
||||||
json=payload
|
|
||||||
)
|
|
||||||
|
|
||||||
if response.status_code != 200:
|
|
||||||
error_detail = f"LangDoc Image Generation API error: {response.status_code} - {response.text}"
|
|
||||||
logger.error(error_detail)
|
|
||||||
|
|
||||||
# Provide more specific error messages
|
|
||||||
if response.status_code == 429:
|
|
||||||
error_message = "Rate limit exceeded for image generation. Please wait before making another request."
|
|
||||||
elif response.status_code == 401:
|
|
||||||
error_message = "Invalid API key for image generation. Please check your LangDoc API configuration."
|
|
||||||
elif response.status_code == 400:
|
|
||||||
error_message = f"Invalid request to LangDoc Image API: {response.text}"
|
|
||||||
else:
|
|
||||||
error_message = f"LangDoc Image API error ({response.status_code}): {response.text}"
|
|
||||||
|
|
||||||
raise HTTPException(status_code=500, detail=error_message)
|
|
||||||
|
|
||||||
responseJson = response.json()
|
|
||||||
|
|
||||||
# Extract image data
|
|
||||||
imageData = responseJson.get("data", [])
|
|
||||||
if not imageData:
|
|
||||||
raise HTTPException(status_code=500, detail="No image data returned from LangDoc API")
|
|
||||||
|
|
||||||
imageInfo = imageData[0]
|
|
||||||
|
|
||||||
return {
|
|
||||||
"success": True,
|
|
||||||
"image_url": imageInfo.get("url"),
|
|
||||||
"revised_prompt": imageInfo.get("revised_prompt"),
|
|
||||||
"size": size,
|
|
||||||
"quality": quality,
|
|
||||||
"style": style,
|
|
||||||
"model": "dall-e-3",
|
|
||||||
"created": responseJson.get("created"),
|
|
||||||
"raw_response": responseJson
|
|
||||||
}
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error generating image with LangDoc: {str(e)}")
|
|
||||||
raise HTTPException(status_code=500, detail=f"Error generating image with LangDoc: {str(e)}")
|
|
||||||
|
|
||||||
async def generateImageWithVariations(self, prompt: str, variations: int = 1, size: str = "1024x1024", quality: str = "standard", style: str = "vivid") -> List[Dict[str, Any]]:
|
|
||||||
"""
|
|
||||||
Generates multiple image variations using LangDoc's DALL-E 3 integration.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
prompt: Text description of the image to generate
|
|
||||||
variations: Number of variations to generate (1-4)
|
|
||||||
size: Image size - "1024x1024", "1792x1024", or "1024x1792"
|
|
||||||
quality: Image quality - "standard" or "hd"
|
|
||||||
style: Image style - "vivid" or "natural"
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
List of dictionaries containing generated image data and metadata
|
|
||||||
|
|
||||||
Raises:
|
|
||||||
HTTPException: For errors in API communication
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
# Limit variations to reasonable number
|
|
||||||
variations = min(max(variations, 1), 4)
|
|
||||||
|
|
||||||
# Use OpenAI-compatible images endpoint
|
|
||||||
imagesUrl = self.apiUrl.replace("/chat/completions", "/images/generations")
|
|
||||||
|
|
||||||
results = []
|
|
||||||
|
|
||||||
# Generate multiple variations by making multiple API calls
|
|
||||||
for i in range(variations):
|
|
||||||
# Add variation to prompt to get different results
|
|
||||||
variationPrompt = f"{prompt} (variation {i+1})"
|
|
||||||
|
|
||||||
payload = {
|
|
||||||
"model": "dall-e-3",
|
|
||||||
"prompt": variationPrompt,
|
|
||||||
"size": size,
|
|
||||||
"quality": quality,
|
|
||||||
"style": style,
|
|
||||||
"n": 1
|
|
||||||
}
|
|
||||||
|
|
||||||
response = await self.httpClient.post(
|
|
||||||
imagesUrl,
|
|
||||||
json=payload
|
|
||||||
)
|
|
||||||
|
|
||||||
if response.status_code != 200:
|
|
||||||
logger.warning(f"Failed to generate variation {i+1}: {response.status_code} - {response.text}")
|
|
||||||
continue
|
|
||||||
|
|
||||||
responseJson = response.json()
|
|
||||||
imageData = responseJson.get("data", [])
|
|
||||||
|
|
||||||
if imageData:
|
|
||||||
imageInfo = imageData[0]
|
|
||||||
results.append({
|
|
||||||
"variation": i + 1,
|
|
||||||
"image_url": imageInfo.get("url"),
|
|
||||||
"revised_prompt": imageInfo.get("revised_prompt"),
|
|
||||||
"size": size,
|
|
||||||
"quality": quality,
|
|
||||||
"style": style,
|
|
||||||
"model": "dall-e-3",
|
|
||||||
"created": responseJson.get("created")
|
|
||||||
})
|
|
||||||
|
|
||||||
# Add small delay between requests to avoid rate limiting
|
|
||||||
if i < variations - 1:
|
|
||||||
await asyncio.sleep(1)
|
|
||||||
|
|
||||||
return results
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error generating image variations with LangDoc: {str(e)}")
|
|
||||||
raise HTTPException(status_code=500, detail=f"Error generating image variations with LangDoc: {str(e)}")
|
|
||||||
|
|
||||||
async def generateImageWithChat(self, prompt: str, size: str = "1024x1024", quality: str = "standard", style: str = "vivid") -> str:
|
|
||||||
"""
|
|
||||||
Generates an image using LangDoc's chat interface with image generation tools.
|
|
||||||
This method uses the chat completions endpoint with image generation capabilities.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
prompt: Text description of the image to generate
|
|
||||||
size: Image size - "1024x1024", "1792x1024", or "1024x1792"
|
|
||||||
quality: Image quality - "standard" or "hd"
|
|
||||||
style: Image style - "vivid" or "natural"
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Response text from the chat model (may include image references)
|
|
||||||
|
|
||||||
Raises:
|
|
||||||
HTTPException: For errors in API communication
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
# Create a prompt that requests image generation
|
|
||||||
imagePrompt = f"Please generate an image with the following description: {prompt}. Size: {size}, Quality: {quality}, Style: {style}"
|
|
||||||
|
|
||||||
messages = [
|
|
||||||
{
|
|
||||||
"role": "user",
|
|
||||||
"content": imagePrompt
|
|
||||||
}
|
|
||||||
]
|
|
||||||
|
|
||||||
# Use the chat completions endpoint
|
|
||||||
response = await self.callAiBasic(messages)
|
|
||||||
|
|
||||||
return response
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error generating image with chat: {str(e)}")
|
|
||||||
raise HTTPException(status_code=500, detail=f"Error generating image with chat: {str(e)}")
|
|
||||||
|
|
||||||
async def _testConnection(self) -> bool:
|
|
||||||
"""
|
|
||||||
Tests the connection to the LangDoc API.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
True if connection is successful, False otherwise
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
# Try to list models as a simple connection test
|
|
||||||
await self.listModels()
|
|
||||||
return True
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"LangDoc connection test failed: {str(e)}")
|
|
||||||
return False
|
|
||||||
255
modules/connectors/connectorAiPerplexity.py
Normal file
255
modules/connectors/connectorAiPerplexity.py
Normal file
|
|
@ -0,0 +1,255 @@
|
||||||
|
import logging
|
||||||
|
import httpx
|
||||||
|
import asyncio
|
||||||
|
from typing import Dict, Any, List, Union, Optional
|
||||||
|
from fastapi import HTTPException
|
||||||
|
from modules.shared.configuration import APP_CONFIG
|
||||||
|
|
||||||
|
# Configure logger
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
def loadConfigData():
|
||||||
|
"""Load configuration data for Perplexity connector"""
|
||||||
|
return {
|
||||||
|
"apiKey": APP_CONFIG.get('Connector_AiPerplexity_API_SECRET'),
|
||||||
|
"apiUrl": APP_CONFIG.get('Connector_AiPerplexity_API_URL'),
|
||||||
|
"modelName": APP_CONFIG.get('Connector_AiPerplexity_MODEL_NAME'),
|
||||||
|
"temperature": float(APP_CONFIG.get('Connector_AiPerplexity_TEMPERATURE')),
|
||||||
|
"maxTokens": int(APP_CONFIG.get('Connector_AiPerplexity_MAX_TOKENS'))
|
||||||
|
}
|
||||||
|
|
||||||
|
class AiPerplexity:
|
||||||
|
"""Connector for communication with the Perplexity API."""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
# Load configuration
|
||||||
|
self.config = loadConfigData()
|
||||||
|
self.apiKey = self.config["apiKey"]
|
||||||
|
self.apiUrl = self.config["apiUrl"]
|
||||||
|
self.modelName = self.config["modelName"]
|
||||||
|
|
||||||
|
# HttpClient for API calls
|
||||||
|
self.httpClient = httpx.AsyncClient(
|
||||||
|
timeout=120.0, # Longer timeout for complex requests
|
||||||
|
headers={
|
||||||
|
"Authorization": f"Bearer {self.apiKey}",
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
"Accept": "application/json"
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.info(f"Perplexity Connector initialized with model: {self.modelName}")
|
||||||
|
|
||||||
|
async def callAiBasic(self, messages: List[Dict[str, Any]], temperature: float = None, maxTokens: int = None) -> str:
|
||||||
|
"""
|
||||||
|
Calls the Perplexity API with the given messages.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
messages: List of messages in OpenAI format (role, content)
|
||||||
|
temperature: Temperature for response generation (0.0-1.0)
|
||||||
|
maxTokens: Maximum number of tokens in the response
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The response from the Perplexity API
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
HTTPException: For errors in API communication
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Use parameters from configuration if none were overridden
|
||||||
|
if temperature is None:
|
||||||
|
temperature = self.config.get("temperature", 0.2)
|
||||||
|
|
||||||
|
if maxTokens is None:
|
||||||
|
maxTokens = self.config.get("maxTokens", 2000)
|
||||||
|
|
||||||
|
payload = {
|
||||||
|
"model": self.modelName,
|
||||||
|
"messages": messages,
|
||||||
|
"temperature": temperature,
|
||||||
|
"max_tokens": maxTokens
|
||||||
|
}
|
||||||
|
|
||||||
|
response = await self.httpClient.post(
|
||||||
|
self.apiUrl,
|
||||||
|
json=payload
|
||||||
|
)
|
||||||
|
|
||||||
|
if response.status_code != 200:
|
||||||
|
error_detail = f"Perplexity API error: {response.status_code} - {response.text}"
|
||||||
|
logger.error(error_detail)
|
||||||
|
|
||||||
|
# Provide more specific error messages based on status code
|
||||||
|
if response.status_code == 429:
|
||||||
|
error_message = "Rate limit exceeded. Please wait before making another request."
|
||||||
|
elif response.status_code == 401:
|
||||||
|
error_message = "Invalid API key. Please check your Perplexity API configuration."
|
||||||
|
elif response.status_code == 400:
|
||||||
|
error_message = f"Invalid request to Perplexity API: {response.text}"
|
||||||
|
else:
|
||||||
|
error_message = f"Perplexity API error ({response.status_code}): {response.text}"
|
||||||
|
|
||||||
|
raise HTTPException(status_code=500, detail=error_message)
|
||||||
|
|
||||||
|
responseJson = response.json()
|
||||||
|
content = responseJson["choices"][0]["message"]["content"]
|
||||||
|
return content
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error calling Perplexity API: {str(e)}")
|
||||||
|
raise HTTPException(status_code=500, detail=f"Error calling Perplexity API: {str(e)}")
|
||||||
|
|
||||||
|
async def callAiWithWebSearch(self, query: str, temperature: float = None, maxTokens: int = None) -> str:
|
||||||
|
"""
|
||||||
|
Calls Perplexity API with web search capabilities for research.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
query: The research query or question
|
||||||
|
temperature: Temperature for response generation (0.0-1.0)
|
||||||
|
maxTokens: Maximum number of tokens in the response
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The response from Perplexity with web search context
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Use parameters from configuration if none were overridden
|
||||||
|
if temperature is None:
|
||||||
|
temperature = self.config.get("temperature", 0.2)
|
||||||
|
|
||||||
|
if maxTokens is None:
|
||||||
|
maxTokens = self.config.get("maxTokens", 2000)
|
||||||
|
|
||||||
|
# For web search, we use the configured model name
|
||||||
|
webSearchModel = self.modelName
|
||||||
|
|
||||||
|
payload = {
|
||||||
|
"model": webSearchModel,
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": query
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"temperature": temperature,
|
||||||
|
"max_tokens": maxTokens
|
||||||
|
}
|
||||||
|
|
||||||
|
response = await self.httpClient.post(
|
||||||
|
self.apiUrl,
|
||||||
|
json=payload
|
||||||
|
)
|
||||||
|
|
||||||
|
if response.status_code != 200:
|
||||||
|
error_detail = f"Perplexity Web Search API error: {response.status_code} - {response.text}"
|
||||||
|
logger.error(error_detail)
|
||||||
|
|
||||||
|
if response.status_code == 429:
|
||||||
|
error_message = "Rate limit exceeded for web search. Please wait before making another request."
|
||||||
|
elif response.status_code == 401:
|
||||||
|
error_message = "Invalid API key for web search. Please check your Perplexity API configuration."
|
||||||
|
elif response.status_code == 400:
|
||||||
|
error_message = f"Invalid request to Perplexity Web Search API: {response.text}"
|
||||||
|
else:
|
||||||
|
error_message = f"Perplexity Web Search API error ({response.status_code}): {response.text}"
|
||||||
|
|
||||||
|
raise HTTPException(status_code=500, detail=error_message)
|
||||||
|
|
||||||
|
responseJson = response.json()
|
||||||
|
content = responseJson["choices"][0]["message"]["content"]
|
||||||
|
return content
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error calling Perplexity Web Search API: {str(e)}")
|
||||||
|
raise HTTPException(status_code=500, detail=f"Error calling Perplexity Web Search API: {str(e)}")
|
||||||
|
|
||||||
|
async def researchTopic(self, topic: str, depth: str = "basic") -> str:
|
||||||
|
"""
|
||||||
|
Research a topic using Perplexity's web search capabilities.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
topic: The topic to research
|
||||||
|
depth: Research depth - "basic", "detailed", or "comprehensive"
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Comprehensive research results on the topic
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Create research prompts based on depth
|
||||||
|
if depth == "basic":
|
||||||
|
prompt = f"Provide a basic overview of: {topic}"
|
||||||
|
elif depth == "detailed":
|
||||||
|
prompt = f"Provide a detailed analysis of: {topic}. Include recent developments, key facts, and important information."
|
||||||
|
else: # comprehensive
|
||||||
|
prompt = f"Provide a comprehensive research report on: {topic}. Include recent developments, key facts, statistics, expert opinions, and current trends."
|
||||||
|
|
||||||
|
return await self.callAiWithWebSearch(prompt)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error researching topic: {str(e)}")
|
||||||
|
raise HTTPException(status_code=500, detail=f"Error researching topic: {str(e)}")
|
||||||
|
|
||||||
|
async def answerQuestion(self, question: str, context: str = None) -> str:
|
||||||
|
"""
|
||||||
|
Answer a question using web search for current information.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
question: The question to answer
|
||||||
|
context: Optional context to provide
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Answer with web search context
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
if context:
|
||||||
|
prompt = f"Context: {context}\n\nQuestion: {question}\n\nPlease provide a comprehensive answer using current information from the web."
|
||||||
|
else:
|
||||||
|
prompt = f"Question: {question}\n\nPlease provide a comprehensive answer using current information from the web."
|
||||||
|
|
||||||
|
return await self.callAiWithWebSearch(prompt)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error answering question: {str(e)}")
|
||||||
|
raise HTTPException(status_code=500, detail=f"Error answering question: {str(e)}")
|
||||||
|
|
||||||
|
async def getCurrentNews(self, topic: str = None, limit: int = 5) -> str:
|
||||||
|
"""
|
||||||
|
Get current news on a specific topic.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
topic: The topic to get news about (optional)
|
||||||
|
limit: Number of news items to retrieve
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Current news information
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
if topic:
|
||||||
|
prompt = f"Get the latest news about {topic}. Provide {limit} recent news items with sources and dates."
|
||||||
|
else:
|
||||||
|
prompt = f"Get the latest news. Provide {limit} recent news items with sources and dates."
|
||||||
|
|
||||||
|
return await self.callAiWithWebSearch(prompt)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error getting current news: {str(e)}")
|
||||||
|
raise HTTPException(status_code=500, detail=f"Error getting current news: {str(e)}")
|
||||||
|
|
||||||
|
async def _testConnection(self) -> bool:
|
||||||
|
"""
|
||||||
|
Tests the connection to the Perplexity API.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if connection is successful, False otherwise
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Try a simple test message
|
||||||
|
testMessages = [
|
||||||
|
{"role": "user", "content": "Hello, please respond with just 'OK' to confirm the connection works."}
|
||||||
|
]
|
||||||
|
|
||||||
|
response = await self.callAiBasic(testMessages)
|
||||||
|
return response and len(response.strip()) > 0
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Perplexity connection test failed: {str(e)}")
|
||||||
|
return False
|
||||||
|
|
@ -4,6 +4,7 @@
|
||||||
import logging
|
import logging
|
||||||
import asyncio
|
import asyncio
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
from typing import Optional
|
||||||
from tavily import AsyncTavilyClient
|
from tavily import AsyncTavilyClient
|
||||||
from modules.shared.configuration import APP_CONFIG
|
from modules.shared.configuration import APP_CONFIG
|
||||||
from modules.shared.timezoneUtils import get_utc_timestamp
|
from modules.shared.timezoneUtils import get_utc_timestamp
|
||||||
|
|
@ -29,6 +30,7 @@ logger = logging.getLogger(__name__)
|
||||||
class WebSearchResult:
|
class WebSearchResult:
|
||||||
title: str
|
title: str
|
||||||
url: str
|
url: str
|
||||||
|
raw_content: Optional[str] = None
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class WebCrawlResult:
|
class WebCrawlResult:
|
||||||
|
|
@ -83,7 +85,11 @@ class ConnectorWeb:
|
||||||
return WebSearchActionResult(success=False, error=str(e))
|
return WebSearchActionResult(success=False, error=str(e))
|
||||||
|
|
||||||
result_items = [
|
result_items = [
|
||||||
WebSearchResultItem(title=result.title, url=result.url)
|
WebSearchResultItem(
|
||||||
|
title=result.title,
|
||||||
|
url=result.url,
|
||||||
|
raw_content=getattr(result, 'raw_content', None)
|
||||||
|
)
|
||||||
for result in raw_results
|
for result in raw_results
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
@ -246,6 +252,15 @@ class ConnectorWeb:
|
||||||
urls = [result.url for result in search_results]
|
urls = [result.url for result in search_results]
|
||||||
return await self._crawl(urls, extract_depth=extract_depth, format=format)
|
return await self._crawl(urls, extract_depth=extract_depth, format=format)
|
||||||
|
|
||||||
|
def _clean_url(self, url: str) -> str:
|
||||||
|
"""Clean URL by removing extra text that might be appended."""
|
||||||
|
import re
|
||||||
|
# Extract just the URL part, removing any extra text after it
|
||||||
|
url_match = re.match(r'(https?://[^\s,]+)', url)
|
||||||
|
if url_match:
|
||||||
|
return url_match.group(1)
|
||||||
|
return url
|
||||||
|
|
||||||
async def _search(
|
async def _search(
|
||||||
self,
|
self,
|
||||||
query: str,
|
query: str,
|
||||||
|
|
@ -289,7 +304,11 @@ class ConnectorWeb:
|
||||||
response = await self.client.search(**kwargs)
|
response = await self.client.search(**kwargs)
|
||||||
|
|
||||||
return [
|
return [
|
||||||
WebSearchResult(title=result["title"], url=result["url"])
|
WebSearchResult(
|
||||||
|
title=result["title"],
|
||||||
|
url=self._clean_url(result["url"]),
|
||||||
|
raw_content=result.get("raw_content")
|
||||||
|
)
|
||||||
for result in response["results"]
|
for result in response["results"]
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
@ -304,26 +323,53 @@ class ConnectorWeb:
|
||||||
retry_delay = self.crawl_retry_delay
|
retry_delay = self.crawl_retry_delay
|
||||||
timeout = self.crawl_timeout
|
timeout = self.crawl_timeout
|
||||||
|
|
||||||
|
logger.debug(f"Starting crawl of {len(urls)} URLs: {urls}")
|
||||||
|
logger.debug(f"Crawl settings: extract_depth={extract_depth}, format={format}, timeout={timeout}s")
|
||||||
|
|
||||||
for attempt in range(max_retries + 1):
|
for attempt in range(max_retries + 1):
|
||||||
try:
|
try:
|
||||||
|
logger.debug(f"Crawl attempt {attempt + 1}/{max_retries + 1}")
|
||||||
|
|
||||||
# Use asyncio.wait_for for timeout
|
# Use asyncio.wait_for for timeout
|
||||||
# Build kwargs for extract
|
# Build kwargs for extract
|
||||||
kwargs_extract: dict = {"urls": urls}
|
kwargs_extract: dict = {"urls": urls}
|
||||||
kwargs_extract["extract_depth"] = extract_depth or "advanced"
|
kwargs_extract["extract_depth"] = extract_depth or "advanced"
|
||||||
kwargs_extract["format"] = format or "text"
|
kwargs_extract["format"] = format or "markdown" # Use markdown to get HTML structure
|
||||||
|
|
||||||
|
logger.debug(f"Sending request to Tavily with kwargs: {kwargs_extract}")
|
||||||
|
|
||||||
response = await asyncio.wait_for(
|
response = await asyncio.wait_for(
|
||||||
self.client.extract(**kwargs_extract),
|
self.client.extract(**kwargs_extract),
|
||||||
timeout=timeout
|
timeout=timeout
|
||||||
)
|
)
|
||||||
|
|
||||||
return [
|
logger.debug(f"Tavily response received: {list(response.keys())}")
|
||||||
WebCrawlResult(url=result["url"], content=result["raw_content"])
|
|
||||||
|
# Debug: Log what Tavily actually returns
|
||||||
|
if "results" in response and response["results"]:
|
||||||
|
logger.debug(f"Tavily returned {len(response['results'])} results")
|
||||||
|
logger.debug(f"First result keys: {list(response['results'][0].keys())}")
|
||||||
|
logger.debug(f"First result has raw_content: {'raw_content' in response['results'][0]}")
|
||||||
|
|
||||||
|
# Log each result
|
||||||
|
for i, result in enumerate(response["results"]):
|
||||||
|
logger.debug(f"Result {i+1}: URL={result.get('url', 'N/A')}, content_length={len(result.get('raw_content', result.get('content', '')))}")
|
||||||
|
else:
|
||||||
|
logger.warning(f"Tavily returned no results in response: {response}")
|
||||||
|
|
||||||
|
results = [
|
||||||
|
WebCrawlResult(
|
||||||
|
url=result["url"],
|
||||||
|
content=result.get("raw_content", result.get("content", "")) # Try raw_content first, fallback to content
|
||||||
|
)
|
||||||
for result in response["results"]
|
for result in response["results"]
|
||||||
]
|
]
|
||||||
|
|
||||||
|
logger.debug(f"Crawl successful: extracted {len(results)} results")
|
||||||
|
return results
|
||||||
|
|
||||||
except asyncio.TimeoutError:
|
except asyncio.TimeoutError:
|
||||||
logger.warning(f"Crawl attempt {attempt + 1} timed out after {timeout} seconds")
|
logger.warning(f"Crawl attempt {attempt + 1} timed out after {timeout} seconds for URLs: {urls}")
|
||||||
if attempt < max_retries:
|
if attempt < max_retries:
|
||||||
logger.info(f"Retrying in {retry_delay} seconds...")
|
logger.info(f"Retrying in {retry_delay} seconds...")
|
||||||
await asyncio.sleep(retry_delay)
|
await asyncio.sleep(retry_delay)
|
||||||
|
|
@ -331,7 +377,22 @@ class ConnectorWeb:
|
||||||
raise Exception(f"Crawl failed after {max_retries + 1} attempts due to timeout")
|
raise Exception(f"Crawl failed after {max_retries + 1} attempts due to timeout")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(f"Crawl attempt {attempt + 1} failed: {str(e)}")
|
logger.warning(f"Crawl attempt {attempt + 1} failed for URLs {urls}: {str(e)}")
|
||||||
|
logger.debug(f"Full error details: {type(e).__name__}: {str(e)}")
|
||||||
|
|
||||||
|
# Check if it's a validation error and log more details
|
||||||
|
if "validation" in str(e).lower():
|
||||||
|
logger.debug(f"URL validation failed. Checking URL format:")
|
||||||
|
for i, url in enumerate(urls):
|
||||||
|
logger.debug(f" URL {i+1}: '{url}' (length: {len(url)})")
|
||||||
|
# Check for common URL issues
|
||||||
|
if ' ' in url:
|
||||||
|
logger.debug(f" WARNING: URL contains spaces!")
|
||||||
|
if not url.startswith(('http://', 'https://')):
|
||||||
|
logger.debug(f" WARNING: URL doesn't start with http/https!")
|
||||||
|
if len(url) > 2000:
|
||||||
|
logger.debug(f" WARNING: URL is very long ({len(url)} chars)")
|
||||||
|
|
||||||
if attempt < max_retries:
|
if attempt < max_retries:
|
||||||
logger.info(f"Retrying in {retry_delay} seconds...")
|
logger.info(f"Retrying in {retry_delay} seconds...")
|
||||||
await asyncio.sleep(retry_delay)
|
await asyncio.sleep(retry_delay)
|
||||||
|
|
|
||||||
|
|
@ -384,6 +384,57 @@ class DatabaseConnector:
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Created table '{table}' with columns from Pydantic model"
|
f"Created table '{table}' with columns from Pydantic model"
|
||||||
)
|
)
|
||||||
|
else:
|
||||||
|
# Table exists: ensure all columns from model are present (simple additive migration)
|
||||||
|
try:
|
||||||
|
cursor.execute(
|
||||||
|
"""
|
||||||
|
SELECT column_name FROM information_schema.columns
|
||||||
|
WHERE LOWER(table_name) = LOWER(%s) AND table_schema = 'public'
|
||||||
|
""",
|
||||||
|
(table,),
|
||||||
|
)
|
||||||
|
existing_columns = {
|
||||||
|
row["column_name"] for row in cursor.fetchall()
|
||||||
|
}
|
||||||
|
|
||||||
|
# Desired columns based on model
|
||||||
|
model_fields = _get_model_fields(model_class)
|
||||||
|
desired_columns = (
|
||||||
|
set(["id"])
|
||||||
|
| set(model_fields.keys())
|
||||||
|
| {"_createdAt", "_modifiedAt", "_createdBy", "_modifiedBy"}
|
||||||
|
)
|
||||||
|
|
||||||
|
# Add missing columns
|
||||||
|
for col in sorted(desired_columns - existing_columns):
|
||||||
|
# Determine SQL type
|
||||||
|
if col in ["id"]:
|
||||||
|
continue # primary key exists already
|
||||||
|
sql_type = model_fields.get(col)
|
||||||
|
if col in ["_createdAt"]:
|
||||||
|
sql_type = "DOUBLE PRECISION"
|
||||||
|
elif col in ["_modifiedAt"]:
|
||||||
|
sql_type = "DOUBLE PRECISION"
|
||||||
|
elif col in ["_createdBy", "_modifiedBy"]:
|
||||||
|
sql_type = "VARCHAR(255)"
|
||||||
|
if not sql_type:
|
||||||
|
sql_type = "TEXT"
|
||||||
|
try:
|
||||||
|
cursor.execute(
|
||||||
|
f'ALTER TABLE "{table}" ADD COLUMN "{col}" {sql_type}'
|
||||||
|
)
|
||||||
|
logger.info(
|
||||||
|
f"Added missing column '{col}' ({sql_type}) to '{table}'"
|
||||||
|
)
|
||||||
|
except Exception as add_err:
|
||||||
|
logger.warning(
|
||||||
|
f"Could not add column '{col}' to '{table}': {add_err}"
|
||||||
|
)
|
||||||
|
except Exception as ensure_err:
|
||||||
|
logger.warning(
|
||||||
|
f"Could not ensure columns for existing table '{table}': {ensure_err}"
|
||||||
|
)
|
||||||
|
|
||||||
self.connection.commit()
|
self.connection.commit()
|
||||||
return True
|
return True
|
||||||
|
|
|
||||||
|
|
@ -10,7 +10,6 @@ from . import datamodelWeb as web
|
||||||
from . import datamodelUam as uam
|
from . import datamodelUam as uam
|
||||||
from . import datamodelSecurity as security
|
from . import datamodelSecurity as security
|
||||||
from . import datamodelNeutralizer as neutralizer
|
from . import datamodelNeutralizer as neutralizer
|
||||||
from . import datamodelWorkflow as workflow
|
|
||||||
from . import datamodelChat as chat
|
from . import datamodelChat as chat
|
||||||
from . import datamodelFiles as files
|
from . import datamodelFiles as files
|
||||||
from . import datamodelVoice as voice
|
from . import datamodelVoice as voice
|
||||||
|
|
|
||||||
|
|
@ -111,6 +111,11 @@ class AiCallOptions(BaseModel):
|
||||||
callType: Literal["planning", "text"] = Field(default="text", description="Call type: planning or text")
|
callType: Literal["planning", "text"] = Field(default="text", description="Call type: planning or text")
|
||||||
safetyMargin: float = Field(default=0.1, ge=0.0, le=0.5, description="Safety margin for token limits (0.0-0.5)")
|
safetyMargin: float = Field(default=0.1, ge=0.0, le=0.5, description="Safety margin for token limits (0.0-0.5)")
|
||||||
modelCapabilities: Optional[List[str]] = Field(default=None, description="Required model capabilities for filtering")
|
modelCapabilities: Optional[List[str]] = Field(default=None, description="Required model capabilities for filtering")
|
||||||
|
|
||||||
|
# Model generation parameters
|
||||||
|
temperature: Optional[float] = Field(default=None, ge=0.0, le=2.0, description="Temperature for response generation (0.0-2.0, lower = more consistent)")
|
||||||
|
maxTokens: Optional[int] = Field(default=None, ge=1, le=32000, description="Maximum tokens in response")
|
||||||
|
maxParts: Optional[int] = Field(default=1000, ge=1, le=1000, description="Maximum number of continuation parts to fetch")
|
||||||
|
|
||||||
|
|
||||||
class AiCallRequest(BaseModel):
|
class AiCallRequest(BaseModel):
|
||||||
|
|
|
||||||
|
|
@ -169,7 +169,7 @@ register_model_labels(
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class ExtractedContent(BaseModel, ModelMixin):
|
class ChatContentExtracted(BaseModel, ModelMixin):
|
||||||
id: str = Field(description="Reference to source ChatDocument")
|
id: str = Field(description="Reference to source ChatDocument")
|
||||||
contents: List[ContentItem] = Field(
|
contents: List[ContentItem] = Field(
|
||||||
default_factory=list, description="List of content items"
|
default_factory=list, description="List of content items"
|
||||||
|
|
@ -177,7 +177,7 @@ class ExtractedContent(BaseModel, ModelMixin):
|
||||||
|
|
||||||
|
|
||||||
register_model_labels(
|
register_model_labels(
|
||||||
"ExtractedContent",
|
"ChatContentExtracted",
|
||||||
{"en": "Extracted Content", "fr": "Contenu extrait"},
|
{"en": "Extracted Content", "fr": "Contenu extrait"},
|
||||||
{
|
{
|
||||||
"id": {"en": "Object ID", "fr": "ID de l'objet"},
|
"id": {"en": "Object ID", "fr": "ID de l'objet"},
|
||||||
|
|
@ -201,6 +201,9 @@ class ChatMessage(BaseModel, ModelMixin):
|
||||||
None, description="Label for the set of documents"
|
None, description="Label for the set of documents"
|
||||||
)
|
)
|
||||||
message: Optional[str] = Field(None, description="Message content")
|
message: Optional[str] = Field(None, description="Message content")
|
||||||
|
summary: Optional[str] = Field(
|
||||||
|
None, description="Short summary of this message for planning/history"
|
||||||
|
)
|
||||||
role: str = Field(description="Role of the message sender")
|
role: str = Field(description="Role of the message sender")
|
||||||
status: str = Field(description="Status of the message (first, step, last)")
|
status: str = Field(description="Status of the message (first, step, last)")
|
||||||
sequenceNr: int = Field(
|
sequenceNr: int = Field(
|
||||||
|
|
@ -244,6 +247,7 @@ register_model_labels(
|
||||||
"documents": {"en": "Documents", "fr": "Documents"},
|
"documents": {"en": "Documents", "fr": "Documents"},
|
||||||
"documentsLabel": {"en": "Documents Label", "fr": "Label des documents"},
|
"documentsLabel": {"en": "Documents Label", "fr": "Label des documents"},
|
||||||
"message": {"en": "Message", "fr": "Message"},
|
"message": {"en": "Message", "fr": "Message"},
|
||||||
|
"summary": {"en": "Summary", "fr": "Résumé"},
|
||||||
"role": {"en": "Role", "fr": "Rôle"},
|
"role": {"en": "Role", "fr": "Rôle"},
|
||||||
"status": {"en": "Status", "fr": "Statut"},
|
"status": {"en": "Status", "fr": "Statut"},
|
||||||
"sequenceNr": {"en": "Sequence Number", "fr": "Numéro de séquence"},
|
"sequenceNr": {"en": "Sequence Number", "fr": "Numéro de séquence"},
|
||||||
|
|
@ -419,34 +423,6 @@ register_model_labels(
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class WorkflowResult(BaseModel, ModelMixin):
|
|
||||||
status: str
|
|
||||||
completed_tasks: int
|
|
||||||
total_tasks: int
|
|
||||||
execution_time: float
|
|
||||||
final_results_count: int
|
|
||||||
error: Optional[str] = None
|
|
||||||
phase: Optional[str] = None
|
|
||||||
|
|
||||||
|
|
||||||
register_model_labels(
|
|
||||||
"WorkflowResult",
|
|
||||||
{"en": "Workflow Result", "fr": "Résultat du workflow"},
|
|
||||||
{
|
|
||||||
"status": {"en": "Status", "fr": "Statut"},
|
|
||||||
"completed_tasks": {"en": "Completed Tasks", "fr": "Tâches terminées"},
|
|
||||||
"total_tasks": {"en": "Total Tasks", "fr": "Total des tâches"},
|
|
||||||
"execution_time": {"en": "Execution Time", "fr": "Temps d'exécution"},
|
|
||||||
"final_results_count": {
|
|
||||||
"en": "Final Results Count",
|
|
||||||
"fr": "Nombre de résultats finaux",
|
|
||||||
},
|
|
||||||
"error": {"en": "Error", "fr": "Erreur"},
|
|
||||||
"phase": {"en": "Phase", "fr": "Phase"},
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class UserInputRequest(BaseModel, ModelMixin):
|
class UserInputRequest(BaseModel, ModelMixin):
|
||||||
prompt: str = Field(description="Prompt for the user")
|
prompt: str = Field(description="Prompt for the user")
|
||||||
listFileId: List[str] = Field(default_factory=list, description="List of file IDs")
|
listFileId: List[str] = Field(default_factory=list, description="List of file IDs")
|
||||||
|
|
@ -462,3 +438,519 @@ register_model_labels(
|
||||||
"userLanguage": {"en": "User Language", "fr": "Langue de l'utilisateur"},
|
"userLanguage": {"en": "User Language", "fr": "Langue de l'utilisateur"},
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ActionDocument(BaseModel, ModelMixin):
|
||||||
|
"""Clear document structure for action results"""
|
||||||
|
|
||||||
|
documentName: str = Field(description="Name of the document")
|
||||||
|
documentData: Any = Field(description="Content/data of the document")
|
||||||
|
mimeType: str = Field(description="MIME type of the document")
|
||||||
|
|
||||||
|
|
||||||
|
register_model_labels(
|
||||||
|
"ActionDocument",
|
||||||
|
{"en": "Action Document", "fr": "Document d'action"},
|
||||||
|
{
|
||||||
|
"documentName": {"en": "Document Name", "fr": "Nom du document"},
|
||||||
|
"documentData": {"en": "Document Data", "fr": "Données du document"},
|
||||||
|
"mimeType": {"en": "MIME Type", "fr": "Type MIME"},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ActionResult(BaseModel, ModelMixin):
|
||||||
|
"""Clean action result with documents as primary output
|
||||||
|
|
||||||
|
IMPORTANT: Action methods should NOT set resultLabel in their return value.
|
||||||
|
The resultLabel is managed by the action handler using the action's execResultLabel
|
||||||
|
from the action plan. This ensures consistent document routing throughout the workflow.
|
||||||
|
"""
|
||||||
|
|
||||||
|
success: bool = Field(description="Whether execution succeeded")
|
||||||
|
error: Optional[str] = Field(None, description="Error message if failed")
|
||||||
|
documents: List[ActionDocument] = Field(
|
||||||
|
default_factory=list, description="Document outputs"
|
||||||
|
)
|
||||||
|
resultLabel: Optional[str] = Field(
|
||||||
|
None,
|
||||||
|
description="Label for document routing (set by action handler, not by action methods)",
|
||||||
|
)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def isSuccess(cls, documents: List[ActionDocument] = None) -> "ActionResult":
|
||||||
|
return cls(success=True, documents=documents or [])
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def isFailure(
|
||||||
|
cls, error: str, documents: List[ActionDocument] = None
|
||||||
|
) -> "ActionResult":
|
||||||
|
return cls(success=False, documents=documents or [], error=error)
|
||||||
|
|
||||||
|
|
||||||
|
register_model_labels(
|
||||||
|
"ActionResult",
|
||||||
|
{"en": "Action Result", "fr": "Résultat de l'action"},
|
||||||
|
{
|
||||||
|
"success": {"en": "Success", "fr": "Succès"},
|
||||||
|
"error": {"en": "Error", "fr": "Erreur"},
|
||||||
|
"documents": {"en": "Documents", "fr": "Documents"},
|
||||||
|
"resultLabel": {"en": "Result Label", "fr": "Étiquette du résultat"},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ActionSelection(BaseModel, ModelMixin):
|
||||||
|
method: str = Field(description="Method to execute (e.g., web, document, ai)")
|
||||||
|
name: str = Field(
|
||||||
|
description="Action name within the method (e.g., search, extract)"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
register_model_labels(
|
||||||
|
"ActionSelection",
|
||||||
|
{"en": "Action Selection", "fr": "Sélection d'action"},
|
||||||
|
{
|
||||||
|
"method": {"en": "Method", "fr": "Méthode"},
|
||||||
|
"name": {"en": "Action Name", "fr": "Nom de l'action"},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ActionParameters(BaseModel, ModelMixin):
|
||||||
|
parameters: Dict[str, Any] = Field(
|
||||||
|
default_factory=dict, description="Parameters to execute the selected action"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
register_model_labels(
|
||||||
|
"ActionParameters",
|
||||||
|
{"en": "Action Parameters", "fr": "Paramètres d'action"},
|
||||||
|
{
|
||||||
|
"parameters": {"en": "Parameters", "fr": "Paramètres"},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ObservationPreview(BaseModel, ModelMixin):
|
||||||
|
name: str = Field(description="Document name or URL label")
|
||||||
|
mime: str = Field(description="MIME type or kind")
|
||||||
|
snippet: str = Field(description="Short snippet or summary")
|
||||||
|
|
||||||
|
|
||||||
|
register_model_labels(
|
||||||
|
"ObservationPreview",
|
||||||
|
{"en": "Observation Preview", "fr": "Aperçu d'observation"},
|
||||||
|
{
|
||||||
|
"name": {"en": "Name", "fr": "Nom"},
|
||||||
|
"mime": {"en": "MIME", "fr": "MIME"},
|
||||||
|
"snippet": {"en": "Snippet", "fr": "Extrait"},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class Observation(BaseModel, ModelMixin):
|
||||||
|
success: bool = Field(description="Action execution success flag")
|
||||||
|
resultLabel: str = Field(description="Deterministic label for produced documents")
|
||||||
|
documentsCount: int = Field(description="Number of produced documents")
|
||||||
|
previews: List[ObservationPreview] = Field(
|
||||||
|
default_factory=list, description="Compact previews of outputs"
|
||||||
|
)
|
||||||
|
notes: List[str] = Field(
|
||||||
|
default_factory=list, description="Short notes or key facts"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
register_model_labels(
|
||||||
|
"Observation",
|
||||||
|
{"en": "Observation", "fr": "Observation"},
|
||||||
|
{
|
||||||
|
"success": {"en": "Success", "fr": "Succès"},
|
||||||
|
"resultLabel": {"en": "Result Label", "fr": "Étiquette du résultat"},
|
||||||
|
"documentsCount": {"en": "Documents Count", "fr": "Nombre de documents"},
|
||||||
|
"previews": {"en": "Previews", "fr": "Aperçus"},
|
||||||
|
"notes": {"en": "Notes", "fr": "Notes"},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TaskStatus(str):
|
||||||
|
PENDING = "pending"
|
||||||
|
RUNNING = "running"
|
||||||
|
COMPLETED = "completed"
|
||||||
|
FAILED = "failed"
|
||||||
|
CANCELLED = "cancelled"
|
||||||
|
|
||||||
|
|
||||||
|
register_model_labels(
|
||||||
|
"TaskStatus",
|
||||||
|
{"en": "Task Status", "fr": "Statut de la tâche"},
|
||||||
|
{
|
||||||
|
"PENDING": {"en": "Pending", "fr": "En attente"},
|
||||||
|
"RUNNING": {"en": "Running", "fr": "En cours"},
|
||||||
|
"COMPLETED": {"en": "Completed", "fr": "Terminé"},
|
||||||
|
"FAILED": {"en": "Failed", "fr": "Échec"},
|
||||||
|
"CANCELLED": {"en": "Cancelled", "fr": "Annulé"},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class DocumentExchange(BaseModel, ModelMixin):
|
||||||
|
documentsLabel: str = Field(description="Label for the set of documents")
|
||||||
|
documents: List[str] = Field(
|
||||||
|
default_factory=list, description="List of document references"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
register_model_labels(
|
||||||
|
"DocumentExchange",
|
||||||
|
{"en": "Document Exchange", "fr": "Échange de documents"},
|
||||||
|
{
|
||||||
|
"documentsLabel": {"en": "Documents Label", "fr": "Label des documents"},
|
||||||
|
"documents": {"en": "Documents", "fr": "Documents"},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ActionItem(BaseModel, ModelMixin):
|
||||||
|
id: str = Field(..., description="Action ID")
|
||||||
|
execMethod: str = Field(..., description="Method to execute")
|
||||||
|
execAction: str = Field(..., description="Action to perform")
|
||||||
|
execParameters: Dict[str, Any] = Field(
|
||||||
|
default_factory=dict, description="Action parameters"
|
||||||
|
)
|
||||||
|
execResultLabel: Optional[str] = Field(
|
||||||
|
None, description="Label for the set of result documents"
|
||||||
|
)
|
||||||
|
expectedDocumentFormats: Optional[List[Dict[str, str]]] = Field(
|
||||||
|
None, description="Expected document formats (optional)"
|
||||||
|
)
|
||||||
|
userMessage: Optional[str] = Field(
|
||||||
|
None, description="User-friendly message in user's language"
|
||||||
|
)
|
||||||
|
status: TaskStatus = Field(default=TaskStatus.PENDING, description="Action status")
|
||||||
|
error: Optional[str] = Field(None, description="Error message if action failed")
|
||||||
|
retryCount: int = Field(default=0, description="Number of retries attempted")
|
||||||
|
retryMax: int = Field(default=3, description="Maximum number of retries")
|
||||||
|
processingTime: Optional[float] = Field(
|
||||||
|
None, description="Processing time in seconds"
|
||||||
|
)
|
||||||
|
timestamp: float = Field(
|
||||||
|
..., description="When the action was executed (UTC timestamp in seconds)"
|
||||||
|
)
|
||||||
|
result: Optional[str] = Field(None, description="Result of the action")
|
||||||
|
|
||||||
|
def setSuccess(self, result: str = None) -> None:
|
||||||
|
"""Set the action as successful with optional result"""
|
||||||
|
self.status = TaskStatus.COMPLETED
|
||||||
|
self.error = None
|
||||||
|
if result is not None:
|
||||||
|
self.result = result
|
||||||
|
|
||||||
|
def setError(self, error_message: str) -> None:
|
||||||
|
"""Set the action as failed with error message"""
|
||||||
|
self.status = TaskStatus.FAILED
|
||||||
|
self.error = error_message
|
||||||
|
|
||||||
|
|
||||||
|
register_model_labels(
|
||||||
|
"ActionItem",
|
||||||
|
{"en": "Task Action", "fr": "Action de tâche"},
|
||||||
|
{
|
||||||
|
"id": {"en": "Action ID", "fr": "ID de l'action"},
|
||||||
|
"execMethod": {"en": "Method", "fr": "Méthode"},
|
||||||
|
"execAction": {"en": "Action", "fr": "Action"},
|
||||||
|
"execParameters": {"en": "Parameters", "fr": "Paramètres"},
|
||||||
|
"execResultLabel": {"en": "Result Label", "fr": "Label du résultat"},
|
||||||
|
"expectedDocumentFormats": {
|
||||||
|
"en": "Expected Document Formats",
|
||||||
|
"fr": "Formats de documents attendus",
|
||||||
|
},
|
||||||
|
"userMessage": {"en": "User Message", "fr": "Message utilisateur"},
|
||||||
|
"status": {"en": "Status", "fr": "Statut"},
|
||||||
|
"error": {"en": "Error", "fr": "Erreur"},
|
||||||
|
"retryCount": {"en": "Retry Count", "fr": "Nombre de tentatives"},
|
||||||
|
"retryMax": {"en": "Max Retries", "fr": "Tentatives max"},
|
||||||
|
"processingTime": {"en": "Processing Time", "fr": "Temps de traitement"},
|
||||||
|
"timestamp": {"en": "Timestamp", "fr": "Horodatage"},
|
||||||
|
"result": {"en": "Result", "fr": "Résultat"},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TaskResult(BaseModel, ModelMixin):
|
||||||
|
taskId: str = Field(..., description="Task ID")
|
||||||
|
status: TaskStatus = Field(default=TaskStatus.PENDING, description="Task status")
|
||||||
|
success: bool = Field(..., description="Whether the task was successful")
|
||||||
|
feedback: Optional[str] = Field(None, description="Task feedback message")
|
||||||
|
error: Optional[str] = Field(None, description="Error message if task failed")
|
||||||
|
|
||||||
|
|
||||||
|
register_model_labels(
|
||||||
|
"TaskResult",
|
||||||
|
{"en": "Task Result", "fr": "Résultat de tâche"},
|
||||||
|
{
|
||||||
|
"taskId": {"en": "Task ID", "fr": "ID de la tâche"},
|
||||||
|
"status": {"en": "Status", "fr": "Statut"},
|
||||||
|
"success": {"en": "Success", "fr": "Succès"},
|
||||||
|
"feedback": {"en": "Feedback", "fr": "Retour"},
|
||||||
|
"error": {"en": "Error", "fr": "Erreur"},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TaskItem(BaseModel, ModelMixin):
|
||||||
|
id: str = Field(..., description="Task ID")
|
||||||
|
workflowId: str = Field(..., description="Workflow ID")
|
||||||
|
userInput: str = Field(..., description="User input that triggered the task")
|
||||||
|
status: TaskStatus = Field(default=TaskStatus.PENDING, description="Task status")
|
||||||
|
error: Optional[str] = Field(None, description="Error message if task failed")
|
||||||
|
startedAt: Optional[float] = Field(
|
||||||
|
None, description="When the task started (UTC timestamp in seconds)"
|
||||||
|
)
|
||||||
|
finishedAt: Optional[float] = Field(
|
||||||
|
None, description="When the task finished (UTC timestamp in seconds)"
|
||||||
|
)
|
||||||
|
actionList: List[ActionItem] = Field(
|
||||||
|
default_factory=list, description="List of actions to execute"
|
||||||
|
)
|
||||||
|
retryCount: int = Field(default=0, description="Number of retries attempted")
|
||||||
|
retryMax: int = Field(default=3, description="Maximum number of retries")
|
||||||
|
rollbackOnFailure: bool = Field(
|
||||||
|
default=True, description="Whether to rollback on failure"
|
||||||
|
)
|
||||||
|
dependencies: List[str] = Field(
|
||||||
|
default_factory=list, description="List of task IDs this task depends on"
|
||||||
|
)
|
||||||
|
feedback: Optional[str] = Field(None, description="Task feedback message")
|
||||||
|
processingTime: Optional[float] = Field(
|
||||||
|
None, description="Total processing time in seconds"
|
||||||
|
)
|
||||||
|
resultLabels: Optional[Dict[str, Any]] = Field(
|
||||||
|
default_factory=dict, description="Map of result labels to their values"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
register_model_labels(
|
||||||
|
"TaskItem",
|
||||||
|
{"en": "Task", "fr": "Tâche"},
|
||||||
|
{
|
||||||
|
"id": {"en": "Task ID", "fr": "ID de la tâche"},
|
||||||
|
"workflowId": {"en": "Workflow ID", "fr": "ID du workflow"},
|
||||||
|
"userInput": {"en": "User Input", "fr": "Entrée utilisateur"},
|
||||||
|
"status": {"en": "Status", "fr": "Statut"},
|
||||||
|
"error": {"en": "Error", "fr": "Erreur"},
|
||||||
|
"startedAt": {"en": "Started At", "fr": "Démarré à"},
|
||||||
|
"finishedAt": {"en": "Finished At", "fr": "Terminé à"},
|
||||||
|
"actionList": {"en": "Actions", "fr": "Actions"},
|
||||||
|
"retryCount": {"en": "Retry Count", "fr": "Nombre de tentatives"},
|
||||||
|
"retryMax": {"en": "Max Retries", "fr": "Tentatives max"},
|
||||||
|
"processingTime": {"en": "Processing Time", "fr": "Temps de traitement"},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TaskStep(BaseModel, ModelMixin):
|
||||||
|
id: str
|
||||||
|
objective: str
|
||||||
|
dependencies: Optional[list[str]] = Field(default_factory=list)
|
||||||
|
success_criteria: Optional[list[str]] = Field(default_factory=list)
|
||||||
|
estimated_complexity: Optional[str] = None
|
||||||
|
userMessage: Optional[str] = Field(
|
||||||
|
None, description="User-friendly message in user's language"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
register_model_labels(
|
||||||
|
"TaskStep",
|
||||||
|
{"en": "Task Step", "fr": "Étape de tâche"},
|
||||||
|
{
|
||||||
|
"id": {"en": "ID", "fr": "ID"},
|
||||||
|
"objective": {"en": "Objective", "fr": "Objectif"},
|
||||||
|
"dependencies": {"en": "Dependencies", "fr": "Dépendances"},
|
||||||
|
"success_criteria": {"en": "Success Criteria", "fr": "Critères de succès"},
|
||||||
|
"estimated_complexity": {
|
||||||
|
"en": "Estimated Complexity",
|
||||||
|
"fr": "Complexité estimée",
|
||||||
|
},
|
||||||
|
"userMessage": {"en": "User Message", "fr": "Message utilisateur"},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TaskHandover(BaseModel, ModelMixin):
|
||||||
|
taskId: str = Field(description="Target task ID")
|
||||||
|
sourceTask: Optional[str] = Field(None, description="Source task ID")
|
||||||
|
inputDocuments: List[DocumentExchange] = Field(
|
||||||
|
default_factory=list, description="Available input documents"
|
||||||
|
)
|
||||||
|
outputDocuments: List[DocumentExchange] = Field(
|
||||||
|
default_factory=list, description="Produced output documents"
|
||||||
|
)
|
||||||
|
context: Dict[str, Any] = Field(default_factory=dict, description="Task context")
|
||||||
|
previousResults: List[str] = Field(
|
||||||
|
default_factory=list, description="Previous result summaries"
|
||||||
|
)
|
||||||
|
improvements: List[str] = Field(
|
||||||
|
default_factory=list, description="Improvement suggestions"
|
||||||
|
)
|
||||||
|
workflowSummary: Optional[str] = Field(
|
||||||
|
None, description="Summarized workflow context"
|
||||||
|
)
|
||||||
|
messageHistory: List[str] = Field(
|
||||||
|
default_factory=list, description="Key message summaries"
|
||||||
|
)
|
||||||
|
timestamp: float = Field(
|
||||||
|
..., description="When the handover was created (UTC timestamp in seconds)"
|
||||||
|
)
|
||||||
|
handoverType: str = Field(
|
||||||
|
default="task", description="Type of handover: task, phase, or workflow"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
register_model_labels(
|
||||||
|
"TaskHandover",
|
||||||
|
{"en": "Task Handover", "fr": "Transfert de tâche"},
|
||||||
|
{
|
||||||
|
"taskId": {"en": "Task ID", "fr": "ID de la tâche"},
|
||||||
|
"sourceTask": {"en": "Source Task", "fr": "Tâche source"},
|
||||||
|
"inputDocuments": {"en": "Input Documents", "fr": "Documents d'entrée"},
|
||||||
|
"outputDocuments": {"en": "Output Documents", "fr": "Documents de sortie"},
|
||||||
|
"context": {"en": "Context", "fr": "Contexte"},
|
||||||
|
"previousResults": {"en": "Previous Results", "fr": "Résultats précédents"},
|
||||||
|
"improvements": {"en": "Improvements", "fr": "Améliorations"},
|
||||||
|
"workflowSummary": {"en": "Workflow Summary", "fr": "Résumé du workflow"},
|
||||||
|
"messageHistory": {"en": "Message History", "fr": "Historique des messages"},
|
||||||
|
"timestamp": {"en": "Timestamp", "fr": "Horodatage"},
|
||||||
|
"handoverType": {"en": "Handover Type", "fr": "Type de transfert"},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TaskContext(BaseModel, ModelMixin):
|
||||||
|
task_step: TaskStep
|
||||||
|
workflow: Optional["ChatWorkflow"] = None
|
||||||
|
workflow_id: Optional[str] = None
|
||||||
|
available_documents: Optional[str] = "No documents available"
|
||||||
|
available_connections: Optional[list[str]] = Field(default_factory=list)
|
||||||
|
previous_results: Optional[list[str]] = Field(default_factory=list)
|
||||||
|
previous_handover: Optional[TaskHandover] = None
|
||||||
|
improvements: Optional[list[str]] = Field(default_factory=list)
|
||||||
|
retry_count: Optional[int] = 0
|
||||||
|
previous_action_results: Optional[list] = Field(default_factory=list)
|
||||||
|
previous_review_result: Optional[dict] = None
|
||||||
|
is_regeneration: Optional[bool] = False
|
||||||
|
failure_patterns: Optional[list[str]] = Field(default_factory=list)
|
||||||
|
failed_actions: Optional[list] = Field(default_factory=list)
|
||||||
|
successful_actions: Optional[list] = Field(default_factory=list)
|
||||||
|
criteria_progress: Optional[dict] = None
|
||||||
|
|
||||||
|
def getDocumentReferences(self) -> List[str]:
|
||||||
|
docs = []
|
||||||
|
if self.previous_handover:
|
||||||
|
for doc_exchange in self.previous_handover.inputDocuments:
|
||||||
|
docs.extend(doc_exchange.documents)
|
||||||
|
return list(set(docs))
|
||||||
|
|
||||||
|
def addImprovement(self, improvement: str) -> None:
|
||||||
|
if improvement not in (self.improvements or []):
|
||||||
|
if self.improvements is None:
|
||||||
|
self.improvements = []
|
||||||
|
self.improvements.append(improvement)
|
||||||
|
|
||||||
|
|
||||||
|
class ReviewContext(BaseModel, ModelMixin):
|
||||||
|
task_step: TaskStep
|
||||||
|
task_actions: Optional[list] = Field(default_factory=list)
|
||||||
|
action_results: Optional[list] = Field(default_factory=list)
|
||||||
|
step_result: Optional[dict] = Field(default_factory=dict)
|
||||||
|
workflow_id: Optional[str] = None
|
||||||
|
previous_results: Optional[list[str]] = Field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
|
class ReviewResult(BaseModel, ModelMixin):
|
||||||
|
status: str
|
||||||
|
reason: Optional[str] = None
|
||||||
|
improvements: Optional[list[str]] = Field(default_factory=list)
|
||||||
|
quality_score: Optional[int] = 5
|
||||||
|
missing_outputs: Optional[list[str]] = Field(default_factory=list)
|
||||||
|
met_criteria: Optional[list[str]] = Field(default_factory=list)
|
||||||
|
unmet_criteria: Optional[list[str]] = Field(default_factory=list)
|
||||||
|
confidence: Optional[float] = 0.5
|
||||||
|
userMessage: Optional[str] = Field(
|
||||||
|
None, description="User-friendly message in user's language"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
register_model_labels(
|
||||||
|
"ReviewResult",
|
||||||
|
{"en": "Review Result", "fr": "Résultat de l'évaluation"},
|
||||||
|
{
|
||||||
|
"status": {"en": "Status", "fr": "Statut"},
|
||||||
|
"reason": {"en": "Reason", "fr": "Raison"},
|
||||||
|
"improvements": {"en": "Improvements", "fr": "Améliorations"},
|
||||||
|
"quality_score": {"en": "Quality Score", "fr": "Score de qualité"},
|
||||||
|
"missing_outputs": {"en": "Missing Outputs", "fr": "Sorties manquantes"},
|
||||||
|
"met_criteria": {"en": "Met Criteria", "fr": "Critères respectés"},
|
||||||
|
"unmet_criteria": {"en": "Unmet Criteria", "fr": "Critères non respectés"},
|
||||||
|
"confidence": {"en": "Confidence", "fr": "Confiance"},
|
||||||
|
"userMessage": {"en": "User Message", "fr": "Message utilisateur"},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TaskPlan(BaseModel, ModelMixin):
|
||||||
|
overview: str
|
||||||
|
tasks: list[TaskStep]
|
||||||
|
userMessage: Optional[str] = Field(
|
||||||
|
None, description="Overall user-friendly message for the task plan"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
register_model_labels(
|
||||||
|
"TaskPlan",
|
||||||
|
{"en": "Task Plan", "fr": "Plan de tâches"},
|
||||||
|
{
|
||||||
|
"overview": {"en": "Overview", "fr": "Aperçu"},
|
||||||
|
"tasks": {"en": "Tasks", "fr": "Tâches"},
|
||||||
|
"userMessage": {"en": "User Message", "fr": "Message utilisateur"},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
# Resolve forward references
|
||||||
|
TaskContext.update_forward_refs()
|
||||||
|
|
||||||
|
|
||||||
|
class PromptPlaceholder(BaseModel, ModelMixin):
|
||||||
|
label: str
|
||||||
|
content: str
|
||||||
|
summaryAllowed: bool = Field(
|
||||||
|
default=False,
|
||||||
|
description="Whether host may summarize content before sending to AI",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
register_model_labels(
|
||||||
|
"PromptPlaceholder",
|
||||||
|
{"en": "Prompt Placeholder", "fr": "Espace réservé d'invite"},
|
||||||
|
{
|
||||||
|
"label": {"en": "Label", "fr": "Libellé"},
|
||||||
|
"content": {"en": "Content", "fr": "Contenu"},
|
||||||
|
"summaryAllowed": {"en": "Summary Allowed", "fr": "Résumé autorisé"},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class PromptBundle(BaseModel, ModelMixin):
|
||||||
|
prompt: str
|
||||||
|
placeholders: List[PromptPlaceholder] = Field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
|
register_model_labels(
|
||||||
|
"PromptBundle",
|
||||||
|
{"en": "Prompt Bundle", "fr": "Lot d'invite"},
|
||||||
|
{
|
||||||
|
"prompt": {"en": "Prompt", "fr": "Invite"},
|
||||||
|
"placeholders": {"en": "Placeholders", "fr": "Espaces réservés"},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
from typing import Any, Dict, List, Optional
|
from typing import Any, Dict, List, Optional, Literal
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -12,8 +12,114 @@ class ContentPart(BaseModel):
|
||||||
metadata: Dict[str, Any] = Field(default_factory=dict, description="Arbitrary metadata for the part")
|
metadata: Dict[str, Any] = Field(default_factory=dict, description="Arbitrary metadata for the part")
|
||||||
|
|
||||||
|
|
||||||
class ExtractedContent(BaseModel):
|
class ContentExtracted(BaseModel):
|
||||||
id: str = Field(description="Extraction id or source document id")
|
id: str = Field(description="Extraction id or source document id")
|
||||||
parts: List[ContentPart] = Field(default_factory=list, description="List of extracted parts")
|
parts: List[ContentPart] = Field(default_factory=list, description="List of extracted parts")
|
||||||
summary: Optional[Dict[str, Any]] = Field(default=None, description="Optional extraction summary")
|
summary: Optional[Dict[str, Any]] = Field(default=None, description="Optional extraction summary")
|
||||||
|
|
||||||
|
|
||||||
|
class MergeStrategy(BaseModel):
|
||||||
|
"""Strategy configuration for merging content parts and AI results."""
|
||||||
|
|
||||||
|
# Grouping configuration
|
||||||
|
groupBy: str = Field(
|
||||||
|
default="typeGroup",
|
||||||
|
description="Field to group parts by (typeGroup, parentId, label, etc.)"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Ordering configuration
|
||||||
|
orderBy: str = Field(
|
||||||
|
default="id",
|
||||||
|
description="Field to order parts within groups (id, order, pageIndex, etc.)"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Merge behavior
|
||||||
|
mergeType: Literal["concatenate", "hierarchical", "intelligent"] = Field(
|
||||||
|
default="concatenate",
|
||||||
|
description="How to merge content within groups"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Size limits
|
||||||
|
maxSize: Optional[int] = Field(
|
||||||
|
default=None,
|
||||||
|
description="Maximum size for merged content in bytes"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Type-specific merge settings
|
||||||
|
textMerge: Optional[Dict[str, Any]] = Field(
|
||||||
|
default=None,
|
||||||
|
description="Text-specific merge settings (separator, formatting, etc.)"
|
||||||
|
)
|
||||||
|
|
||||||
|
tableMerge: Optional[Dict[str, Any]] = Field(
|
||||||
|
default=None,
|
||||||
|
description="Table-specific merge settings (header handling, etc.)"
|
||||||
|
)
|
||||||
|
|
||||||
|
structureMerge: Optional[Dict[str, Any]] = Field(
|
||||||
|
default=None,
|
||||||
|
description="Structure-specific merge settings (hierarchy, etc.)"
|
||||||
|
)
|
||||||
|
|
||||||
|
# AI result merging
|
||||||
|
aiResultMerge: Optional[Dict[str, Any]] = Field(
|
||||||
|
default=None,
|
||||||
|
description="AI result merging settings (prompt, context, etc.)"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Chunk handling
|
||||||
|
preserveChunks: bool = Field(
|
||||||
|
default=False,
|
||||||
|
description="Whether to preserve individual chunks or merge them"
|
||||||
|
)
|
||||||
|
|
||||||
|
chunkSeparator: str = Field(
|
||||||
|
default="\n\n---\n\n",
|
||||||
|
description="Separator between chunks when merging"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Metadata handling
|
||||||
|
preserveMetadata: bool = Field(
|
||||||
|
default=True,
|
||||||
|
description="Whether to preserve metadata from original parts"
|
||||||
|
)
|
||||||
|
|
||||||
|
metadataFields: Optional[List[str]] = Field(
|
||||||
|
default=None,
|
||||||
|
description="Specific metadata fields to preserve (None = all)"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Error handling
|
||||||
|
onError: Literal["skip", "include", "fail"] = Field(
|
||||||
|
default="skip",
|
||||||
|
description="How to handle errors during merging"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Validation
|
||||||
|
validateContent: bool = Field(
|
||||||
|
default=True,
|
||||||
|
description="Whether to validate content before merging"
|
||||||
|
)
|
||||||
|
|
||||||
|
def getTypeSpecificSettings(self, typeGroup: str) -> Dict[str, Any]:
|
||||||
|
"""Get type-specific merge settings for a content type."""
|
||||||
|
if typeGroup == "text" and self.textMerge:
|
||||||
|
return self.textMerge
|
||||||
|
elif typeGroup == "table" and self.tableMerge:
|
||||||
|
return self.tableMerge
|
||||||
|
elif typeGroup == "structure" and self.structureMerge:
|
||||||
|
return self.structureMerge
|
||||||
|
else:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
def shouldPreserveChunk(self, chunk: Dict[str, Any]) -> bool:
|
||||||
|
"""Determine if a chunk should be preserved based on strategy."""
|
||||||
|
if not self.preserveChunks:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Check if chunk has error metadata
|
||||||
|
if self.onError == "skip" and chunk.get("metadata", {}).get("error"):
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -19,8 +19,6 @@ class FileItem(BaseModel, ModelMixin):
|
||||||
|
|
||||||
def to_dict(self) -> Dict[str, Any]:
|
def to_dict(self) -> Dict[str, Any]:
|
||||||
return super().to_dict()
|
return super().to_dict()
|
||||||
|
|
||||||
|
|
||||||
register_model_labels(
|
register_model_labels(
|
||||||
"FileItem",
|
"FileItem",
|
||||||
{"en": "File Item", "fr": "Élément de fichier"},
|
{"en": "File Item", "fr": "Élément de fichier"},
|
||||||
|
|
@ -35,7 +33,6 @@ register_model_labels(
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class FilePreview(BaseModel, ModelMixin):
|
class FilePreview(BaseModel, ModelMixin):
|
||||||
content: Union[str, bytes] = Field(description="File content (text or binary)")
|
content: Union[str, bytes] = Field(description="File content (text or binary)")
|
||||||
mimeType: str = Field(description="MIME type of the file")
|
mimeType: str = Field(description="MIME type of the file")
|
||||||
|
|
@ -49,8 +46,6 @@ class FilePreview(BaseModel, ModelMixin):
|
||||||
if isinstance(data.get("content"), bytes):
|
if isinstance(data.get("content"), bytes):
|
||||||
data["content"] = base64.b64encode(data["content"]).decode("utf-8")
|
data["content"] = base64.b64encode(data["content"]).decode("utf-8")
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
|
||||||
register_model_labels(
|
register_model_labels(
|
||||||
"FilePreview",
|
"FilePreview",
|
||||||
{"en": "File Preview", "fr": "Aperçu du fichier"},
|
{"en": "File Preview", "fr": "Aperçu du fichier"},
|
||||||
|
|
@ -64,13 +59,10 @@ register_model_labels(
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class FileData(BaseModel, ModelMixin):
|
class FileData(BaseModel, ModelMixin):
|
||||||
id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Primary key")
|
id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Primary key")
|
||||||
data: str = Field(description="File data content")
|
data: str = Field(description="File data content")
|
||||||
base64Encoded: bool = Field(description="Whether the data is base64 encoded")
|
base64Encoded: bool = Field(description="Whether the data is base64 encoded")
|
||||||
|
|
||||||
|
|
||||||
register_model_labels(
|
register_model_labels(
|
||||||
"FileData",
|
"FileData",
|
||||||
{"en": "File Data", "fr": "Données de fichier"},
|
{"en": "File Data", "fr": "Données de fichier"},
|
||||||
|
|
@ -80,5 +72,3 @@ register_model_labels(
|
||||||
"base64Encoded": {"en": "Base64 Encoded", "fr": "Encodé en Base64"},
|
"base64Encoded": {"en": "Base64 Encoded", "fr": "Encodé en Base64"},
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -14,8 +14,6 @@ class DataNeutraliserConfig(BaseModel, ModelMixin):
|
||||||
namesToParse: str = Field(default="", description="Multiline list of names to parse for neutralization", frontend_type="textarea", frontend_readonly=False, frontend_required=False)
|
namesToParse: str = Field(default="", description="Multiline list of names to parse for neutralization", frontend_type="textarea", frontend_readonly=False, frontend_required=False)
|
||||||
sharepointSourcePath: str = Field(default="", description="SharePoint path to read files for neutralization", frontend_type="text", frontend_readonly=False, frontend_required=False)
|
sharepointSourcePath: str = Field(default="", description="SharePoint path to read files for neutralization", frontend_type="text", frontend_readonly=False, frontend_required=False)
|
||||||
sharepointTargetPath: str = Field(default="", description="SharePoint path to store neutralized files", frontend_type="text", frontend_readonly=False, frontend_required=False)
|
sharepointTargetPath: str = Field(default="", description="SharePoint path to store neutralized files", frontend_type="text", frontend_readonly=False, frontend_required=False)
|
||||||
|
|
||||||
|
|
||||||
register_model_labels(
|
register_model_labels(
|
||||||
"DataNeutraliserConfig",
|
"DataNeutraliserConfig",
|
||||||
{"en": "Data Neutralization Config", "fr": "Configuration de neutralisation des données"},
|
{"en": "Data Neutralization Config", "fr": "Configuration de neutralisation des données"},
|
||||||
|
|
@ -30,7 +28,6 @@ register_model_labels(
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class DataNeutralizerAttributes(BaseModel, ModelMixin):
|
class DataNeutralizerAttributes(BaseModel, ModelMixin):
|
||||||
id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Unique ID of the attribute mapping (used as UID in neutralized files)", frontend_type="text", frontend_readonly=True, frontend_required=False)
|
id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Unique ID of the attribute mapping (used as UID in neutralized files)", frontend_type="text", frontend_readonly=True, frontend_required=False)
|
||||||
mandateId: str = Field(description="ID of the mandate this attribute belongs to", frontend_type="text", frontend_readonly=True, frontend_required=True)
|
mandateId: str = Field(description="ID of the mandate this attribute belongs to", frontend_type="text", frontend_readonly=True, frontend_required=True)
|
||||||
|
|
@ -38,8 +35,6 @@ class DataNeutralizerAttributes(BaseModel, ModelMixin):
|
||||||
originalText: str = Field(description="Original text that was neutralized", frontend_type="text", frontend_readonly=True, frontend_required=True)
|
originalText: str = Field(description="Original text that was neutralized", frontend_type="text", frontend_readonly=True, frontend_required=True)
|
||||||
fileId: Optional[str] = Field(default=None, description="ID of the file this attribute belongs to", frontend_type="text", frontend_readonly=True, frontend_required=False)
|
fileId: Optional[str] = Field(default=None, description="ID of the file this attribute belongs to", frontend_type="text", frontend_readonly=True, frontend_required=False)
|
||||||
patternType: str = Field(description="Type of pattern that matched (email, phone, name, etc.)", frontend_type="text", frontend_readonly=True, frontend_required=True)
|
patternType: str = Field(description="Type of pattern that matched (email, phone, name, etc.)", frontend_type="text", frontend_readonly=True, frontend_required=True)
|
||||||
|
|
||||||
|
|
||||||
register_model_labels(
|
register_model_labels(
|
||||||
"DataNeutralizerAttributes",
|
"DataNeutralizerAttributes",
|
||||||
{"en": "Neutralized Data Attribute", "fr": "Attribut de données neutralisées"},
|
{"en": "Neutralized Data Attribute", "fr": "Attribut de données neutralisées"},
|
||||||
|
|
|
||||||
|
|
@ -47,7 +47,8 @@ class Token(BaseModel, ModelMixin):
|
||||||
None, description="Mandate ID for tenant scoping of the token"
|
None, description="Mandate ID for tenant scoping of the token"
|
||||||
)
|
)
|
||||||
|
|
||||||
model_config = ConfigDict(use_enum_values=True)
|
class Config:
|
||||||
|
use_enum_values = True
|
||||||
|
|
||||||
|
|
||||||
register_model_labels(
|
register_model_labels(
|
||||||
|
|
|
||||||
|
|
@ -9,7 +9,6 @@ class TicketFieldAttribute(BaseModel):
|
||||||
fieldName: str = Field(description="Human-readable field name")
|
fieldName: str = Field(description="Human-readable field name")
|
||||||
field: str = Field(description="Ticket field ID/key")
|
field: str = Field(description="Ticket field ID/key")
|
||||||
|
|
||||||
|
|
||||||
class TicketBase(ABC):
|
class TicketBase(ABC):
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
async def read_attributes(self) -> list[TicketFieldAttribute]: ...
|
async def read_attributes(self) -> list[TicketFieldAttribute]: ...
|
||||||
|
|
|
||||||
|
|
@ -13,20 +13,17 @@ class AuthAuthority(str, Enum):
|
||||||
GOOGLE = "google"
|
GOOGLE = "google"
|
||||||
MSFT = "msft"
|
MSFT = "msft"
|
||||||
|
|
||||||
|
|
||||||
class UserPrivilege(str, Enum):
|
class UserPrivilege(str, Enum):
|
||||||
SYSADMIN = "sysadmin"
|
SYSADMIN = "sysadmin"
|
||||||
ADMIN = "admin"
|
ADMIN = "admin"
|
||||||
USER = "user"
|
USER = "user"
|
||||||
|
|
||||||
|
|
||||||
class ConnectionStatus(str, Enum):
|
class ConnectionStatus(str, Enum):
|
||||||
ACTIVE = "active"
|
ACTIVE = "active"
|
||||||
EXPIRED = "expired"
|
EXPIRED = "expired"
|
||||||
REVOKED = "revoked"
|
REVOKED = "revoked"
|
||||||
PENDING = "pending"
|
PENDING = "pending"
|
||||||
|
|
||||||
|
|
||||||
class Mandate(BaseModel, ModelMixin):
|
class Mandate(BaseModel, ModelMixin):
|
||||||
id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Unique ID of the mandate", frontend_type="text", frontend_readonly=True, frontend_required=False)
|
id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Unique ID of the mandate", frontend_type="text", frontend_readonly=True, frontend_required=False)
|
||||||
name: str = Field(description="Name of the mandate", frontend_type="text", frontend_readonly=False, frontend_required=True)
|
name: str = Field(description="Name of the mandate", frontend_type="text", frontend_readonly=False, frontend_required=True)
|
||||||
|
|
@ -37,8 +34,6 @@ class Mandate(BaseModel, ModelMixin):
|
||||||
{"value": "it", "label": {"en": "Italiano", "fr": "Italien"}},
|
{"value": "it", "label": {"en": "Italiano", "fr": "Italien"}},
|
||||||
])
|
])
|
||||||
enabled: bool = Field(default=True, description="Indicates whether the mandate is enabled", frontend_type="checkbox", frontend_readonly=False, frontend_required=False)
|
enabled: bool = Field(default=True, description="Indicates whether the mandate is enabled", frontend_type="checkbox", frontend_readonly=False, frontend_required=False)
|
||||||
|
|
||||||
|
|
||||||
register_model_labels(
|
register_model_labels(
|
||||||
"Mandate",
|
"Mandate",
|
||||||
{"en": "Mandate", "fr": "Mandat"},
|
{"en": "Mandate", "fr": "Mandat"},
|
||||||
|
|
@ -50,7 +45,6 @@ register_model_labels(
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class UserConnection(BaseModel, ModelMixin):
|
class UserConnection(BaseModel, ModelMixin):
|
||||||
id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Unique ID of the connection", frontend_type="text", frontend_readonly=True, frontend_required=False)
|
id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Unique ID of the connection", frontend_type="text", frontend_readonly=True, frontend_required=False)
|
||||||
userId: str = Field(description="ID of the user this connection belongs to", frontend_type="text", frontend_readonly=True, frontend_required=False)
|
userId: str = Field(description="ID of the user this connection belongs to", frontend_type="text", frontend_readonly=True, frontend_required=False)
|
||||||
|
|
@ -77,8 +71,6 @@ class UserConnection(BaseModel, ModelMixin):
|
||||||
{"value": "none", "label": {"en": "None", "fr": "Aucun"}},
|
{"value": "none", "label": {"en": "None", "fr": "Aucun"}},
|
||||||
])
|
])
|
||||||
tokenExpiresAt: Optional[float] = Field(None, description="When the current token expires (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False)
|
tokenExpiresAt: Optional[float] = Field(None, description="When the current token expires (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False)
|
||||||
|
|
||||||
|
|
||||||
register_model_labels(
|
register_model_labels(
|
||||||
"UserConnection",
|
"UserConnection",
|
||||||
{"en": "User Connection", "fr": "Connexion utilisateur"},
|
{"en": "User Connection", "fr": "Connexion utilisateur"},
|
||||||
|
|
@ -98,7 +90,6 @@ register_model_labels(
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class User(BaseModel, ModelMixin):
|
class User(BaseModel, ModelMixin):
|
||||||
id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Unique ID of the user", frontend_type="text", frontend_readonly=True, frontend_required=False)
|
id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Unique ID of the user", frontend_type="text", frontend_readonly=True, frontend_required=False)
|
||||||
username: str = Field(description="Username for login", frontend_type="text", frontend_readonly=False, frontend_required=True)
|
username: str = Field(description="Username for login", frontend_type="text", frontend_readonly=False, frontend_required=True)
|
||||||
|
|
@ -122,8 +113,6 @@ class User(BaseModel, ModelMixin):
|
||||||
{"value": "msft", "label": {"en": "Microsoft", "fr": "Microsoft"}},
|
{"value": "msft", "label": {"en": "Microsoft", "fr": "Microsoft"}},
|
||||||
])
|
])
|
||||||
mandateId: Optional[str] = Field(None, description="ID of the mandate this user belongs to", frontend_type="text", frontend_readonly=True, frontend_required=False)
|
mandateId: Optional[str] = Field(None, description="ID of the mandate this user belongs to", frontend_type="text", frontend_readonly=True, frontend_required=False)
|
||||||
|
|
||||||
|
|
||||||
register_model_labels(
|
register_model_labels(
|
||||||
"User",
|
"User",
|
||||||
{"en": "User", "fr": "Utilisateur"},
|
{"en": "User", "fr": "Utilisateur"},
|
||||||
|
|
@ -140,15 +129,10 @@ register_model_labels(
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class UserInDB(User):
|
class UserInDB(User):
|
||||||
hashedPassword: Optional[str] = Field(None, description="Hash of the user password")
|
hashedPassword: Optional[str] = Field(None, description="Hash of the user password")
|
||||||
|
|
||||||
|
|
||||||
register_model_labels(
|
register_model_labels(
|
||||||
"UserInDB",
|
"UserInDB",
|
||||||
{"en": "User Access", "fr": "Accès de l'utilisateur"},
|
{"en": "User Access", "fr": "Accès de l'utilisateur"},
|
||||||
{"hashedPassword": {"en": "Password hash", "fr": "Hachage de mot de passe"}},
|
{"hashedPassword": {"en": "Password hash", "fr": "Hachage de mot de passe"}},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -10,8 +10,6 @@ class Prompt(BaseModel, ModelMixin):
|
||||||
mandateId: str = Field(description="ID of the mandate this prompt belongs to", frontend_type="text", frontend_readonly=True, frontend_required=False)
|
mandateId: str = Field(description="ID of the mandate this prompt belongs to", frontend_type="text", frontend_readonly=True, frontend_required=False)
|
||||||
content: str = Field(description="Content of the prompt", frontend_type="textarea", frontend_readonly=False, frontend_required=True)
|
content: str = Field(description="Content of the prompt", frontend_type="textarea", frontend_readonly=False, frontend_required=True)
|
||||||
name: str = Field(description="Name of the prompt", frontend_type="text", frontend_readonly=False, frontend_required=True)
|
name: str = Field(description="Name of the prompt", frontend_type="text", frontend_readonly=False, frontend_required=True)
|
||||||
|
|
||||||
|
|
||||||
register_model_labels(
|
register_model_labels(
|
||||||
"Prompt",
|
"Prompt",
|
||||||
{"en": "Prompt", "fr": "Invite"},
|
{"en": "Prompt", "fr": "Invite"},
|
||||||
|
|
|
||||||
|
|
@ -22,7 +22,6 @@ class VoiceSettings(BaseModel, ModelMixin):
|
||||||
def to_dict(self) -> Dict[str, Any]:
|
def to_dict(self) -> Dict[str, Any]:
|
||||||
return super().to_dict()
|
return super().to_dict()
|
||||||
|
|
||||||
|
|
||||||
register_model_labels(
|
register_model_labels(
|
||||||
"VoiceSettings",
|
"VoiceSettings",
|
||||||
{"en": "Voice Settings", "fr": "Paramètres vocaux"},
|
{"en": "Voice Settings", "fr": "Paramètres vocaux"},
|
||||||
|
|
|
||||||
|
|
@ -1,10 +1,8 @@
|
||||||
"""Web-related modules"""
|
"""Web-related modules"""
|
||||||
|
|
||||||
from abc import ABC, abstractmethod
|
|
||||||
from pydantic import BaseModel, Field, HttpUrl
|
from pydantic import BaseModel, Field, HttpUrl
|
||||||
from typing import List, Optional, Literal
|
from typing import List, Optional, Literal, Dict, Any
|
||||||
from modules.shared.configuration import APP_CONFIG
|
from modules.shared.configuration import APP_CONFIG
|
||||||
from modules.datamodels.datamodelWorkflow import ActionDocument, ActionResult
|
from modules.datamodels.datamodelChat import ActionDocument, ActionResult
|
||||||
|
|
||||||
|
|
||||||
WEB_SEARCH_MAX_QUERY_LENGTH: int = int(APP_CONFIG.get("Web_Search_MAX_QUERY_LENGTH", "400"))
|
WEB_SEARCH_MAX_QUERY_LENGTH: int = int(APP_CONFIG.get("Web_Search_MAX_QUERY_LENGTH", "400"))
|
||||||
|
|
@ -12,130 +10,133 @@ WEB_SEARCH_MAX_RESULTS: int = int(APP_CONFIG.get("Web_Search_MAX_RESULTS", "20")
|
||||||
WEB_SEARCH_MIN_RESULTS: int = int(APP_CONFIG.get("Web_Search_MIN_RESULTS", "1"))
|
WEB_SEARCH_MIN_RESULTS: int = int(APP_CONFIG.get("Web_Search_MIN_RESULTS", "1"))
|
||||||
|
|
||||||
|
|
||||||
class WebSearchRequest(BaseModel):
|
class WebResearchOptions(BaseModel):
|
||||||
query: str = Field(min_length=1, max_length=WEB_SEARCH_MAX_QUERY_LENGTH)
|
"""Advanced options for web research workflow"""
|
||||||
max_results: int = Field(ge=WEB_SEARCH_MIN_RESULTS, le=WEB_SEARCH_MAX_RESULTS)
|
max_pages: int = Field(default=10, ge=1, le=50, description="Maximum pages to crawl")
|
||||||
# Tavily tuning options
|
search_depth: Literal["basic", "advanced"] = Field(default="basic", description="Tavily search depth")
|
||||||
search_depth: Optional[Literal["basic", "advanced"]] = Field(default=None)
|
extract_depth: Literal["basic", "advanced"] = Field(default="advanced", description="Tavily extract depth")
|
||||||
time_range: Optional[Literal["d", "w", "m", "y"]] = Field(
|
format: Literal["text", "markdown"] = Field(default="markdown", description="Content format")
|
||||||
default=None, description="Limit results to last day/week/month/year"
|
return_report: bool = Field(default=True, description="Return formatted report or raw data")
|
||||||
)
|
pages_search_depth: int = Field(default=1, ge=1, le=5, description="How deep to crawl: 1=main pages only, 2=main+sub-pages, 3=main+sub+sub-sub, etc.")
|
||||||
topic: Optional[Literal["general", "news", "academic"]] = Field(default=None)
|
country: Optional[str] = Field(default=None, description="Country code for search bias")
|
||||||
include_domains: Optional[List[str]] = Field(default=None)
|
time_range: Optional[Literal["d", "w", "m", "y"]] = Field(default=None, description="Time range for search")
|
||||||
exclude_domains: Optional[List[str]] = Field(default=None)
|
topic: Optional[Literal["general", "news", "academic"]] = Field(default=None, description="Search topic")
|
||||||
language: Optional[str] = Field(default=None, description="ISO language code like 'en', 'de'")
|
language: Optional[str] = Field(default=None, description="Language code")
|
||||||
include_answer: Optional[bool] = Field(default=None)
|
include_answer: Optional[bool] = Field(default=None, description="Include AI answer")
|
||||||
include_raw_content: Optional[bool] = Field(default=None)
|
include_raw_content: Optional[bool] = Field(default=None, description="Include raw content")
|
||||||
|
|
||||||
|
class WebResearchRequest(BaseModel):
|
||||||
|
"""Main web research request"""
|
||||||
|
user_prompt: str = Field(min_length=1, max_length=WEB_SEARCH_MAX_QUERY_LENGTH, description="User's research question or prompt")
|
||||||
|
urls: Optional[List[str]] = Field(default=None, description="Specific URLs to crawl (optional)")
|
||||||
|
max_results: int = Field(default=5, ge=1, le=WEB_SEARCH_MAX_RESULTS, description="Max search results")
|
||||||
|
options: WebResearchOptions = Field(default_factory=WebResearchOptions, description="Advanced options")
|
||||||
|
|
||||||
class WebSearchResultItem(BaseModel):
|
class WebSearchResultItem(BaseModel):
|
||||||
"""Individual search result"""
|
"""Individual search result"""
|
||||||
|
|
||||||
title: str
|
title: str
|
||||||
url: HttpUrl
|
url: HttpUrl
|
||||||
|
raw_content: Optional[str] = Field(default=None, description="Raw HTML content")
|
||||||
|
|
||||||
|
class WebCrawlResultItem(BaseModel):
|
||||||
|
"""Individual crawl result"""
|
||||||
|
url: HttpUrl
|
||||||
|
content: str
|
||||||
|
|
||||||
|
class WebResearchDocumentData(BaseModel):
|
||||||
|
"""Complete web research results"""
|
||||||
|
user_prompt: str
|
||||||
|
websites_analyzed: int
|
||||||
|
additional_links_found: int
|
||||||
|
analysis_result: str
|
||||||
|
sources: List[WebSearchResultItem]
|
||||||
|
additional_links: List[str]
|
||||||
|
individual_content: Optional[Dict[str, str]] = None # URL -> content mapping
|
||||||
|
debug_info: Optional[Dict[str, Any]] = None
|
||||||
|
|
||||||
|
class WebResearchActionDocument(ActionDocument):
|
||||||
|
documentData: WebResearchDocumentData
|
||||||
|
|
||||||
|
class WebResearchActionResult(ActionResult):
|
||||||
|
documents: List[WebResearchActionDocument] = Field(default_factory=list)
|
||||||
|
|
||||||
|
# Legacy models for connector compatibility
|
||||||
|
|
||||||
class WebSearchDocumentData(BaseModel):
|
class WebSearchDocumentData(BaseModel):
|
||||||
"""Complete search (and scrape) results document"""
|
"""Search results document data"""
|
||||||
|
query: str
|
||||||
query: str = Field(min_length=1, max_length=WEB_SEARCH_MAX_QUERY_LENGTH)
|
results: List[WebSearchResultItem]
|
||||||
# Allow both WebSearchResultItem and WebScrapeResultItem to be stored here
|
|
||||||
results: List[object]
|
|
||||||
total_count: int
|
total_count: int
|
||||||
|
|
||||||
|
|
||||||
class WebSearchActionDocument(ActionDocument):
|
class WebSearchActionDocument(ActionDocument):
|
||||||
documentData: WebSearchDocumentData
|
documentData: WebSearchDocumentData
|
||||||
|
|
||||||
|
|
||||||
class WebSearchActionResult(ActionResult):
|
class WebSearchActionResult(ActionResult):
|
||||||
documents: List[WebSearchActionDocument] = Field(default_factory=list)
|
documents: List[WebSearchActionDocument] = Field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
class WebSearchBase(ABC):
|
|
||||||
@abstractmethod
|
|
||||||
async def search_urls(self, request: WebSearchRequest) -> WebSearchActionResult: ...
|
|
||||||
|
|
||||||
|
|
||||||
# --- Web crawl ---
|
|
||||||
|
|
||||||
|
|
||||||
class WebCrawlRequest(BaseModel):
|
|
||||||
urls: List[HttpUrl]
|
|
||||||
# Tavily extract options
|
|
||||||
extract_depth: Optional[Literal["basic", "advanced"]] = Field(default=None)
|
|
||||||
format: Optional[Literal["text", "markdown"]] = Field(default=None)
|
|
||||||
|
|
||||||
|
|
||||||
class WebCrawlResultItem(BaseModel):
|
|
||||||
"""Individual crawl result"""
|
|
||||||
|
|
||||||
url: HttpUrl
|
|
||||||
content: str
|
|
||||||
|
|
||||||
|
|
||||||
class WebCrawlDocumentData(BaseModel):
|
class WebCrawlDocumentData(BaseModel):
|
||||||
"""Complete crawl results document"""
|
"""Crawl results document data"""
|
||||||
|
|
||||||
urls: List[HttpUrl]
|
urls: List[HttpUrl]
|
||||||
results: List[WebCrawlResultItem]
|
results: List[WebCrawlResultItem]
|
||||||
total_count: int
|
total_count: int
|
||||||
|
|
||||||
|
|
||||||
class WebCrawlActionDocument(ActionDocument):
|
class WebCrawlActionDocument(ActionDocument):
|
||||||
documentData: WebCrawlDocumentData = Field(
|
documentData: WebCrawlDocumentData
|
||||||
description="The data extracted from crawled URLs"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class WebCrawlActionResult(ActionResult):
|
class WebCrawlActionResult(ActionResult):
|
||||||
documents: List[WebCrawlActionDocument] = Field(default_factory=list)
|
documents: List[WebCrawlActionDocument] = Field(default_factory=list)
|
||||||
|
|
||||||
|
class WebScrapeDocumentData(BaseModel):
|
||||||
class WebCrawlBase(ABC):
|
"""Scrape results document data"""
|
||||||
@abstractmethod
|
query: str
|
||||||
async def crawl_urls(self, request: WebCrawlRequest) -> WebCrawlActionResult: ...
|
results: List[WebSearchResultItem]
|
||||||
|
total_count: int
|
||||||
|
|
||||||
# --- Web scrape ---
|
|
||||||
|
|
||||||
|
|
||||||
class WebScrapeRequest(BaseModel):
|
|
||||||
query: str = Field(min_length=1, max_length=WEB_SEARCH_MAX_QUERY_LENGTH)
|
|
||||||
max_results: int = Field(ge=WEB_SEARCH_MIN_RESULTS, le=WEB_SEARCH_MAX_RESULTS)
|
|
||||||
# Pass-through search options
|
|
||||||
search_depth: Optional[Literal["basic", "advanced"]] = Field(default=None)
|
|
||||||
time_range: Optional[Literal["d", "w", "m", "y"]] = Field(default=None)
|
|
||||||
topic: Optional[Literal["general", "news", "academic"]] = Field(default=None)
|
|
||||||
include_domains: Optional[List[str]] = Field(default=None)
|
|
||||||
exclude_domains: Optional[List[str]] = Field(default=None)
|
|
||||||
language: Optional[str] = Field(default=None)
|
|
||||||
include_answer: Optional[bool] = Field(default=None)
|
|
||||||
include_raw_content: Optional[bool] = Field(default=None)
|
|
||||||
# Extract options
|
|
||||||
extract_depth: Optional[Literal["basic", "advanced"]] = Field(default=None)
|
|
||||||
format: Optional[Literal["text", "markdown"]] = Field(default=None)
|
|
||||||
|
|
||||||
|
|
||||||
class WebScrapeResultItem(BaseModel):
|
|
||||||
"""Individual scrape result"""
|
|
||||||
|
|
||||||
url: HttpUrl
|
|
||||||
content: str
|
|
||||||
|
|
||||||
|
|
||||||
class WebScrapeActionDocument(ActionDocument):
|
class WebScrapeActionDocument(ActionDocument):
|
||||||
documentData: WebSearchDocumentData = Field(
|
documentData: WebScrapeDocumentData
|
||||||
description="The data extracted from scraped URLs"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class WebScrapeActionResult(ActionResult):
|
class WebScrapeActionResult(ActionResult):
|
||||||
documents: List[WebScrapeActionDocument] = Field(default_factory=list)
|
documents: List[WebScrapeActionDocument] = Field(default_factory=list)
|
||||||
|
|
||||||
|
class WebSearchRequest(BaseModel):
|
||||||
|
"""Search request for Tavily"""
|
||||||
|
query: str
|
||||||
|
max_results: int = 5
|
||||||
|
search_depth: Optional[Literal["basic", "advanced"]] = None
|
||||||
|
time_range: Optional[Literal["d", "w", "m", "y"]] = None
|
||||||
|
topic: Optional[Literal["general", "news", "academic"]] = None
|
||||||
|
include_domains: Optional[List[str]] = None
|
||||||
|
exclude_domains: Optional[List[str]] = None
|
||||||
|
language: Optional[str] = None
|
||||||
|
include_answer: Optional[bool] = None
|
||||||
|
include_raw_content: Optional[bool] = None
|
||||||
|
auto_parameters: Optional[bool] = None
|
||||||
|
country: Optional[str] = None
|
||||||
|
|
||||||
class WebScrapeBase(ABC):
|
class WebCrawlRequest(BaseModel):
|
||||||
@abstractmethod
|
"""Crawl request for Tavily"""
|
||||||
async def scrape(self, request: WebScrapeRequest) -> WebScrapeActionResult: ...
|
urls: List[HttpUrl]
|
||||||
|
extract_depth: Optional[Literal["basic", "advanced"]] = None
|
||||||
|
format: Optional[Literal["text", "markdown"]] = None
|
||||||
|
|
||||||
|
class WebScrapeRequest(BaseModel):
|
||||||
|
"""Scrape request for Tavily"""
|
||||||
|
query: str
|
||||||
|
max_results: int = 5
|
||||||
|
search_depth: Optional[Literal["basic", "advanced"]] = None
|
||||||
|
time_range: Optional[Literal["d", "w", "m", "y"]] = None
|
||||||
|
topic: Optional[Literal["general", "news", "academic"]] = None
|
||||||
|
include_domains: Optional[List[str]] = None
|
||||||
|
exclude_domains: Optional[List[str]] = None
|
||||||
|
language: Optional[str] = None
|
||||||
|
include_answer: Optional[bool] = None
|
||||||
|
include_raw_content: Optional[bool] = None
|
||||||
|
auto_parameters: Optional[bool] = None
|
||||||
|
country: Optional[str] = None
|
||||||
|
extract_depth: Optional[Literal["basic", "advanced"]] = None
|
||||||
|
format: Optional[Literal["text", "markdown"]] = None
|
||||||
|
|
||||||
|
class WebScrapeResultItem(BaseModel):
|
||||||
|
"""Individual scrape result"""
|
||||||
|
url: HttpUrl
|
||||||
|
content: str
|
||||||
|
|
|
||||||
|
|
@ -1,474 +0,0 @@
|
||||||
"""Workflow-related base datamodels and step/task structures."""
|
|
||||||
|
|
||||||
from enum import Enum
|
|
||||||
from typing import List, Dict, Any, Optional
|
|
||||||
from pydantic import BaseModel, Field
|
|
||||||
from modules.shared.attributeUtils import register_model_labels, ModelMixin
|
|
||||||
|
|
||||||
|
|
||||||
class ActionDocument(BaseModel, ModelMixin):
|
|
||||||
"""Clear document structure for action results"""
|
|
||||||
|
|
||||||
documentName: str = Field(description="Name of the document")
|
|
||||||
documentData: Any = Field(description="Content/data of the document")
|
|
||||||
mimeType: str = Field(description="MIME type of the document")
|
|
||||||
|
|
||||||
|
|
||||||
register_model_labels(
|
|
||||||
"ActionDocument",
|
|
||||||
{"en": "Action Document", "fr": "Document d'action"},
|
|
||||||
{
|
|
||||||
"documentName": {"en": "Document Name", "fr": "Nom du document"},
|
|
||||||
"documentData": {"en": "Document Data", "fr": "Données du document"},
|
|
||||||
"mimeType": {"en": "MIME Type", "fr": "Type MIME"},
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class ActionResult(BaseModel, ModelMixin):
|
|
||||||
"""Clean action result with documents as primary output
|
|
||||||
|
|
||||||
IMPORTANT: Action methods should NOT set resultLabel in their return value.
|
|
||||||
The resultLabel is managed by the action handler using the action's execResultLabel
|
|
||||||
from the action plan. This ensures consistent document routing throughout the workflow.
|
|
||||||
"""
|
|
||||||
|
|
||||||
success: bool = Field(description="Whether execution succeeded")
|
|
||||||
error: Optional[str] = Field(None, description="Error message if failed")
|
|
||||||
documents: List[ActionDocument] = Field(
|
|
||||||
default_factory=list, description="Document outputs"
|
|
||||||
)
|
|
||||||
resultLabel: Optional[str] = Field(
|
|
||||||
None,
|
|
||||||
description="Label for document routing (set by action handler, not by action methods)",
|
|
||||||
)
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def isSuccess(cls, documents: List[ActionDocument] = None) -> "ActionResult":
|
|
||||||
return cls(success=True, documents=documents or [])
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def isFailure(
|
|
||||||
cls, error: str, documents: List[ActionDocument] = None
|
|
||||||
) -> "ActionResult":
|
|
||||||
return cls(success=False, documents=documents or [], error=error)
|
|
||||||
|
|
||||||
|
|
||||||
register_model_labels(
|
|
||||||
"ActionResult",
|
|
||||||
{"en": "Action Result", "fr": "Résultat de l'action"},
|
|
||||||
{
|
|
||||||
"success": {"en": "Success", "fr": "Succès"},
|
|
||||||
"error": {"en": "Error", "fr": "Erreur"},
|
|
||||||
"documents": {"en": "Documents", "fr": "Documents"},
|
|
||||||
"resultLabel": {"en": "Result Label", "fr": "Étiquette du résultat"},
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class ActionSelection(BaseModel, ModelMixin):
|
|
||||||
method: str = Field(description="Method to execute (e.g., web, document, ai)")
|
|
||||||
name: str = Field(
|
|
||||||
description="Action name within the method (e.g., search, extract)"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
register_model_labels(
|
|
||||||
"ActionSelection",
|
|
||||||
{"en": "Action Selection", "fr": "Sélection d'action"},
|
|
||||||
{
|
|
||||||
"method": {"en": "Method", "fr": "Méthode"},
|
|
||||||
"name": {"en": "Action Name", "fr": "Nom de l'action"},
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class ActionParameters(BaseModel, ModelMixin):
|
|
||||||
parameters: Dict[str, Any] = Field(
|
|
||||||
default_factory=dict, description="Parameters to execute the selected action"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
register_model_labels(
|
|
||||||
"ActionParameters",
|
|
||||||
{"en": "Action Parameters", "fr": "Paramètres d'action"},
|
|
||||||
{
|
|
||||||
"parameters": {"en": "Parameters", "fr": "Paramètres"},
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class ObservationPreview(BaseModel, ModelMixin):
|
|
||||||
name: str = Field(description="Document name or URL label")
|
|
||||||
mime: str = Field(description="MIME type or kind")
|
|
||||||
snippet: str = Field(description="Short snippet or summary")
|
|
||||||
|
|
||||||
|
|
||||||
register_model_labels(
|
|
||||||
"ObservationPreview",
|
|
||||||
{"en": "Observation Preview", "fr": "Aperçu d'observation"},
|
|
||||||
{
|
|
||||||
"name": {"en": "Name", "fr": "Nom"},
|
|
||||||
"mime": {"en": "MIME", "fr": "MIME"},
|
|
||||||
"snippet": {"en": "Snippet", "fr": "Extrait"},
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class Observation(BaseModel, ModelMixin):
|
|
||||||
success: bool = Field(description="Action execution success flag")
|
|
||||||
resultLabel: str = Field(description="Deterministic label for produced documents")
|
|
||||||
documentsCount: int = Field(description="Number of produced documents")
|
|
||||||
previews: List[ObservationPreview] = Field(
|
|
||||||
default_factory=list, description="Compact previews of outputs"
|
|
||||||
)
|
|
||||||
notes: List[str] = Field(
|
|
||||||
default_factory=list, description="Short notes or key facts"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
register_model_labels(
|
|
||||||
"Observation",
|
|
||||||
{"en": "Observation", "fr": "Observation"},
|
|
||||||
{
|
|
||||||
"success": {"en": "Success", "fr": "Succès"},
|
|
||||||
"resultLabel": {"en": "Result Label", "fr": "Étiquette du résultat"},
|
|
||||||
"documentsCount": {"en": "Documents Count", "fr": "Nombre de documents"},
|
|
||||||
"previews": {"en": "Previews", "fr": "Aperçus"},
|
|
||||||
"notes": {"en": "Notes", "fr": "Notes"},
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class TaskStatus(str, Enum):
|
|
||||||
"""Task status enumeration."""
|
|
||||||
|
|
||||||
PENDING = "pending"
|
|
||||||
RUNNING = "running"
|
|
||||||
COMPLETED = "completed"
|
|
||||||
FAILED = "failed"
|
|
||||||
CANCELLED = "cancelled"
|
|
||||||
|
|
||||||
|
|
||||||
register_model_labels(
|
|
||||||
"TaskStatus",
|
|
||||||
{"en": "Task Status", "fr": "Statut de la tâche"},
|
|
||||||
{
|
|
||||||
"PENDING": {"en": "Pending", "fr": "En attente"},
|
|
||||||
"RUNNING": {"en": "Running", "fr": "En cours"},
|
|
||||||
"COMPLETED": {"en": "Completed", "fr": "Terminé"},
|
|
||||||
"FAILED": {"en": "Failed", "fr": "Échec"},
|
|
||||||
"CANCELLED": {"en": "Cancelled", "fr": "Annulé"},
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class DocumentExchange(BaseModel, ModelMixin):
|
|
||||||
documentsLabel: str = Field(description="Label for the set of documents")
|
|
||||||
documents: List[str] = Field(
|
|
||||||
default_factory=list, description="List of document references"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
register_model_labels(
|
|
||||||
"DocumentExchange",
|
|
||||||
{"en": "Document Exchange", "fr": "Échange de documents"},
|
|
||||||
{
|
|
||||||
"documentsLabel": {"en": "Documents Label", "fr": "Label des documents"},
|
|
||||||
"documents": {"en": "Documents", "fr": "Documents"},
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class TaskAction(BaseModel, ModelMixin):
|
|
||||||
id: str = Field(..., description="Action ID")
|
|
||||||
execMethod: str = Field(..., description="Method to execute")
|
|
||||||
execAction: str = Field(..., description="Action to perform")
|
|
||||||
execParameters: Dict[str, Any] = Field(
|
|
||||||
default_factory=dict, description="Action parameters"
|
|
||||||
)
|
|
||||||
execResultLabel: Optional[str] = Field(
|
|
||||||
None, description="Label for the set of result documents"
|
|
||||||
)
|
|
||||||
expectedDocumentFormats: Optional[List[Dict[str, str]]] = Field(
|
|
||||||
None, description="Expected document formats (optional)"
|
|
||||||
)
|
|
||||||
userMessage: Optional[str] = Field(
|
|
||||||
None, description="User-friendly message in user's language"
|
|
||||||
)
|
|
||||||
status: TaskStatus = Field(default=TaskStatus.PENDING, description="Action status")
|
|
||||||
error: Optional[str] = Field(None, description="Error message if action failed")
|
|
||||||
retryCount: int = Field(default=0, description="Number of retries attempted")
|
|
||||||
retryMax: int = Field(default=3, description="Maximum number of retries")
|
|
||||||
processingTime: Optional[float] = Field(
|
|
||||||
None, description="Processing time in seconds"
|
|
||||||
)
|
|
||||||
timestamp: float = Field(
|
|
||||||
..., description="When the action was executed (UTC timestamp in seconds)"
|
|
||||||
)
|
|
||||||
result: Optional[str] = Field(None, description="Result of the action")
|
|
||||||
|
|
||||||
|
|
||||||
register_model_labels(
|
|
||||||
"TaskAction",
|
|
||||||
{"en": "Task Action", "fr": "Action de tâche"},
|
|
||||||
{
|
|
||||||
"id": {"en": "Action ID", "fr": "ID de l'action"},
|
|
||||||
"execMethod": {"en": "Method", "fr": "Méthode"},
|
|
||||||
"execAction": {"en": "Action", "fr": "Action"},
|
|
||||||
"execParameters": {"en": "Parameters", "fr": "Paramètres"},
|
|
||||||
"execResultLabel": {"en": "Result Label", "fr": "Label du résultat"},
|
|
||||||
"expectedDocumentFormats": {
|
|
||||||
"en": "Expected Document Formats",
|
|
||||||
"fr": "Formats de documents attendus",
|
|
||||||
},
|
|
||||||
"userMessage": {"en": "User Message", "fr": "Message utilisateur"},
|
|
||||||
"status": {"en": "Status", "fr": "Statut"},
|
|
||||||
"error": {"en": "Error", "fr": "Erreur"},
|
|
||||||
"retryCount": {"en": "Retry Count", "fr": "Nombre de tentatives"},
|
|
||||||
"retryMax": {"en": "Max Retries", "fr": "Tentatives max"},
|
|
||||||
"processingTime": {"en": "Processing Time", "fr": "Temps de traitement"},
|
|
||||||
"timestamp": {"en": "Timestamp", "fr": "Horodatage"},
|
|
||||||
"result": {"en": "Result", "fr": "Résultat"},
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class TaskResult(BaseModel, ModelMixin):
|
|
||||||
taskId: str = Field(..., description="Task ID")
|
|
||||||
status: TaskStatus = Field(default=TaskStatus.PENDING, description="Task status")
|
|
||||||
success: bool = Field(..., description="Whether the task was successful")
|
|
||||||
feedback: Optional[str] = Field(None, description="Task feedback message")
|
|
||||||
error: Optional[str] = Field(None, description="Error message if task failed")
|
|
||||||
|
|
||||||
|
|
||||||
register_model_labels(
|
|
||||||
"TaskResult",
|
|
||||||
{"en": "Task Result", "fr": "Résultat de tâche"},
|
|
||||||
{
|
|
||||||
"taskId": {"en": "Task ID", "fr": "ID de la tâche"},
|
|
||||||
"status": {"en": "Status", "fr": "Statut"},
|
|
||||||
"success": {"en": "Success", "fr": "Succès"},
|
|
||||||
"feedback": {"en": "Feedback", "fr": "Retour"},
|
|
||||||
"error": {"en": "Error", "fr": "Erreur"},
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class TaskItem(BaseModel, ModelMixin):
|
|
||||||
id: str = Field(..., description="Task ID")
|
|
||||||
workflowId: str = Field(..., description="Workflow ID")
|
|
||||||
userInput: str = Field(..., description="User input that triggered the task")
|
|
||||||
status: TaskStatus = Field(default=TaskStatus.PENDING, description="Task status")
|
|
||||||
error: Optional[str] = Field(None, description="Error message if task failed")
|
|
||||||
startedAt: Optional[float] = Field(
|
|
||||||
None, description="When the task started (UTC timestamp in seconds)"
|
|
||||||
)
|
|
||||||
finishedAt: Optional[float] = Field(
|
|
||||||
None, description="When the task finished (UTC timestamp in seconds)"
|
|
||||||
)
|
|
||||||
actionList: List[TaskAction] = Field(
|
|
||||||
default_factory=list, description="List of actions to execute"
|
|
||||||
)
|
|
||||||
retryCount: int = Field(default=0, description="Number of retries attempted")
|
|
||||||
retryMax: int = Field(default=3, description="Maximum number of retries")
|
|
||||||
rollbackOnFailure: bool = Field(
|
|
||||||
default=True, description="Whether to rollback on failure"
|
|
||||||
)
|
|
||||||
dependencies: List[str] = Field(
|
|
||||||
default_factory=list, description="List of task IDs this task depends on"
|
|
||||||
)
|
|
||||||
feedback: Optional[str] = Field(None, description="Task feedback message")
|
|
||||||
processingTime: Optional[float] = Field(
|
|
||||||
None, description="Total processing time in seconds"
|
|
||||||
)
|
|
||||||
resultLabels: Optional[Dict[str, Any]] = Field(
|
|
||||||
default_factory=dict, description="Map of result labels to their values"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
register_model_labels(
|
|
||||||
"TaskItem",
|
|
||||||
{"en": "Task", "fr": "Tâche"},
|
|
||||||
{
|
|
||||||
"id": {"en": "Task ID", "fr": "ID de la tâche"},
|
|
||||||
"workflowId": {"en": "Workflow ID", "fr": "ID du workflow"},
|
|
||||||
"userInput": {"en": "User Input", "fr": "Entrée utilisateur"},
|
|
||||||
"status": {"en": "Status", "fr": "Statut"},
|
|
||||||
"error": {"en": "Error", "fr": "Erreur"},
|
|
||||||
"startedAt": {"en": "Started At", "fr": "Démarré à"},
|
|
||||||
"finishedAt": {"en": "Finished At", "fr": "Terminé à"},
|
|
||||||
"actionList": {"en": "Actions", "fr": "Actions"},
|
|
||||||
"retryCount": {"en": "Retry Count", "fr": "Nombre de tentatives"},
|
|
||||||
"retryMax": {"en": "Max Retries", "fr": "Tentatives max"},
|
|
||||||
"processingTime": {"en": "Processing Time", "fr": "Temps de traitement"},
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class TaskStep(BaseModel, ModelMixin):
|
|
||||||
id: str
|
|
||||||
objective: str
|
|
||||||
dependencies: Optional[list[str]] = Field(default_factory=list)
|
|
||||||
success_criteria: Optional[list[str]] = Field(default_factory=list)
|
|
||||||
estimated_complexity: Optional[str] = None
|
|
||||||
userMessage: Optional[str] = Field(
|
|
||||||
None, description="User-friendly message in user's language"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
register_model_labels(
|
|
||||||
"TaskStep",
|
|
||||||
{"en": "Task Step", "fr": "Étape de tâche"},
|
|
||||||
{
|
|
||||||
"id": {"en": "ID", "fr": "ID"},
|
|
||||||
"objective": {"en": "Objective", "fr": "Objectif"},
|
|
||||||
"dependencies": {"en": "Dependencies", "fr": "Dépendances"},
|
|
||||||
"success_criteria": {"en": "Success Criteria", "fr": "Critères de succès"},
|
|
||||||
"estimated_complexity": {
|
|
||||||
"en": "Estimated Complexity",
|
|
||||||
"fr": "Complexité estimée",
|
|
||||||
},
|
|
||||||
"userMessage": {"en": "User Message", "fr": "Message utilisateur"},
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class TaskHandover(BaseModel, ModelMixin):
|
|
||||||
taskId: str = Field(description="Target task ID")
|
|
||||||
sourceTask: Optional[str] = Field(None, description="Source task ID")
|
|
||||||
inputDocuments: List[DocumentExchange] = Field(
|
|
||||||
default_factory=list, description="Available input documents"
|
|
||||||
)
|
|
||||||
outputDocuments: List[DocumentExchange] = Field(
|
|
||||||
default_factory=list, description="Produced output documents"
|
|
||||||
)
|
|
||||||
context: Dict[str, Any] = Field(default_factory=dict, description="Task context")
|
|
||||||
previousResults: List[str] = Field(
|
|
||||||
default_factory=list, description="Previous result summaries"
|
|
||||||
)
|
|
||||||
improvements: List[str] = Field(
|
|
||||||
default_factory=list, description="Improvement suggestions"
|
|
||||||
)
|
|
||||||
workflowSummary: Optional[str] = Field(
|
|
||||||
None, description="Summarized workflow context"
|
|
||||||
)
|
|
||||||
messageHistory: List[str] = Field(
|
|
||||||
default_factory=list, description="Key message summaries"
|
|
||||||
)
|
|
||||||
timestamp: float = Field(
|
|
||||||
..., description="When the handover was created (UTC timestamp in seconds)"
|
|
||||||
)
|
|
||||||
handoverType: str = Field(
|
|
||||||
default="task", description="Type of handover: task, phase, or workflow"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
register_model_labels(
|
|
||||||
"TaskHandover",
|
|
||||||
{"en": "Task Handover", "fr": "Transfert de tâche"},
|
|
||||||
{
|
|
||||||
"taskId": {"en": "Task ID", "fr": "ID de la tâche"},
|
|
||||||
"sourceTask": {"en": "Source Task", "fr": "Tâche source"},
|
|
||||||
"inputDocuments": {"en": "Input Documents", "fr": "Documents d'entrée"},
|
|
||||||
"outputDocuments": {"en": "Output Documents", "fr": "Documents de sortie"},
|
|
||||||
"context": {"en": "Context", "fr": "Contexte"},
|
|
||||||
"previousResults": {"en": "Previous Results", "fr": "Résultats précédents"},
|
|
||||||
"improvements": {"en": "Improvements", "fr": "Améliorations"},
|
|
||||||
"workflowSummary": {"en": "Workflow Summary", "fr": "Résumé du workflow"},
|
|
||||||
"messageHistory": {"en": "Message History", "fr": "Historique des messages"},
|
|
||||||
"timestamp": {"en": "Timestamp", "fr": "Horodatage"},
|
|
||||||
"handoverType": {"en": "Handover Type", "fr": "Type de transfert"},
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class TaskContext(BaseModel, ModelMixin):
|
|
||||||
task_step: TaskStep
|
|
||||||
workflow: Optional["ChatWorkflow"] = None
|
|
||||||
workflow_id: Optional[str] = None
|
|
||||||
available_documents: Optional[str] = "No documents available"
|
|
||||||
available_connections: Optional[list[str]] = Field(default_factory=list)
|
|
||||||
previous_results: Optional[list[str]] = Field(default_factory=list)
|
|
||||||
previous_handover: Optional[TaskHandover] = None
|
|
||||||
improvements: Optional[list[str]] = Field(default_factory=list)
|
|
||||||
retry_count: Optional[int] = 0
|
|
||||||
previous_action_results: Optional[list] = Field(default_factory=list)
|
|
||||||
previous_review_result: Optional[dict] = None
|
|
||||||
is_regeneration: Optional[bool] = False
|
|
||||||
failure_patterns: Optional[list[str]] = Field(default_factory=list)
|
|
||||||
failed_actions: Optional[list] = Field(default_factory=list)
|
|
||||||
successful_actions: Optional[list] = Field(default_factory=list)
|
|
||||||
criteria_progress: Optional[dict] = None
|
|
||||||
|
|
||||||
def getDocumentReferences(self) -> List[str]:
|
|
||||||
docs = []
|
|
||||||
if self.previous_handover:
|
|
||||||
for doc_exchange in self.previous_handover.inputDocuments:
|
|
||||||
docs.extend(doc_exchange.documents)
|
|
||||||
return list(set(docs))
|
|
||||||
|
|
||||||
def addImprovement(self, improvement: str) -> None:
|
|
||||||
if improvement not in (self.improvements or []):
|
|
||||||
if self.improvements is None:
|
|
||||||
self.improvements = []
|
|
||||||
self.improvements.append(improvement)
|
|
||||||
|
|
||||||
|
|
||||||
class ReviewContext(BaseModel, ModelMixin):
|
|
||||||
task_step: TaskStep
|
|
||||||
task_actions: Optional[list] = Field(default_factory=list)
|
|
||||||
action_results: Optional[list] = Field(default_factory=list)
|
|
||||||
step_result: Optional[dict] = Field(default_factory=dict)
|
|
||||||
workflow_id: Optional[str] = None
|
|
||||||
previous_results: Optional[list[str]] = Field(default_factory=list)
|
|
||||||
|
|
||||||
|
|
||||||
class ReviewResult(BaseModel, ModelMixin):
|
|
||||||
status: str
|
|
||||||
reason: Optional[str] = None
|
|
||||||
improvements: Optional[list[str]] = Field(default_factory=list)
|
|
||||||
quality_score: Optional[int] = 5
|
|
||||||
missing_outputs: Optional[list[str]] = Field(default_factory=list)
|
|
||||||
met_criteria: Optional[list[str]] = Field(default_factory=list)
|
|
||||||
unmet_criteria: Optional[list[str]] = Field(default_factory=list)
|
|
||||||
confidence: Optional[float] = 0.5
|
|
||||||
userMessage: Optional[str] = Field(
|
|
||||||
None, description="User-friendly message in user's language"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
register_model_labels(
|
|
||||||
"ReviewResult",
|
|
||||||
{"en": "Review Result", "fr": "Résultat de l'évaluation"},
|
|
||||||
{
|
|
||||||
"status": {"en": "Status", "fr": "Statut"},
|
|
||||||
"reason": {"en": "Reason", "fr": "Raison"},
|
|
||||||
"improvements": {"en": "Improvements", "fr": "Améliorations"},
|
|
||||||
"quality_score": {"en": "Quality Score", "fr": "Score de qualité"},
|
|
||||||
"missing_outputs": {"en": "Missing Outputs", "fr": "Sorties manquantes"},
|
|
||||||
"met_criteria": {"en": "Met Criteria", "fr": "Critères respectés"},
|
|
||||||
"unmet_criteria": {"en": "Unmet Criteria", "fr": "Critères non respectés"},
|
|
||||||
"confidence": {"en": "Confidence", "fr": "Confiance"},
|
|
||||||
"userMessage": {"en": "User Message", "fr": "Message utilisateur"},
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class TaskPlan(BaseModel, ModelMixin):
|
|
||||||
overview: str
|
|
||||||
tasks: list[TaskStep]
|
|
||||||
userMessage: Optional[str] = Field(
|
|
||||||
None, description="Overall user-friendly message for the task plan"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
register_model_labels(
|
|
||||||
"TaskPlan",
|
|
||||||
{"en": "Task Plan", "fr": "Plan de tâches"},
|
|
||||||
{
|
|
||||||
"overview": {"en": "Overview", "fr": "Aperçu"},
|
|
||||||
"tasks": {"en": "Tasks", "fr": "Tâches"},
|
|
||||||
"userMessage": {"en": "User Message", "fr": "Message utilisateur"},
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
@ -1,10 +1,12 @@
|
||||||
import logging
|
import logging
|
||||||
from typing import Dict, Any, List, Union
|
from typing import Dict, Any, List, Union, Tuple, Optional
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
from modules.connectors.connectorAiOpenai import AiOpenai
|
from modules.connectors.connectorAiOpenai import AiOpenai
|
||||||
from modules.connectors.connectorAiAnthropic import AiAnthropic
|
from modules.connectors.connectorAiAnthropic import AiAnthropic
|
||||||
from modules.connectors.connectorAiLangdoc import AiLangdoc
|
from modules.connectors.connectorAiPerplexity import AiPerplexity
|
||||||
from modules.connectors.connectorAiTavily import ConnectorWeb
|
from modules.connectors.connectorAiTavily import ConnectorWeb
|
||||||
from modules.datamodels.datamodelAi import (
|
from modules.datamodels.datamodelAi import (
|
||||||
AiCallOptions,
|
AiCallOptions,
|
||||||
|
|
@ -18,26 +20,14 @@ from modules.datamodels.datamodelAi import (
|
||||||
PROCESSING_MODE_PRIORITY_MAPPING
|
PROCESSING_MODE_PRIORITY_MAPPING
|
||||||
)
|
)
|
||||||
from modules.datamodels.datamodelWeb import (
|
from modules.datamodels.datamodelWeb import (
|
||||||
WebCrawlActionResult,
|
WebResearchRequest,
|
||||||
WebCrawlActionDocument,
|
WebResearchActionResult,
|
||||||
WebCrawlDocumentData,
|
|
||||||
WebCrawlRequest,
|
|
||||||
WebCrawlResultItem,
|
|
||||||
WebScrapeActionResult,
|
|
||||||
WebScrapeActionDocument,
|
|
||||||
WebSearchDocumentData as WebScrapeDocumentData,
|
|
||||||
WebScrapeRequest,
|
|
||||||
WebScrapeResultItem,
|
|
||||||
WebSearchActionResult,
|
|
||||||
WebSearchActionDocument,
|
|
||||||
WebSearchDocumentData,
|
|
||||||
WebSearchRequest,
|
|
||||||
WebSearchResultItem,
|
WebSearchResultItem,
|
||||||
|
WebCrawlResultItem,
|
||||||
|
WebSearchRequest,
|
||||||
|
WebCrawlRequest,
|
||||||
)
|
)
|
||||||
from modules.datamodels.datamodelWorkflow import ActionDocument
|
from modules.datamodels.datamodelChat import ActionDocument
|
||||||
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
# Comprehensive model registry with capability tags and function mapping
|
# Comprehensive model registry with capability tags and function mapping
|
||||||
|
|
@ -52,8 +42,8 @@ aiModels: Dict[str, Dict[str, Any]] = {
|
||||||
"costPer1kTokensOutput": 0.06,
|
"costPer1kTokensOutput": 0.06,
|
||||||
"speedRating": 8,
|
"speedRating": 8,
|
||||||
"qualityRating": 9,
|
"qualityRating": 9,
|
||||||
"capabilities": ["text_generation", "chat", "reasoning"],
|
"capabilities": ["text_generation", "chat", "reasoning", "analysis"],
|
||||||
"tags": ["text", "chat", "reasoning", "general"]
|
"tags": ["text", "chat", "reasoning", "analysis", "general"]
|
||||||
},
|
},
|
||||||
"openai_callAiBasic_gpt35": {
|
"openai_callAiBasic_gpt35": {
|
||||||
"connector": "openai",
|
"connector": "openai",
|
||||||
|
|
@ -118,90 +108,66 @@ aiModels: Dict[str, Dict[str, Any]] = {
|
||||||
"tags": ["image", "vision", "multimodal", "high_quality"]
|
"tags": ["image", "vision", "multimodal", "high_quality"]
|
||||||
},
|
},
|
||||||
|
|
||||||
# LangDoc Models
|
# Perplexity Models
|
||||||
"langdoc_callAiBasic": {
|
"perplexity_callAiBasic": {
|
||||||
"connector": "langdoc",
|
"connector": "perplexity",
|
||||||
"function": "callAiBasic",
|
"function": "callAiBasic",
|
||||||
"llmName": "gpt-4o",
|
"llmName": "llama-3.1-sonar-large-128k-online",
|
||||||
"contextLength": 128000,
|
"contextLength": 128000,
|
||||||
"costPer1kTokens": 0.02,
|
"costPer1kTokens": 0.005,
|
||||||
"costPer1kTokensOutput": 0.04,
|
"costPer1kTokensOutput": 0.005,
|
||||||
"speedRating": 8,
|
"speedRating": 8,
|
||||||
"qualityRating": 9,
|
"qualityRating": 8,
|
||||||
"capabilities": ["text_generation", "chat", "reasoning"],
|
"capabilities": ["text_generation", "chat", "reasoning", "web_search"],
|
||||||
"tags": ["text", "chat", "reasoning", "general", "cost_effective"]
|
"tags": ["text", "chat", "reasoning", "web_search", "cost_effective"]
|
||||||
},
|
},
|
||||||
"langdoc_callAiImage": {
|
"perplexity_callAiWithWebSearch": {
|
||||||
"connector": "langdoc",
|
"connector": "perplexity",
|
||||||
"function": "callAiImage",
|
"function": "callAiWithWebSearch",
|
||||||
"llmName": "gpt-4o",
|
"llmName": "sonar-pro",
|
||||||
"contextLength": 128000,
|
"contextLength": 128000,
|
||||||
"costPer1kTokens": 0.02,
|
"costPer1kTokens": 0.01,
|
||||||
"costPer1kTokensOutput": 0.04,
|
"costPer1kTokensOutput": 0.01,
|
||||||
"speedRating": 7,
|
"speedRating": 7,
|
||||||
"qualityRating": 9,
|
"qualityRating": 9,
|
||||||
"capabilities": ["image_analysis", "vision", "multimodal"],
|
"capabilities": ["text_generation", "web_search", "research"],
|
||||||
"tags": ["image", "vision", "multimodal", "cost_effective"]
|
"tags": ["text", "web_search", "research", "high_quality"]
|
||||||
},
|
},
|
||||||
"langdoc_generateImage": {
|
"perplexity_researchTopic": {
|
||||||
"connector": "langdoc",
|
"connector": "perplexity",
|
||||||
"function": "generateImage",
|
"function": "researchTopic",
|
||||||
"llmName": "dall-e-3",
|
"llmName": "mistral-7b-instruct",
|
||||||
"contextLength": 0,
|
"contextLength": 32000,
|
||||||
"costPer1kTokens": 0.04,
|
"costPer1kTokens": 0.002,
|
||||||
"costPer1kTokensOutput": 0.0,
|
"costPer1kTokensOutput": 0.002,
|
||||||
"speedRating": 6,
|
"speedRating": 8,
|
||||||
"qualityRating": 9,
|
|
||||||
"capabilities": ["image_generation", "art", "visual_creation"],
|
|
||||||
"tags": ["image_generation", "art", "visual", "cost_effective"]
|
|
||||||
},
|
|
||||||
"langdoc_generateImageWithVariations": {
|
|
||||||
"connector": "langdoc",
|
|
||||||
"function": "generateImageWithVariations",
|
|
||||||
"llmName": "dall-e-3",
|
|
||||||
"contextLength": 0,
|
|
||||||
"costPer1kTokens": 0.04,
|
|
||||||
"costPer1kTokensOutput": 0.0,
|
|
||||||
"speedRating": 5,
|
|
||||||
"qualityRating": 9,
|
|
||||||
"capabilities": ["image_generation", "art", "visual_creation", "variations"],
|
|
||||||
"tags": ["image_generation", "art", "visual", "variations", "cost_effective"]
|
|
||||||
},
|
|
||||||
"langdoc_generateImageWithChat": {
|
|
||||||
"connector": "langdoc",
|
|
||||||
"function": "generateImageWithChat",
|
|
||||||
"llmName": "gpt-4o",
|
|
||||||
"contextLength": 128000,
|
|
||||||
"costPer1kTokens": 0.02,
|
|
||||||
"costPer1kTokensOutput": 0.04,
|
|
||||||
"speedRating": 6,
|
|
||||||
"qualityRating": 8,
|
"qualityRating": 8,
|
||||||
"capabilities": ["image_generation", "chat", "visual_creation"],
|
"capabilities": ["web_search", "research", "information_gathering"],
|
||||||
"tags": ["image_generation", "chat", "visual", "cost_effective"]
|
"tags": ["web_search", "research", "information", "cost_effective"]
|
||||||
},
|
},
|
||||||
"langdoc_listModels": {
|
"perplexity_answerQuestion": {
|
||||||
"connector": "langdoc",
|
"connector": "perplexity",
|
||||||
"function": "listModels",
|
"function": "answerQuestion",
|
||||||
"llmName": "api",
|
"llmName": "mistral-7b-instruct",
|
||||||
"contextLength": 0,
|
"contextLength": 32000,
|
||||||
"costPer1kTokens": 0.0,
|
"costPer1kTokens": 0.002,
|
||||||
"costPer1kTokensOutput": 0.0,
|
"costPer1kTokensOutput": 0.002,
|
||||||
"speedRating": 9,
|
"speedRating": 8,
|
||||||
"qualityRating": 5,
|
"qualityRating": 8,
|
||||||
"capabilities": ["model_listing", "api_info"],
|
"capabilities": ["web_search", "question_answering", "research"],
|
||||||
"tags": ["api", "info", "models"]
|
"tags": ["web_search", "qa", "research", "cost_effective"]
|
||||||
},
|
},
|
||||||
"langdoc_getModelInfo": {
|
"perplexity_getCurrentNews": {
|
||||||
"connector": "langdoc",
|
"connector": "perplexity",
|
||||||
"function": "getModelInfo",
|
"function": "getCurrentNews",
|
||||||
"llmName": "api",
|
"llmName": "mistral-7b-instruct",
|
||||||
"contextLength": 0,
|
"contextLength": 32000,
|
||||||
"costPer1kTokens": 0.0,
|
"costPer1kTokens": 0.002,
|
||||||
"costPer1kTokensOutput": 0.0,
|
"costPer1kTokensOutput": 0.002,
|
||||||
"speedRating": 9,
|
"speedRating": 8,
|
||||||
"qualityRating": 5,
|
"qualityRating": 8,
|
||||||
"capabilities": ["model_info", "api_info"],
|
"capabilities": ["web_search", "news", "current_events"],
|
||||||
"tags": ["api", "info", "models"]
|
"tags": ["web_search", "news", "current_events", "cost_effective"]
|
||||||
},
|
},
|
||||||
|
|
||||||
# Tavily Web Models
|
# Tavily Web Models
|
||||||
|
|
@ -250,7 +216,7 @@ class AiObjects:
|
||||||
|
|
||||||
openaiService: AiOpenai
|
openaiService: AiOpenai
|
||||||
anthropicService: AiAnthropic
|
anthropicService: AiAnthropic
|
||||||
langdocService: AiLangdoc
|
perplexityService: AiPerplexity
|
||||||
tavilyService: ConnectorWeb
|
tavilyService: ConnectorWeb
|
||||||
|
|
||||||
def __post_init__(self) -> None:
|
def __post_init__(self) -> None:
|
||||||
|
|
@ -258,8 +224,8 @@ class AiObjects:
|
||||||
raise TypeError("openaiService must be provided")
|
raise TypeError("openaiService must be provided")
|
||||||
if self.anthropicService is None:
|
if self.anthropicService is None:
|
||||||
raise TypeError("anthropicService must be provided")
|
raise TypeError("anthropicService must be provided")
|
||||||
if self.langdocService is None:
|
if self.perplexityService is None:
|
||||||
raise TypeError("langdocService must be provided")
|
raise TypeError("perplexityService must be provided")
|
||||||
if self.tavilyService is None:
|
if self.tavilyService is None:
|
||||||
raise TypeError("tavilyService must be provided")
|
raise TypeError("tavilyService must be provided")
|
||||||
|
|
||||||
|
|
@ -268,13 +234,13 @@ class AiObjects:
|
||||||
"""Create AiObjects instance with all connectors initialized."""
|
"""Create AiObjects instance with all connectors initialized."""
|
||||||
openaiService = AiOpenai()
|
openaiService = AiOpenai()
|
||||||
anthropicService = AiAnthropic()
|
anthropicService = AiAnthropic()
|
||||||
langdocService = AiLangdoc()
|
perplexityService = AiPerplexity()
|
||||||
tavilyService = await ConnectorWeb.create()
|
tavilyService = await ConnectorWeb.create()
|
||||||
|
|
||||||
return cls(
|
return cls(
|
||||||
openaiService=openaiService,
|
openaiService=openaiService,
|
||||||
anthropicService=anthropicService,
|
anthropicService=anthropicService,
|
||||||
langdocService=langdocService,
|
perplexityService=perplexityService,
|
||||||
tavilyService=tavilyService
|
tavilyService=tavilyService
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -330,11 +296,22 @@ class AiObjects:
|
||||||
elif options.operationType == OperationType.IMAGE_GENERATION:
|
elif options.operationType == OperationType.IMAGE_GENERATION:
|
||||||
return "openai_generateImage"
|
return "openai_generateImage"
|
||||||
elif options.operationType == OperationType.WEB_RESEARCH:
|
elif options.operationType == OperationType.WEB_RESEARCH:
|
||||||
return "langdoc_callAiBasic"
|
return "perplexity_callAiWithWebSearch"
|
||||||
else:
|
else:
|
||||||
return "openai_callAiBasic_gpt35"
|
return "openai_callAiBasic_gpt35"
|
||||||
|
|
||||||
# Select based on priority
|
# Special handling for planning operations - use Claude for consistency
|
||||||
|
if options.operationType in [OperationType.GENERATE_PLAN, OperationType.ANALYSE_CONTENT]:
|
||||||
|
if "anthropic_callAiBasic" in candidates:
|
||||||
|
logger.info("Planning operation: Selected Claude (anthropic_callAiBasic) for highest quality")
|
||||||
|
return "anthropic_callAiBasic"
|
||||||
|
|
||||||
|
# Fallback to GPT-4o if Claude not available
|
||||||
|
if "openai_callAiBasic" in candidates:
|
||||||
|
logger.info("Planning operation: Selected GPT-4o (openai_callAiBasic) as fallback")
|
||||||
|
return "openai_callAiBasic"
|
||||||
|
|
||||||
|
# Select based on priority for other operations
|
||||||
if effectivePriority == Priority.SPEED:
|
if effectivePriority == Priority.SPEED:
|
||||||
return max(candidates, key=lambda k: candidates[k]["speedRating"])
|
return max(candidates, key=lambda k: candidates[k]["speedRating"])
|
||||||
elif effectivePriority == Priority.QUALITY:
|
elif effectivePriority == Priority.QUALITY:
|
||||||
|
|
@ -355,8 +332,8 @@ class AiObjects:
|
||||||
return self.openaiService
|
return self.openaiService
|
||||||
elif connectorType == "anthropic":
|
elif connectorType == "anthropic":
|
||||||
return self.anthropicService
|
return self.anthropicService
|
||||||
elif connectorType == "langdoc":
|
elif connectorType == "perplexity":
|
||||||
return self.langdocService
|
return self.perplexityService
|
||||||
elif connectorType == "tavily":
|
elif connectorType == "tavily":
|
||||||
return self.tavilyService
|
return self.tavilyService
|
||||||
else:
|
else:
|
||||||
|
|
@ -383,6 +360,17 @@ class AiObjects:
|
||||||
# Select model for text generation
|
# Select model for text generation
|
||||||
modelName = self._selectModel(prompt, context, options)
|
modelName = self._selectModel(prompt, context, options)
|
||||||
|
|
||||||
|
# Derive generation parameters
|
||||||
|
temperature = getattr(options, "temperature", None)
|
||||||
|
if temperature is None:
|
||||||
|
temperature = 0.2
|
||||||
|
maxTokens = getattr(options, "maxTokens", None)
|
||||||
|
# Provide a generous default to avoid truncation for long outputs
|
||||||
|
if maxTokens is None:
|
||||||
|
# If resultFormat suggests large outputs (e.g., html, json), allow more tokens
|
||||||
|
wants_large = str(getattr(options, "resultFormat", "")).lower() in ["html", "json", "md", "markdown"]
|
||||||
|
maxTokens = 8000 if wants_large else 2000
|
||||||
|
|
||||||
messages: List[Dict[str, Any]] = []
|
messages: List[Dict[str, Any]] = []
|
||||||
if context:
|
if context:
|
||||||
messages.append({"role": "system", "content": f"Context from documents:\n{context}"})
|
messages.append({"role": "system", "content": f"Context from documents:\n{context}"})
|
||||||
|
|
@ -394,10 +382,27 @@ class AiObjects:
|
||||||
# Call the appropriate function
|
# Call the appropriate function
|
||||||
if functionName == "callAiBasic":
|
if functionName == "callAiBasic":
|
||||||
if aiModels[modelName]["connector"] == "openai":
|
if aiModels[modelName]["connector"] == "openai":
|
||||||
content = await connector.callAiBasic(messages)
|
content = await connector.callAiBasic(messages, temperature=temperature, maxTokens=maxTokens)
|
||||||
|
elif aiModels[modelName]["connector"] == "perplexity":
|
||||||
|
content = await connector.callAiBasic(messages, temperature=temperature, maxTokens=maxTokens)
|
||||||
else:
|
else:
|
||||||
response = await connector.callAiBasic(messages)
|
response = await connector.callAiBasic(messages, temperature=temperature, maxTokens=maxTokens)
|
||||||
content = response["choices"][0]["message"]["content"]
|
content = response["choices"][0]["message"]["content"]
|
||||||
|
elif functionName == "callAiWithWebSearch":
|
||||||
|
# Perplexity web search function
|
||||||
|
query = prompt
|
||||||
|
if context:
|
||||||
|
query = f"Context: {context}\n\nQuery: {prompt}"
|
||||||
|
content = await connector.callAiWithWebSearch(query)
|
||||||
|
elif functionName == "researchTopic":
|
||||||
|
# Perplexity research function
|
||||||
|
content = await connector.researchTopic(prompt)
|
||||||
|
elif functionName == "answerQuestion":
|
||||||
|
# Perplexity question answering function
|
||||||
|
content = await connector.answerQuestion(prompt, context)
|
||||||
|
elif functionName == "getCurrentNews":
|
||||||
|
# Perplexity news function
|
||||||
|
content = await connector.getCurrentNews(prompt)
|
||||||
else:
|
else:
|
||||||
raise ValueError(f"Function {functionName} not supported for text generation")
|
raise ValueError(f"Function {functionName} not supported for text generation")
|
||||||
|
|
||||||
|
|
@ -446,21 +451,331 @@ class AiObjects:
|
||||||
else:
|
else:
|
||||||
raise ValueError(f"Function {functionName} not supported for image generation")
|
raise ValueError(f"Function {functionName} not supported for image generation")
|
||||||
|
|
||||||
# Web functionality methods
|
# Web functionality methods - Simple interface to Tavily connector
|
||||||
async def webSearch(self, web_search_request: WebSearchRequest) -> WebSearchActionResult:
|
async def search_websites(self, query: str, max_results: int = 5, **kwargs) -> List[WebSearchResultItem]:
|
||||||
"""Perform web search using Tavily."""
|
"""Search for websites using Tavily."""
|
||||||
return await self.tavilyService.search(web_search_request)
|
request = WebSearchRequest(
|
||||||
|
query=query,
|
||||||
|
max_results=max_results,
|
||||||
|
**kwargs
|
||||||
|
)
|
||||||
|
result = await self.tavilyService.search(request)
|
||||||
|
|
||||||
|
if result.success and result.documents:
|
||||||
|
return result.documents[0].documentData.results
|
||||||
|
return []
|
||||||
|
|
||||||
async def webCrawl(self, web_crawl_request: WebCrawlRequest) -> WebCrawlActionResult:
|
async def crawl_websites(self, urls: List[str], extract_depth: str = "advanced", format: str = "markdown") -> List[WebCrawlResultItem]:
|
||||||
"""Crawl web pages using Tavily."""
|
"""Crawl websites using Tavily."""
|
||||||
return await self.tavilyService.crawl(web_crawl_request)
|
from pydantic import HttpUrl
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
|
# Safely create HttpUrl objects with proper scheme handling
|
||||||
|
http_urls = []
|
||||||
|
for url in urls:
|
||||||
|
try:
|
||||||
|
# Ensure URL has a scheme
|
||||||
|
parsed = urlparse(url)
|
||||||
|
if not parsed.scheme:
|
||||||
|
url = f"https://{url}"
|
||||||
|
|
||||||
|
# Use HttpUrl with scheme parameter (this works for all URLs)
|
||||||
|
http_urls.append(HttpUrl(url, scheme="https"))
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Skipping invalid URL {url}: {e}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
if not http_urls:
|
||||||
|
return []
|
||||||
|
|
||||||
|
request = WebCrawlRequest(
|
||||||
|
urls=http_urls,
|
||||||
|
extract_depth=extract_depth,
|
||||||
|
format=format
|
||||||
|
)
|
||||||
|
result = await self.tavilyService.crawl(request)
|
||||||
|
|
||||||
|
if result.success and result.documents:
|
||||||
|
return result.documents[0].documentData.results
|
||||||
|
return []
|
||||||
|
|
||||||
async def webScrape(self, web_scrape_request: WebScrapeRequest) -> WebScrapeActionResult:
|
async def extract_content(self, urls: List[str], extract_depth: str = "advanced", format: str = "markdown") -> Dict[str, str]:
|
||||||
"""Scrape web content using Tavily."""
|
"""Extract content from URLs and return as dictionary."""
|
||||||
return await self.tavilyService.scrape(web_scrape_request)
|
crawl_results = await self.crawl_websites(urls, extract_depth, format)
|
||||||
|
return {str(result.url): result.content for result in crawl_results}
|
||||||
|
|
||||||
|
# Core Web Tools - Clean interface for web operations
|
||||||
|
async def readPage(self, url: str, extract_depth: str = "advanced") -> Optional[str]:
|
||||||
|
"""Read a single web page and return its content (HTML/Markdown)."""
|
||||||
|
logger.debug(f"Reading page: {url}")
|
||||||
|
try:
|
||||||
|
# URL encode the URL to handle spaces and special characters
|
||||||
|
from urllib.parse import quote, urlparse, urlunparse
|
||||||
|
parsed = urlparse(url)
|
||||||
|
encoded_url = urlunparse((
|
||||||
|
parsed.scheme,
|
||||||
|
parsed.netloc,
|
||||||
|
parsed.path,
|
||||||
|
parsed.params,
|
||||||
|
parsed.query,
|
||||||
|
parsed.fragment
|
||||||
|
))
|
||||||
|
|
||||||
|
# Manually encode query parameters to handle spaces
|
||||||
|
if parsed.query:
|
||||||
|
encoded_query = quote(parsed.query, safe='=&')
|
||||||
|
encoded_url = urlunparse((
|
||||||
|
parsed.scheme,
|
||||||
|
parsed.netloc,
|
||||||
|
parsed.path,
|
||||||
|
parsed.params,
|
||||||
|
encoded_query,
|
||||||
|
parsed.fragment
|
||||||
|
))
|
||||||
|
|
||||||
|
logger.debug(f"URL encoded: {url} -> {encoded_url}")
|
||||||
|
|
||||||
|
content = await self.extract_content([encoded_url], extract_depth, "markdown")
|
||||||
|
result = content.get(encoded_url)
|
||||||
|
if result:
|
||||||
|
logger.debug(f"Successfully read page {encoded_url}: {len(result)} chars")
|
||||||
|
else:
|
||||||
|
logger.warning(f"No content returned for page {encoded_url}")
|
||||||
|
return result
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to read page {url}: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
async def getUrlsFromPage(self, url: str, extract_depth: str = "advanced") -> List[str]:
|
||||||
|
"""Get all URLs from a web page, with redundancies removed."""
|
||||||
|
try:
|
||||||
|
content = await self.readPage(url, extract_depth)
|
||||||
|
if not content:
|
||||||
|
return []
|
||||||
|
|
||||||
|
links = self._extractLinksFromContent(content, url)
|
||||||
|
# Remove duplicates while preserving order
|
||||||
|
seen = set()
|
||||||
|
unique_links = []
|
||||||
|
for link in links:
|
||||||
|
if link not in seen:
|
||||||
|
seen.add(link)
|
||||||
|
unique_links.append(link)
|
||||||
|
|
||||||
|
logger.debug(f"Extracted {len(unique_links)} unique URLs from {url}")
|
||||||
|
return unique_links
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to get URLs from page {url}: {e}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
def filterUrlsOnlyPages(self, urls: List[str], max_per_domain: int = 10) -> List[str]:
|
||||||
|
"""Filter URLs to get only links for pages to follow (no images, etc.)."""
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
|
def _isHtmlCandidate(url: str) -> bool:
|
||||||
|
lower = url.lower()
|
||||||
|
blocked = ('.jpg', '.jpeg', '.png', '.gif', '.svg', '.webp', '.ico', '.bmp',
|
||||||
|
'.mp4', '.mp3', '.avi', '.mov', '.mkv',
|
||||||
|
'.pdf', '.zip', '.rar', '.7z', '.tar', '.gz',
|
||||||
|
'.css', '.js', '.woff', '.woff2', '.ttf', '.eot')
|
||||||
|
return not lower.endswith(blocked)
|
||||||
|
|
||||||
|
# Group by domain
|
||||||
|
domain_links = {}
|
||||||
|
for link in urls:
|
||||||
|
domain = urlparse(link).netloc
|
||||||
|
if domain not in domain_links:
|
||||||
|
domain_links[domain] = []
|
||||||
|
domain_links[domain].append(link)
|
||||||
|
|
||||||
|
# Filter and cap per domain
|
||||||
|
filtered_links = []
|
||||||
|
for domain, domain_link_list in domain_links.items():
|
||||||
|
seen = set()
|
||||||
|
domain_filtered = []
|
||||||
|
|
||||||
|
for link in domain_link_list:
|
||||||
|
if link in seen:
|
||||||
|
continue
|
||||||
|
if not _isHtmlCandidate(link):
|
||||||
|
continue
|
||||||
|
seen.add(link)
|
||||||
|
domain_filtered.append(link)
|
||||||
|
if len(domain_filtered) >= max_per_domain:
|
||||||
|
break
|
||||||
|
|
||||||
|
filtered_links.extend(domain_filtered)
|
||||||
|
logger.debug(f"Domain {domain}: {len(domain_link_list)} -> {len(domain_filtered)} links")
|
||||||
|
|
||||||
|
return filtered_links
|
||||||
|
|
||||||
|
def _extractLinksFromContent(self, content: str, base_url: str) -> List[str]:
|
||||||
|
"""Extract links from HTML/Markdown content."""
|
||||||
|
try:
|
||||||
|
import re
|
||||||
|
from urllib.parse import urljoin, urlparse, quote, urlunparse
|
||||||
|
|
||||||
|
def _cleanUrl(url: str) -> str:
|
||||||
|
"""Clean and encode URL to remove spaces and invalid characters."""
|
||||||
|
# Remove quotes and extra spaces
|
||||||
|
url = url.strip().strip('"\'')
|
||||||
|
|
||||||
|
# If it's a relative URL, make it absolute first
|
||||||
|
if not url.startswith(('http://', 'https://')):
|
||||||
|
url = urljoin(base_url, url)
|
||||||
|
|
||||||
|
# Parse and re-encode the URL properly
|
||||||
|
parsed = urlparse(url)
|
||||||
|
if parsed.query:
|
||||||
|
# Encode query parameters properly
|
||||||
|
encoded_query = quote(parsed.query, safe='=&')
|
||||||
|
url = urlunparse((
|
||||||
|
parsed.scheme,
|
||||||
|
parsed.netloc,
|
||||||
|
parsed.path,
|
||||||
|
parsed.params,
|
||||||
|
encoded_query,
|
||||||
|
parsed.fragment
|
||||||
|
))
|
||||||
|
|
||||||
|
return url
|
||||||
|
|
||||||
|
links = []
|
||||||
|
|
||||||
|
# Extract HTML links: <a href="url"> format
|
||||||
|
html_link_pattern = r'<a[^>]+href=["\']([^"\']+)["\'][^>]*>'
|
||||||
|
html_links = re.findall(html_link_pattern, content, re.IGNORECASE)
|
||||||
|
|
||||||
|
for url in html_links:
|
||||||
|
if url and not url.startswith('#') and not url.startswith('javascript:'):
|
||||||
|
try:
|
||||||
|
cleaned_url = _cleanUrl(url)
|
||||||
|
links.append(cleaned_url)
|
||||||
|
logger.debug(f"Extracted HTML link: {url} -> {cleaned_url}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(f"Failed to clean HTML link {url}: {e}")
|
||||||
|
|
||||||
|
# Extract markdown links: [text](url) format
|
||||||
|
markdown_link_pattern = r'\[([^\]]+)\]\(([^)]+)\)'
|
||||||
|
markdown_links = re.findall(markdown_link_pattern, content)
|
||||||
|
|
||||||
|
for text, url in markdown_links:
|
||||||
|
if url and not url.startswith('#'):
|
||||||
|
try:
|
||||||
|
cleaned_url = _cleanUrl(url)
|
||||||
|
# Only keep URLs from the same domain
|
||||||
|
if urlparse(cleaned_url).netloc == urlparse(base_url).netloc:
|
||||||
|
links.append(cleaned_url)
|
||||||
|
logger.debug(f"Extracted markdown link: {url} -> {cleaned_url}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(f"Failed to clean markdown link {url}: {e}")
|
||||||
|
|
||||||
|
# Extract plain URLs in the text
|
||||||
|
url_pattern = r'https?://[^\s\)]+'
|
||||||
|
plain_urls = re.findall(url_pattern, content)
|
||||||
|
|
||||||
|
for url in plain_urls:
|
||||||
|
try:
|
||||||
|
clean_url = url.rstrip('.,;!?')
|
||||||
|
cleaned_url = _cleanUrl(clean_url)
|
||||||
|
if urlparse(cleaned_url).netloc == urlparse(base_url).netloc:
|
||||||
|
if cleaned_url not in links: # Avoid duplicates
|
||||||
|
links.append(cleaned_url)
|
||||||
|
logger.debug(f"Extracted plain URL: {url} -> {cleaned_url}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(f"Failed to clean plain URL {url}: {e}")
|
||||||
|
|
||||||
|
logger.debug(f"Total links extracted and cleaned: {len(links)}")
|
||||||
|
return links
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to extract links from content: {e}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
async def crawlRecursively(self, urls: List[str], max_depth: int, extract_depth: str = "advanced", max_per_domain: int = 10) -> Dict[str, str]:
|
||||||
|
"""
|
||||||
|
Recursively crawl URLs up to specified depth.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
urls: List of starting URLs to crawl
|
||||||
|
max_depth: Maximum depth to crawl (1=main pages only, 2=main+sub-pages, etc.)
|
||||||
|
extract_depth: Tavily extract depth setting
|
||||||
|
max_per_domain: Maximum URLs per domain per level
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dictionary mapping URL -> content for all crawled pages
|
||||||
|
"""
|
||||||
|
logger.info(f"Starting recursive crawl: {len(urls)} starting URLs, max_depth={max_depth}")
|
||||||
|
|
||||||
|
# URL index to track all processed URLs
|
||||||
|
processed_urls = set()
|
||||||
|
all_content = {}
|
||||||
|
|
||||||
|
# Current level URLs to process
|
||||||
|
current_level_urls = urls.copy()
|
||||||
|
|
||||||
|
for depth in range(1, max_depth + 1):
|
||||||
|
logger.info(f"=== DEPTH LEVEL {depth}/{max_depth} ===")
|
||||||
|
logger.info(f"Processing {len(current_level_urls)} URLs at depth {depth}")
|
||||||
|
|
||||||
|
# URLs found at this level (for next iteration)
|
||||||
|
next_level_urls = []
|
||||||
|
|
||||||
|
for url in current_level_urls:
|
||||||
|
if url in processed_urls:
|
||||||
|
logger.debug(f"URL {url} already processed, skipping")
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
logger.info(f"Processing URL at depth {depth}: {url}")
|
||||||
|
|
||||||
|
# Read page content
|
||||||
|
content = await self.readPage(url, extract_depth)
|
||||||
|
if content:
|
||||||
|
all_content[url] = content
|
||||||
|
processed_urls.add(url)
|
||||||
|
logger.info(f"✓ Successfully processed {url}: {len(content)} chars")
|
||||||
|
|
||||||
|
# Get URLs from this page for next level
|
||||||
|
page_urls = await self.getUrlsFromPage(url, extract_depth)
|
||||||
|
logger.info(f"Found {len(page_urls)} URLs on {url}")
|
||||||
|
|
||||||
|
# Filter URLs and add to next level
|
||||||
|
filtered_urls = self.filterUrlsOnlyPages(page_urls, max_per_domain)
|
||||||
|
logger.info(f"Filtered to {len(filtered_urls)} valid URLs")
|
||||||
|
|
||||||
|
# Add new URLs to next level (avoiding already processed ones)
|
||||||
|
new_urls_count = 0
|
||||||
|
for new_url in filtered_urls:
|
||||||
|
if new_url not in processed_urls:
|
||||||
|
next_level_urls.append(new_url)
|
||||||
|
new_urls_count += 1
|
||||||
|
|
||||||
|
logger.info(f"Added {new_urls_count} new URLs to next level from {url}")
|
||||||
|
else:
|
||||||
|
logger.warning(f"✗ No content extracted from {url}")
|
||||||
|
processed_urls.add(url) # Mark as processed to avoid retry
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"✗ Failed to process URL {url} at depth {depth}: {e}")
|
||||||
|
processed_urls.add(url) # Mark as processed to avoid retry
|
||||||
|
|
||||||
|
# Prepare for next iteration
|
||||||
|
current_level_urls = next_level_urls
|
||||||
|
logger.info(f"Depth {depth} completed. Found {len(next_level_urls)} URLs for next level")
|
||||||
|
|
||||||
|
# Stop if no more URLs to process
|
||||||
|
if not current_level_urls:
|
||||||
|
logger.info(f"No more URLs found at depth {depth}, stopping recursion")
|
||||||
|
break
|
||||||
|
|
||||||
|
logger.info(f"Recursive crawl completed: {len(all_content)} total pages crawled")
|
||||||
|
return all_content
|
||||||
|
|
||||||
async def webQuery(self, query: str, context: str = "", options: AiCallOptions = None) -> str:
|
async def webQuery(self, query: str, context: str = "", options: AiCallOptions = None) -> str:
|
||||||
"""Use LangDoc AI to provide the best answers for web-related queries."""
|
"""Use Perplexity AI to provide the best answers for web-related queries."""
|
||||||
if options is None:
|
if options is None:
|
||||||
options = AiCallOptions(operationType=OperationType.WEB_RESEARCH)
|
options = AiCallOptions(operationType=OperationType.WEB_RESEARCH)
|
||||||
|
|
||||||
|
|
@ -480,14 +795,12 @@ Please provide:
|
||||||
|
|
||||||
Format your response in a clear, professional manner that would be helpful for someone researching this topic."""
|
Format your response in a clear, professional manner that would be helpful for someone researching this topic."""
|
||||||
|
|
||||||
messages = [{"role": "user", "content": webPrompt}]
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Use LangDoc for the best answers
|
# Use Perplexity for web research with search capabilities
|
||||||
response = await self.langdocService.callAiBasic(messages)
|
response = await self.perplexityService.callAiWithWebSearch(webPrompt)
|
||||||
return response
|
return response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"LangDoc web query failed: {str(e)}")
|
logger.error(f"Perplexity web query failed: {str(e)}")
|
||||||
raise Exception(f"Failed to process web query: {str(e)}")
|
raise Exception(f"Failed to process web query: {str(e)}")
|
||||||
|
|
||||||
# Utility methods
|
# Utility methods
|
||||||
|
|
@ -511,3 +824,157 @@ Format your response in a clear, professional manner that would be helpful for s
|
||||||
"""Get model names that have a specific tag."""
|
"""Get model names that have a specific tag."""
|
||||||
return [name for name, info in aiModels.items() if tag in info.get("tags", [])]
|
return [name for name, info in aiModels.items() if tag in info.get("tags", [])]
|
||||||
|
|
||||||
|
async def selectRelevantWebsites(self, websites: List[str], userQuestion: str) -> Tuple[List[str], str]:
|
||||||
|
"""Select most relevant websites using AI analysis. Returns (selected_websites, ai_response)."""
|
||||||
|
if len(websites) <= 1:
|
||||||
|
return websites, "Only one website available, no selection needed"
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Create website summaries for AI analysis
|
||||||
|
websiteSummaries = []
|
||||||
|
for i, url in enumerate(websites, 1):
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
domain = urlparse(url).netloc
|
||||||
|
summary = f"{i}. {url} (Domain: {domain})"
|
||||||
|
websiteSummaries.append(summary)
|
||||||
|
|
||||||
|
selectionPrompt = f"""
|
||||||
|
Based on this user request: "{userQuestion}"
|
||||||
|
|
||||||
|
I have {len(websites)} websites found. Please select the most relevant website(s) for this request.
|
||||||
|
|
||||||
|
Available websites:
|
||||||
|
{chr(10).join(websiteSummaries)}
|
||||||
|
|
||||||
|
Please respond with the website number(s) (1, 2, 3, etc.) that are most relevant.
|
||||||
|
Format: 1,3,5 (or just 1 for single selection)
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Use Perplexity to select the best websites
|
||||||
|
response = await self.webQuery(selectionPrompt)
|
||||||
|
|
||||||
|
# Parse the selection
|
||||||
|
import re
|
||||||
|
numbers = re.findall(r'\d+', response)
|
||||||
|
if numbers:
|
||||||
|
selectedWebsites = []
|
||||||
|
for num in numbers:
|
||||||
|
index = int(num) - 1
|
||||||
|
if 0 <= index < len(websites):
|
||||||
|
selectedWebsites.append(websites[index])
|
||||||
|
|
||||||
|
if selectedWebsites:
|
||||||
|
logger.info(f"AI selected {len(selectedWebsites)} websites")
|
||||||
|
return selectedWebsites, response
|
||||||
|
|
||||||
|
# Fallback to first website
|
||||||
|
logger.warning("AI selection failed, using first website")
|
||||||
|
return websites[:1], f"AI selection failed, fallback to first website. AI response: {response}"
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error in website selection: {str(e)}")
|
||||||
|
return websites[:1], f"Error in website selection: {str(e)}"
|
||||||
|
|
||||||
|
async def analyzeContentWithChunking(self, allContent: Dict[str, str], userQuestion: str) -> str:
|
||||||
|
"""Analyze content using AI with chunking for large content."""
|
||||||
|
logger.info(f"Analyzing {len(allContent)} websites with AI")
|
||||||
|
|
||||||
|
# Process content in chunks to avoid token limits
|
||||||
|
chunkSize = 50000 # 50k chars per chunk
|
||||||
|
allChunks = []
|
||||||
|
|
||||||
|
for url, content in allContent.items():
|
||||||
|
filteredContent = self._filterContent(content)
|
||||||
|
if len(filteredContent) <= chunkSize:
|
||||||
|
allChunks.append((url, filteredContent))
|
||||||
|
logger.info(f"Content from {url}: {len(filteredContent)} chars (single chunk)")
|
||||||
|
else:
|
||||||
|
# Split large content into chunks
|
||||||
|
chunkCount = (len(filteredContent) + chunkSize - 1) // chunkSize
|
||||||
|
logger.info(f"Content from {url}: {len(filteredContent)} chars (split into {chunkCount} chunks)")
|
||||||
|
for i in range(0, len(filteredContent), chunkSize):
|
||||||
|
chunk = filteredContent[i:i+chunkSize]
|
||||||
|
chunkNum = i//chunkSize + 1
|
||||||
|
allChunks.append((f"{url} (part {chunkNum})", chunk))
|
||||||
|
|
||||||
|
logger.info(f"Processing {len(allChunks)} content chunks")
|
||||||
|
|
||||||
|
# Analyze each chunk
|
||||||
|
chunkAnalyses = []
|
||||||
|
for i, (url, chunk) in enumerate(allChunks, 1):
|
||||||
|
logger.info(f"Analyzing chunk {i}/{len(allChunks)}: {url}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
analysisPrompt = f"""
|
||||||
|
Analyze this web content and extract relevant information for: {userQuestion}
|
||||||
|
|
||||||
|
Source: {url}
|
||||||
|
Content: {chunk}
|
||||||
|
|
||||||
|
Please extract key information relevant to the query.
|
||||||
|
"""
|
||||||
|
|
||||||
|
analysis = await self.webQuery(analysisPrompt)
|
||||||
|
chunkAnalyses.append(analysis)
|
||||||
|
logger.info(f"Chunk {i}/{len(allChunks)} analyzed successfully")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Chunk {i}/{len(allChunks)} error: {e}")
|
||||||
|
|
||||||
|
# Combine all chunk analyses
|
||||||
|
if chunkAnalyses:
|
||||||
|
logger.info(f"Combining {len(chunkAnalyses)} chunk analyses")
|
||||||
|
combinedAnalysis = "\n\n".join(chunkAnalyses)
|
||||||
|
|
||||||
|
# Final synthesis
|
||||||
|
try:
|
||||||
|
logger.info("Performing final synthesis of all analyses")
|
||||||
|
synthesisPrompt = f"""
|
||||||
|
Based on these partial analyses, provide a comprehensive answer to: {userQuestion}
|
||||||
|
|
||||||
|
Partial analyses:
|
||||||
|
{combinedAnalysis}
|
||||||
|
|
||||||
|
Please provide a clear, well-structured answer to the query.
|
||||||
|
"""
|
||||||
|
|
||||||
|
finalAnalysis = await self.webQuery(synthesisPrompt)
|
||||||
|
logger.info("Final synthesis completed successfully")
|
||||||
|
return finalAnalysis
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Synthesis error: {e}")
|
||||||
|
return combinedAnalysis
|
||||||
|
else:
|
||||||
|
logger.error("No content could be analyzed")
|
||||||
|
return "No content could be analyzed"
|
||||||
|
|
||||||
|
def _filterContent(self, content: str) -> str:
|
||||||
|
"""Filter out navigation, ads, and other nonsense content."""
|
||||||
|
lines = content.split('\n')
|
||||||
|
filteredLines = []
|
||||||
|
|
||||||
|
for line in lines:
|
||||||
|
line = line.strip()
|
||||||
|
# Skip empty lines
|
||||||
|
if not line:
|
||||||
|
continue
|
||||||
|
# Skip navigation elements
|
||||||
|
if any(skip in line.lower() for skip in [
|
||||||
|
'toggle navigation', 'log in', 'sign up', 'cookies', 'privacy policy',
|
||||||
|
'terms of service', 'subscribe', 'newsletter', 'follow us', 'share this',
|
||||||
|
'advertisement', 'sponsored', 'banner', 'popup', 'modal'
|
||||||
|
]):
|
||||||
|
continue
|
||||||
|
# Skip image references without context
|
||||||
|
if line.startswith(' and line.endswith(')') and '---' in line:
|
||||||
|
continue
|
||||||
|
# Keep meaningful content
|
||||||
|
if len(line) > 10: # Skip very short lines
|
||||||
|
filteredLines.append(line)
|
||||||
|
|
||||||
|
return '\n'.join(filteredLines)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -12,8 +12,8 @@ from typing import Dict, Any, List, Optional, Union, get_origin, get_args
|
||||||
import asyncio
|
import asyncio
|
||||||
|
|
||||||
from modules.interfaces.interfaceDbChatAccess import ChatAccess
|
from modules.interfaces.interfaceDbChatAccess import ChatAccess
|
||||||
from modules.datamodels.datamodelWorkflow import (
|
from modules.datamodels.datamodelChat import (
|
||||||
TaskAction,
|
ActionItem,
|
||||||
TaskResult,
|
TaskResult,
|
||||||
TaskItem,
|
TaskItem,
|
||||||
TaskStatus,
|
TaskStatus,
|
||||||
|
|
@ -549,7 +549,7 @@ class ChatObjects:
|
||||||
created_documents.append(created_doc)
|
created_documents.append(created_doc)
|
||||||
|
|
||||||
# Convert to ChatMessage model
|
# Convert to ChatMessage model
|
||||||
return ChatMessage(
|
chat_message = ChatMessage(
|
||||||
id=createdMessage["id"],
|
id=createdMessage["id"],
|
||||||
workflowId=createdMessage["workflowId"],
|
workflowId=createdMessage["workflowId"],
|
||||||
parentMessageId=createdMessage.get("parentMessageId"),
|
parentMessageId=createdMessage.get("parentMessageId"),
|
||||||
|
|
@ -570,6 +570,11 @@ class ChatObjects:
|
||||||
actionMethod=createdMessage.get("actionMethod"),
|
actionMethod=createdMessage.get("actionMethod"),
|
||||||
actionName=createdMessage.get("actionName")
|
actionName=createdMessage.get("actionName")
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Debug: Store message and documents for debugging TODO REMOVE
|
||||||
|
self._storeDebugMessageAndDocuments(chat_message)
|
||||||
|
|
||||||
|
return chat_message
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error creating workflow message: {str(e)}")
|
logger.error(f"Error creating workflow message: {str(e)}")
|
||||||
|
|
@ -1045,6 +1050,120 @@ class ChatObjects:
|
||||||
|
|
||||||
return {"items": items}
|
return {"items": items}
|
||||||
|
|
||||||
|
def _storeDebugMessageAndDocuments(self, message: ChatMessage) -> None:
|
||||||
|
"""
|
||||||
|
Store message and documents for debugging purposes in fileshare.
|
||||||
|
Structure: gateway/test-chat/messages/m_round_task_action_timestamp/documentlist_label/documents
|
||||||
|
|
||||||
|
Args:
|
||||||
|
message: ChatMessage object to store
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
import os
|
||||||
|
import json
|
||||||
|
from datetime import datetime, UTC
|
||||||
|
|
||||||
|
# Create base debug directory
|
||||||
|
debug_root = "./test-chat/messages"
|
||||||
|
os.makedirs(debug_root, exist_ok=True)
|
||||||
|
|
||||||
|
# Generate timestamp
|
||||||
|
timestamp = datetime.now(UTC).strftime('%Y%m%d-%H%M%S-%f')[:-3]
|
||||||
|
|
||||||
|
# Create message folder name: m_round_task_action_timestamp
|
||||||
|
# Use actual values from message, not defaults
|
||||||
|
round_str = str(message.roundNumber) if message.roundNumber is not None else "0"
|
||||||
|
task_str = str(message.taskNumber) if message.taskNumber is not None else "0"
|
||||||
|
action_str = str(message.actionNumber) if message.actionNumber is not None else "0"
|
||||||
|
message_folder = f"{timestamp}_m_{round_str}_{task_str}_{action_str}"
|
||||||
|
|
||||||
|
message_path = os.path.join(debug_root, message_folder)
|
||||||
|
os.makedirs(message_path, exist_ok=True)
|
||||||
|
|
||||||
|
# Store message data - use dict() instead of model_dump() for compatibility
|
||||||
|
message_file = os.path.join(message_path, "message.json")
|
||||||
|
with open(message_file, "w", encoding="utf-8") as f:
|
||||||
|
# Convert message to dict manually to avoid model_dump() issues
|
||||||
|
message_dict = {
|
||||||
|
"id": message.id,
|
||||||
|
"workflowId": message.workflowId,
|
||||||
|
"parentMessageId": message.parentMessageId,
|
||||||
|
"message": message.message,
|
||||||
|
"role": message.role,
|
||||||
|
"status": message.status,
|
||||||
|
"sequenceNr": message.sequenceNr,
|
||||||
|
"publishedAt": message.publishedAt,
|
||||||
|
"roundNumber": message.roundNumber,
|
||||||
|
"taskNumber": message.taskNumber,
|
||||||
|
"actionNumber": message.actionNumber,
|
||||||
|
"documentsLabel": message.documentsLabel,
|
||||||
|
"actionId": message.actionId,
|
||||||
|
"actionMethod": message.actionMethod,
|
||||||
|
"actionName": message.actionName,
|
||||||
|
"success": message.success,
|
||||||
|
"documents": []
|
||||||
|
}
|
||||||
|
json.dump(message_dict, f, indent=2, ensure_ascii=False, default=str)
|
||||||
|
|
||||||
|
# Store message content as text
|
||||||
|
if message.message:
|
||||||
|
message_text_file = os.path.join(message_path, "message_text.txt")
|
||||||
|
with open(message_text_file, "w", encoding="utf-8") as f:
|
||||||
|
f.write(str(message.message))
|
||||||
|
|
||||||
|
# Store documents if provided
|
||||||
|
if message.documents and len(message.documents) > 0:
|
||||||
|
logger.info(f"Debug: Processing {len(message.documents)} documents")
|
||||||
|
|
||||||
|
# Group documents by documentsLabel
|
||||||
|
documents_by_label = {}
|
||||||
|
for doc in message.documents:
|
||||||
|
label = message.documentsLabel or 'default'
|
||||||
|
if label not in documents_by_label:
|
||||||
|
documents_by_label[label] = []
|
||||||
|
documents_by_label[label].append(doc)
|
||||||
|
|
||||||
|
# Create subfolder for each document label
|
||||||
|
for label, docs in documents_by_label.items():
|
||||||
|
# Sanitize label for filesystem
|
||||||
|
safe_label = "".join(c for c in str(label) if c.isalnum() or c in (' ', '-', '_')).rstrip()
|
||||||
|
safe_label = safe_label.replace(' ', '_')
|
||||||
|
if not safe_label:
|
||||||
|
safe_label = "default"
|
||||||
|
|
||||||
|
label_folder = os.path.join(message_path, safe_label)
|
||||||
|
os.makedirs(label_folder, exist_ok=True)
|
||||||
|
logger.info(f"Debug: Created document folder: {label_folder}")
|
||||||
|
|
||||||
|
# Store each document
|
||||||
|
for i, doc in enumerate(docs):
|
||||||
|
# Create document metadata file
|
||||||
|
doc_meta = {
|
||||||
|
"id": doc.id,
|
||||||
|
"messageId": doc.messageId,
|
||||||
|
"fileId": doc.fileId,
|
||||||
|
"fileName": doc.fileName,
|
||||||
|
"fileSize": doc.fileSize,
|
||||||
|
"mimeType": doc.mimeType,
|
||||||
|
"roundNumber": doc.roundNumber,
|
||||||
|
"taskNumber": doc.taskNumber,
|
||||||
|
"actionNumber": doc.actionNumber,
|
||||||
|
"actionId": doc.actionId
|
||||||
|
}
|
||||||
|
|
||||||
|
doc_meta_file = os.path.join(label_folder, f"document_{i+1:03d}_metadata.json")
|
||||||
|
with open(doc_meta_file, "w", encoding="utf-8") as f:
|
||||||
|
json.dump(doc_meta, f, indent=2, ensure_ascii=False, default=str)
|
||||||
|
|
||||||
|
logger.info(f"Debug: Stored document metadata for {doc.fileName}")
|
||||||
|
|
||||||
|
logger.info(f"Debug: Stored message and documents in {message_path}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Debug: Failed to store message and documents: {e}")
|
||||||
|
import traceback
|
||||||
|
logger.error(f"Debug: Traceback: {traceback.format_exc()}")
|
||||||
|
|
||||||
|
|
||||||
def getInterface(currentUser: Optional[User] = None) -> 'ChatObjects':
|
def getInterface(currentUser: Optional[User] = None) -> 'ChatObjects':
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
|
|
@ -41,6 +41,8 @@ class Services:
|
||||||
def __init__(self, user: User, workflow: ChatWorkflow = None):
|
def __init__(self, user: User, workflow: ChatWorkflow = None):
|
||||||
self.user: User = user
|
self.user: User = user
|
||||||
self.workflow: ChatWorkflow = workflow
|
self.workflow: ChatWorkflow = workflow
|
||||||
|
self.currentUserPrompt: str = "" # Cleaned/normalized user intent for the current round
|
||||||
|
self.rawUserPrompt: str = "" # Original raw user message for the current round
|
||||||
|
|
||||||
# Initialize interfaces
|
# Initialize interfaces
|
||||||
|
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load diff
182
modules/services/serviceExtraction/chunking/image_chunker.py
Normal file
182
modules/services/serviceExtraction/chunking/image_chunker.py
Normal file
|
|
@ -0,0 +1,182 @@
|
||||||
|
from typing import Any, Dict, List
|
||||||
|
import base64
|
||||||
|
import io
|
||||||
|
|
||||||
|
from modules.datamodels.datamodelExtraction import ContentPart
|
||||||
|
from ..subRegistry import Chunker
|
||||||
|
|
||||||
|
|
||||||
|
class ImageChunker(Chunker):
|
||||||
|
"""Chunker for reducing image size through resizing, compression, and tiling."""
|
||||||
|
|
||||||
|
def chunk(self, part: ContentPart, options: Dict[str, Any]) -> list[Dict[str, Any]]:
|
||||||
|
"""
|
||||||
|
Chunk an image by reducing its size through various strategies.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
part: ContentPart containing image data (base64 encoded)
|
||||||
|
options: Chunking options including:
|
||||||
|
- imageChunkSize: Maximum size in bytes for each chunk
|
||||||
|
- imageMaxPixels: Maximum pixels (width*height) for the image
|
||||||
|
- imageQuality: JPEG quality (0-100, default 85)
|
||||||
|
- imageTileSize: Size for tiling if image is still too large
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of image chunks with reduced size
|
||||||
|
"""
|
||||||
|
maxBytes = int(options.get("imageChunkSize", 1000000)) # 1MB default
|
||||||
|
maxPixels = int(options.get("imageMaxPixels", 1024 * 1024)) # 1MP default
|
||||||
|
quality = int(options.get("imageQuality", 85))
|
||||||
|
tileSize = int(options.get("imageTileSize", 512)) # 512x512 tiles
|
||||||
|
|
||||||
|
chunks: List[Dict[str, Any]] = []
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Lazy import PIL to avoid hanging during module import
|
||||||
|
from PIL import Image
|
||||||
|
|
||||||
|
# Decode base64 image data
|
||||||
|
imageData = base64.b64decode(part.data)
|
||||||
|
image = Image.open(io.BytesIO(imageData))
|
||||||
|
|
||||||
|
# Get original dimensions
|
||||||
|
originalWidth, originalHeight = image.size
|
||||||
|
originalPixels = originalWidth * originalHeight
|
||||||
|
|
||||||
|
# Strategy 1: If image is small enough, return as-is
|
||||||
|
if len(part.data) <= maxBytes and originalPixels <= maxPixels:
|
||||||
|
chunks.append({
|
||||||
|
"data": part.data,
|
||||||
|
"size": len(part.data),
|
||||||
|
"order": 0,
|
||||||
|
"metadata": {
|
||||||
|
"originalSize": len(part.data),
|
||||||
|
"originalPixels": originalPixels,
|
||||||
|
"strategy": "original"
|
||||||
|
}
|
||||||
|
})
|
||||||
|
return chunks
|
||||||
|
|
||||||
|
# Strategy 2: Resize to fit within pixel limit
|
||||||
|
if originalPixels > maxPixels:
|
||||||
|
# Calculate new dimensions maintaining aspect ratio
|
||||||
|
scale = (maxPixels / originalPixels) ** 0.5
|
||||||
|
newWidth = int(originalWidth * scale)
|
||||||
|
newHeight = int(originalHeight * scale)
|
||||||
|
|
||||||
|
# Ensure minimum size
|
||||||
|
newWidth = max(newWidth, 64)
|
||||||
|
newHeight = max(newHeight, 64)
|
||||||
|
|
||||||
|
image = image.resize((newWidth, newHeight), Image.Resampling.LANCZOS)
|
||||||
|
|
||||||
|
# Strategy 3: Compress with quality reduction
|
||||||
|
currentSize = len(part.data)
|
||||||
|
currentQuality = quality
|
||||||
|
|
||||||
|
while currentSize > maxBytes and currentQuality > 10:
|
||||||
|
# Compress image
|
||||||
|
output = io.BytesIO()
|
||||||
|
image.save(output, format='JPEG', quality=currentQuality, optimize=True)
|
||||||
|
compressedData = output.getvalue()
|
||||||
|
compressedB64 = base64.b64encode(compressedData).decode('utf-8')
|
||||||
|
currentSize = len(compressedB64)
|
||||||
|
|
||||||
|
if currentSize <= maxBytes:
|
||||||
|
chunks.append({
|
||||||
|
"data": compressedB64,
|
||||||
|
"size": currentSize,
|
||||||
|
"order": 0,
|
||||||
|
"metadata": {
|
||||||
|
"originalSize": len(part.data),
|
||||||
|
"originalPixels": originalPixels,
|
||||||
|
"compressedSize": currentSize,
|
||||||
|
"quality": currentQuality,
|
||||||
|
"strategy": "compressed"
|
||||||
|
}
|
||||||
|
})
|
||||||
|
return chunks
|
||||||
|
|
||||||
|
currentQuality -= 10
|
||||||
|
|
||||||
|
# Strategy 4: Tile the image if still too large
|
||||||
|
if currentSize > maxBytes:
|
||||||
|
chunks = self._tileImage(image, maxBytes, tileSize, quality, originalPixels)
|
||||||
|
return chunks
|
||||||
|
|
||||||
|
# Fallback: Return compressed version even if over limit
|
||||||
|
output = io.BytesIO()
|
||||||
|
image.save(output, format='JPEG', quality=10, optimize=True)
|
||||||
|
compressedData = output.getvalue()
|
||||||
|
compressedB64 = base64.b64encode(compressedData).decode('utf-8')
|
||||||
|
|
||||||
|
chunks.append({
|
||||||
|
"data": compressedB64,
|
||||||
|
"size": len(compressedB64),
|
||||||
|
"order": 0,
|
||||||
|
"metadata": {
|
||||||
|
"originalSize": len(part.data),
|
||||||
|
"originalPixels": originalPixels,
|
||||||
|
"compressedSize": len(compressedB64),
|
||||||
|
"quality": 10,
|
||||||
|
"strategy": "fallback_compressed"
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
# Fallback: Return original data with error metadata
|
||||||
|
chunks.append({
|
||||||
|
"data": part.data,
|
||||||
|
"size": len(part.data),
|
||||||
|
"order": 0,
|
||||||
|
"metadata": {
|
||||||
|
"originalSize": len(part.data),
|
||||||
|
"strategy": "error_fallback",
|
||||||
|
"error": str(e)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
return chunks
|
||||||
|
|
||||||
|
def _tileImage(self, image: "Image.Image", maxBytes: int, tileSize: int, quality: int, originalPixels: int) -> List[Dict[str, Any]]:
|
||||||
|
"""Split image into tiles if it's still too large after compression."""
|
||||||
|
chunks = []
|
||||||
|
width, height = image.size
|
||||||
|
|
||||||
|
# Calculate tile grid
|
||||||
|
tilesX = (width + tileSize - 1) // tileSize
|
||||||
|
tilesY = (height + tileSize - 1) // tileSize
|
||||||
|
|
||||||
|
for y in range(tilesY):
|
||||||
|
for x in range(tilesX):
|
||||||
|
# Calculate tile boundaries
|
||||||
|
left = x * tileSize
|
||||||
|
top = y * tileSize
|
||||||
|
right = min(left + tileSize, width)
|
||||||
|
bottom = min(top + tileSize, height)
|
||||||
|
|
||||||
|
# Extract tile
|
||||||
|
tile = image.crop((left, top, right, bottom))
|
||||||
|
|
||||||
|
# Compress tile
|
||||||
|
output = io.BytesIO()
|
||||||
|
tile.save(output, format='JPEG', quality=quality, optimize=True)
|
||||||
|
tileData = output.getvalue()
|
||||||
|
tileB64 = base64.b64encode(tileData).decode('utf-8')
|
||||||
|
|
||||||
|
chunks.append({
|
||||||
|
"data": tileB64,
|
||||||
|
"size": len(tileB64),
|
||||||
|
"order": y * tilesX + x,
|
||||||
|
"metadata": {
|
||||||
|
"originalSize": len(image.tobytes()),
|
||||||
|
"originalPixels": originalPixels,
|
||||||
|
"tileSize": tileSize,
|
||||||
|
"tilePosition": f"{x},{y}",
|
||||||
|
"tileBounds": f"{left},{top},{right},{bottom}",
|
||||||
|
"quality": quality,
|
||||||
|
"strategy": "tiled"
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
return chunks
|
||||||
|
|
@ -1,12 +1,17 @@
|
||||||
from typing import Any, Dict, List
|
from typing import Any, Dict, List
|
||||||
|
import logging
|
||||||
|
|
||||||
from modules.datamodels.datamodelExtraction import ContentPart
|
from modules.datamodels.datamodelExtraction import ContentPart
|
||||||
from ..subRegistry import Chunker
|
from ..subRegistry import Chunker
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class TextChunker(Chunker):
|
class TextChunker(Chunker):
|
||||||
def chunk(self, part: ContentPart, options: Dict[str, Any]) -> list[Dict[str, Any]]:
|
def chunk(self, part: ContentPart, options: Dict[str, Any]) -> list[Dict[str, Any]]:
|
||||||
maxBytes = int(options.get("textChunkSize", 40000))
|
maxBytes = int(options.get("textChunkSize", 40000))
|
||||||
|
logger.debug(f"TextChunker: textChunkSize from options: {options.get('textChunkSize', 'NOT_FOUND')}")
|
||||||
|
logger.debug(f"TextChunker: using maxBytes: {maxBytes}")
|
||||||
chunks: List[Dict[str, Any]] = []
|
chunks: List[Dict[str, Any]] = []
|
||||||
current: List[str] = []
|
current: List[str] = []
|
||||||
size = 0
|
size = 0
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
from typing import Any, Dict, List
|
from typing import Any, Dict, List
|
||||||
import base64
|
import base64
|
||||||
|
|
||||||
from ..utils import makeId
|
from ..subUtils import makeId
|
||||||
from modules.datamodels.datamodelExtraction import ContentPart
|
from modules.datamodels.datamodelExtraction import ContentPart
|
||||||
from ..subRegistry import Extractor
|
from ..subRegistry import Extractor
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
from typing import Any, Dict, List
|
from typing import Any, Dict, List
|
||||||
|
|
||||||
from modules.datamodels.datamodelExtraction import ContentPart
|
from modules.datamodels.datamodelExtraction import ContentPart
|
||||||
from ..utils import makeId
|
from ..subUtils import makeId
|
||||||
from ..subRegistry import Extractor
|
from ..subRegistry import Extractor
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
from typing import Any, Dict, List
|
from typing import Any, Dict, List
|
||||||
import io
|
import io
|
||||||
|
|
||||||
from ..utils import makeId
|
from ..subUtils import makeId
|
||||||
from modules.datamodels.datamodelExtraction import ContentPart
|
from modules.datamodels.datamodelExtraction import ContentPart
|
||||||
from ..subRegistry import Extractor
|
from ..subRegistry import Extractor
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@ from typing import Any, Dict, List
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
from modules.datamodels.datamodelExtraction import ContentPart
|
from modules.datamodels.datamodelExtraction import ContentPart
|
||||||
from ..utils import makeId
|
from ..subUtils import makeId
|
||||||
from ..subRegistry import Extractor
|
from ..subRegistry import Extractor
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
from typing import Any, Dict, List
|
from typing import Any, Dict, List
|
||||||
import base64
|
import base64
|
||||||
|
|
||||||
from ..utils import makeId
|
from ..subUtils import makeId
|
||||||
from modules.datamodels.datamodelExtraction import ContentPart
|
from modules.datamodels.datamodelExtraction import ContentPart
|
||||||
from ..subRegistry import Extractor
|
from ..subRegistry import Extractor
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@ from typing import Any, Dict, List
|
||||||
import json
|
import json
|
||||||
|
|
||||||
from modules.datamodels.datamodelExtraction import ContentPart
|
from modules.datamodels.datamodelExtraction import ContentPart
|
||||||
from ..utils import makeId
|
from ..subUtils import makeId
|
||||||
from ..subRegistry import Extractor
|
from ..subRegistry import Extractor
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@ from typing import Any, Dict, List
|
||||||
import base64
|
import base64
|
||||||
import io
|
import io
|
||||||
|
|
||||||
from ..utils import makeId
|
from ..subUtils import makeId
|
||||||
from modules.datamodels.datamodelExtraction import ContentPart
|
from modules.datamodels.datamodelExtraction import ContentPart
|
||||||
from ..subRegistry import Extractor
|
from ..subRegistry import Extractor
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
from typing import Any, Dict, List
|
from typing import Any, Dict, List
|
||||||
|
|
||||||
from modules.datamodels.datamodelExtraction import ContentPart
|
from modules.datamodels.datamodelExtraction import ContentPart
|
||||||
from ..utils import makeId
|
from ..subUtils import makeId
|
||||||
from ..subRegistry import Extractor
|
from ..subRegistry import Extractor
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@ from typing import Any, Dict, List
|
||||||
import io
|
import io
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
from ..utils import makeId
|
from ..subUtils import makeId
|
||||||
from modules.datamodels.datamodelExtraction import ContentPart
|
from modules.datamodels.datamodelExtraction import ContentPart
|
||||||
from ..subRegistry import Extractor
|
from ..subRegistry import Extractor
|
||||||
|
|
||||||
|
|
@ -75,7 +75,8 @@ class XlsxExtractor(Extractor):
|
||||||
elif isinstance(v, datetime):
|
elif isinstance(v, datetime):
|
||||||
cells.append(v.strftime("%Y-%m-%d %H:%M:%S"))
|
cells.append(v.strftime("%Y-%m-%d %H:%M:%S"))
|
||||||
else:
|
else:
|
||||||
cells.append(f'"{str(v).replace("\"", "\"\"")}"')
|
escaped_value = str(v).replace('"', '""')
|
||||||
|
cells.append(f'"{escaped_value}"')
|
||||||
lines.append(",".join(cells))
|
lines.append(",".join(cells))
|
||||||
csvData = "\n".join(lines)
|
csvData = "\n".join(lines)
|
||||||
parts.append(ContentPart(
|
parts.append(ContentPart(
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@ from typing import Any, Dict, List
|
||||||
import xml.etree.ElementTree as ET
|
import xml.etree.ElementTree as ET
|
||||||
|
|
||||||
from modules.datamodels.datamodelExtraction import ContentPart
|
from modules.datamodels.datamodelExtraction import ContentPart
|
||||||
from ..utils import makeId
|
from ..subUtils import makeId
|
||||||
from ..subRegistry import Extractor
|
from ..subRegistry import Extractor
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,14 @@
|
||||||
from typing import Any, Dict, List, Optional, Union
|
from typing import Any, Dict, List, Optional, Union
|
||||||
import uuid
|
import uuid
|
||||||
|
import logging
|
||||||
|
|
||||||
from .subRegistry import ExtractorRegistry, ChunkerRegistry
|
from .subRegistry import ExtractorRegistry, ChunkerRegistry
|
||||||
from .subPipeline import runExtraction, poolAndLimit, applyAiIfRequested
|
from .subPipeline import runExtraction, poolAndLimit, applyAiIfRequested
|
||||||
from modules.datamodels.datamodelExtraction import ExtractedContent, ContentPart
|
from modules.datamodels.datamodelExtraction import ContentExtracted, ContentPart, MergeStrategy
|
||||||
|
from modules.datamodels.datamodelChat import ChatDocument
|
||||||
|
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class ExtractionService:
|
class ExtractionService:
|
||||||
|
|
@ -12,45 +17,325 @@ class ExtractionService:
|
||||||
self._extractorRegistry = ExtractorRegistry()
|
self._extractorRegistry = ExtractorRegistry()
|
||||||
self._chunkerRegistry = ChunkerRegistry()
|
self._chunkerRegistry = ChunkerRegistry()
|
||||||
|
|
||||||
def extractContent(self, documentList: List[Dict[str, Any]], options: Dict[str, Any]) -> List[ExtractedContent]:
|
def extractContent(self, documents: List[ChatDocument], options: Dict[str, Any]) -> List[ContentExtracted]:
|
||||||
results: List[ExtractedContent] = []
|
"""
|
||||||
for doc in documentList:
|
Extract content from a list of ChatDocument objects.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
documents: List of ChatDocument objects to extract content from
|
||||||
|
options: Extraction options including maxSize, chunkAllowed, mergeStrategy, etc.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of ContentExtracted objects, one per input document
|
||||||
|
"""
|
||||||
|
results: List[ContentExtracted] = []
|
||||||
|
|
||||||
|
# Lazy import to avoid circular deps and heavy init at module import
|
||||||
|
from modules.interfaces.interfaceDbComponentObjects import getInterface
|
||||||
|
dbInterface = getInterface()
|
||||||
|
|
||||||
|
for i, doc in enumerate(documents):
|
||||||
|
logger.info(f"=== DOCUMENT {i}: {doc.fileName} ===")
|
||||||
|
logger.info(f"Initial MIME type: {doc.mimeType}")
|
||||||
|
|
||||||
|
# Resolve raw bytes for this document using interface
|
||||||
|
documentBytes = dbInterface.getFileData(doc.fileId)
|
||||||
|
if not documentBytes:
|
||||||
|
raise ValueError(f"No file data found for fileId={doc.fileId}")
|
||||||
|
|
||||||
|
# Convert ChatDocument to the format expected by runExtraction
|
||||||
|
documentData = {
|
||||||
|
"id": doc.id,
|
||||||
|
"bytes": documentBytes,
|
||||||
|
"fileName": doc.fileName,
|
||||||
|
"mimeType": doc.mimeType
|
||||||
|
}
|
||||||
|
|
||||||
ec = runExtraction(
|
ec = runExtraction(
|
||||||
extractorRegistry=self._extractorRegistry,
|
extractorRegistry=self._extractorRegistry,
|
||||||
chunkerRegistry=self._chunkerRegistry,
|
chunkerRegistry=self._chunkerRegistry,
|
||||||
documentBytes=doc.get("bytes"),
|
documentBytes=documentData["bytes"],
|
||||||
fileName=doc.get("fileName"),
|
fileName=documentData["fileName"],
|
||||||
mimeType=doc.get("mimeType"),
|
mimeType=documentData["mimeType"],
|
||||||
options=options
|
options=options
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Log content parts metadata
|
||||||
|
logger.debug(f"Content parts: {len(ec.parts)}")
|
||||||
|
for j, part in enumerate(ec.parts):
|
||||||
|
logger.debug(f" Part {j}: {part.typeGroup} ({part.mimeType}) - {len(part.data) if part.data else 0} chars")
|
||||||
|
if part.metadata:
|
||||||
|
logger.debug(f" Metadata: {part.metadata}")
|
||||||
|
|
||||||
# Attach document id to parts if missing
|
# Attach document id to parts if missing
|
||||||
for p in ec.parts:
|
for p in ec.parts:
|
||||||
if "documentId" not in p.metadata:
|
if "documentId" not in p.metadata:
|
||||||
p.metadata["documentId"] = doc.get("id") or str(uuid.uuid4())
|
p.metadata["documentId"] = documentData["id"] or str(uuid.uuid4())
|
||||||
|
|
||||||
|
# Log chunking information
|
||||||
|
chunked_parts = [p for p in ec.parts if p.metadata.get("chunk", False)]
|
||||||
|
if chunked_parts:
|
||||||
|
logger.debug(f"=== CHUNKING RESULTS ===")
|
||||||
|
logger.debug(f"Total parts: {len(ec.parts)}")
|
||||||
|
logger.debug(f"Chunked parts: {len(chunked_parts)}")
|
||||||
|
for chunk in chunked_parts:
|
||||||
|
logger.debug(f" Chunk: {chunk.label} - {len(chunk.data)} chars (parent: {chunk.parentId})")
|
||||||
|
else:
|
||||||
|
logger.debug(f"No chunking needed - {len(ec.parts)} parts fit within size limits")
|
||||||
|
|
||||||
ec = applyAiIfRequested(ec, options)
|
ec = applyAiIfRequested(ec, options)
|
||||||
results.append(ec)
|
results.append(ec)
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
||||||
async def extractContentFromDocument(self, prompt: str, documents: List[Dict[str, Any]], options: Optional[Dict[str, Any]] = None) -> List[ExtractedContent]:
|
def mergeAiResults(
|
||||||
|
self,
|
||||||
|
extractedContent: List[ContentExtracted],
|
||||||
|
aiResults: List[str],
|
||||||
|
strategy: MergeStrategy
|
||||||
|
) -> ContentExtracted:
|
||||||
"""
|
"""
|
||||||
Batch extract content from multiple documents.
|
Merge AI results from chunked content back into a single ContentExtracted.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
prompt: Instructional prompt for optional AI post-processing/selection.
|
extractedContent: List of ContentExtracted objects that were processed
|
||||||
documents: List of dicts with keys: id, bytes, fileName, mimeType.
|
aiResults: List of AI response strings, one per chunk
|
||||||
options: Optional extraction options. "ai" config may be provided.
|
strategy: Merge strategy configuration (dict or MergeStrategy object)
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
List[ExtractedContent]: one per input document in order.
|
Single ContentExtracted with merged AI results
|
||||||
"""
|
"""
|
||||||
# Build options safely and inject prompt for downstream AI selection if desired
|
logger.debug(f"=== MERGING AI RESULTS ===")
|
||||||
effectiveOptions: Dict[str, Any] = options.copy() if options else {}
|
logger.debug(f"Extracted content: {len(extractedContent)} documents")
|
||||||
aiCfg = effectiveOptions.get("ai") or {}
|
logger.debug(f"AI results: {len(aiResults)} responses")
|
||||||
if prompt:
|
logger.debug(f"Merge strategy: {strategy.mergeType}")
|
||||||
aiCfg["prompt"] = prompt
|
|
||||||
effectiveOptions["ai"] = aiCfg
|
mergeStrategy = strategy
|
||||||
|
|
||||||
# Delegate to existing synchronous pipeline
|
# Collect all parts from all extracted content
|
||||||
return self.extractContent(documents, effectiveOptions)
|
allParts: List[ContentPart] = []
|
||||||
|
for ec in extractedContent:
|
||||||
|
allParts.extend(ec.parts)
|
||||||
|
|
||||||
|
logger.debug(f"Total original parts: {len(allParts)}")
|
||||||
|
|
||||||
|
# Create AI result parts
|
||||||
|
aiResultParts: List[ContentPart] = []
|
||||||
|
for i, aiResult in enumerate(aiResults):
|
||||||
|
aiPart = ContentPart(
|
||||||
|
id=f"ai_result_{i}",
|
||||||
|
parentId=None, # Will be set based on strategy
|
||||||
|
label="ai_result",
|
||||||
|
typeGroup="text",
|
||||||
|
mimeType="text/plain",
|
||||||
|
data=aiResult,
|
||||||
|
metadata={
|
||||||
|
"aiResult": True,
|
||||||
|
"order": i,
|
||||||
|
"size": len(aiResult.encode('utf-8'))
|
||||||
|
}
|
||||||
|
)
|
||||||
|
aiResultParts.append(aiPart)
|
||||||
|
|
||||||
|
logger.debug(f"Created {len(aiResultParts)} AI result parts")
|
||||||
|
|
||||||
|
# Apply merging strategy
|
||||||
|
if mergeStrategy.mergeType == "concatenate":
|
||||||
|
mergedParts = self._mergeConcatenate(allParts, aiResultParts, mergeStrategy)
|
||||||
|
elif mergeStrategy.mergeType == "hierarchical":
|
||||||
|
mergedParts = self._mergeHierarchical(allParts, aiResultParts, mergeStrategy)
|
||||||
|
elif mergeStrategy.mergeType == "intelligent":
|
||||||
|
mergedParts = self._mergeIntelligent(allParts, aiResultParts, mergeStrategy)
|
||||||
|
else:
|
||||||
|
# Default to concatenate
|
||||||
|
mergedParts = self._mergeConcatenate(allParts, aiResultParts, mergeStrategy)
|
||||||
|
|
||||||
|
# Create final ContentExtracted
|
||||||
|
mergedContent = ContentExtracted(
|
||||||
|
id=f"merged_{uuid.uuid4()}",
|
||||||
|
parts=mergedParts
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.debug(f"=== MERGE COMPLETED ===")
|
||||||
|
logger.debug(f"Final merged parts: {len(mergedParts)}")
|
||||||
|
logger.debug(f"Merged content ID: {mergedContent.id}")
|
||||||
|
|
||||||
|
return mergedContent
|
||||||
|
|
||||||
|
def _mergeConcatenate(
|
||||||
|
self,
|
||||||
|
originalParts: List[ContentPart],
|
||||||
|
aiResultParts: List[ContentPart],
|
||||||
|
strategy: MergeStrategy
|
||||||
|
) -> List[ContentPart]:
|
||||||
|
"""Merge parts by simple concatenation."""
|
||||||
|
mergedParts = []
|
||||||
|
|
||||||
|
# Add original parts (filtered if needed)
|
||||||
|
for part in originalParts:
|
||||||
|
if strategy.preserveChunks or not part.metadata.get("chunk", False):
|
||||||
|
mergedParts.append(part)
|
||||||
|
|
||||||
|
# Add AI results
|
||||||
|
if aiResultParts:
|
||||||
|
# Group AI results by parentId if available
|
||||||
|
aiResultsByParent = {}
|
||||||
|
for aiPart in aiResultParts:
|
||||||
|
parentId = aiPart.parentId or "root"
|
||||||
|
if parentId not in aiResultsByParent:
|
||||||
|
aiResultsByParent[parentId] = []
|
||||||
|
aiResultsByParent[parentId].append(aiPart)
|
||||||
|
|
||||||
|
# Merge AI results for each parent
|
||||||
|
for parentId, aiParts in aiResultsByParent.items():
|
||||||
|
if len(aiParts) == 1:
|
||||||
|
mergedParts.append(aiParts[0])
|
||||||
|
else:
|
||||||
|
# Concatenate multiple AI results for same parent
|
||||||
|
combinedData = strategy.chunkSeparator.join([p.data for p in aiParts])
|
||||||
|
combinedPart = ContentPart(
|
||||||
|
id=f"merged_ai_{parentId}",
|
||||||
|
parentId=parentId if parentId != "root" else None,
|
||||||
|
label="merged_ai_result",
|
||||||
|
typeGroup="text",
|
||||||
|
mimeType="text/plain",
|
||||||
|
data=combinedData,
|
||||||
|
metadata={
|
||||||
|
"aiResult": True,
|
||||||
|
"merged": True,
|
||||||
|
"sourceCount": len(aiParts),
|
||||||
|
"size": len(combinedData.encode('utf-8'))
|
||||||
|
}
|
||||||
|
)
|
||||||
|
mergedParts.append(combinedPart)
|
||||||
|
|
||||||
|
return mergedParts
|
||||||
|
|
||||||
|
def _mergeHierarchical(
|
||||||
|
self,
|
||||||
|
originalParts: List[ContentPart],
|
||||||
|
aiResultParts: List[ContentPart],
|
||||||
|
strategy: MergeStrategy
|
||||||
|
) -> List[ContentPart]:
|
||||||
|
"""Merge parts hierarchically based on parentId relationships."""
|
||||||
|
# Group parts by parentId
|
||||||
|
partsByParent = {}
|
||||||
|
for part in originalParts:
|
||||||
|
parentId = part.parentId or "root"
|
||||||
|
if parentId not in partsByParent:
|
||||||
|
partsByParent[parentId] = []
|
||||||
|
partsByParent[parentId].append(part)
|
||||||
|
|
||||||
|
# Group AI results by parentId
|
||||||
|
aiResultsByParent = {}
|
||||||
|
for aiPart in aiResultParts:
|
||||||
|
parentId = aiPart.parentId or "root"
|
||||||
|
if parentId not in aiResultsByParent:
|
||||||
|
aiResultsByParent[parentId] = []
|
||||||
|
aiResultsByParent[parentId].append(aiPart)
|
||||||
|
|
||||||
|
mergedParts = []
|
||||||
|
|
||||||
|
# Process each parent group
|
||||||
|
for parentId in set(list(partsByParent.keys()) + list(aiResultsByParent.keys())):
|
||||||
|
originalGroup = partsByParent.get(parentId, [])
|
||||||
|
aiGroup = aiResultsByParent.get(parentId, [])
|
||||||
|
|
||||||
|
# Add original parts
|
||||||
|
mergedParts.extend(originalGroup)
|
||||||
|
|
||||||
|
# Add AI results for this parent
|
||||||
|
if aiGroup:
|
||||||
|
if len(aiGroup) == 1:
|
||||||
|
mergedParts.append(aiGroup[0])
|
||||||
|
else:
|
||||||
|
# Merge multiple AI results
|
||||||
|
combinedData = strategy.chunkSeparator.join([p.data for p in aiGroup])
|
||||||
|
combinedPart = ContentPart(
|
||||||
|
id=f"hierarchical_ai_{parentId}",
|
||||||
|
parentId=parentId if parentId != "root" else None,
|
||||||
|
label="hierarchical_ai_result",
|
||||||
|
typeGroup="text",
|
||||||
|
mimeType="text/plain",
|
||||||
|
data=combinedData,
|
||||||
|
metadata={
|
||||||
|
"aiResult": True,
|
||||||
|
"hierarchical": True,
|
||||||
|
"sourceCount": len(aiGroup),
|
||||||
|
"size": len(combinedData.encode('utf-8'))
|
||||||
|
}
|
||||||
|
)
|
||||||
|
mergedParts.append(combinedPart)
|
||||||
|
|
||||||
|
return mergedParts
|
||||||
|
|
||||||
|
def _mergeIntelligent(
|
||||||
|
self,
|
||||||
|
originalParts: List[ContentPart],
|
||||||
|
aiResultParts: List[ContentPart],
|
||||||
|
strategy: MergeStrategy
|
||||||
|
) -> List[ContentPart]:
|
||||||
|
"""Merge parts using intelligent strategies based on content type."""
|
||||||
|
mergedParts = []
|
||||||
|
|
||||||
|
# Group by typeGroup for intelligent merging
|
||||||
|
partsByType = {}
|
||||||
|
for part in originalParts:
|
||||||
|
typeGroup = part.typeGroup
|
||||||
|
if typeGroup not in partsByType:
|
||||||
|
partsByType[typeGroup] = []
|
||||||
|
partsByType[typeGroup].append(part)
|
||||||
|
|
||||||
|
# Process each type group
|
||||||
|
for typeGroup, parts in partsByType.items():
|
||||||
|
if typeGroup == "text":
|
||||||
|
mergedParts.extend(self._mergeTextIntelligent(parts, aiResultParts, strategy))
|
||||||
|
elif typeGroup == "table":
|
||||||
|
mergedParts.extend(self._mergeTableIntelligent(parts, aiResultParts, strategy))
|
||||||
|
elif typeGroup == "structure":
|
||||||
|
mergedParts.extend(self._mergeStructureIntelligent(parts, aiResultParts, strategy))
|
||||||
|
else:
|
||||||
|
# Default handling for other types
|
||||||
|
mergedParts.extend(parts)
|
||||||
|
|
||||||
|
# Add any remaining AI results that weren't merged
|
||||||
|
for aiPart in aiResultParts:
|
||||||
|
if not any(p.id == aiPart.id for p in mergedParts):
|
||||||
|
mergedParts.append(aiPart)
|
||||||
|
|
||||||
|
return mergedParts
|
||||||
|
|
||||||
|
def _mergeTextIntelligent(
|
||||||
|
self,
|
||||||
|
textParts: List[ContentPart],
|
||||||
|
aiResultParts: List[ContentPart],
|
||||||
|
strategy: MergeStrategy
|
||||||
|
) -> List[ContentPart]:
|
||||||
|
"""Intelligent merging for text content."""
|
||||||
|
# For now, use concatenate strategy
|
||||||
|
# This could be enhanced with semantic analysis, summarization, etc.
|
||||||
|
return self._mergeConcatenate(textParts, aiResultParts, strategy)
|
||||||
|
|
||||||
|
def _mergeTableIntelligent(
|
||||||
|
self,
|
||||||
|
tableParts: List[ContentPart],
|
||||||
|
aiResultParts: List[ContentPart],
|
||||||
|
strategy: MergeStrategy
|
||||||
|
) -> List[ContentPart]:
|
||||||
|
"""Intelligent merging for table content."""
|
||||||
|
# For now, use concatenate strategy
|
||||||
|
# This could be enhanced with table merging logic
|
||||||
|
return self._mergeConcatenate(tableParts, aiResultParts, strategy)
|
||||||
|
|
||||||
|
def _mergeStructureIntelligent(
|
||||||
|
self,
|
||||||
|
structureParts: List[ContentPart],
|
||||||
|
aiResultParts: List[ContentPart],
|
||||||
|
strategy: MergeStrategy
|
||||||
|
) -> List[ContentPart]:
|
||||||
|
"""Intelligent merging for structured content."""
|
||||||
|
# For now, use concatenate strategy
|
||||||
|
# This could be enhanced with structure-aware merging
|
||||||
|
return self._mergeConcatenate(structureParts, aiResultParts, strategy)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
from typing import Any, Dict, List
|
from typing import Any, Dict, List
|
||||||
from modules.datamodels.datamodelExtraction import ContentPart
|
from modules.datamodels.datamodelExtraction import ContentPart
|
||||||
from ..utils import makeId
|
from ..subUtils import makeId
|
||||||
|
|
||||||
|
|
||||||
class TableMerger:
|
class TableMerger:
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
from typing import Any, Dict, List
|
from typing import Any, Dict, List
|
||||||
from modules.datamodels.datamodelExtraction import ContentPart
|
from modules.datamodels.datamodelExtraction import ContentPart
|
||||||
from ..utils import makeId
|
from ..subUtils import makeId
|
||||||
|
|
||||||
|
|
||||||
class TextMerger:
|
class TextMerger:
|
||||||
|
|
|
||||||
|
|
@ -1,14 +1,61 @@
|
||||||
from typing import Any, Dict, List
|
from typing import Any, Dict, List
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
|
||||||
from modules.datamodels.datamodelExtraction import ExtractedContent, ContentPart
|
from modules.datamodels.datamodelExtraction import ContentExtracted, ContentPart
|
||||||
from .utils import makeId
|
from .subUtils import makeId
|
||||||
from .subRegistry import ExtractorRegistry, ChunkerRegistry
|
from .subRegistry import ExtractorRegistry, ChunkerRegistry
|
||||||
from .merging.text_merger import TextMerger
|
from .merging.text_merger import TextMerger
|
||||||
from .merging.table_merger import TableMerger
|
from .merging.table_merger import TableMerger
|
||||||
from .merging.default_merger import DefaultMerger
|
from .merging.default_merger import DefaultMerger
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
def runExtraction(extractorRegistry: ExtractorRegistry, chunkerRegistry: ChunkerRegistry, documentBytes: bytes, fileName: str, mimeType: str, options: Dict[str, Any]) -> ExtractedContent:
|
|
||||||
|
def _mergeParts(parts: List[ContentPart], mergeStrategy: Dict[str, Any]) -> List[ContentPart]:
|
||||||
|
"""Merge parts based on the provided strategy."""
|
||||||
|
if not parts or not mergeStrategy:
|
||||||
|
return parts
|
||||||
|
|
||||||
|
groupBy = mergeStrategy.get("groupBy", "typeGroup")
|
||||||
|
orderBy = mergeStrategy.get("orderBy", "id")
|
||||||
|
|
||||||
|
# Group parts by the specified field
|
||||||
|
groups = {}
|
||||||
|
for part in parts:
|
||||||
|
key = getattr(part, groupBy, "unknown")
|
||||||
|
if key not in groups:
|
||||||
|
groups[key] = []
|
||||||
|
groups[key].append(part)
|
||||||
|
|
||||||
|
# Merge each group
|
||||||
|
merged_parts = []
|
||||||
|
for group_key, group_parts in groups.items():
|
||||||
|
if len(group_parts) == 1:
|
||||||
|
merged_parts.extend(group_parts)
|
||||||
|
else:
|
||||||
|
# Sort by orderBy field if specified
|
||||||
|
if orderBy:
|
||||||
|
group_parts.sort(key=lambda p: getattr(p, orderBy, ""))
|
||||||
|
|
||||||
|
# Use appropriate merger based on type
|
||||||
|
type_group = group_parts[0].typeGroup if group_parts else "unknown"
|
||||||
|
|
||||||
|
if type_group == "text":
|
||||||
|
merger = TextMerger()
|
||||||
|
elif type_group == "table":
|
||||||
|
merger = TableMerger()
|
||||||
|
else:
|
||||||
|
merger = DefaultMerger()
|
||||||
|
|
||||||
|
# Merge the group
|
||||||
|
merged = merger.merge(group_parts, mergeStrategy)
|
||||||
|
merged_parts.extend(merged)
|
||||||
|
|
||||||
|
return merged_parts
|
||||||
|
|
||||||
|
|
||||||
|
def runExtraction(extractorRegistry: ExtractorRegistry, chunkerRegistry: ChunkerRegistry, documentBytes: bytes, fileName: str, mimeType: str, options: Dict[str, Any]) -> ContentExtracted:
|
||||||
extractor = extractorRegistry.resolve(mimeType, fileName)
|
extractor = extractorRegistry.resolve(mimeType, fileName)
|
||||||
if extractor is None:
|
if extractor is None:
|
||||||
# fallback: single binary part
|
# fallback: single binary part
|
||||||
|
|
@ -21,14 +68,66 @@ def runExtraction(extractorRegistry: ExtractorRegistry, chunkerRegistry: Chunker
|
||||||
data="",
|
data="",
|
||||||
metadata={"warning": "No extractor registered"}
|
metadata={"warning": "No extractor registered"}
|
||||||
)
|
)
|
||||||
return ExtractedContent(id=makeId(), parts=[part])
|
return ContentExtracted(id=makeId(), parts=[part])
|
||||||
|
|
||||||
parts = extractor.extract(documentBytes, {"fileName": fileName, "mimeType": mimeType, "options": options})
|
parts = extractor.extract(documentBytes, {"fileName": fileName, "mimeType": mimeType, "options": options})
|
||||||
# Optional merge step
|
|
||||||
|
# Apply chunking and size limiting
|
||||||
|
parts = poolAndLimit(parts, chunkerRegistry, options)
|
||||||
|
|
||||||
|
# Optional merge step - but preserve chunks
|
||||||
mergeStrategy = options.get("mergeStrategy", {})
|
mergeStrategy = options.get("mergeStrategy", {})
|
||||||
if mergeStrategy:
|
if mergeStrategy:
|
||||||
parts = _mergeParts(parts, mergeStrategy)
|
|
||||||
return ExtractedContent(id=makeId(), parts=parts)
|
# Don't merge chunks - they should stay separate for processing
|
||||||
|
non_chunk_parts = [p for p in parts if not p.metadata.get("chunk", False)]
|
||||||
|
chunk_parts = [p for p in parts if p.metadata.get("chunk", False)]
|
||||||
|
|
||||||
|
logger.debug(f"runExtraction: Preserving {len(chunk_parts)} chunks from merging")
|
||||||
|
|
||||||
|
if non_chunk_parts:
|
||||||
|
non_chunk_parts = _mergeParts(non_chunk_parts, mergeStrategy)
|
||||||
|
|
||||||
|
# Combine non-chunk parts with chunk parts (chunks stay separate)
|
||||||
|
parts = non_chunk_parts + chunk_parts
|
||||||
|
|
||||||
|
logger.debug(f"runExtraction: Final parts after merging: {len(parts)} (chunks: {len(chunk_parts)})")
|
||||||
|
# DEBUG: dump parts and chunks to files TODO TO REMOVE
|
||||||
|
try:
|
||||||
|
base_dir = "./test-chat/ai"
|
||||||
|
os.makedirs(base_dir, exist_ok=True)
|
||||||
|
|
||||||
|
# Generate timestamp for consistent naming
|
||||||
|
from datetime import datetime, UTC
|
||||||
|
ts = datetime.now(UTC).strftime('%Y%m%d-%H%M%S-%f')[:-3]
|
||||||
|
|
||||||
|
# Write a summary file
|
||||||
|
summary_lines: List[str] = [f"fileName: {fileName}", f"mimeType: {mimeType}", f"totalParts: {len(parts)}"]
|
||||||
|
text_index = 0
|
||||||
|
for idx, part in enumerate(parts):
|
||||||
|
is_texty = part.typeGroup in ("text", "table", "structure")
|
||||||
|
size = int(part.metadata.get("size", 0) or 0)
|
||||||
|
is_chunk = bool(part.metadata.get("chunk", False))
|
||||||
|
summary_lines.append(
|
||||||
|
f"part[{idx}]: typeGroup={part.typeGroup}, label={part.label}, size={size}, chunk={is_chunk}"
|
||||||
|
)
|
||||||
|
if is_texty and getattr(part, "data", None):
|
||||||
|
text_index += 1
|
||||||
|
fname = f"{ts}_extract_{fileName}_part_{idx:03d}_{'chunk' if is_chunk else 'full'}_{text_index:03d}.txt"
|
||||||
|
fpath = os.path.join(base_dir, fname)
|
||||||
|
with open(fpath, "w", encoding="utf-8") as f:
|
||||||
|
f.write(f"# typeGroup: {part.typeGroup}\n# label: {part.label}\n# chunk: {is_chunk}\n# size: {size}\n\n")
|
||||||
|
f.write(str(part.data))
|
||||||
|
|
||||||
|
# Write summary file
|
||||||
|
summary_fname = f"{ts}_extract_{fileName}_summary.txt"
|
||||||
|
summary_fpath = os.path.join(base_dir, summary_fname)
|
||||||
|
with open(summary_fpath, "w", encoding="utf-8") as f:
|
||||||
|
f.write("\n".join(summary_lines))
|
||||||
|
except Exception as _e:
|
||||||
|
logger.debug(f"Debug dump skipped: {_e}")
|
||||||
|
|
||||||
|
return ContentExtracted(id=makeId(), parts=parts)
|
||||||
|
|
||||||
|
|
||||||
def poolAndLimit(parts: List[ContentPart], chunkerRegistry: ChunkerRegistry, options: Dict[str, Any]) -> List[ContentPart]:
|
def poolAndLimit(parts: List[ContentPart], chunkerRegistry: ChunkerRegistry, options: Dict[str, Any]) -> List[ContentPart]:
|
||||||
|
|
@ -57,28 +156,54 @@ def poolAndLimit(parts: List[ContentPart], chunkerRegistry: ChunkerRegistry, opt
|
||||||
|
|
||||||
# If we have remaining parts and chunking is allowed, try chunking
|
# If we have remaining parts and chunking is allowed, try chunking
|
||||||
if remaining and chunkAllowed:
|
if remaining and chunkAllowed:
|
||||||
|
logger.debug(f"=== CHUNKING ACTIVATED ===")
|
||||||
|
logger.debug(f"Remaining parts to chunk: {len(remaining)}")
|
||||||
|
logger.debug(f"Max size limit: {maxSize} bytes")
|
||||||
|
logger.debug(f"Current size used: {current} bytes")
|
||||||
|
|
||||||
for p in remaining:
|
for p in remaining:
|
||||||
if p.typeGroup in ("text", "table", "structure"):
|
if p.typeGroup in ("text", "table", "structure", "image"):
|
||||||
|
logger.debug(f"Chunking {p.typeGroup} part: {len(p.data)} chars")
|
||||||
chunks = chunkerRegistry.resolve(p.typeGroup).chunk(p, options)
|
chunks = chunkerRegistry.resolve(p.typeGroup).chunk(p, options)
|
||||||
|
logger.debug(f"Created {len(chunks)} chunks")
|
||||||
|
|
||||||
|
chunks_added = 0
|
||||||
for ch in chunks:
|
for ch in chunks:
|
||||||
chSize = int(ch.get("size", 0) or 0)
|
chSize = int(ch.get("size", 0) or 0)
|
||||||
if current + chSize <= maxSize:
|
# Add all chunks - don't limit by maxSize since they'll be processed separately
|
||||||
kept.append(ContentPart(
|
kept.append(ContentPart(
|
||||||
id=makeId(),
|
id=makeId(),
|
||||||
parentId=p.id,
|
parentId=p.id,
|
||||||
label=f"chunk_{ch.get('order', 0)}",
|
label=f"chunk_{ch.get('order', 0)}",
|
||||||
typeGroup=p.typeGroup,
|
typeGroup=p.typeGroup,
|
||||||
mimeType=p.mimeType,
|
mimeType=p.mimeType,
|
||||||
data=ch.get("data", ""),
|
data=ch.get("data", ""),
|
||||||
metadata={"size": chSize, "chunk": True}
|
metadata={
|
||||||
))
|
"size": chSize,
|
||||||
current += chSize
|
"chunk": True,
|
||||||
else:
|
**ch.get("metadata", {})
|
||||||
break
|
}
|
||||||
|
))
|
||||||
|
chunks_added += 1
|
||||||
|
logger.debug(f"Added chunk {ch.get('order', 0)}: {chSize} bytes")
|
||||||
|
|
||||||
|
logger.debug(f"Added {chunks_added} chunks from {p.typeGroup} part")
|
||||||
|
|
||||||
# Apply merging strategy if provided
|
# Apply merging strategy if provided, but preserve chunks
|
||||||
if mergeStrategy:
|
if mergeStrategy:
|
||||||
kept = _applyMerging(kept, mergeStrategy)
|
# Don't merge chunks - they should stay separate for processing
|
||||||
|
non_chunk_parts = [p for p in kept if not p.metadata.get("chunk", False)]
|
||||||
|
chunk_parts = [p for p in kept if p.metadata.get("chunk", False)]
|
||||||
|
|
||||||
|
logger.debug(f"Preserving {len(chunk_parts)} chunks from merging")
|
||||||
|
|
||||||
|
if non_chunk_parts:
|
||||||
|
non_chunk_parts = _applyMerging(non_chunk_parts, mergeStrategy)
|
||||||
|
|
||||||
|
# Combine non-chunk parts with chunk parts (chunks stay separate)
|
||||||
|
kept = non_chunk_parts + chunk_parts
|
||||||
|
|
||||||
|
logger.debug(f"Final parts after merging: {len(kept)} (chunks: {len(chunk_parts)})")
|
||||||
|
|
||||||
# Re-check size after merging
|
# Re-check size after merging
|
||||||
totalSize = sum(int(p.metadata.get("size", 0) or 0) for p in kept)
|
totalSize = sum(int(p.metadata.get("size", 0) or 0) for p in kept)
|
||||||
|
|
@ -151,7 +276,7 @@ def _applySizeLimit(parts: List[ContentPart], maxSize: int) -> List[ContentPart]
|
||||||
return kept
|
return kept
|
||||||
|
|
||||||
|
|
||||||
def applyAiIfRequested(extracted: ExtractedContent, options: Dict[str, Any]) -> ExtractedContent:
|
def applyAiIfRequested(extracted: ContentExtracted, options: Dict[str, Any]) -> ContentExtracted:
|
||||||
"""
|
"""
|
||||||
Apply AI processing if requested in options.
|
Apply AI processing if requested in options.
|
||||||
This is a placeholder for actual AI integration.
|
This is a placeholder for actual AI integration.
|
||||||
|
|
|
||||||
|
|
@ -59,8 +59,11 @@ class ExtractorRegistry:
|
||||||
self.register("xlsm", XlsxExtractor())
|
self.register("xlsm", XlsxExtractor())
|
||||||
# fallback
|
# fallback
|
||||||
self.setFallback(BinaryExtractor())
|
self.setFallback(BinaryExtractor())
|
||||||
except Exception:
|
print(f"✅ ExtractorRegistry: Successfully registered {len(self._map)} extractors")
|
||||||
pass
|
except Exception as e:
|
||||||
|
print(f"❌ ExtractorRegistry: Failed to register extractors: {str(e)}")
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
|
||||||
def register(self, key: str, extractor: Extractor):
|
def register(self, key: str, extractor: Extractor):
|
||||||
self._map[key] = extractor
|
self._map[key] = extractor
|
||||||
|
|
@ -88,11 +91,16 @@ class ChunkerRegistry:
|
||||||
from .chunking.text_chunker import TextChunker
|
from .chunking.text_chunker import TextChunker
|
||||||
from .chunking.table_chunker import TableChunker
|
from .chunking.table_chunker import TableChunker
|
||||||
from .chunking.structure_chunker import StructureChunker
|
from .chunking.structure_chunker import StructureChunker
|
||||||
|
# Skip ImageChunker for now to avoid PIL import hang
|
||||||
|
# from .chunking.image_chunker import ImageChunker
|
||||||
self.register("text", TextChunker())
|
self.register("text", TextChunker())
|
||||||
self.register("table", TableChunker())
|
self.register("table", TableChunker())
|
||||||
self.register("structure", StructureChunker())
|
self.register("structure", StructureChunker())
|
||||||
except Exception:
|
# self.register("image", ImageChunker())
|
||||||
pass
|
except Exception as e:
|
||||||
|
print(f"❌ ChunkerRegistry: Failed to register chunkers: {str(e)}")
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
|
||||||
def register(self, typeGroup: str, chunker: Chunker):
|
def register(self, typeGroup: str, chunker: Chunker):
|
||||||
self._map[typeGroup] = chunker
|
self._map[typeGroup] = chunker
|
||||||
|
|
|
||||||
|
|
@ -3,5 +3,3 @@ import uuid
|
||||||
|
|
||||||
def makeId() -> str:
|
def makeId() -> str:
|
||||||
return str(uuid.uuid4())
|
return str(uuid.uuid4())
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -105,12 +105,49 @@ class GenerationService:
|
||||||
|
|
||||||
logger.info(f"Document {document_name} has content: {len(content)} characters")
|
logger.info(f"Document {document_name} has content: {len(content)} characters")
|
||||||
|
|
||||||
|
# Normalize file extension based on mime type if missing or incorrect
|
||||||
|
try:
|
||||||
|
mime_to_ext = {
|
||||||
|
"application/vnd.openxmlformats-officedocument.wordprocessingml.document": ".docx",
|
||||||
|
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": ".xlsx",
|
||||||
|
"application/vnd.openxmlformats-officedocument.presentationml.presentation": ".pptx",
|
||||||
|
"application/pdf": ".pdf",
|
||||||
|
"text/html": ".html",
|
||||||
|
"text/markdown": ".md",
|
||||||
|
"text/plain": ".txt",
|
||||||
|
"application/json": ".json",
|
||||||
|
}
|
||||||
|
expected_ext = mime_to_ext.get(mime_type)
|
||||||
|
if expected_ext:
|
||||||
|
if not document_name.lower().endswith(expected_ext):
|
||||||
|
# Append/replace extension to match mime type
|
||||||
|
if "." in document_name:
|
||||||
|
document_name = document_name.rsplit(".", 1)[0] + expected_ext
|
||||||
|
else:
|
||||||
|
document_name = document_name + expected_ext
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Decide if content is base64-encoded binary (e.g., docx/pdf) or plain text
|
||||||
|
base64encoded = False
|
||||||
|
try:
|
||||||
|
binary_mime_types = {
|
||||||
|
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||||
|
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
||||||
|
"application/vnd.openxmlformats-officedocument.presentationml.presentation",
|
||||||
|
"application/pdf",
|
||||||
|
}
|
||||||
|
if isinstance(document_data, str) and mime_type in binary_mime_types:
|
||||||
|
base64encoded = True
|
||||||
|
except Exception:
|
||||||
|
base64encoded = False
|
||||||
|
|
||||||
# Create document with file in one step using interfaces directly
|
# Create document with file in one step using interfaces directly
|
||||||
document = self._createDocument(
|
document = self._createDocument(
|
||||||
fileName=document_name,
|
fileName=document_name,
|
||||||
mimeType=mime_type,
|
mimeType=mime_type,
|
||||||
content=content,
|
content=content,
|
||||||
base64encoded=False,
|
base64encoded=base64encoded,
|
||||||
messageId=message_id
|
messageId=message_id
|
||||||
)
|
)
|
||||||
if document:
|
if document:
|
||||||
|
|
@ -257,4 +294,109 @@ class GenerationService:
|
||||||
'totalActions': 0,
|
'totalActions': 0,
|
||||||
'workflowStatus': 'unknown',
|
'workflowStatus': 'unknown',
|
||||||
'workflowId': 'unknown'
|
'workflowId': 'unknown'
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async def renderReport(self, extracted_content: str, output_format: str, title: str) -> tuple[str, str]:
|
||||||
|
"""
|
||||||
|
Render extracted content to the specified output format.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
extracted_content: Content extracted by AI using format-specific prompt
|
||||||
|
output_format: Target format (html, pdf, docx, txt, md, json, csv, xlsx)
|
||||||
|
title: Report title
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
tuple: (rendered_content, mime_type)
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# DEBUG: dump renderer input to diagnose JSON+HTML mixtures TODO REMOVE
|
||||||
|
try:
|
||||||
|
import os
|
||||||
|
ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
|
||||||
|
debug_root = "./test-chat/ai"
|
||||||
|
debug_dir = os.path.join(debug_root, f"render_input_{ts}")
|
||||||
|
os.makedirs(debug_dir, exist_ok=True)
|
||||||
|
with open(os.path.join(debug_dir, "meta.txt"), "w", encoding="utf-8") as f:
|
||||||
|
f.write(f"title: {title}\nformat: {output_format}\nlength: {len(extracted_content or '')}\nstarts_with_brace: {str(extracted_content.strip().startswith('{') if extracted_content else False)}\n")
|
||||||
|
with open(os.path.join(debug_dir, "extracted_content.txt"), "w", encoding="utf-8") as f:
|
||||||
|
f.write(extracted_content or "")
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Get the appropriate renderer for the format
|
||||||
|
renderer = self._getFormatRenderer(output_format)
|
||||||
|
if not renderer:
|
||||||
|
raise ValueError(f"Unsupported output format: {output_format}")
|
||||||
|
|
||||||
|
# Render the content
|
||||||
|
rendered_content, mime_type = await renderer.render(extracted_content, title)
|
||||||
|
# DEBUG: dump rendered output
|
||||||
|
try:
|
||||||
|
import os
|
||||||
|
with open(os.path.join(debug_dir, "rendered_output.txt"), "w", encoding="utf-8") as f:
|
||||||
|
f.write(rendered_content or "")
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
logger.info(f"Successfully rendered report to {output_format} format: {len(rendered_content)} characters")
|
||||||
|
return rendered_content, mime_type
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error rendering report to {output_format}: {str(e)}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
def getExtractionPrompt(self, output_format: str, user_prompt: str, title: str) -> str:
|
||||||
|
"""
|
||||||
|
Get the format-specific extraction prompt for AI content extraction.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
output_format: Target format (html, pdf, docx, txt, md, json, csv, xlsx)
|
||||||
|
user_prompt: User's original prompt for report generation
|
||||||
|
title: Report title
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: Format-specific prompt for AI extraction
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Get the appropriate renderer for the format
|
||||||
|
renderer = self._getFormatRenderer(output_format)
|
||||||
|
if not renderer:
|
||||||
|
raise ValueError(f"Unsupported output format: {output_format}")
|
||||||
|
|
||||||
|
# Build centralized prompt with generic rules + format-specific guidelines
|
||||||
|
from .prompt_builder import buildExtractionPrompt
|
||||||
|
extraction_prompt = buildExtractionPrompt(
|
||||||
|
output_format=output_format,
|
||||||
|
renderer=renderer,
|
||||||
|
user_prompt=user_prompt,
|
||||||
|
title=title
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.info(f"Generated {output_format}-specific extraction prompt: {len(extraction_prompt)} characters")
|
||||||
|
return extraction_prompt
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error getting extraction prompt for {output_format}: {str(e)}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
def _getFormatRenderer(self, output_format: str):
|
||||||
|
"""Get the appropriate renderer for the specified format using auto-discovery."""
|
||||||
|
try:
|
||||||
|
from .renderers.registry import get_renderer
|
||||||
|
renderer = get_renderer(output_format)
|
||||||
|
|
||||||
|
if renderer:
|
||||||
|
return renderer
|
||||||
|
|
||||||
|
# Fallback to text renderer if no specific renderer found
|
||||||
|
logger.warning(f"No renderer found for format {output_format}, falling back to text")
|
||||||
|
fallback_renderer = get_renderer('text')
|
||||||
|
if fallback_renderer:
|
||||||
|
return fallback_renderer
|
||||||
|
|
||||||
|
logger.error("Even text renderer fallback failed")
|
||||||
|
return None
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error getting renderer for {output_format}: {str(e)}")
|
||||||
|
return None
|
||||||
72
modules/services/serviceGeneration/prompt_builder.py
Normal file
72
modules/services/serviceGeneration/prompt_builder.py
Normal file
|
|
@ -0,0 +1,72 @@
|
||||||
|
"""
|
||||||
|
Centralized prompt builder for document generation across formats.
|
||||||
|
|
||||||
|
Builds a robust prompt that:
|
||||||
|
- Accepts any user intent (no fixed structure assumptions)
|
||||||
|
- Injects format-specific guidelines from the selected renderer
|
||||||
|
- Adds a common policy section to always use real data from source docs
|
||||||
|
- Requires the AI to output a filename header that we can parse and use
|
||||||
|
"""
|
||||||
|
|
||||||
|
from typing import Protocol
|
||||||
|
|
||||||
|
|
||||||
|
class _RendererLike(Protocol):
|
||||||
|
def getExtractionPrompt(self, user_prompt: str, title: str) -> str: # returns only format-specific guidelines
|
||||||
|
...
|
||||||
|
|
||||||
|
|
||||||
|
def buildExtractionPrompt(
|
||||||
|
output_format: str,
|
||||||
|
renderer: _RendererLike,
|
||||||
|
user_prompt: str,
|
||||||
|
title: str
|
||||||
|
) -> str:
|
||||||
|
"""
|
||||||
|
Build the final extraction prompt by combining:
|
||||||
|
- The raw user prompt (verbatim)
|
||||||
|
- Generic cross-format instructions (filename header + real-data policy)
|
||||||
|
- Format-specific guidelines snippet provided by the renderer
|
||||||
|
|
||||||
|
The AI must place a single filename header at the very top:
|
||||||
|
FILENAME: <safe-file-name-with-extension>
|
||||||
|
followed by a blank line and then ONLY the document content according to the target format.
|
||||||
|
"""
|
||||||
|
|
||||||
|
format_guidelines = renderer.getExtractionPrompt(user_prompt, title)
|
||||||
|
|
||||||
|
# Generic block appears once for every format
|
||||||
|
generic_intro = f"""
|
||||||
|
{user_prompt}
|
||||||
|
|
||||||
|
You are generating a document in {output_format.upper()} format for the title: "{title}".
|
||||||
|
|
||||||
|
Rules:
|
||||||
|
- The user's intent fully defines the structure. Do not assume a fixed template or headings.
|
||||||
|
- Use only factual information extracted from the supplied source documents.
|
||||||
|
- Do not invent, hallucinate, or include placeholders (e.g., "lorem ipsum", "TBD").
|
||||||
|
- The output must strictly follow the target format and be ready for saving without extra wrapping.
|
||||||
|
- At the VERY TOP output exactly one line with the filename header:
|
||||||
|
FILENAME: <safe-file-name-with-extension>
|
||||||
|
- The base name should be short, descriptive, and kebab-case or snake-case without spaces.
|
||||||
|
- Include the correct extension for the requested format (e.g., .html, .pdf, .docx, .md, .txt, .json, .csv, .xlsx).
|
||||||
|
- Avoid special characters beyond [a-zA-Z0-9-_].
|
||||||
|
- After this header, insert a single blank line and then provide ONLY the document content.
|
||||||
|
|
||||||
|
Common policy:
|
||||||
|
- Use the actual data from the source documents to create the content.
|
||||||
|
- Do not generate placeholder text or templates.
|
||||||
|
- Extract and use the real data provided in the source documents to create meaningful content.
|
||||||
|
""".strip()
|
||||||
|
|
||||||
|
# Final assembly
|
||||||
|
final_prompt = (
|
||||||
|
generic_intro
|
||||||
|
+ "\n\nFORMAT-SPECIFIC GUIDELINES:\n"
|
||||||
|
+ format_guidelines.strip()
|
||||||
|
+ "\n\nGenerate the complete document content now based on the source documents below:"
|
||||||
|
)
|
||||||
|
|
||||||
|
return final_prompt
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -0,0 +1,86 @@
|
||||||
|
"""
|
||||||
|
Base renderer class for all format renderers.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from abc import ABC, abstractmethod
|
||||||
|
from typing import Dict, Any, Tuple, List
|
||||||
|
import logging
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
class BaseRenderer(ABC):
|
||||||
|
"""Base class for all format renderers."""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.logger = logger
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_supported_formats(cls) -> List[str]:
|
||||||
|
"""
|
||||||
|
Return list of supported format names for this renderer.
|
||||||
|
Override this method in subclasses to specify supported formats.
|
||||||
|
"""
|
||||||
|
return []
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_format_aliases(cls) -> List[str]:
|
||||||
|
"""
|
||||||
|
Return list of format aliases for this renderer.
|
||||||
|
Override this method in subclasses to specify format aliases.
|
||||||
|
"""
|
||||||
|
return []
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_priority(cls) -> int:
|
||||||
|
"""
|
||||||
|
Return priority for this renderer (higher number = higher priority).
|
||||||
|
Used when multiple renderers support the same format.
|
||||||
|
"""
|
||||||
|
return 0
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def getExtractionPrompt(self, user_prompt: str, title: str) -> str:
|
||||||
|
"""
|
||||||
|
Get the format-specific extraction prompt for AI content extraction.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
user_prompt: User's original prompt for report generation
|
||||||
|
title: Report title
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: Format-specific prompt for AI extraction
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
async def render(self, extracted_content: str, title: str) -> Tuple[str, str]:
|
||||||
|
"""
|
||||||
|
Render extracted content to the target format.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
extracted_content: Raw content extracted by AI using format-specific prompt
|
||||||
|
title: Report title
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
tuple: (rendered_content, mime_type)
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
def _extract_sections(self, report_data: Dict[str, Any]) -> list:
|
||||||
|
"""Extract sections from report data."""
|
||||||
|
return report_data.get('sections', [])
|
||||||
|
|
||||||
|
def _extract_metadata(self, report_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
|
"""Extract metadata from report data."""
|
||||||
|
return report_data.get('metadata', {})
|
||||||
|
|
||||||
|
def _get_title(self, report_data: Dict[str, Any], fallback_title: str) -> str:
|
||||||
|
"""Get title from report data or use fallback."""
|
||||||
|
return report_data.get('title', fallback_title)
|
||||||
|
|
||||||
|
def _format_timestamp(self, timestamp: str = None) -> str:
|
||||||
|
"""Format timestamp for display."""
|
||||||
|
if timestamp:
|
||||||
|
return timestamp
|
||||||
|
from datetime import datetime, UTC
|
||||||
|
return datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S UTC")
|
||||||
64
modules/services/serviceGeneration/renderers/csv_renderer.py
Normal file
64
modules/services/serviceGeneration/renderers/csv_renderer.py
Normal file
|
|
@ -0,0 +1,64 @@
|
||||||
|
"""
|
||||||
|
CSV renderer for report generation.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from .base_renderer import BaseRenderer
|
||||||
|
from typing import Dict, Any, Tuple, List
|
||||||
|
import csv
|
||||||
|
import io
|
||||||
|
|
||||||
|
class CsvRenderer(BaseRenderer):
|
||||||
|
"""Renders content to CSV format with format-specific extraction."""
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_supported_formats(cls) -> List[str]:
|
||||||
|
"""Return supported CSV formats."""
|
||||||
|
return ['csv']
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_format_aliases(cls) -> List[str]:
|
||||||
|
"""Return format aliases."""
|
||||||
|
return ['spreadsheet', 'table']
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_priority(cls) -> int:
|
||||||
|
"""Return priority for CSV renderer."""
|
||||||
|
return 70
|
||||||
|
|
||||||
|
def getExtractionPrompt(self, user_prompt: str, title: str) -> str:
|
||||||
|
"""Return only CSV-specific guidelines; global prompt is built centrally."""
|
||||||
|
return (
|
||||||
|
"CSV FORMAT GUIDELINES:\n"
|
||||||
|
"- Emit ONLY CSV text without fences or commentary.\n"
|
||||||
|
"- Include a single header row with clear column names.\n"
|
||||||
|
"- Quote fields containing commas, quotes, or newlines; escape quotes by doubling them.\n"
|
||||||
|
"- Use rows to represent items/records derived from sources.\n"
|
||||||
|
"- Keep cells concise; include units in headers when useful.\n"
|
||||||
|
"OUTPUT: Return ONLY valid CSV content that can be imported."
|
||||||
|
)
|
||||||
|
|
||||||
|
async def render(self, extracted_content: str, title: str) -> Tuple[str, str]:
|
||||||
|
"""Render extracted content to CSV format."""
|
||||||
|
try:
|
||||||
|
# The extracted content should already be CSV from the AI
|
||||||
|
# Just clean it up
|
||||||
|
csv_content = self._clean_csv_content(extracted_content, title)
|
||||||
|
|
||||||
|
return csv_content, "text/csv"
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.error(f"Error rendering CSV: {str(e)}")
|
||||||
|
# Return minimal CSV fallback
|
||||||
|
return f"Title,Content\n{title},Error rendering report: {str(e)}", "text/csv"
|
||||||
|
|
||||||
|
def _clean_csv_content(self, content: str, title: str) -> str:
|
||||||
|
"""Clean and validate CSV content from AI."""
|
||||||
|
content = content.strip()
|
||||||
|
|
||||||
|
# Remove markdown code blocks if present
|
||||||
|
if content.startswith("```") and content.endswith("```"):
|
||||||
|
lines = content.split('\n')
|
||||||
|
if len(lines) > 2:
|
||||||
|
content = '\n'.join(lines[1:-1]).strip()
|
||||||
|
|
||||||
|
return content
|
||||||
249
modules/services/serviceGeneration/renderers/docx_renderer.py
Normal file
249
modules/services/serviceGeneration/renderers/docx_renderer.py
Normal file
|
|
@ -0,0 +1,249 @@
|
||||||
|
"""
|
||||||
|
DOCX renderer for report generation using python-docx.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from .base_renderer import BaseRenderer
|
||||||
|
from typing import Dict, Any, Tuple, List
|
||||||
|
import io
|
||||||
|
import base64
|
||||||
|
from datetime import datetime, UTC
|
||||||
|
|
||||||
|
try:
|
||||||
|
from docx import Document
|
||||||
|
from docx.shared import Inches, Pt
|
||||||
|
from docx.enum.text import WD_ALIGN_PARAGRAPH
|
||||||
|
from docx.enum.table import WD_TABLE_ALIGNMENT
|
||||||
|
from docx.oxml.shared import OxmlElement, qn
|
||||||
|
from docx.oxml.ns import nsdecls
|
||||||
|
from docx.oxml import parse_xml
|
||||||
|
DOCX_AVAILABLE = True
|
||||||
|
except ImportError:
|
||||||
|
DOCX_AVAILABLE = False
|
||||||
|
|
||||||
|
class DocxRenderer(BaseRenderer):
|
||||||
|
"""Renders content to DOCX format using python-docx."""
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_supported_formats(cls) -> List[str]:
|
||||||
|
"""Return supported DOCX formats."""
|
||||||
|
return ['docx', 'doc']
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_format_aliases(cls) -> List[str]:
|
||||||
|
"""Return format aliases."""
|
||||||
|
return ['word', 'document']
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_priority(cls) -> int:
|
||||||
|
"""Return priority for DOCX renderer."""
|
||||||
|
return 115
|
||||||
|
|
||||||
|
def getExtractionPrompt(self, user_prompt: str, title: str) -> str:
|
||||||
|
"""Return only DOCX-specific guidelines; global prompt is built centrally."""
|
||||||
|
return (
|
||||||
|
"DOCX FORMAT GUIDELINES:\n"
|
||||||
|
"- Provide plain text content suitable for Word generation (no markdown/HTML).\n"
|
||||||
|
"- Use clear section hierarchy; bullet and numbered lists where needed.\n"
|
||||||
|
"- Include tables as simple pipe-delimited lines if tabular data is needed.\n"
|
||||||
|
"OUTPUT: Return ONLY the structured plain text to be converted into DOCX."
|
||||||
|
)
|
||||||
|
|
||||||
|
async def render(self, extracted_content: str, title: str) -> Tuple[str, str]:
|
||||||
|
"""Render extracted content to DOCX format."""
|
||||||
|
try:
|
||||||
|
if not DOCX_AVAILABLE:
|
||||||
|
# Fallback to HTML if python-docx not available
|
||||||
|
from .html_renderer import HtmlRenderer
|
||||||
|
html_renderer = HtmlRenderer()
|
||||||
|
html_content, _ = await html_renderer.render(extracted_content, title)
|
||||||
|
return html_content, "text/html"
|
||||||
|
|
||||||
|
# Generate DOCX using python-docx
|
||||||
|
docx_content = self._generate_docx(extracted_content, title)
|
||||||
|
|
||||||
|
return docx_content, "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.error(f"Error rendering DOCX: {str(e)}")
|
||||||
|
# Return minimal fallback
|
||||||
|
return f"DOCX Generation Error: {str(e)}", "text/plain"
|
||||||
|
|
||||||
|
def _generate_docx(self, content: str, title: str) -> str:
|
||||||
|
"""Generate DOCX content using python-docx."""
|
||||||
|
try:
|
||||||
|
# Create new document
|
||||||
|
doc = Document()
|
||||||
|
|
||||||
|
# Set up document styles
|
||||||
|
self._setup_document_styles(doc)
|
||||||
|
|
||||||
|
# Add title
|
||||||
|
title_para = doc.add_heading(title, 0)
|
||||||
|
title_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
||||||
|
|
||||||
|
# Add generation date
|
||||||
|
date_para = doc.add_paragraph(f"Generated: {self._format_timestamp()}")
|
||||||
|
date_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
||||||
|
|
||||||
|
# Add page break
|
||||||
|
doc.add_page_break()
|
||||||
|
|
||||||
|
# Process content
|
||||||
|
lines = content.split('\n')
|
||||||
|
current_section = []
|
||||||
|
|
||||||
|
for line in lines:
|
||||||
|
line = line.strip()
|
||||||
|
if not line:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Check for ALL CAPS headings (major headings)
|
||||||
|
if line.isupper() and len(line) > 3 and not line.startswith('-') and not line.startswith('*'):
|
||||||
|
if current_section:
|
||||||
|
self._process_section(doc, current_section)
|
||||||
|
current_section = []
|
||||||
|
doc.add_heading(line, level=1)
|
||||||
|
# Check for Title Case headings (subheadings)
|
||||||
|
elif line.istitle() and len(line) > 5 and not line.startswith('-') and not line.startswith('*') and not line.startswith(('1.', '2.', '3.', '4.', '5.')):
|
||||||
|
if current_section:
|
||||||
|
self._process_section(doc, current_section)
|
||||||
|
current_section = []
|
||||||
|
doc.add_heading(line, level=2)
|
||||||
|
# Check for markdown headings (fallback)
|
||||||
|
elif line.startswith('# '):
|
||||||
|
# H1 heading
|
||||||
|
if current_section:
|
||||||
|
self._process_section(doc, current_section)
|
||||||
|
current_section = []
|
||||||
|
doc.add_heading(line[2:], level=1)
|
||||||
|
elif line.startswith('## '):
|
||||||
|
# H2 heading
|
||||||
|
if current_section:
|
||||||
|
self._process_section(doc, current_section)
|
||||||
|
current_section = []
|
||||||
|
doc.add_heading(line[3:], level=2)
|
||||||
|
elif line.startswith('### '):
|
||||||
|
# H3 heading
|
||||||
|
if current_section:
|
||||||
|
self._process_section(doc, current_section)
|
||||||
|
current_section = []
|
||||||
|
doc.add_heading(line[4:], level=3)
|
||||||
|
else:
|
||||||
|
current_section.append(line)
|
||||||
|
|
||||||
|
# Process remaining content
|
||||||
|
if current_section:
|
||||||
|
self._process_section(doc, current_section)
|
||||||
|
|
||||||
|
# Save to buffer
|
||||||
|
buffer = io.BytesIO()
|
||||||
|
doc.save(buffer)
|
||||||
|
buffer.seek(0)
|
||||||
|
|
||||||
|
# Convert to base64
|
||||||
|
docx_bytes = buffer.getvalue()
|
||||||
|
docx_base64 = base64.b64encode(docx_bytes).decode('utf-8')
|
||||||
|
|
||||||
|
return docx_base64
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.error(f"Error generating DOCX: {str(e)}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
def _setup_document_styles(self, doc):
|
||||||
|
"""Set up document styles."""
|
||||||
|
try:
|
||||||
|
# Set default font
|
||||||
|
style = doc.styles['Normal']
|
||||||
|
font = style.font
|
||||||
|
font.name = 'Calibri'
|
||||||
|
font.size = Pt(11)
|
||||||
|
|
||||||
|
# Set heading styles
|
||||||
|
for i in range(1, 4):
|
||||||
|
heading_style = doc.styles[f'Heading {i}']
|
||||||
|
heading_font = heading_style.font
|
||||||
|
heading_font.name = 'Calibri'
|
||||||
|
heading_font.size = Pt(16 - i * 2)
|
||||||
|
heading_font.bold = True
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.warning(f"Could not set up document styles: {str(e)}")
|
||||||
|
|
||||||
|
def _process_section(self, doc, lines: list):
|
||||||
|
"""Process a section of content into DOCX elements."""
|
||||||
|
for line in lines:
|
||||||
|
if not line.strip():
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Check for tables (lines with |)
|
||||||
|
if '|' in line and not line.startswith('|'):
|
||||||
|
# This might be part of a table, process as table
|
||||||
|
table_data = self._extract_table_data(lines)
|
||||||
|
if table_data:
|
||||||
|
self._add_table(doc, table_data)
|
||||||
|
return
|
||||||
|
|
||||||
|
# Check for lists
|
||||||
|
if line.startswith('- ') or line.startswith('* '):
|
||||||
|
# This is a list item
|
||||||
|
doc.add_paragraph(line[2:], style='List Bullet')
|
||||||
|
elif line.startswith(('1. ', '2. ', '3. ', '4. ', '5. ')):
|
||||||
|
# This is a numbered list item
|
||||||
|
doc.add_paragraph(line[3:], style='List Number')
|
||||||
|
else:
|
||||||
|
# Regular paragraph
|
||||||
|
doc.add_paragraph(line)
|
||||||
|
|
||||||
|
def _extract_table_data(self, lines: list) -> list:
|
||||||
|
"""Extract table data from lines."""
|
||||||
|
table_data = []
|
||||||
|
in_table = False
|
||||||
|
|
||||||
|
for line in lines:
|
||||||
|
if '|' in line:
|
||||||
|
if not in_table:
|
||||||
|
in_table = True
|
||||||
|
# Split by | and clean up
|
||||||
|
cells = [cell.strip() for cell in line.split('|') if cell.strip()]
|
||||||
|
if cells:
|
||||||
|
table_data.append(cells)
|
||||||
|
elif in_table and not line.strip():
|
||||||
|
# Empty line, might be end of table
|
||||||
|
break
|
||||||
|
|
||||||
|
return table_data if len(table_data) > 1 else []
|
||||||
|
|
||||||
|
def _add_table(self, doc, table_data: list):
|
||||||
|
"""Add a table to the document."""
|
||||||
|
try:
|
||||||
|
if not table_data:
|
||||||
|
return
|
||||||
|
|
||||||
|
# Create table
|
||||||
|
table = doc.add_table(rows=len(table_data), cols=len(table_data[0]))
|
||||||
|
table.alignment = WD_TABLE_ALIGNMENT.CENTER
|
||||||
|
|
||||||
|
# Add data to table
|
||||||
|
for row_idx, row_data in enumerate(table_data):
|
||||||
|
for col_idx, cell_data in enumerate(row_data):
|
||||||
|
if col_idx < len(table.rows[row_idx].cells):
|
||||||
|
table.rows[row_idx].cells[col_idx].text = cell_data
|
||||||
|
|
||||||
|
# Style the table
|
||||||
|
self._style_table(table)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.warning(f"Could not add table: {str(e)}")
|
||||||
|
|
||||||
|
def _style_table(self, table):
|
||||||
|
"""Apply styling to the table."""
|
||||||
|
try:
|
||||||
|
# Style header row
|
||||||
|
if len(table.rows) > 0:
|
||||||
|
header_cells = table.rows[0].cells
|
||||||
|
for cell in header_cells:
|
||||||
|
for paragraph in cell.paragraphs:
|
||||||
|
for run in paragraph.runs:
|
||||||
|
run.bold = True
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.warning(f"Could not style table: {str(e)}")
|
||||||
210
modules/services/serviceGeneration/renderers/excel_renderer.py
Normal file
210
modules/services/serviceGeneration/renderers/excel_renderer.py
Normal file
|
|
@ -0,0 +1,210 @@
|
||||||
|
"""
|
||||||
|
Excel renderer for report generation using openpyxl.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from .base_renderer import BaseRenderer
|
||||||
|
from typing import Dict, Any, Tuple, List
|
||||||
|
import io
|
||||||
|
import base64
|
||||||
|
from datetime import datetime, UTC
|
||||||
|
|
||||||
|
try:
|
||||||
|
from openpyxl import Workbook
|
||||||
|
from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
|
||||||
|
from openpyxl.utils import get_column_letter
|
||||||
|
from openpyxl.worksheet.table import Table, TableStyleInfo
|
||||||
|
OPENPYXL_AVAILABLE = True
|
||||||
|
except ImportError:
|
||||||
|
OPENPYXL_AVAILABLE = False
|
||||||
|
|
||||||
|
class ExcelRenderer(BaseRenderer):
|
||||||
|
"""Renders content to Excel format using openpyxl."""
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_supported_formats(cls) -> List[str]:
|
||||||
|
"""Return supported Excel formats."""
|
||||||
|
return ['xlsx', 'xls', 'excel']
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_format_aliases(cls) -> List[str]:
|
||||||
|
"""Return format aliases."""
|
||||||
|
return ['spreadsheet', 'workbook']
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_priority(cls) -> int:
|
||||||
|
"""Return priority for Excel renderer."""
|
||||||
|
return 110
|
||||||
|
|
||||||
|
def getExtractionPrompt(self, user_prompt: str, title: str) -> str:
|
||||||
|
"""Return only Excel-specific guidelines; global prompt is built centrally."""
|
||||||
|
return (
|
||||||
|
"EXCEL FORMAT GUIDELINES:\n"
|
||||||
|
"- Output one or more pipe-delimited tables with a single header row.\n"
|
||||||
|
"- Let user intent define columns; use clear names and ISO dates.\n"
|
||||||
|
"- Separate multiple tables by a single blank line.\n"
|
||||||
|
"- No markdown/HTML/code fences; tables only unless user explicitly asks for notes.\n"
|
||||||
|
"OUTPUT: Return ONLY pipe-delimited tables suitable for import."
|
||||||
|
)
|
||||||
|
|
||||||
|
async def render(self, extracted_content: str, title: str) -> Tuple[str, str]:
|
||||||
|
"""Render extracted content to Excel format."""
|
||||||
|
try:
|
||||||
|
if not OPENPYXL_AVAILABLE:
|
||||||
|
# Fallback to CSV if openpyxl not available
|
||||||
|
from .csv_renderer import CsvRenderer
|
||||||
|
csv_renderer = CsvRenderer()
|
||||||
|
csv_content, _ = await csv_renderer.render(extracted_content, title)
|
||||||
|
return csv_content, "text/csv"
|
||||||
|
|
||||||
|
# Generate Excel using openpyxl
|
||||||
|
excel_content = self._generate_excel(extracted_content, title)
|
||||||
|
|
||||||
|
return excel_content, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.error(f"Error rendering Excel: {str(e)}")
|
||||||
|
# Return CSV fallback
|
||||||
|
return f"Title,Content\n{title},Error rendering Excel report: {str(e)}", "text/csv"
|
||||||
|
|
||||||
|
def _generate_excel(self, content: str, title: str) -> str:
|
||||||
|
"""Generate Excel content using openpyxl."""
|
||||||
|
try:
|
||||||
|
# Create workbook
|
||||||
|
wb = Workbook()
|
||||||
|
|
||||||
|
# Remove default sheet
|
||||||
|
wb.remove(wb.active)
|
||||||
|
|
||||||
|
# Create sheets
|
||||||
|
summary_sheet = wb.create_sheet("Summary", 0)
|
||||||
|
data_sheet = wb.create_sheet("Data", 1)
|
||||||
|
analysis_sheet = wb.create_sheet("Analysis", 2)
|
||||||
|
|
||||||
|
# Add content to sheets
|
||||||
|
self._populate_summary_sheet(summary_sheet, title)
|
||||||
|
self._populate_data_sheet(data_sheet, content)
|
||||||
|
self._populate_analysis_sheet(analysis_sheet, content)
|
||||||
|
|
||||||
|
# Save to buffer
|
||||||
|
buffer = io.BytesIO()
|
||||||
|
wb.save(buffer)
|
||||||
|
buffer.seek(0)
|
||||||
|
|
||||||
|
# Convert to base64
|
||||||
|
excel_bytes = buffer.getvalue()
|
||||||
|
excel_base64 = base64.b64encode(excel_bytes).decode('utf-8')
|
||||||
|
|
||||||
|
return excel_base64
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.error(f"Error generating Excel: {str(e)}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
def _populate_summary_sheet(self, sheet, title: str):
|
||||||
|
"""Populate the summary sheet."""
|
||||||
|
try:
|
||||||
|
# Title
|
||||||
|
sheet['A1'] = title
|
||||||
|
sheet['A1'].font = Font(size=16, bold=True)
|
||||||
|
sheet['A1'].alignment = Alignment(horizontal='center')
|
||||||
|
|
||||||
|
# Generation info
|
||||||
|
sheet['A3'] = "Generated:"
|
||||||
|
sheet['B3'] = self._format_timestamp()
|
||||||
|
sheet['A4'] = "Status:"
|
||||||
|
sheet['B4'] = "Generated Successfully"
|
||||||
|
|
||||||
|
# Key metrics placeholder
|
||||||
|
sheet['A6'] = "Key Metrics:"
|
||||||
|
sheet['A6'].font = Font(bold=True)
|
||||||
|
sheet['A7'] = "Total Items:"
|
||||||
|
sheet['B7'] = "=COUNTA(Data!A:A)-1" # Count non-empty cells in Data sheet
|
||||||
|
|
||||||
|
# Auto-adjust column widths
|
||||||
|
sheet.column_dimensions['A'].width = 20
|
||||||
|
sheet.column_dimensions['B'].width = 30
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.warning(f"Could not populate summary sheet: {str(e)}")
|
||||||
|
|
||||||
|
def _populate_data_sheet(self, sheet, content: str):
|
||||||
|
"""Populate the data sheet."""
|
||||||
|
try:
|
||||||
|
# Headers
|
||||||
|
headers = ["Item/Category", "Value/Amount", "Percentage", "Source Document", "Notes/Comments"]
|
||||||
|
for col, header in enumerate(headers, 1):
|
||||||
|
cell = sheet.cell(row=1, column=col, value=header)
|
||||||
|
cell.font = Font(bold=True)
|
||||||
|
cell.fill = PatternFill(start_color="CCCCCC", end_color="CCCCCC", fill_type="solid")
|
||||||
|
|
||||||
|
# Process content
|
||||||
|
lines = content.split('\n')
|
||||||
|
row = 2
|
||||||
|
|
||||||
|
for line in lines:
|
||||||
|
line = line.strip()
|
||||||
|
if not line:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Check for table data (lines with |)
|
||||||
|
if '|' in line:
|
||||||
|
cells = [cell.strip() for cell in line.split('|') if cell.strip()]
|
||||||
|
for col, cell_data in enumerate(cells[:5], 1): # Limit to 5 columns
|
||||||
|
sheet.cell(row=row, column=col, value=cell_data)
|
||||||
|
row += 1
|
||||||
|
else:
|
||||||
|
# Regular content
|
||||||
|
sheet.cell(row=row, column=1, value=line)
|
||||||
|
row += 1
|
||||||
|
|
||||||
|
# Auto-adjust column widths
|
||||||
|
for col in range(1, 6):
|
||||||
|
sheet.column_dimensions[get_column_letter(col)].width = 20
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.warning(f"Could not populate data sheet: {str(e)}")
|
||||||
|
|
||||||
|
def _populate_analysis_sheet(self, sheet, content: str):
|
||||||
|
"""Populate the analysis sheet."""
|
||||||
|
try:
|
||||||
|
# Title
|
||||||
|
sheet['A1'] = "Analysis & Insights"
|
||||||
|
sheet['A1'].font = Font(size=14, bold=True)
|
||||||
|
|
||||||
|
# Content analysis
|
||||||
|
lines = content.split('\n')
|
||||||
|
row = 3
|
||||||
|
|
||||||
|
sheet['A3'] = "Content Analysis:"
|
||||||
|
sheet['A3'].font = Font(bold=True)
|
||||||
|
row += 1
|
||||||
|
|
||||||
|
# Count different types of content
|
||||||
|
table_lines = sum(1 for line in lines if '|' in line)
|
||||||
|
list_lines = sum(1 for line in lines if line.startswith(('- ', '* ')))
|
||||||
|
text_lines = len(lines) - table_lines - list_lines
|
||||||
|
|
||||||
|
sheet[f'A{row}'] = f"Total Lines: {len(lines)}"
|
||||||
|
row += 1
|
||||||
|
sheet[f'A{row}'] = f"Table Rows: {table_lines}"
|
||||||
|
row += 1
|
||||||
|
sheet[f'A{row}'] = f"List Items: {list_lines}"
|
||||||
|
row += 1
|
||||||
|
sheet[f'A{row}'] = f"Text Lines: {text_lines}"
|
||||||
|
row += 2
|
||||||
|
|
||||||
|
# Recommendations
|
||||||
|
sheet[f'A{row}'] = "Recommendations:"
|
||||||
|
sheet[f'A{row}'].font = Font(bold=True)
|
||||||
|
row += 1
|
||||||
|
sheet[f'A{row}'] = "1. Review data accuracy"
|
||||||
|
row += 1
|
||||||
|
sheet[f'A{row}'] = "2. Consider additional analysis"
|
||||||
|
row += 1
|
||||||
|
sheet[f'A{row}'] = "3. Update regularly"
|
||||||
|
|
||||||
|
# Auto-adjust column width
|
||||||
|
sheet.column_dimensions['A'].width = 30
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.warning(f"Could not populate analysis sheet: {str(e)}")
|
||||||
|
|
@ -0,0 +1,69 @@
|
||||||
|
"""
|
||||||
|
HTML renderer for report generation.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from .base_renderer import BaseRenderer
|
||||||
|
from typing import Dict, Any, Tuple, List
|
||||||
|
|
||||||
|
class HtmlRenderer(BaseRenderer):
|
||||||
|
"""Renders content to HTML format with format-specific extraction."""
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_supported_formats(cls) -> List[str]:
|
||||||
|
"""Return supported HTML formats."""
|
||||||
|
return ['html', 'htm']
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_format_aliases(cls) -> List[str]:
|
||||||
|
"""Return format aliases."""
|
||||||
|
return ['web', 'webpage']
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_priority(cls) -> int:
|
||||||
|
"""Return priority for HTML renderer."""
|
||||||
|
return 100
|
||||||
|
|
||||||
|
def getExtractionPrompt(self, user_prompt: str, title: str) -> str:
|
||||||
|
"""Return only HTML-specific guidelines; global prompt is built centrally."""
|
||||||
|
return (
|
||||||
|
"HTML FORMAT GUIDELINES:\n"
|
||||||
|
"- Output a complete HTML5 document starting with <!DOCTYPE html>.\n"
|
||||||
|
"- Include <html>, <head> with <meta charset=\"UTF-8\"> and <title>, and <body>.\n"
|
||||||
|
"- Use semantic elements: <header>, <main>, <section>, <article>, <footer>.\n"
|
||||||
|
"- Provide professional CSS in a <style> block; responsive, clean typography.\n"
|
||||||
|
"- Use h1/h2/h3 for headings; tables and lists for structure.\n"
|
||||||
|
"OUTPUT: Return ONLY valid HTML (no markdown, no code fences)."
|
||||||
|
)
|
||||||
|
|
||||||
|
async def render(self, extracted_content: str, title: str) -> Tuple[str, str]:
|
||||||
|
"""Render extracted content to HTML format."""
|
||||||
|
try:
|
||||||
|
# The extracted content should already be HTML from the AI
|
||||||
|
# Just clean it up and ensure it's valid
|
||||||
|
html_content = self._clean_html_content(extracted_content, title)
|
||||||
|
|
||||||
|
return html_content, "text/html"
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.error(f"Error rendering HTML: {str(e)}")
|
||||||
|
# Return minimal HTML fallback
|
||||||
|
return f"<html><head><title>{title}</title></head><body><h1>{title}</h1><p>Error rendering report: {str(e)}</p></body></html>", "text/html"
|
||||||
|
|
||||||
|
def _clean_html_content(self, content: str, title: str) -> str:
|
||||||
|
"""Clean and validate HTML content from AI."""
|
||||||
|
content = content.strip()
|
||||||
|
|
||||||
|
# Remove markdown code blocks if present
|
||||||
|
if content.startswith("```") and content.endswith("```"):
|
||||||
|
lines = content.split('\n')
|
||||||
|
if len(lines) > 2:
|
||||||
|
content = '\n'.join(lines[1:-1]).strip()
|
||||||
|
|
||||||
|
# Ensure it starts with DOCTYPE
|
||||||
|
if not content.startswith('<!DOCTYPE'):
|
||||||
|
if content.startswith('<html'):
|
||||||
|
content = '<!DOCTYPE html>\n' + content
|
||||||
|
else:
|
||||||
|
content = f'<!DOCTYPE html>\n<html>\n<head><meta charset="UTF-8"><title>{title}</title></head>\n<body>\n{content}\n</body>\n</html>'
|
||||||
|
|
||||||
|
return content
|
||||||
|
|
@ -0,0 +1,74 @@
|
||||||
|
"""
|
||||||
|
JSON renderer for report generation.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from .base_renderer import BaseRenderer
|
||||||
|
from typing import Dict, Any, Tuple, List
|
||||||
|
import json
|
||||||
|
|
||||||
|
class JsonRenderer(BaseRenderer):
|
||||||
|
"""Renders content to JSON format with format-specific extraction."""
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_supported_formats(cls) -> List[str]:
|
||||||
|
"""Return supported JSON formats."""
|
||||||
|
return ['json']
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_format_aliases(cls) -> List[str]:
|
||||||
|
"""Return format aliases."""
|
||||||
|
return ['data']
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_priority(cls) -> int:
|
||||||
|
"""Return priority for JSON renderer."""
|
||||||
|
return 80
|
||||||
|
|
||||||
|
def getExtractionPrompt(self, user_prompt: str, title: str) -> str:
|
||||||
|
"""Return only JSON-specific guidelines; global prompt is built centrally."""
|
||||||
|
return (
|
||||||
|
"JSON FORMAT GUIDELINES:\n"
|
||||||
|
"- Output ONLY a single valid JSON object (no fences, no pre/post text).\n"
|
||||||
|
"- Choose a structure that best fits the user's intent; include a top-level title and data.\n"
|
||||||
|
"- Prefer arrays/objects that map cleanly to the extracted facts.\n"
|
||||||
|
"- Include minimal metadata only if useful (e.g., generatedAt, sources).\n"
|
||||||
|
"OUTPUT: Return ONLY valid, parseable JSON."
|
||||||
|
)
|
||||||
|
|
||||||
|
async def render(self, extracted_content: str, title: str) -> Tuple[str, str]:
|
||||||
|
"""Render extracted content to JSON format."""
|
||||||
|
try:
|
||||||
|
# The extracted content should already be JSON from the AI
|
||||||
|
# Just validate and format it
|
||||||
|
json_content = self._clean_json_content(extracted_content, title)
|
||||||
|
|
||||||
|
return json_content, "application/json"
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.error(f"Error rendering JSON: {str(e)}")
|
||||||
|
# Return minimal JSON fallback
|
||||||
|
fallback_data = {
|
||||||
|
"title": title,
|
||||||
|
"sections": [{"type": "text", "content": f"Error rendering report: {str(e)}"}],
|
||||||
|
"metadata": {"error": str(e)}
|
||||||
|
}
|
||||||
|
return json.dumps(fallback_data, indent=2), "application/json"
|
||||||
|
|
||||||
|
def _clean_json_content(self, content: str, title: str) -> str:
|
||||||
|
"""Clean and validate JSON content from AI."""
|
||||||
|
content = content.strip()
|
||||||
|
|
||||||
|
# Remove markdown code blocks if present
|
||||||
|
if content.startswith("```") and content.endswith("```"):
|
||||||
|
lines = content.split('\n')
|
||||||
|
if len(lines) > 2:
|
||||||
|
content = '\n'.join(lines[1:-1]).strip()
|
||||||
|
|
||||||
|
# Validate JSON
|
||||||
|
try:
|
||||||
|
parsed = json.loads(content)
|
||||||
|
# Re-format with proper indentation
|
||||||
|
return json.dumps(parsed, indent=2, ensure_ascii=False)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
# If not valid JSON, return as-is
|
||||||
|
return content
|
||||||
|
|
@ -0,0 +1,65 @@
|
||||||
|
"""
|
||||||
|
Markdown renderer for report generation.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from .base_renderer import BaseRenderer
|
||||||
|
from typing import Dict, Any, Tuple, List
|
||||||
|
|
||||||
|
class MarkdownRenderer(BaseRenderer):
|
||||||
|
"""Renders content to Markdown format with format-specific extraction."""
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_supported_formats(cls) -> List[str]:
|
||||||
|
"""Return supported Markdown formats."""
|
||||||
|
return ['md', 'markdown']
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_format_aliases(cls) -> List[str]:
|
||||||
|
"""Return format aliases."""
|
||||||
|
return ['mdown', 'mkd']
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_priority(cls) -> int:
|
||||||
|
"""Return priority for markdown renderer."""
|
||||||
|
return 95
|
||||||
|
|
||||||
|
def getExtractionPrompt(self, user_prompt: str, title: str) -> str:
|
||||||
|
"""Return only Markdown-specific guidelines; global prompt is built centrally."""
|
||||||
|
return (
|
||||||
|
"MARKDOWN FORMAT GUIDELINES:\n"
|
||||||
|
"- Use proper Markdown syntax only (no HTML wrappers).\n"
|
||||||
|
"- # for main title, ## for sections, ### for subsections.\n"
|
||||||
|
"- Tables with | separators and a header row.\n"
|
||||||
|
"- Bullet lists with - or *.\n"
|
||||||
|
"- Emphasis with **bold** and *italic*.\n"
|
||||||
|
"- Code blocks with ```language.\n"
|
||||||
|
"- Horizontal rules (---) to separate major sections when helpful.\n"
|
||||||
|
"- Include links [text](url) and images  when referenced by sources.\n"
|
||||||
|
"OUTPUT: Return ONLY raw Markdown content without code fences."
|
||||||
|
)
|
||||||
|
|
||||||
|
async def render(self, extracted_content: str, title: str) -> Tuple[str, str]:
|
||||||
|
"""Render extracted content to Markdown format."""
|
||||||
|
try:
|
||||||
|
# The extracted content should already be Markdown from the AI
|
||||||
|
# Just clean it up
|
||||||
|
markdown_content = self._clean_markdown_content(extracted_content, title)
|
||||||
|
|
||||||
|
return markdown_content, "text/markdown"
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.error(f"Error rendering markdown: {str(e)}")
|
||||||
|
# Return minimal markdown fallback
|
||||||
|
return f"# {title}\n\nError rendering report: {str(e)}", "text/markdown"
|
||||||
|
|
||||||
|
def _clean_markdown_content(self, content: str, title: str) -> str:
|
||||||
|
"""Clean and validate Markdown content from AI."""
|
||||||
|
content = content.strip()
|
||||||
|
|
||||||
|
# Remove markdown code blocks if present
|
||||||
|
if content.startswith("```") and content.endswith("```"):
|
||||||
|
lines = content.split('\n')
|
||||||
|
if len(lines) > 2:
|
||||||
|
content = '\n'.join(lines[1:-1]).strip()
|
||||||
|
|
||||||
|
return content
|
||||||
225
modules/services/serviceGeneration/renderers/pdf_renderer.py
Normal file
225
modules/services/serviceGeneration/renderers/pdf_renderer.py
Normal file
|
|
@ -0,0 +1,225 @@
|
||||||
|
"""
|
||||||
|
PDF renderer for report generation using reportlab.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from .base_renderer import BaseRenderer
|
||||||
|
from typing import Dict, Any, Tuple, List
|
||||||
|
import io
|
||||||
|
import base64
|
||||||
|
from datetime import datetime, UTC
|
||||||
|
|
||||||
|
try:
|
||||||
|
from reportlab.lib.pagesizes import letter, A4
|
||||||
|
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak
|
||||||
|
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
|
||||||
|
from reportlab.lib.units import inch
|
||||||
|
from reportlab.lib import colors
|
||||||
|
from reportlab.lib.enums import TA_CENTER, TA_LEFT, TA_JUSTIFY
|
||||||
|
REPORTLAB_AVAILABLE = True
|
||||||
|
except ImportError:
|
||||||
|
REPORTLAB_AVAILABLE = False
|
||||||
|
|
||||||
|
class PdfRenderer(BaseRenderer):
|
||||||
|
"""Renders content to PDF format using reportlab."""
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_supported_formats(cls) -> List[str]:
|
||||||
|
"""Return supported PDF formats."""
|
||||||
|
return ['pdf']
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_format_aliases(cls) -> List[str]:
|
||||||
|
"""Return format aliases."""
|
||||||
|
return ['document', 'print']
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_priority(cls) -> int:
|
||||||
|
"""Return priority for PDF renderer."""
|
||||||
|
return 120
|
||||||
|
|
||||||
|
def getExtractionPrompt(self, user_prompt: str, title: str) -> str:
|
||||||
|
"""Return only PDF-specific guidelines; global prompt is built centrally."""
|
||||||
|
return (
|
||||||
|
"PDF FORMAT GUIDELINES:\n"
|
||||||
|
"- Provide structured content suitable for pagination and headings (H1/H2/H3-like).\n"
|
||||||
|
"- Use bullet lists and tables where useful; separate major sections clearly.\n"
|
||||||
|
"- Avoid markdown/HTML; produce clean, plain content that can be laid out as PDF.\n"
|
||||||
|
"OUTPUT: Return ONLY the PDF-ready textual content (no fences)."
|
||||||
|
)
|
||||||
|
|
||||||
|
async def render(self, extracted_content: str, title: str) -> Tuple[str, str]:
|
||||||
|
"""Render extracted content to PDF format."""
|
||||||
|
try:
|
||||||
|
if not REPORTLAB_AVAILABLE:
|
||||||
|
# Fallback to HTML if reportlab not available
|
||||||
|
from .html_renderer import HtmlRenderer
|
||||||
|
html_renderer = HtmlRenderer()
|
||||||
|
html_content, _ = await html_renderer.render(extracted_content, title)
|
||||||
|
return html_content, "text/html"
|
||||||
|
|
||||||
|
# Generate PDF using reportlab
|
||||||
|
pdf_content = self._generate_pdf(extracted_content, title)
|
||||||
|
|
||||||
|
return pdf_content, "application/pdf"
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.error(f"Error rendering PDF: {str(e)}")
|
||||||
|
# Return minimal fallback
|
||||||
|
return f"PDF Generation Error: {str(e)}", "text/plain"
|
||||||
|
|
||||||
|
def _generate_pdf(self, content: str, title: str) -> str:
|
||||||
|
"""Generate PDF content using reportlab."""
|
||||||
|
try:
|
||||||
|
# Create a buffer to hold the PDF
|
||||||
|
buffer = io.BytesIO()
|
||||||
|
|
||||||
|
# Create PDF document
|
||||||
|
doc = SimpleDocTemplate(
|
||||||
|
buffer,
|
||||||
|
pagesize=A4,
|
||||||
|
rightMargin=72,
|
||||||
|
leftMargin=72,
|
||||||
|
topMargin=72,
|
||||||
|
bottomMargin=18
|
||||||
|
)
|
||||||
|
|
||||||
|
# Get styles
|
||||||
|
styles = getSampleStyleSheet()
|
||||||
|
|
||||||
|
# Create custom styles
|
||||||
|
title_style = ParagraphStyle(
|
||||||
|
'CustomTitle',
|
||||||
|
parent=styles['Heading1'],
|
||||||
|
fontSize=24,
|
||||||
|
spaceAfter=30,
|
||||||
|
alignment=TA_CENTER,
|
||||||
|
textColor=colors.darkblue
|
||||||
|
)
|
||||||
|
|
||||||
|
heading_style = ParagraphStyle(
|
||||||
|
'CustomHeading',
|
||||||
|
parent=styles['Heading2'],
|
||||||
|
fontSize=16,
|
||||||
|
spaceAfter=12,
|
||||||
|
spaceBefore=12,
|
||||||
|
textColor=colors.darkblue
|
||||||
|
)
|
||||||
|
|
||||||
|
# Build PDF content
|
||||||
|
story = []
|
||||||
|
|
||||||
|
# Title page
|
||||||
|
story.append(Paragraph(title, title_style))
|
||||||
|
story.append(Spacer(1, 20))
|
||||||
|
story.append(Paragraph(f"Generated: {self._format_timestamp()}", styles['Normal']))
|
||||||
|
story.append(PageBreak())
|
||||||
|
|
||||||
|
# Process content
|
||||||
|
lines = content.split('\n')
|
||||||
|
current_section = []
|
||||||
|
|
||||||
|
for line in lines:
|
||||||
|
line = line.strip()
|
||||||
|
if not line:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Check for headings
|
||||||
|
if line.startswith('# '):
|
||||||
|
# H1 heading
|
||||||
|
if current_section:
|
||||||
|
story.extend(self._process_section(current_section, styles))
|
||||||
|
current_section = []
|
||||||
|
story.append(Paragraph(line[2:], title_style))
|
||||||
|
story.append(Spacer(1, 12))
|
||||||
|
elif line.startswith('## '):
|
||||||
|
# H2 heading
|
||||||
|
if current_section:
|
||||||
|
story.extend(self._process_section(current_section, styles))
|
||||||
|
current_section = []
|
||||||
|
story.append(Paragraph(line[3:], heading_style))
|
||||||
|
story.append(Spacer(1, 8))
|
||||||
|
elif line.startswith('### '):
|
||||||
|
# H3 heading
|
||||||
|
if current_section:
|
||||||
|
story.extend(self._process_section(current_section, styles))
|
||||||
|
current_section = []
|
||||||
|
story.append(Paragraph(line[4:], styles['Heading3']))
|
||||||
|
story.append(Spacer(1, 6))
|
||||||
|
else:
|
||||||
|
current_section.append(line)
|
||||||
|
|
||||||
|
# Process remaining content
|
||||||
|
if current_section:
|
||||||
|
story.extend(self._process_section(current_section, styles))
|
||||||
|
|
||||||
|
# Build PDF
|
||||||
|
doc.build(story)
|
||||||
|
|
||||||
|
# Get PDF content as base64
|
||||||
|
buffer.seek(0)
|
||||||
|
pdf_bytes = buffer.getvalue()
|
||||||
|
pdf_base64 = base64.b64encode(pdf_bytes).decode('utf-8')
|
||||||
|
|
||||||
|
return pdf_base64
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.error(f"Error generating PDF: {str(e)}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
def _process_section(self, lines: list, styles) -> list:
|
||||||
|
"""Process a section of content into PDF elements."""
|
||||||
|
elements = []
|
||||||
|
|
||||||
|
for line in lines:
|
||||||
|
if not line.strip():
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Check for tables (lines with |)
|
||||||
|
if '|' in line and not line.startswith('|'):
|
||||||
|
# This might be part of a table, process as table
|
||||||
|
table_data = self._extract_table_data(lines)
|
||||||
|
if table_data:
|
||||||
|
table = Table(table_data)
|
||||||
|
table.setStyle(TableStyle([
|
||||||
|
('BACKGROUND', (0, 0), (-1, 0), colors.grey),
|
||||||
|
('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
|
||||||
|
('ALIGN', (0, 0), (-1, -1), 'CENTER'),
|
||||||
|
('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
|
||||||
|
('FONTSIZE', (0, 0), (-1, 0), 14),
|
||||||
|
('BOTTOMPADDING', (0, 0), (-1, 0), 12),
|
||||||
|
('BACKGROUND', (0, 1), (-1, -1), colors.beige),
|
||||||
|
('GRID', (0, 0), (-1, -1), 1, colors.black)
|
||||||
|
]))
|
||||||
|
elements.append(table)
|
||||||
|
elements.append(Spacer(1, 12))
|
||||||
|
return elements
|
||||||
|
|
||||||
|
# Check for lists
|
||||||
|
if line.startswith('- ') or line.startswith('* '):
|
||||||
|
# This is a list item
|
||||||
|
elements.append(Paragraph(f"• {line[2:]}", styles['Normal']))
|
||||||
|
else:
|
||||||
|
# Regular paragraph
|
||||||
|
elements.append(Paragraph(line, styles['Normal']))
|
||||||
|
|
||||||
|
elements.append(Spacer(1, 6))
|
||||||
|
return elements
|
||||||
|
|
||||||
|
def _extract_table_data(self, lines: list) -> list:
|
||||||
|
"""Extract table data from lines."""
|
||||||
|
table_data = []
|
||||||
|
in_table = False
|
||||||
|
|
||||||
|
for line in lines:
|
||||||
|
if '|' in line:
|
||||||
|
if not in_table:
|
||||||
|
in_table = True
|
||||||
|
# Split by | and clean up
|
||||||
|
cells = [cell.strip() for cell in line.split('|') if cell.strip()]
|
||||||
|
if cells:
|
||||||
|
table_data.append(cells)
|
||||||
|
elif in_table and not line.strip():
|
||||||
|
# Empty line, might be end of table
|
||||||
|
break
|
||||||
|
|
||||||
|
return table_data if len(table_data) > 1 else []
|
||||||
157
modules/services/serviceGeneration/renderers/registry.py
Normal file
157
modules/services/serviceGeneration/renderers/registry.py
Normal file
|
|
@ -0,0 +1,157 @@
|
||||||
|
"""
|
||||||
|
Renderer registry for automatic discovery and registration of renderers.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import importlib
|
||||||
|
import pkgutil
|
||||||
|
from typing import Dict, Type, List, Optional
|
||||||
|
from .base_renderer import BaseRenderer
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
class RendererRegistry:
|
||||||
|
"""Registry for automatic renderer discovery and management."""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self._renderers: Dict[str, Type[BaseRenderer]] = {}
|
||||||
|
self._format_mappings: Dict[str, str] = {}
|
||||||
|
self._discovered = False
|
||||||
|
|
||||||
|
def discover_renderers(self) -> None:
|
||||||
|
"""Automatically discover and register all renderers by scanning files."""
|
||||||
|
if self._discovered:
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
# Get the directory containing this registry file
|
||||||
|
current_dir = Path(__file__).parent
|
||||||
|
renderers_dir = current_dir
|
||||||
|
|
||||||
|
# Get the package name dynamically
|
||||||
|
package_name = __name__.rsplit('.', 1)[0]
|
||||||
|
|
||||||
|
# Scan all Python files in the renderers directory
|
||||||
|
for file_path in renderers_dir.glob("*.py"):
|
||||||
|
if file_path.name in ['registry.py', 'base_renderer.py', '__init__.py']:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Extract module name from filename
|
||||||
|
module_name = file_path.stem
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Import the module dynamically
|
||||||
|
full_module_name = f"{package_name}.{module_name}"
|
||||||
|
module = importlib.import_module(full_module_name)
|
||||||
|
|
||||||
|
# Look for renderer classes in the module
|
||||||
|
for attr_name in dir(module):
|
||||||
|
attr = getattr(module, attr_name)
|
||||||
|
if (isinstance(attr, type) and
|
||||||
|
issubclass(attr, BaseRenderer) and
|
||||||
|
attr != BaseRenderer and
|
||||||
|
hasattr(attr, 'get_supported_formats')):
|
||||||
|
|
||||||
|
# Register the renderer
|
||||||
|
self._register_renderer_class(attr)
|
||||||
|
logger.info(f"Discovered renderer: {attr.__name__} from {module_name}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Could not load renderer from {module_name}: {str(e)}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
self._discovered = True
|
||||||
|
logger.info(f"Renderer discovery completed. Found {len(self._renderers)} renderers.")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error during renderer discovery: {str(e)}")
|
||||||
|
self._discovered = True # Mark as discovered to avoid repeated attempts
|
||||||
|
|
||||||
|
def _register_renderer_class(self, renderer_class: Type[BaseRenderer]) -> None:
|
||||||
|
"""Register a renderer class with its supported formats."""
|
||||||
|
try:
|
||||||
|
# Get supported formats from the renderer class
|
||||||
|
supported_formats = renderer_class.get_supported_formats()
|
||||||
|
|
||||||
|
for format_name in supported_formats:
|
||||||
|
# Register primary format
|
||||||
|
self._renderers[format_name.lower()] = renderer_class
|
||||||
|
|
||||||
|
# Register aliases if any
|
||||||
|
if hasattr(renderer_class, 'get_format_aliases'):
|
||||||
|
aliases = renderer_class.get_format_aliases()
|
||||||
|
for alias in aliases:
|
||||||
|
self._format_mappings[alias.lower()] = format_name.lower()
|
||||||
|
|
||||||
|
logger.debug(f"Registered {renderer_class.__name__} for formats: {supported_formats}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error registering renderer {renderer_class.__name__}: {str(e)}")
|
||||||
|
|
||||||
|
def get_renderer(self, output_format: str) -> Optional[BaseRenderer]:
|
||||||
|
"""Get a renderer instance for the specified format."""
|
||||||
|
if not self._discovered:
|
||||||
|
self.discover_renderers()
|
||||||
|
|
||||||
|
# Normalize format name
|
||||||
|
format_name = output_format.lower().strip()
|
||||||
|
|
||||||
|
# Check for aliases first
|
||||||
|
if format_name in self._format_mappings:
|
||||||
|
format_name = self._format_mappings[format_name]
|
||||||
|
|
||||||
|
# Get renderer class
|
||||||
|
renderer_class = self._renderers.get(format_name)
|
||||||
|
|
||||||
|
if renderer_class:
|
||||||
|
try:
|
||||||
|
return renderer_class()
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error creating renderer instance for {format_name}: {str(e)}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
logger.warning(f"No renderer found for format: {output_format}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def get_supported_formats(self) -> List[str]:
|
||||||
|
"""Get list of all supported formats."""
|
||||||
|
if not self._discovered:
|
||||||
|
self.discover_renderers()
|
||||||
|
|
||||||
|
formats = list(self._renderers.keys())
|
||||||
|
formats.extend(self._format_mappings.keys())
|
||||||
|
return sorted(set(formats))
|
||||||
|
|
||||||
|
def get_renderer_info(self) -> Dict[str, Dict[str, str]]:
|
||||||
|
"""Get information about all registered renderers."""
|
||||||
|
if not self._discovered:
|
||||||
|
self.discover_renderers()
|
||||||
|
|
||||||
|
info = {}
|
||||||
|
for format_name, renderer_class in self._renderers.items():
|
||||||
|
info[format_name] = {
|
||||||
|
'class_name': renderer_class.__name__,
|
||||||
|
'module': renderer_class.__module__,
|
||||||
|
'description': getattr(renderer_class, '__doc__', 'No description').strip().split('\n')[0] if renderer_class.__doc__ else 'No description'
|
||||||
|
}
|
||||||
|
|
||||||
|
return info
|
||||||
|
|
||||||
|
# Global registry instance
|
||||||
|
_registry = RendererRegistry()
|
||||||
|
|
||||||
|
def get_renderer(output_format: str) -> Optional[BaseRenderer]:
|
||||||
|
"""Get a renderer instance for the specified format."""
|
||||||
|
return _registry.get_renderer(output_format)
|
||||||
|
|
||||||
|
def get_supported_formats() -> List[str]:
|
||||||
|
"""Get list of all supported formats."""
|
||||||
|
return _registry.get_supported_formats()
|
||||||
|
|
||||||
|
def get_renderer_info() -> Dict[str, Dict[str, str]]:
|
||||||
|
"""Get information about all registered renderers."""
|
||||||
|
return _registry.get_renderer_info()
|
||||||
|
|
@ -0,0 +1,94 @@
|
||||||
|
"""
|
||||||
|
Text renderer for report generation.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from .base_renderer import BaseRenderer
|
||||||
|
from typing import Dict, Any, Tuple, List
|
||||||
|
|
||||||
|
class TextRenderer(BaseRenderer):
|
||||||
|
"""Renders content to plain text format with format-specific extraction."""
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_supported_formats(cls) -> List[str]:
|
||||||
|
"""Return supported text formats (excluding formats with dedicated renderers)."""
|
||||||
|
return [
|
||||||
|
'txt', 'text', 'plain',
|
||||||
|
# Programming languages
|
||||||
|
'js', 'javascript', 'ts', 'typescript', 'jsx', 'tsx',
|
||||||
|
'py', 'python', 'java', 'cpp', 'c', 'h', 'hpp',
|
||||||
|
'cs', 'csharp', 'php', 'rb', 'ruby', 'go', 'rs', 'rust',
|
||||||
|
'swift', 'kt', 'kotlin', 'scala', 'r', 'm', 'objc',
|
||||||
|
'sh', 'bash', 'zsh', 'fish', 'ps1', 'bat', 'cmd',
|
||||||
|
# Web technologies (excluding html/htm which have dedicated renderer)
|
||||||
|
'css', 'scss', 'sass', 'less', 'xml', 'yaml', 'yml', 'toml', 'ini', 'cfg',
|
||||||
|
# Data formats (excluding csv, md/markdown which have dedicated renderers)
|
||||||
|
'tsv', 'log', 'rst', 'sql', 'dockerfile', 'dockerignore', 'gitignore',
|
||||||
|
# Configuration files
|
||||||
|
'env', 'properties', 'conf', 'config', 'rc',
|
||||||
|
'gitattributes', 'editorconfig', 'eslintrc',
|
||||||
|
# Documentation
|
||||||
|
'readme', 'changelog', 'license', 'authors',
|
||||||
|
'contributing', 'todo', 'notes', 'docs'
|
||||||
|
]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_format_aliases(cls) -> List[str]:
|
||||||
|
"""Return format aliases."""
|
||||||
|
return [
|
||||||
|
'ascii', 'utf8', 'utf-8', 'code', 'source',
|
||||||
|
'script', 'program', 'file', 'document',
|
||||||
|
'raw', 'unformatted', 'plaintext'
|
||||||
|
]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_priority(cls) -> int:
|
||||||
|
"""Return priority for text renderer."""
|
||||||
|
return 90
|
||||||
|
|
||||||
|
def getExtractionPrompt(self, user_prompt: str, title: str) -> str:
|
||||||
|
"""Return only plain-text guidelines; global prompt is built centrally."""
|
||||||
|
return (
|
||||||
|
"TEXT FORMAT GUIDELINES:\n"
|
||||||
|
"- Output ONLY plain text (no markdown or HTML).\n"
|
||||||
|
"- Use clear headings (you may underline with === or --- when helpful).\n"
|
||||||
|
"- Use simple bullet lists with '-' and tables with '|' when needed.\n"
|
||||||
|
"- Preserve indentation for code-like content if present.\n"
|
||||||
|
"OUTPUT: Return ONLY the raw text content."
|
||||||
|
)
|
||||||
|
|
||||||
|
async def render(self, extracted_content: str, title: str) -> Tuple[str, str]:
|
||||||
|
"""Render extracted content to plain text format."""
|
||||||
|
try:
|
||||||
|
# The extracted content should already be formatted text from the AI
|
||||||
|
# Just clean it up
|
||||||
|
text_content = self._clean_text_content(extracted_content, title)
|
||||||
|
|
||||||
|
return text_content, "text/plain"
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.error(f"Error rendering text: {str(e)}")
|
||||||
|
# Return minimal text fallback
|
||||||
|
return f"{title}\n\nError rendering report: {str(e)}", "text/plain"
|
||||||
|
|
||||||
|
def _clean_text_content(self, content: str, title: str) -> str:
|
||||||
|
"""Clean and validate text content from AI."""
|
||||||
|
content = content.strip()
|
||||||
|
|
||||||
|
# Remove markdown code blocks if present
|
||||||
|
if content.startswith("```") and content.endswith("```"):
|
||||||
|
lines = content.split('\n')
|
||||||
|
if len(lines) > 2:
|
||||||
|
content = '\n'.join(lines[1:-1]).strip()
|
||||||
|
|
||||||
|
# Remove any remaining markdown formatting
|
||||||
|
content = content.replace('**', '').replace('*', '')
|
||||||
|
content = content.replace('__', '').replace('_', '')
|
||||||
|
|
||||||
|
# Clean up any HTML-like tags that might have slipped through
|
||||||
|
import re
|
||||||
|
content = re.sub(r'<[^>]+>', '', content)
|
||||||
|
|
||||||
|
# Ensure proper line endings
|
||||||
|
content = content.replace('\r\n', '\n').replace('\r', '\n')
|
||||||
|
|
||||||
|
return content
|
||||||
|
|
@ -3,7 +3,7 @@ import uuid
|
||||||
from typing import Dict, Any, List, Optional
|
from typing import Dict, Any, List, Optional
|
||||||
from modules.datamodels.datamodelUam import User, UserConnection
|
from modules.datamodels.datamodelUam import User, UserConnection
|
||||||
from modules.datamodels.datamodelChat import ChatDocument, ChatMessage
|
from modules.datamodels.datamodelChat import ChatDocument, ChatMessage
|
||||||
from modules.datamodels.datamodelChat import ExtractedContent
|
from modules.datamodels.datamodelChat import ChatContentExtracted
|
||||||
from modules.services.serviceExtraction.mainServiceExtraction import ExtractionService
|
from modules.services.serviceExtraction.mainServiceExtraction import ExtractionService
|
||||||
from modules.services.serviceGeneration.subDocumentUtility import getFileExtension, getMimeTypeFromExtension, detectContentTypeFromData
|
from modules.services.serviceGeneration.subDocumentUtility import getFileExtension, getMimeTypeFromExtension, detectContentTypeFromData
|
||||||
from modules.shared.timezoneUtils import get_utc_timestamp
|
from modules.shared.timezoneUtils import get_utc_timestamp
|
||||||
|
|
@ -78,6 +78,12 @@ class WorkflowService:
|
||||||
def getChatDocumentsFromDocumentList(self, documentList: List[str]) -> List[ChatDocument]:
|
def getChatDocumentsFromDocumentList(self, documentList: List[str]) -> List[ChatDocument]:
|
||||||
"""Get ChatDocuments from a list of document references using all three formats."""
|
"""Get ChatDocuments from a list of document references using all three formats."""
|
||||||
try:
|
try:
|
||||||
|
# Get the current workflow from services (same pattern as setWorkflowContext)
|
||||||
|
workflow = getattr(self.serviceCenter, 'currentWorkflow', None) or self.workflow
|
||||||
|
if not workflow:
|
||||||
|
logger.error("No workflow available for document list resolution")
|
||||||
|
return []
|
||||||
|
|
||||||
all_documents = []
|
all_documents = []
|
||||||
for doc_ref in documentList:
|
for doc_ref in documentList:
|
||||||
if doc_ref.startswith("docItem:"):
|
if doc_ref.startswith("docItem:"):
|
||||||
|
|
@ -86,7 +92,7 @@ class WorkflowService:
|
||||||
if len(parts) >= 2:
|
if len(parts) >= 2:
|
||||||
doc_id = parts[1]
|
doc_id = parts[1]
|
||||||
# Find the document by ID
|
# Find the document by ID
|
||||||
for message in self.workflow.messages:
|
for message in workflow.messages:
|
||||||
if message.documents:
|
if message.documents:
|
||||||
for doc in message.documents:
|
for doc in message.documents:
|
||||||
if doc.id == doc_id:
|
if doc.id == doc_id:
|
||||||
|
|
@ -101,9 +107,15 @@ class WorkflowService:
|
||||||
# Format: docList:<messageId>:<label>
|
# Format: docList:<messageId>:<label>
|
||||||
message_id = parts[1]
|
message_id = parts[1]
|
||||||
label = parts[2]
|
label = parts[2]
|
||||||
|
logger.debug(f"Looking for message with ID: {message_id} and label: {label}")
|
||||||
|
|
||||||
# Find the message by ID and get all its documents
|
# Find the message by ID and get all its documents
|
||||||
for message in self.workflow.messages:
|
message_found = False
|
||||||
|
for message in workflow.messages:
|
||||||
|
logger.debug(f"Checking message ID: {message.id} (looking for: {message_id})")
|
||||||
if str(message.id) == message_id:
|
if str(message.id) == message_id:
|
||||||
|
message_found = True
|
||||||
|
logger.debug(f"Found message {message.id} with documentsLabel: {getattr(message, 'documentsLabel', 'None')}")
|
||||||
if message.documents:
|
if message.documents:
|
||||||
doc_names = [doc.fileName for doc in message.documents if hasattr(doc, 'fileName')]
|
doc_names = [doc.fileName for doc in message.documents if hasattr(doc, 'fileName')]
|
||||||
logger.debug(f"Found docList reference {doc_ref}: {len(message.documents)} documents - {doc_names}")
|
logger.debug(f"Found docList reference {doc_ref}: {len(message.documents)} documents - {doc_names}")
|
||||||
|
|
@ -111,13 +123,16 @@ class WorkflowService:
|
||||||
else:
|
else:
|
||||||
logger.debug(f"Found docList reference {doc_ref} but message has no documents")
|
logger.debug(f"Found docList reference {doc_ref} but message has no documents")
|
||||||
break
|
break
|
||||||
|
|
||||||
|
if not message_found:
|
||||||
|
logger.warning(f"Message with ID {message_id} not found in workflow. Available message IDs: {[str(msg.id) for msg in workflow.messages]}")
|
||||||
elif len(parts) >= 2:
|
elif len(parts) >= 2:
|
||||||
# Format: docList:<label> - find message by documentsLabel
|
# Format: docList:<label> - find message by documentsLabel
|
||||||
label = parts[1]
|
label = parts[1]
|
||||||
logger.debug(f"Looking for message with documentsLabel: {label}")
|
logger.debug(f"Looking for message with documentsLabel: {label}")
|
||||||
# Find messages with matching documentsLabel
|
# Find messages with matching documentsLabel
|
||||||
matching_messages = []
|
matching_messages = []
|
||||||
for message in self.workflow.messages:
|
for message in workflow.messages:
|
||||||
# Check both attribute and raw data for documentsLabel
|
# Check both attribute and raw data for documentsLabel
|
||||||
msg_label = getattr(message, 'documentsLabel', None)
|
msg_label = getattr(message, 'documentsLabel', None)
|
||||||
if msg_label == label:
|
if msg_label == label:
|
||||||
|
|
@ -158,7 +173,7 @@ class WorkflowService:
|
||||||
# Find messages with matching documentsLabel (this is the correct way!)
|
# Find messages with matching documentsLabel (this is the correct way!)
|
||||||
# In case of retries, we want the NEWEST message (most recent publishedAt)
|
# In case of retries, we want the NEWEST message (most recent publishedAt)
|
||||||
matching_messages = []
|
matching_messages = []
|
||||||
for message in self.workflow.messages:
|
for message in workflow.messages:
|
||||||
msg_documents_label = getattr(message, 'documentsLabel', '')
|
msg_documents_label = getattr(message, 'documentsLabel', '')
|
||||||
|
|
||||||
# Check if this message's documentsLabel matches our reference
|
# Check if this message's documentsLabel matches our reference
|
||||||
|
|
@ -187,7 +202,7 @@ class WorkflowService:
|
||||||
# Fallback: also check if any message has this documentsLabel as a prefix
|
# Fallback: also check if any message has this documentsLabel as a prefix
|
||||||
logger.debug(f"Trying fallback search for messages with documentsLabel containing: {doc_ref}")
|
logger.debug(f"Trying fallback search for messages with documentsLabel containing: {doc_ref}")
|
||||||
fallback_messages = []
|
fallback_messages = []
|
||||||
for message in self.workflow.messages:
|
for message in workflow.messages:
|
||||||
msg_documents_label = getattr(message, 'documentsLabel', '')
|
msg_documents_label = getattr(message, 'documentsLabel', '')
|
||||||
if msg_documents_label and msg_documents_label.startswith(doc_ref):
|
if msg_documents_label and msg_documents_label.startswith(doc_ref):
|
||||||
fallback_messages.append(message)
|
fallback_messages.append(message)
|
||||||
|
|
@ -422,24 +437,30 @@ class WorkflowService:
|
||||||
def setWorkflowContext(self, round_number: int = None, task_number: int = None, action_number: int = None):
|
def setWorkflowContext(self, round_number: int = None, task_number: int = None, action_number: int = None):
|
||||||
"""Set current workflow context for document generation and routing"""
|
"""Set current workflow context for document generation and routing"""
|
||||||
try:
|
try:
|
||||||
|
# Get the current workflow from services
|
||||||
|
workflow = getattr(self.serviceCenter, 'currentWorkflow', None) or self.workflow
|
||||||
|
if not workflow:
|
||||||
|
logger.error("No workflow available for context setting")
|
||||||
|
return
|
||||||
|
|
||||||
# Prepare update data
|
# Prepare update data
|
||||||
update_data = {}
|
update_data = {}
|
||||||
|
|
||||||
if round_number is not None:
|
if round_number is not None:
|
||||||
self.workflow.currentRound = round_number
|
workflow.currentRound = round_number
|
||||||
update_data["currentRound"] = round_number
|
update_data["currentRound"] = round_number
|
||||||
if task_number is not None:
|
if task_number is not None:
|
||||||
self.workflow.currentTask = task_number
|
workflow.currentTask = task_number
|
||||||
update_data["currentTask"] = task_number
|
update_data["currentTask"] = task_number
|
||||||
if action_number is not None:
|
if action_number is not None:
|
||||||
self.workflow.currentAction = action_number
|
workflow.currentAction = action_number
|
||||||
update_data["currentAction"] = action_number
|
update_data["currentAction"] = action_number
|
||||||
|
|
||||||
# Persist changes to database if any updates were made
|
# Persist changes to database if any updates were made
|
||||||
if update_data:
|
if update_data:
|
||||||
self.interfaceDbChat.updateWorkflow(self.workflow.id, update_data)
|
self.interfaceDbChat.updateWorkflow(workflow.id, update_data)
|
||||||
|
|
||||||
logger.debug(f"Updated workflow context: Round {self.workflow.currentRound if hasattr(self.workflow, 'currentRound') else 'N/A'}, Task {self.workflow.currentTask if hasattr(self.workflow, 'currentTask') else 'N/A'}, Action {self.workflow.currentAction if hasattr(self.workflow, 'currentAction') else 'N/A'}")
|
logger.debug(f"Updated workflow context: Round {workflow.currentRound if hasattr(workflow, 'currentRound') else 'N/A'}, Task {workflow.currentTask if hasattr(workflow, 'currentTask') else 'N/A'}, Action {workflow.currentAction if hasattr(workflow, 'currentAction') else 'N/A'}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error setting workflow context: {str(e)}")
|
logger.error(f"Error setting workflow context: {str(e)}")
|
||||||
|
|
||||||
|
|
@ -467,3 +488,376 @@ class WorkflowService:
|
||||||
'workflowStatus': 'unknown',
|
'workflowStatus': 'unknown',
|
||||||
'workflowId': 'unknown'
|
'workflowId': 'unknown'
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def createWorkflow(self, workflowData: Dict[str, Any]):
|
||||||
|
"""Create a new workflow by delegating to the chat interface"""
|
||||||
|
try:
|
||||||
|
return self.interfaceDbChat.createWorkflow(workflowData)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error creating workflow: {str(e)}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
def updateWorkflow(self, workflowId: str, updateData: Dict[str, Any]):
|
||||||
|
"""Update workflow by delegating to the chat interface"""
|
||||||
|
try:
|
||||||
|
return self.interfaceDbChat.updateWorkflow(workflowId, updateData)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error updating workflow: {str(e)}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
def updateWorkflowStats(self, workflowId: str, **kwargs):
|
||||||
|
"""Update workflow statistics by delegating to the chat interface"""
|
||||||
|
try:
|
||||||
|
return self.interfaceDbChat.updateWorkflowStats(workflowId, **kwargs)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error updating workflow stats: {str(e)}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
def getWorkflow(self, workflowId: str):
|
||||||
|
"""Get workflow by ID by delegating to the chat interface"""
|
||||||
|
try:
|
||||||
|
return self.interfaceDbChat.getWorkflow(workflowId)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error getting workflow: {str(e)}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
def createMessage(self, messageData: Dict[str, Any]):
|
||||||
|
"""Create a new message by delegating to the chat interface"""
|
||||||
|
try:
|
||||||
|
return self.interfaceDbChat.createMessage(messageData)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error creating message: {str(e)}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
def updateMessage(self, messageId: str, messageData: Dict[str, Any]):
|
||||||
|
"""Update message by delegating to the chat interface"""
|
||||||
|
try:
|
||||||
|
return self.interfaceDbChat.updateMessage(messageId, messageData)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error updating message: {str(e)}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
def createLog(self, logData: Dict[str, Any]):
|
||||||
|
"""Create a new log entry by delegating to the chat interface"""
|
||||||
|
try:
|
||||||
|
return self.interfaceDbChat.createLog(logData)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error creating log: {str(e)}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
def getDocumentCount(self) -> str:
|
||||||
|
"""Get document count for task planning (matching old handlingTasks.py logic)"""
|
||||||
|
try:
|
||||||
|
# Get the current workflow from services
|
||||||
|
workflow = getattr(self.serviceCenter, 'currentWorkflow', None) or self.workflow
|
||||||
|
if not workflow:
|
||||||
|
return "No documents available"
|
||||||
|
|
||||||
|
# Count documents from all messages in the workflow (like old system)
|
||||||
|
total_docs = 0
|
||||||
|
for message in workflow.messages:
|
||||||
|
if hasattr(message, 'documents') and message.documents:
|
||||||
|
total_docs += len(message.documents)
|
||||||
|
|
||||||
|
if total_docs == 0:
|
||||||
|
return "No documents available"
|
||||||
|
|
||||||
|
return f"{total_docs} document(s) available"
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error getting document count: {str(e)}")
|
||||||
|
return "No documents available"
|
||||||
|
|
||||||
|
def getWorkflowHistoryContext(self) -> str:
|
||||||
|
"""Get workflow history context for task planning (matching old handlingTasks.py logic)"""
|
||||||
|
try:
|
||||||
|
# Get the current workflow from services
|
||||||
|
workflow = getattr(self.serviceCenter, 'currentWorkflow', None) or self.workflow
|
||||||
|
if not workflow:
|
||||||
|
return "No previous round context available"
|
||||||
|
|
||||||
|
# Check if there are any previous rounds by looking for "first" messages
|
||||||
|
has_previous_rounds = False
|
||||||
|
for message in workflow.messages:
|
||||||
|
if hasattr(message, 'status') and message.status == "first":
|
||||||
|
has_previous_rounds = True
|
||||||
|
break
|
||||||
|
|
||||||
|
if not has_previous_rounds:
|
||||||
|
return "No previous round context available"
|
||||||
|
|
||||||
|
# Get document reference list to show what documents are available from previous rounds
|
||||||
|
document_list = self._getDocumentReferenceList(workflow)
|
||||||
|
|
||||||
|
# Build context string showing previous rounds
|
||||||
|
context = "Previous workflow rounds contain documents:\n"
|
||||||
|
|
||||||
|
# Show history exchanges (previous rounds)
|
||||||
|
if document_list["history"]:
|
||||||
|
for exchange in document_list["history"]:
|
||||||
|
# Find the message that corresponds to this exchange
|
||||||
|
message_id = None
|
||||||
|
for message in workflow.messages:
|
||||||
|
if hasattr(message, 'documentsLabel') and message.documentsLabel == exchange['documentsLabel']:
|
||||||
|
message_id = message.id
|
||||||
|
break
|
||||||
|
|
||||||
|
if message_id:
|
||||||
|
doc_list_ref = f"docList:{message_id}:{exchange['documentsLabel']}"
|
||||||
|
else:
|
||||||
|
doc_list_ref = f"docList:{exchange['documentsLabel']}"
|
||||||
|
|
||||||
|
context += f"- {doc_list_ref} ({len(exchange['documents'])} documents)\n"
|
||||||
|
else:
|
||||||
|
context = "No previous round context available"
|
||||||
|
|
||||||
|
return context
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error getting workflow history context: {str(e)}")
|
||||||
|
return "No previous round context available"
|
||||||
|
|
||||||
|
def getAvailableDocuments(self, workflow) -> str:
|
||||||
|
"""Get available documents formatted for AI prompts (exact copy of old ServiceCenter.getEnhancedDocumentContext)"""
|
||||||
|
try:
|
||||||
|
if not workflow or not hasattr(workflow, 'messages'):
|
||||||
|
return "No documents available"
|
||||||
|
|
||||||
|
# Get document reference list using the exact same logic as old system
|
||||||
|
document_list = self._getDocumentReferenceList(workflow)
|
||||||
|
|
||||||
|
# Build technical context string for AI action planning (exact copy of old system)
|
||||||
|
context = "AVAILABLE DOCUMENTS:\n\n"
|
||||||
|
|
||||||
|
# Process chat exchanges (current round) - exact copy of old system
|
||||||
|
if document_list["chat"]:
|
||||||
|
context += "CURRENT ROUND DOCUMENTS:\n"
|
||||||
|
for exchange in document_list["chat"]:
|
||||||
|
# Generate docList reference for the exchange (using message ID and label)
|
||||||
|
# Find the message that corresponds to this exchange
|
||||||
|
message_id = None
|
||||||
|
for message in workflow.messages:
|
||||||
|
if hasattr(message, 'documentsLabel') and message.documentsLabel == exchange['documentsLabel']:
|
||||||
|
message_id = message.id
|
||||||
|
break
|
||||||
|
|
||||||
|
if message_id:
|
||||||
|
doc_list_ref = f"docList:{message_id}:{exchange['documentsLabel']}"
|
||||||
|
else:
|
||||||
|
# Fallback to label-only format if message ID not found
|
||||||
|
doc_list_ref = f"docList:{exchange['documentsLabel']}"
|
||||||
|
|
||||||
|
context += f"- {doc_list_ref} contains:\n"
|
||||||
|
# Generate docItem references for each document in the list
|
||||||
|
for doc_ref in exchange['documents']:
|
||||||
|
if doc_ref.startswith("docItem:"):
|
||||||
|
context += f" - {doc_ref}\n"
|
||||||
|
else:
|
||||||
|
# Convert to proper docItem format if needed
|
||||||
|
context += f" - docItem:{doc_ref}\n"
|
||||||
|
context += "\n"
|
||||||
|
|
||||||
|
# Process history exchanges (previous rounds) - exact copy of old system
|
||||||
|
if document_list["history"]:
|
||||||
|
context += "WORKFLOW HISTORY DOCUMENTS:\n"
|
||||||
|
for exchange in document_list["history"]:
|
||||||
|
# Generate docList reference for the exchange (using message ID and label)
|
||||||
|
# Find the message that corresponds to this exchange
|
||||||
|
message_id = None
|
||||||
|
for message in workflow.messages:
|
||||||
|
if hasattr(message, 'documentsLabel') and message.documentsLabel == exchange['documentsLabel']:
|
||||||
|
message_id = message.id
|
||||||
|
break
|
||||||
|
|
||||||
|
if message_id:
|
||||||
|
doc_list_ref = f"docList:{message_id}:{exchange['documentsLabel']}"
|
||||||
|
else:
|
||||||
|
# Fallback to label-only format if message ID not found
|
||||||
|
doc_list_ref = f"docList:{exchange['documentsLabel']}"
|
||||||
|
|
||||||
|
context += f"- {doc_list_ref} contains:\n"
|
||||||
|
# Generate docItem references for each document in the list
|
||||||
|
for doc_ref in exchange['documents']:
|
||||||
|
if doc_ref.startswith("docItem:"):
|
||||||
|
context += f" - {doc_ref}\n"
|
||||||
|
else:
|
||||||
|
# Convert to proper docItem format if needed
|
||||||
|
context += f" - docItem:{doc_ref}\n"
|
||||||
|
context += "\n"
|
||||||
|
|
||||||
|
if not document_list["chat"] and not document_list["history"]:
|
||||||
|
context += "NO DOCUMENTS AVAILABLE - This workflow has no documents to process.\n"
|
||||||
|
|
||||||
|
return context
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error getting available documents: {str(e)}")
|
||||||
|
return "NO DOCUMENTS AVAILABLE - Error generating document context."
|
||||||
|
|
||||||
|
def _getDocumentReferenceList(self, workflow) -> Dict[str, List]:
|
||||||
|
"""Get list of document exchanges with new labeling format, sorted by recency (exact copy of old system)"""
|
||||||
|
# Collect all documents first and refresh their attributes
|
||||||
|
all_documents = []
|
||||||
|
for message in workflow.messages:
|
||||||
|
if message.documents:
|
||||||
|
all_documents.extend(message.documents)
|
||||||
|
|
||||||
|
# Refresh file attributes for all documents
|
||||||
|
if all_documents:
|
||||||
|
self._refreshDocumentFileAttributes(all_documents)
|
||||||
|
|
||||||
|
chat_exchanges = []
|
||||||
|
history_exchanges = []
|
||||||
|
|
||||||
|
# Process messages in reverse order; "first" marks boundary
|
||||||
|
in_current_round = True
|
||||||
|
for message in reversed(workflow.messages):
|
||||||
|
is_first = message.status == "first" if hasattr(message, 'status') else False
|
||||||
|
|
||||||
|
# Build a DocumentExchange if message has documents
|
||||||
|
doc_exchange = None
|
||||||
|
if message.documents:
|
||||||
|
if message.actionId and message.documentsLabel:
|
||||||
|
# Validate that we use the same label as in the message
|
||||||
|
validated_label = self._validateDocumentLabelConsistency(message)
|
||||||
|
|
||||||
|
# Use the message's actual documentsLabel
|
||||||
|
doc_refs = []
|
||||||
|
for doc in message.documents:
|
||||||
|
doc_ref = self._getDocumentReferenceFromChatDocument(doc, message)
|
||||||
|
doc_refs.append(doc_ref)
|
||||||
|
|
||||||
|
doc_exchange = {
|
||||||
|
'documentsLabel': validated_label,
|
||||||
|
'documents': doc_refs
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
# Generate new labels for documents without explicit labels
|
||||||
|
doc_refs = []
|
||||||
|
for doc in message.documents:
|
||||||
|
doc_ref = self._getDocumentReferenceFromChatDocument(doc, message)
|
||||||
|
doc_refs.append(doc_ref)
|
||||||
|
|
||||||
|
if doc_refs:
|
||||||
|
# Create a label based on message context
|
||||||
|
context_prefix = self._generateWorkflowContextPrefix(message)
|
||||||
|
context_label = f"{context_prefix}_context"
|
||||||
|
|
||||||
|
doc_exchange = {
|
||||||
|
'documentsLabel': context_label,
|
||||||
|
'documents': doc_refs
|
||||||
|
}
|
||||||
|
|
||||||
|
# Append to appropriate container based on boundary
|
||||||
|
if doc_exchange:
|
||||||
|
if in_current_round:
|
||||||
|
chat_exchanges.append(doc_exchange)
|
||||||
|
else:
|
||||||
|
history_exchanges.append(doc_exchange)
|
||||||
|
|
||||||
|
# Flip boundary after including the "first" message in chat
|
||||||
|
if in_current_round and is_first:
|
||||||
|
in_current_round = False
|
||||||
|
|
||||||
|
# Sort by recency: most recent first, then current round, then earlier rounds
|
||||||
|
# Sort chat exchanges by message sequence number (most recent first)
|
||||||
|
chat_exchanges.sort(key=lambda x: self._getMessageSequenceForExchange(x, workflow), reverse=True)
|
||||||
|
# Sort history exchanges by message sequence number (most recent first)
|
||||||
|
history_exchanges.sort(key=lambda x: self._getMessageSequenceForExchange(x, workflow), reverse=True)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"chat": chat_exchanges,
|
||||||
|
"history": history_exchanges
|
||||||
|
}
|
||||||
|
|
||||||
|
def _refreshDocumentFileAttributes(self, documents) -> None:
|
||||||
|
"""Update file attributes (fileName, fileSize, mimeType) for documents"""
|
||||||
|
for doc in documents:
|
||||||
|
try:
|
||||||
|
# Use the proper WorkflowService method to get file info
|
||||||
|
file_info = self.getFileInfo(doc.fileId)
|
||||||
|
if file_info:
|
||||||
|
doc.fileName = file_info.get("fileName", doc.fileName)
|
||||||
|
doc.fileSize = file_info.get("size", doc.fileSize)
|
||||||
|
doc.mimeType = file_info.get("mimeType", doc.mimeType)
|
||||||
|
else:
|
||||||
|
logger.warning(f"File not found for document {doc.id}, fileId: {doc.fileId}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error refreshing file attributes for document {doc.id}: {e}")
|
||||||
|
|
||||||
|
def _generateWorkflowContextPrefix(self, message) -> str:
|
||||||
|
"""Generate workflow context prefix: round{num}_task{num}_action{num}"""
|
||||||
|
round_num = message.roundNumber if hasattr(message, 'roundNumber') else 1
|
||||||
|
task_num = message.taskNumber if hasattr(message, 'taskNumber') else 0
|
||||||
|
action_num = message.actionNumber if hasattr(message, 'actionNumber') else 0
|
||||||
|
return f"round{round_num}_task{task_num}_action{action_num}"
|
||||||
|
|
||||||
|
def _getDocumentReferenceFromChatDocument(self, document, message) -> str:
|
||||||
|
"""Get document reference using document ID and filename."""
|
||||||
|
try:
|
||||||
|
# Use document ID and filename for simple reference
|
||||||
|
return f"docItem:{document.id}:{document.fileName}"
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Critical error creating document reference for document {document.id}: {str(e)}")
|
||||||
|
# Re-raise the error to prevent workflow from continuing with invalid data
|
||||||
|
raise
|
||||||
|
|
||||||
|
def _getMessageSequenceForExchange(self, exchange, workflow) -> int:
|
||||||
|
"""Get message sequence number for sorting exchanges by recency"""
|
||||||
|
try:
|
||||||
|
# Extract message ID from the first document reference
|
||||||
|
if exchange['documents'] and len(exchange['documents']) > 0:
|
||||||
|
first_doc_ref = exchange['documents'][0]
|
||||||
|
if first_doc_ref.startswith("docItem:"):
|
||||||
|
# docItem:<id>:<label> - extract ID
|
||||||
|
parts = first_doc_ref.split(':')
|
||||||
|
if len(parts) >= 2:
|
||||||
|
doc_id = parts[1]
|
||||||
|
# Find the message containing this document
|
||||||
|
for message in workflow.messages:
|
||||||
|
if message.documents:
|
||||||
|
for doc in message.documents:
|
||||||
|
if doc.id == doc_id:
|
||||||
|
return message.sequenceNr if hasattr(message, 'sequenceNr') else 0
|
||||||
|
elif first_doc_ref.startswith("docList:"):
|
||||||
|
# docList:<message_id>:<label> - extract message ID
|
||||||
|
parts = first_doc_ref.split(':')
|
||||||
|
if len(parts) >= 2:
|
||||||
|
message_id = parts[1]
|
||||||
|
# Find the message by ID
|
||||||
|
for message in workflow.messages:
|
||||||
|
if str(message.id) == message_id:
|
||||||
|
return message.sequenceNr if hasattr(message, 'sequenceNr') else 0
|
||||||
|
return 0
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error getting message sequence for exchange: {str(e)}")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
def _validateDocumentLabelConsistency(self, message) -> str:
|
||||||
|
"""Validate that the document label used for references matches the message's actual label"""
|
||||||
|
if not hasattr(message, 'documentsLabel') or not message.documentsLabel:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Simply return the message's actual documentsLabel - no correction, just validation
|
||||||
|
return message.documentsLabel
|
||||||
|
|
||||||
|
def getConnectionReferenceList(self) -> List[str]:
|
||||||
|
"""Get connection reference list (matching old handlingTasks.py logic)"""
|
||||||
|
try:
|
||||||
|
# Get connections from the database using the same logic as the old system
|
||||||
|
if hasattr(self.serviceCenter, 'interfaceDbApp') and hasattr(self.serviceCenter, 'user'):
|
||||||
|
userId = self.serviceCenter.user.id
|
||||||
|
connections = self.serviceCenter.interfaceDbApp.getUserConnections(userId)
|
||||||
|
if connections:
|
||||||
|
# Format connections as reference strings using the same pattern as the old system
|
||||||
|
connectionRefs = []
|
||||||
|
for conn in connections:
|
||||||
|
# Create reference string in format: connection:{authority}:{username}:{id} [status:..., token:...]
|
||||||
|
# This matches the format expected by getUserConnectionFromConnectionReference()
|
||||||
|
ref = self.getConnectionReferenceFromUserConnection(conn)
|
||||||
|
connectionRefs.append(ref)
|
||||||
|
return connectionRefs
|
||||||
|
|
||||||
|
return []
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error getting connection reference list: {str(e)}")
|
||||||
|
return []
|
||||||
|
|
@ -1,226 +0,0 @@
|
||||||
import asyncio
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
from typing import List, Dict, Any
|
|
||||||
|
|
||||||
# Ensure relative imports work when running directly
|
|
||||||
CURRENT_DIR = os.path.dirname(__file__)
|
|
||||||
GATEWAY_DIR = os.path.dirname(os.path.dirname(CURRENT_DIR))
|
|
||||||
if GATEWAY_DIR not in sys.path:
|
|
||||||
sys.path.append(GATEWAY_DIR)
|
|
||||||
|
|
||||||
from modules.services.serviceExtraction.mainServiceExtraction import ExtractionService
|
|
||||||
from modules.services.serviceGeneration.mainServiceGeneration import DocumentGenerationService
|
|
||||||
from modules.datamodels.datamodelWorkflow import ActionResult, ActionDocument
|
|
||||||
from modules.datamodels.datamodelAi import AiCallOptions, OperationType, ProcessingMode, Priority
|
|
||||||
from modules.services.serviceAi.mainServiceAi import AiService
|
|
||||||
|
|
||||||
|
|
||||||
TESTDATA_DIR = os.path.join(GATEWAY_DIR, "testdata")
|
|
||||||
|
|
||||||
|
|
||||||
def _read_test_files() -> List[Dict[str, Any]]:
|
|
||||||
files = []
|
|
||||||
for name in os.listdir(TESTDATA_DIR):
|
|
||||||
path = os.path.join(TESTDATA_DIR, name)
|
|
||||||
if not os.path.isfile(path):
|
|
||||||
continue
|
|
||||||
try:
|
|
||||||
with open(path, "rb") as f:
|
|
||||||
data = f.read()
|
|
||||||
mime = _guess_mime(name)
|
|
||||||
files.append({
|
|
||||||
"id": name,
|
|
||||||
"bytes": data,
|
|
||||||
"fileName": name,
|
|
||||||
"mimeType": mime,
|
|
||||||
})
|
|
||||||
except Exception:
|
|
||||||
continue
|
|
||||||
return files
|
|
||||||
|
|
||||||
|
|
||||||
def _guess_mime(name: str) -> str:
|
|
||||||
lower = name.lower()
|
|
||||||
if lower.endswith(".pdf"):
|
|
||||||
return "application/pdf"
|
|
||||||
if lower.endswith(".xlsx"):
|
|
||||||
return "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
|
||||||
if lower.endswith(".jpg") or lower.endswith(".jpeg"):
|
|
||||||
return "image/jpeg"
|
|
||||||
if lower.endswith(".png"):
|
|
||||||
return "image/png"
|
|
||||||
return "application/octet-stream"
|
|
||||||
|
|
||||||
|
|
||||||
def run_extraction_1000_bytes() -> None:
|
|
||||||
svc = ExtractionService()
|
|
||||||
docs = _read_test_files()
|
|
||||||
options = {
|
|
||||||
# cap total pooled size per document set
|
|
||||||
"maxSize": 1000,
|
|
||||||
# allow chunking to respect the cap across parts
|
|
||||||
"chunkAllowed": True,
|
|
||||||
# chunk sizes for different content types to help fit under the cap
|
|
||||||
"textChunkSize": 500,
|
|
||||||
"tableChunkSize": 500,
|
|
||||||
"structureChunkSize": 500,
|
|
||||||
# simple merge strategy if supported
|
|
||||||
"mergeStrategy": {},
|
|
||||||
}
|
|
||||||
results = svc.extractContent(docs, options)
|
|
||||||
print("[extraction] documents:", len(docs), "results:", len(results))
|
|
||||||
for i, ec in enumerate(results):
|
|
||||||
total = sum(int(p.metadata.get("size", 0) or 0) for p in ec.parts)
|
|
||||||
print(f" - doc[{i}] parts={len(ec.parts)} pooledBytes={total}")
|
|
||||||
|
|
||||||
|
|
||||||
async def main():
|
|
||||||
print("=== serviceExtraction: compress to 1000 bytes ===")
|
|
||||||
run_extraction_1000_bytes()
|
|
||||||
print("\n=== serviceGeneration: create ActionResult and write output to testdata ===")
|
|
||||||
await run_generation_write_file()
|
|
||||||
print("\n=== serviceAi: planning call + image + pdf extraction ===")
|
|
||||||
await run_ai_tests()
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
asyncio.run(main())
|
|
||||||
|
|
||||||
async def run_generation_write_file() -> None:
|
|
||||||
# Minimal stubs for interfaces expected by DocumentGenerationService
|
|
||||||
class _FileItem:
|
|
||||||
def __init__(self, file_id: str, file_name: str, mime_type: str, content: bytes):
|
|
||||||
self.id = file_id
|
|
||||||
self.fileName = file_name
|
|
||||||
self.mimeType = mime_type
|
|
||||||
self.fileSize = len(content)
|
|
||||||
|
|
||||||
class _ComponentInterface:
|
|
||||||
def __init__(self):
|
|
||||||
self._files = {}
|
|
||||||
def createFile(self, name: str, mimeType: str, content: bytes):
|
|
||||||
fid = f"test_{len(self._files)+1}"
|
|
||||||
item = _FileItem(fid, name, mimeType, content)
|
|
||||||
self._files[fid] = item
|
|
||||||
return item
|
|
||||||
def createFileData(self, fileId: str, content: bytes):
|
|
||||||
# Persist into testdata directory as requested
|
|
||||||
item = self._files[fileId]
|
|
||||||
out_path = os.path.join(TESTDATA_DIR, f"output_{fileId}_{item.fileName}")
|
|
||||||
with open(out_path, "wb") as f:
|
|
||||||
f.write(content)
|
|
||||||
def getFile(self, fileId: str):
|
|
||||||
return self._files.get(fileId)
|
|
||||||
|
|
||||||
class _ServiceCenter:
|
|
||||||
def __init__(self, comp):
|
|
||||||
self.interfaceDbComponent = comp
|
|
||||||
self.interfaceDbChat = None
|
|
||||||
self.workflow = type("_Wf", (), {"id": "wf_test", "currentRound": 1, "currentTask": 1, "currentAction": 1, "status": "running", "totalTasks": 1, "totalActions": 1})()
|
|
||||||
|
|
||||||
component = _ComponentInterface()
|
|
||||||
center = _ServiceCenter(component)
|
|
||||||
gen = DocumentGenerationService(center)
|
|
||||||
|
|
||||||
# Build a fake action and ActionResult with a small text document
|
|
||||||
class _Action:
|
|
||||||
def __init__(self):
|
|
||||||
self.id = "action_test"
|
|
||||||
self.execMethod = "document"
|
|
||||||
self.execAction = "generate"
|
|
||||||
self.execParameters = {}
|
|
||||||
self.execResultLabel = "round1_task1_action1_results"
|
|
||||||
action = _Action()
|
|
||||||
|
|
||||||
content = "This is a generated test file from serviceGeneration test."
|
|
||||||
action_doc = ActionDocument(documentName="test_generated.txt", documentData=content, mimeType="text/plain")
|
|
||||||
action_result = ActionResult(success=True, documents=[action_doc])
|
|
||||||
|
|
||||||
docs = gen.createDocumentsFromActionResult(action_result, action, center.workflow, message_id="msg_test")
|
|
||||||
print("[generation] created documents:", len(docs))
|
|
||||||
|
|
||||||
|
|
||||||
async def run_ai_tests() -> None:
|
|
||||||
# Create AiService instance (uses internal default model registry; no external creds required for this test)
|
|
||||||
ai = await AiService.create()
|
|
||||||
|
|
||||||
# Planning AI call (like in handlingTasks.generateTaskPlan)
|
|
||||||
plan_options = AiCallOptions(
|
|
||||||
operationType=OperationType.GENERATE_PLAN,
|
|
||||||
priority=Priority.QUALITY,
|
|
||||||
compressPrompt=False,
|
|
||||||
compressContext=False,
|
|
||||||
processingMode=ProcessingMode.DETAILED,
|
|
||||||
maxCost=0.05,
|
|
||||||
maxProcessingTime=10,
|
|
||||||
)
|
|
||||||
plan_prompt = """
|
|
||||||
You are a planning assistant. Return a compact JSON with fields: tasks:[{id, objective, success_criteria:[]}], languageUserDetected:"en".
|
|
||||||
Create exactly one simple task id:"task_1" objective:"Test planning" success_criteria:["done"].
|
|
||||||
""".strip()
|
|
||||||
plan_resp = await ai.callAi(prompt=plan_prompt, placeholders=None, options=plan_options)
|
|
||||||
print("[ai] planning response length:", len(plan_resp) if plan_resp else 0)
|
|
||||||
|
|
||||||
# Image content extraction prompt using test JPEG
|
|
||||||
img_path = os.path.join(TESTDATA_DIR, "00Untitled.jpg")
|
|
||||||
img_resp = None
|
|
||||||
if os.path.exists(img_path):
|
|
||||||
try:
|
|
||||||
with open(img_path, "rb") as f:
|
|
||||||
img_bytes = f.read()
|
|
||||||
img_options = AiCallOptions(
|
|
||||||
operationType=OperationType.ANALYSE_CONTENT,
|
|
||||||
priority=Priority.BALANCED,
|
|
||||||
compressPrompt=True,
|
|
||||||
compressContext=False,
|
|
||||||
processingMode=ProcessingMode.ADVANCED,
|
|
||||||
maxCost=0.02,
|
|
||||||
maxProcessingTime=10,
|
|
||||||
)
|
|
||||||
img_resp = await ai.callAiImage(
|
|
||||||
prompt="Describe the content of this image succinctly.",
|
|
||||||
imageData=img_bytes,
|
|
||||||
mimeType="image/jpeg",
|
|
||||||
options=img_options,
|
|
||||||
)
|
|
||||||
print("[ai] image analysis response length:", len(img_resp) if img_resp else 0)
|
|
||||||
except Exception as e:
|
|
||||||
print("[ai] image analysis error:", str(e))
|
|
||||||
else:
|
|
||||||
print("[ai] image test file not found; skipping")
|
|
||||||
|
|
||||||
# PDF extraction prompt: emulate text call with document context built via ExtractionService
|
|
||||||
pdf_path = os.path.join(TESTDATA_DIR, "diagramm_komponenten.pdf")
|
|
||||||
if os.path.exists(pdf_path):
|
|
||||||
try:
|
|
||||||
# Build a minimal ChatDocument-like shim that AiService._callAiText expects via extraction
|
|
||||||
class _Doc:
|
|
||||||
def __init__(self, file_path: str, mime: str):
|
|
||||||
self.id = "doc_pdf"
|
|
||||||
self.fileName = os.path.basename(file_path)
|
|
||||||
self.mimeType = mime
|
|
||||||
with open(file_path, "rb") as f:
|
|
||||||
self.fileData = f.read()
|
|
||||||
pdf_doc = _Doc(pdf_path, "application/pdf")
|
|
||||||
|
|
||||||
pdf_options = AiCallOptions(
|
|
||||||
operationType=OperationType.ANALYSE_CONTENT,
|
|
||||||
priority=Priority.BALANCED,
|
|
||||||
compressPrompt=True,
|
|
||||||
compressContext=True,
|
|
||||||
processingMode=ProcessingMode.ADVANCED,
|
|
||||||
maxContextBytes=1000,
|
|
||||||
chunkAllowed=True,
|
|
||||||
maxCost=0.02,
|
|
||||||
maxProcessingTime=10,
|
|
||||||
)
|
|
||||||
pdf_prompt = "Extract key information from the attached PDF."
|
|
||||||
pdf_resp = await ai.callAi(prompt=pdf_prompt, documents=[pdf_doc], options=pdf_options)
|
|
||||||
print("[ai] pdf extraction response length:", len(pdf_resp) if pdf_resp else 0)
|
|
||||||
except Exception as e:
|
|
||||||
print("[ai] pdf extraction error:", str(e))
|
|
||||||
else:
|
|
||||||
print("[ai] pdf test file not found; skipping")
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -324,7 +324,7 @@ def getModelClasses() -> Dict[str, Type[BaseModel]]:
|
||||||
os.path.dirname(os.path.dirname(__file__)), "interfaces"
|
os.path.dirname(os.path.dirname(__file__)), "interfaces"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Find all model files
|
# Find all model files in interfaces directory
|
||||||
for fileName in os.listdir(interfaces_dir):
|
for fileName in os.listdir(interfaces_dir):
|
||||||
if fileName.endswith("Model.py"):
|
if fileName.endswith("Model.py"):
|
||||||
# Convert fileName to module name (e.g., gatewayModel.py -> gatewayModel)
|
# Convert fileName to module name (e.g., gatewayModel.py -> gatewayModel)
|
||||||
|
|
@ -342,6 +342,29 @@ def getModelClasses() -> Dict[str, Type[BaseModel]]:
|
||||||
):
|
):
|
||||||
modelClasses[name] = obj
|
modelClasses[name] = obj
|
||||||
|
|
||||||
|
# Also get models from datamodels directory
|
||||||
|
datamodels_dir = os.path.join(
|
||||||
|
os.path.dirname(os.path.dirname(__file__)), "datamodels"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Find all model files in datamodels directory
|
||||||
|
for fileName in os.listdir(datamodels_dir):
|
||||||
|
if fileName.startswith("datamodel") and fileName.endswith(".py"):
|
||||||
|
# Convert fileName to module name (e.g., datamodelUtils.py -> datamodelUtils)
|
||||||
|
module_name = fileName[:-3]
|
||||||
|
|
||||||
|
# Import the module dynamically
|
||||||
|
module = importlib.import_module(f"modules.datamodels.{module_name}")
|
||||||
|
|
||||||
|
# Get all classes from the module
|
||||||
|
for name, obj in inspect.getmembers(module):
|
||||||
|
if (
|
||||||
|
inspect.isclass(obj)
|
||||||
|
and issubclass(obj, BaseModel)
|
||||||
|
and obj != BaseModel
|
||||||
|
):
|
||||||
|
modelClasses[name] = obj
|
||||||
|
|
||||||
return modelClasses
|
return modelClasses
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -8,8 +8,10 @@ from typing import Dict, Any, List, Optional
|
||||||
from datetime import datetime, UTC
|
from datetime import datetime, UTC
|
||||||
|
|
||||||
from modules.workflows.methods.methodBase import MethodBase, action
|
from modules.workflows.methods.methodBase import MethodBase, action
|
||||||
from modules.datamodels.datamodelWorkflow import ActionResult
|
from modules.datamodels.datamodelChat import ActionResult
|
||||||
from modules.datamodels.datamodelAi import AiCallOptions, OperationType, Priority
|
from modules.datamodels.datamodelAi import AiCallOptions, OperationType, Priority
|
||||||
|
from modules.datamodels.datamodelChat import ChatDocument
|
||||||
|
from modules.datamodels.datamodelWeb import WebResearchRequest, WebResearchOptions
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
@ -28,26 +30,36 @@ class MethodAi(MethodBase):
|
||||||
@action
|
@action
|
||||||
async def process(self, parameters: Dict[str, Any]) -> ActionResult:
|
async def process(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||||
"""
|
"""
|
||||||
Perform an AI call for any type of task with optional document references
|
GENERAL:
|
||||||
|
- Purpose: AI-based analysis and content generation with optional document context.
|
||||||
|
- Input requirements: aiPrompt (required); optional documentList, resultType, processingMode, includeMetadata, operationType, priority, maxCost, maxProcessingTime, requiredTags.
|
||||||
|
- Output format: Single or multiple documents in requested format.
|
||||||
|
|
||||||
Parameters:
|
Parameters:
|
||||||
aiPrompt (str): The AI prompt for processing
|
- aiPrompt (str, required): Instruction for the AI.
|
||||||
documentList (list, optional): List of document references to include in context
|
- documentList (list, optional): Document reference(s) for context.
|
||||||
expectedDocumentFormat (str, optional): Expected document output format with extension, mimeType, description
|
- resultType (str, optional): Output extension (txt, json, md, csv, xml, html, pdf, docx, xlsx, png). Default: txt.
|
||||||
processingMode (str, optional): Processing mode - use 'basic', 'advanced', or 'detailed' (defaults to 'basic')
|
- processingMode (str, optional): basic | advanced | detailed. Default: basic.
|
||||||
includeMetadata (bool, optional): Whether to include metadata (default: True)
|
- includeMetadata (bool, optional): Include metadata when available. Default: True.
|
||||||
operationType (str, optional): Operation type - use 'general', 'generate_plan', 'analyse_content', 'generate_content', 'web_research', 'image_analysis', or 'image_generation'
|
- operationType (str, optional): general | generate_plan | analyse_content | generate_content | web_research | image_analysis | image_generation. Default: general.
|
||||||
priority (str, optional): Priority level - use 'speed', 'quality', 'cost', or 'balanced'
|
- priority (str, optional): speed | quality | cost | balanced. Default: balanced.
|
||||||
maxCost (float, optional): Maximum cost budget for the AI call
|
- maxCost (float, optional): Cost limit.
|
||||||
maxProcessingTime (int, optional): Maximum processing time in seconds
|
- maxProcessingTime (int, optional): Time limit in seconds.
|
||||||
requiredTags (list, optional): Required model tags - use 'text', 'chat', 'reasoning', 'analysis', 'image', 'vision', 'web', 'search', etc.
|
- requiredTags (list, optional): Capability tags (e.g., text, chat, reasoning, analysis, image, vision, web, search).
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
|
# Debug logging to see what parameters are received
|
||||||
|
logger.info(f"MethodAi.process received parameters: {parameters}")
|
||||||
|
logger.info(f"Parameters type: {type(parameters)}")
|
||||||
|
logger.info(f"Parameters keys: {list(parameters.keys()) if isinstance(parameters, dict) else 'Not a dict'}")
|
||||||
|
|
||||||
aiPrompt = parameters.get("aiPrompt")
|
aiPrompt = parameters.get("aiPrompt")
|
||||||
|
logger.info(f"aiPrompt extracted: '{aiPrompt}' (type: {type(aiPrompt)})")
|
||||||
|
|
||||||
documentList = parameters.get("documentList", [])
|
documentList = parameters.get("documentList", [])
|
||||||
if isinstance(documentList, str):
|
if isinstance(documentList, str):
|
||||||
documentList = [documentList]
|
documentList = [documentList]
|
||||||
expectedDocumentFormat = parameters.get("expectedDocumentFormat", "")
|
resultType = parameters.get("resultType", "txt")
|
||||||
processingMode = parameters.get("processingMode", "basic")
|
processingMode = parameters.get("processingMode", "basic")
|
||||||
includeMetadata = parameters.get("includeMetadata", True)
|
includeMetadata = parameters.get("includeMetadata", True)
|
||||||
operationType = parameters.get("operationType", "general")
|
operationType = parameters.get("operationType", "general")
|
||||||
|
|
@ -57,102 +69,23 @@ class MethodAi(MethodBase):
|
||||||
requiredTags = parameters.get("requiredTags")
|
requiredTags = parameters.get("requiredTags")
|
||||||
|
|
||||||
if not aiPrompt:
|
if not aiPrompt:
|
||||||
|
logger.error(f"aiPrompt is missing or empty. Parameters: {parameters}")
|
||||||
return ActionResult.isFailure(
|
return ActionResult.isFailure(
|
||||||
error="AI prompt is required"
|
error="AI prompt is required"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Determine output format first (needed for context building)
|
# Determine output extension and default MIME type without duplicating service logic
|
||||||
output_extension = ".txt" # Default
|
normalized_result_type = (str(resultType).strip().lstrip('.').lower() or "txt")
|
||||||
output_mime_type = "text/plain" # Default
|
output_extension = f".{normalized_result_type}"
|
||||||
|
output_mime_type = "application/octet-stream" # Prefer service-provided mimeType when available
|
||||||
|
logger.info(f"Using result type: {resultType} -> {output_extension}")
|
||||||
|
|
||||||
if expectedDocumentFormat:
|
# Get ChatDocuments for AI service - let AI service handle all document processing
|
||||||
output_extension = expected_format.get("extension", ".txt")
|
chatDocuments = []
|
||||||
output_mime_type = expected_format.get("mimeType", "text/plain")
|
|
||||||
logger.info(f"Using expected format: {output_extension} ({output_mime_type})")
|
|
||||||
|
|
||||||
# Build context from documents if provided
|
|
||||||
context = ""
|
|
||||||
if documentList:
|
if documentList:
|
||||||
chatDocuments = self.services.workflow.getChatDocumentsFromDocumentList(documentList)
|
chatDocuments = self.services.workflow.getChatDocumentsFromDocumentList(documentList)
|
||||||
if chatDocuments:
|
if chatDocuments:
|
||||||
context_parts = []
|
logger.info(f"Prepared {len(chatDocuments)} documents for AI processing")
|
||||||
# Build batch payload for extraction
|
|
||||||
batch_docs = []
|
|
||||||
for doc in chatDocuments:
|
|
||||||
try:
|
|
||||||
fileBytes = self.services.workflow.getFileData(doc.fileId) if hasattr(doc, 'fileId') else None
|
|
||||||
except Exception:
|
|
||||||
fileBytes = None
|
|
||||||
batch_docs.append({
|
|
||||||
"id": getattr(doc, 'id', None),
|
|
||||||
"bytes": fileBytes or b"",
|
|
||||||
"fileName": getattr(doc, 'fileName', 'unknown'),
|
|
||||||
"mimeType": getattr(doc, 'mimeType', None) or "application/octet-stream"
|
|
||||||
})
|
|
||||||
|
|
||||||
extraction_prompt = (
|
|
||||||
f"Extract content for AI task context. Task: {aiPrompt}. Mode: {processingMode}."
|
|
||||||
)
|
|
||||||
try:
|
|
||||||
extracted_list = await self.services.extraction.extractContentFromDocuments(
|
|
||||||
prompt=extraction_prompt,
|
|
||||||
documents=batch_docs,
|
|
||||||
options={"ai": {"enabled": False}, "mergeStrategy": {}}
|
|
||||||
)
|
|
||||||
except Exception:
|
|
||||||
extracted_list = []
|
|
||||||
|
|
||||||
# Helper to aggregate readable text from parts
|
|
||||||
def _partsToText(parts) -> str:
|
|
||||||
lines: List[str] = []
|
|
||||||
for p in (parts or []):
|
|
||||||
try:
|
|
||||||
if getattr(p, 'typeGroup', '') in ("text", "table", "structure") and getattr(p, 'data', None):
|
|
||||||
lines.append(p.data)
|
|
||||||
except Exception:
|
|
||||||
continue
|
|
||||||
return "\n\n".join(lines)
|
|
||||||
|
|
||||||
for i, doc in enumerate(chatDocuments):
|
|
||||||
file_info = self.services.workflow.getFileInfo(doc.fileId)
|
|
||||||
content = ""
|
|
||||||
try:
|
|
||||||
ec = extracted_list[i] if i < len(extracted_list) else None
|
|
||||||
if ec:
|
|
||||||
content = _partsToText(getattr(ec, 'parts', []))
|
|
||||||
except Exception:
|
|
||||||
content = ""
|
|
||||||
|
|
||||||
if content.strip():
|
|
||||||
metadata_info = ""
|
|
||||||
if file_info and includeMetadata:
|
|
||||||
metadata_info = f" (Size: {file_info.get('fileSize', 'unknown')}, Type: {file_info.get('mimeType', 'unknown')})"
|
|
||||||
base_length = 5000 if processingMode == "detailed" else 3000 if processingMode == "advanced" else 2000
|
|
||||||
if processingMode == "detailed":
|
|
||||||
context_parts.append(
|
|
||||||
f"Document: {doc.fileName}{metadata_info}\nRelevance to AI Task: This document contains content directly related to '{aiPrompt[:100]}...'\nContent:\n{content[:base_length]}..."
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
context_parts.append(
|
|
||||||
f"Document: {doc.fileName}{metadata_info}\nContent:\n{content[:base_length]}..."
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
context_parts.append(f"Document: {doc.fileName} [No readable text content - binary file]")
|
|
||||||
|
|
||||||
if context_parts:
|
|
||||||
context_header = f"""
|
|
||||||
=== DOCUMENT CONTEXT FOR AI PROCESSING ===
|
|
||||||
AI Task: {aiPrompt[:100]}...
|
|
||||||
Processing Mode: {processingMode}
|
|
||||||
Expected Output Format: {output_extension.upper()}
|
|
||||||
Total Documents: {len(chatDocuments)}
|
|
||||||
|
|
||||||
The following documents contain content relevant to your task.
|
|
||||||
Use this information to provide the most accurate and helpful response.
|
|
||||||
================================================
|
|
||||||
"""
|
|
||||||
context = context_header + "\n\n" + "\n\n".join(context_parts)
|
|
||||||
logger.info(f"Included {len(chatDocuments)} documents in AI context with task-specific extraction")
|
|
||||||
|
|
||||||
# Build enhanced prompt
|
# Build enhanced prompt
|
||||||
enhanced_prompt = aiPrompt
|
enhanced_prompt = aiPrompt
|
||||||
|
|
@ -163,52 +96,14 @@ class MethodAi(MethodBase):
|
||||||
elif processingMode == "advanced":
|
elif processingMode == "advanced":
|
||||||
enhanced_prompt += "\n\nPlease provide an advanced response with deep insights."
|
enhanced_prompt += "\n\nPlease provide an advanced response with deep insights."
|
||||||
|
|
||||||
# Add custom instructions if provided
|
# Note: customInstructions parameter was removed as it's not defined in the method signature
|
||||||
if customInstructions:
|
|
||||||
enhanced_prompt += f"\n\nAdditional Instructions: {customInstructions}"
|
|
||||||
|
|
||||||
# Add format-specific instructions only if non-text format is requested
|
# Add format guidance to prompt
|
||||||
if output_extension != ".txt":
|
if normalized_result_type != "txt":
|
||||||
if output_extension == ".csv":
|
enhanced_prompt += f"\n\nPlease deliver the result in {normalized_result_type.upper()} format. Ensure the output follows the proper {normalized_result_type.upper()} syntax and structure."
|
||||||
enhanced_prompt += f"\n\nCRITICAL: Deliver the result as pure CSV data without any markdown formatting, code blocks, or additional text. Output only the CSV content with proper headers and data rows."
|
|
||||||
elif output_extension == ".json":
|
|
||||||
enhanced_prompt += f"\n\nCRITICAL: Deliver the result as pure JSON data without any markdown formatting, code blocks, or additional text. Output only the JSON content."
|
|
||||||
elif output_extension == ".xml":
|
|
||||||
enhanced_prompt += f"\n\nCRITICAL: Deliver the result as pure XML data without any markdown formatting, code blocks, or additional text. Output only the XML content."
|
|
||||||
else:
|
|
||||||
enhanced_prompt += f"\n\nCRITICAL: Deliver the result as pure {output_extension.upper()} data without any markdown formatting, code blocks, or additional text."
|
|
||||||
|
|
||||||
# Call appropriate AI service based on processing mode
|
# Build options and delegate document handling to AI/Extraction/Generation services
|
||||||
logger.info(f"Executing AI call with mode: {processingMode}, prompt length: {len(enhanced_prompt)}")
|
|
||||||
if context:
|
|
||||||
logger.info(f"Including context from {len(documentList)} documents")
|
|
||||||
|
|
||||||
# Encourage longer, structured outputs with a min-length hint
|
|
||||||
min_tokens_hint = "\n\nPlease ensure the response is substantial and complete."
|
|
||||||
call_prompt = enhanced_prompt + min_tokens_hint
|
|
||||||
|
|
||||||
# Centralized AI call with optional document context
|
|
||||||
documents = []
|
|
||||||
try:
|
|
||||||
if documentList:
|
|
||||||
for d in (chatDocuments or []):
|
|
||||||
try:
|
|
||||||
file_data = self.services.workflow.getFileData(d.fileId)
|
|
||||||
documents.append(
|
|
||||||
ChatDocument(
|
|
||||||
fileData=file_data,
|
|
||||||
fileName=d.fileName,
|
|
||||||
mimeType=d.mimeType
|
|
||||||
)
|
|
||||||
)
|
|
||||||
except Exception:
|
|
||||||
continue
|
|
||||||
except Exception:
|
|
||||||
documents = None
|
|
||||||
|
|
||||||
output_format = output_extension.replace('.', '') or 'txt'
|
output_format = output_extension.replace('.', '') or 'txt'
|
||||||
|
|
||||||
# Build options using new AiCallOptions format
|
|
||||||
options = AiCallOptions(
|
options = AiCallOptions(
|
||||||
operationType=operationType,
|
operationType=operationType,
|
||||||
priority=priority,
|
priority=priority,
|
||||||
|
|
@ -221,76 +116,240 @@ class MethodAi(MethodBase):
|
||||||
maxProcessingTime=maxProcessingTime,
|
maxProcessingTime=maxProcessingTime,
|
||||||
requiredTags=requiredTags
|
requiredTags=requiredTags
|
||||||
)
|
)
|
||||||
|
|
||||||
|
supported_generation_formats = {"html", "pdf", "docx", "txt", "md", "json", "csv", "xlsx"}
|
||||||
|
output_format_arg = output_format if output_format in supported_generation_formats else None
|
||||||
|
|
||||||
result = await self.services.ai.callAi(
|
result = await self.services.ai.callAi(
|
||||||
prompt=call_prompt,
|
prompt=enhanced_prompt,
|
||||||
documents=documents or None,
|
documents=chatDocuments if chatDocuments else None,
|
||||||
options=options
|
options=options,
|
||||||
|
outputFormat=output_format_arg
|
||||||
)
|
)
|
||||||
|
|
||||||
# If expected JSON and too short/not JSON, retry with stricter JSON guardrails
|
from modules.datamodels.datamodelChat import ActionDocument
|
||||||
if output_extension == ".json":
|
|
||||||
import json
|
|
||||||
cleaned = (result or "").strip()
|
|
||||||
if cleaned.startswith('```json'):
|
|
||||||
cleaned = cleaned[7:]
|
|
||||||
if cleaned.endswith('```'):
|
|
||||||
cleaned = cleaned[:-3]
|
|
||||||
cleaned = cleaned.strip()
|
|
||||||
needs_retry = False
|
|
||||||
try:
|
|
||||||
parsed = json.loads(cleaned)
|
|
||||||
# Heuristic: small dict -> possibly underfilled
|
|
||||||
if isinstance(parsed, dict) and len(parsed.keys()) <= 2:
|
|
||||||
needs_retry = True
|
|
||||||
except Exception:
|
|
||||||
needs_retry = True
|
|
||||||
|
|
||||||
if needs_retry:
|
if isinstance(result, dict) and isinstance(result.get("documents"), list):
|
||||||
guardrail_prompt = (
|
action_documents = []
|
||||||
enhanced_prompt
|
for d in result["documents"]:
|
||||||
+ "\n\nCRITICAL: Return ONLY valid JSON, no markdown, no code fences. "
|
action_documents.append(ActionDocument(
|
||||||
"Include all requested fields with detailed content."
|
documentName=d.get("documentName"),
|
||||||
)
|
documentData=d.get("documentData"),
|
||||||
try:
|
mimeType=d.get("mimeType") or output_mime_type
|
||||||
result = await self.services.ai.callAi(
|
))
|
||||||
prompt=guardrail_prompt,
|
return ActionResult.isSuccess(documents=action_documents)
|
||||||
documents=context or None,
|
|
||||||
options=AiCallOptions(
|
|
||||||
operationType=OperationType.GENERATE_CONTENT,
|
|
||||||
priority=Priority.QUALITY,
|
|
||||||
compressPrompt=False,
|
|
||||||
compressContext=True,
|
|
||||||
processDocumentsIndividually=True,
|
|
||||||
processingMode="detailed",
|
|
||||||
resultFormat="json",
|
|
||||||
maxCost=0.03,
|
|
||||||
maxProcessingTime=30
|
|
||||||
)
|
|
||||||
)
|
|
||||||
except Exception:
|
|
||||||
result = cleaned # fallback to first attempt
|
|
||||||
|
|
||||||
# Create result document
|
|
||||||
fileName = f"ai_{processingMode}_{self._format_timestamp_for_filename()}{output_extension}"
|
|
||||||
|
|
||||||
|
|
||||||
|
extension = output_extension.lstrip('.')
|
||||||
# Return result in the standard ActionResult format
|
meaningful_name = self._generateMeaningfulFileName(
|
||||||
return ActionResult.isSuccess(
|
base_name="ai",
|
||||||
documents=[{
|
extension=extension,
|
||||||
"documentName": fileName,
|
action_name="result"
|
||||||
"documentData": {
|
|
||||||
"result": result,
|
|
||||||
"fileName": fileName,
|
|
||||||
"processedDocuments": len(documentList) if documentList else 0
|
|
||||||
},
|
|
||||||
"mimeType": output_mime_type
|
|
||||||
}]
|
|
||||||
)
|
)
|
||||||
|
action_document = ActionDocument(
|
||||||
|
documentName=meaningful_name,
|
||||||
|
documentData=result,
|
||||||
|
mimeType=output_mime_type
|
||||||
|
)
|
||||||
|
return ActionResult.isSuccess(documents=[action_document])
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error in AI processing: {str(e)}")
|
logger.error(f"Error in AI processing: {str(e)}")
|
||||||
return ActionResult.isFailure(
|
return ActionResult.isFailure(
|
||||||
error=str(e)
|
error=str(e)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@action
|
||||||
|
async def webResearch(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||||
|
"""
|
||||||
|
GENERAL:
|
||||||
|
- Purpose: Web research and information gathering with basic analysis and sources.
|
||||||
|
- Input requirements: user_prompt (required); optional urls, max_results, max_pages, search_depth, extract_depth, pages_search_depth, country, time_range, topic, language.
|
||||||
|
- Output format: JSON with results and sources.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
- user_prompt (str, required): Research question or topic.
|
||||||
|
- urls (list, optional): Specific URLs to crawl.
|
||||||
|
- max_results (int, optional): Max search results. Default: 10.
|
||||||
|
- max_pages (int, optional): Max pages to crawl per site. Default: 10.
|
||||||
|
- search_depth (str, optional): basic | advanced. Default: basic.
|
||||||
|
- extract_depth (str, optional): basic | advanced. Default: advanced.
|
||||||
|
- pages_search_depth (int, optional): Crawl depth level. Default: 2.
|
||||||
|
- country (str, optional): Country code for bias.
|
||||||
|
- time_range (str, optional): d | w | m | y.
|
||||||
|
- topic (str, optional): general | news | academic.
|
||||||
|
- language (str, optional): Language code (e.g., de, en, fr).
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
user_prompt = parameters.get("user_prompt")
|
||||||
|
urls = parameters.get("urls")
|
||||||
|
max_results = parameters.get("max_results", 10)
|
||||||
|
max_pages = parameters.get("max_pages", 10)
|
||||||
|
search_depth = parameters.get("search_depth", "basic")
|
||||||
|
extract_depth = parameters.get("extract_depth", "advanced")
|
||||||
|
pages_search_depth = parameters.get("pages_search_depth", 2)
|
||||||
|
country = parameters.get("country")
|
||||||
|
time_range = parameters.get("time_range")
|
||||||
|
topic = parameters.get("topic")
|
||||||
|
language = parameters.get("language")
|
||||||
|
|
||||||
|
if not user_prompt:
|
||||||
|
return ActionResult.isFailure(
|
||||||
|
error="Search query is required"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Build WebResearchOptions
|
||||||
|
options = WebResearchOptions(
|
||||||
|
max_pages=max_pages,
|
||||||
|
search_depth=search_depth,
|
||||||
|
extract_depth=extract_depth,
|
||||||
|
pages_search_depth=pages_search_depth,
|
||||||
|
country=country,
|
||||||
|
time_range=time_range,
|
||||||
|
topic=topic,
|
||||||
|
language=language
|
||||||
|
)
|
||||||
|
|
||||||
|
# Build WebResearchRequest
|
||||||
|
request = WebResearchRequest(
|
||||||
|
user_prompt=user_prompt,
|
||||||
|
urls=urls,
|
||||||
|
max_results=max_results,
|
||||||
|
options=options
|
||||||
|
)
|
||||||
|
|
||||||
|
# Call web research service
|
||||||
|
logger.info(f"Performing comprehensive web research for: {user_prompt}")
|
||||||
|
logger.info(f"Max results: {max_results}, Max pages: {max_pages}")
|
||||||
|
if urls:
|
||||||
|
logger.info(f"Using provided URLs: {len(urls)}")
|
||||||
|
|
||||||
|
result = await self.services.ai.webResearch(request)
|
||||||
|
|
||||||
|
if not result.success:
|
||||||
|
return ActionResult.isFailure(error=result.error)
|
||||||
|
|
||||||
|
# Convert WebResearchActionResult to ActionResult format
|
||||||
|
documents = []
|
||||||
|
for doc in result.documents:
|
||||||
|
documents.append({
|
||||||
|
"documentName": doc.documentName,
|
||||||
|
"documentData": {
|
||||||
|
"user_prompt": doc.documentData.user_prompt,
|
||||||
|
"websites_analyzed": doc.documentData.websites_analyzed,
|
||||||
|
"additional_links_found": doc.documentData.additional_links_found,
|
||||||
|
"analysis_result": doc.documentData.analysis_result,
|
||||||
|
"sources": [{"title": s.title, "url": str(s.url)} for s in doc.documentData.sources],
|
||||||
|
"additional_links": doc.documentData.additional_links,
|
||||||
|
"debug_info": doc.documentData.debug_info
|
||||||
|
},
|
||||||
|
"mimeType": doc.mimeType
|
||||||
|
})
|
||||||
|
|
||||||
|
# Return result in the standard ActionResult format
|
||||||
|
return ActionResult.isSuccess(
|
||||||
|
documents=documents
|
||||||
|
)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error in web research: {str(e)}")
|
||||||
|
return ActionResult.isFailure(
|
||||||
|
error=str(e)
|
||||||
|
)
|
||||||
|
|
||||||
|
def _mergeDataChunks(self, chunks: List[str], resultType: str, mimeType: str) -> str:
|
||||||
|
"""Intelligently merge data chunks using strategies based on content type"""
|
||||||
|
try:
|
||||||
|
if resultType == "json":
|
||||||
|
return self._mergeJsonChunks(chunks)
|
||||||
|
elif resultType in ["csv", "table"]:
|
||||||
|
return self._mergeTableChunks(chunks)
|
||||||
|
elif resultType in ["txt", "md", "text"]:
|
||||||
|
return self._mergeTextChunks(chunks)
|
||||||
|
else:
|
||||||
|
# Default: simple concatenation
|
||||||
|
return "\n".join(str(chunk) for chunk in chunks)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to merge chunks intelligently: {str(e)}, using simple concatenation")
|
||||||
|
return "\n".join(str(chunk) for chunk in chunks)
|
||||||
|
|
||||||
|
def _mergeJsonChunks(self, chunks: List[str]) -> str:
|
||||||
|
"""Merge JSON chunks intelligently"""
|
||||||
|
import json
|
||||||
|
|
||||||
|
merged_data = []
|
||||||
|
for i, chunk in enumerate(chunks):
|
||||||
|
try:
|
||||||
|
if isinstance(chunk, str):
|
||||||
|
chunk_data = json.loads(chunk)
|
||||||
|
else:
|
||||||
|
chunk_data = chunk
|
||||||
|
|
||||||
|
if isinstance(chunk_data, list):
|
||||||
|
merged_data.extend(chunk_data)
|
||||||
|
elif isinstance(chunk_data, dict):
|
||||||
|
# For objects, merge by combining keys
|
||||||
|
if not merged_data:
|
||||||
|
merged_data = chunk_data
|
||||||
|
else:
|
||||||
|
if isinstance(merged_data, dict):
|
||||||
|
merged_data.update(chunk_data)
|
||||||
|
else:
|
||||||
|
merged_data.append(chunk_data)
|
||||||
|
else:
|
||||||
|
merged_data.append(chunk_data)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to parse chunk {i}: {str(e)}")
|
||||||
|
# Add as string if JSON parsing fails
|
||||||
|
merged_data.append(str(chunk))
|
||||||
|
|
||||||
|
return json.dumps(merged_data, indent=2)
|
||||||
|
|
||||||
|
def _mergeTableChunks(self, chunks: List[str]) -> str:
|
||||||
|
"""Merge table chunks (CSV) intelligently"""
|
||||||
|
import csv
|
||||||
|
import io
|
||||||
|
|
||||||
|
merged_rows = []
|
||||||
|
headers = None
|
||||||
|
|
||||||
|
for i, chunk in enumerate(chunks):
|
||||||
|
try:
|
||||||
|
# Parse CSV chunk
|
||||||
|
reader = csv.reader(io.StringIO(str(chunk)))
|
||||||
|
rows = list(reader)
|
||||||
|
|
||||||
|
if not rows:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# First chunk: capture headers
|
||||||
|
if i == 0:
|
||||||
|
headers = rows[0] if rows else []
|
||||||
|
merged_rows.extend(rows)
|
||||||
|
else:
|
||||||
|
# Subsequent chunks: skip header if it matches
|
||||||
|
if rows and rows[0] == headers:
|
||||||
|
merged_rows.extend(rows[1:]) # Skip duplicate header
|
||||||
|
else:
|
||||||
|
merged_rows.extend(rows)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to parse table chunk {i}: {str(e)}")
|
||||||
|
# Add as raw text if CSV parsing fails
|
||||||
|
merged_rows.append([f"Raw chunk {i}: {str(chunk)[:100]}..."])
|
||||||
|
|
||||||
|
# Convert back to CSV
|
||||||
|
output = io.StringIO()
|
||||||
|
writer = csv.writer(output)
|
||||||
|
writer.writerows(merged_rows)
|
||||||
|
return output.getvalue()
|
||||||
|
|
||||||
|
def _mergeTextChunks(self, chunks: List[str]) -> str:
|
||||||
|
"""Merge text chunks intelligently"""
|
||||||
|
# Simple concatenation with proper spacing
|
||||||
|
merged = []
|
||||||
|
for chunk in chunks:
|
||||||
|
chunk_str = str(chunk).strip()
|
||||||
|
if chunk_str:
|
||||||
|
merged.append(chunk_str)
|
||||||
|
|
||||||
|
return "\n\n".join(merged) # Double newline between chunks for readability
|
||||||
|
|
|
||||||
|
|
@ -130,6 +130,9 @@ class MethodBase:
|
||||||
# Extract parameter name and type
|
# Extract parameter name and type
|
||||||
if '(' in paramPart:
|
if '(' in paramPart:
|
||||||
paramName = paramPart.split('(')[0].strip()
|
paramName = paramPart.split('(')[0].strip()
|
||||||
|
# Normalize bullet-prefixed parameter names like "- aiPrompt" or "* aiPrompt"
|
||||||
|
if paramName.startswith('-') or paramName.startswith('*'):
|
||||||
|
paramName = paramName[1:].strip()
|
||||||
paramType = paramPart[paramPart.find('(')+1:paramPart.find(')')].strip()
|
paramType = paramPart[paramPart.find('(')+1:paramPart.find(')')].strip()
|
||||||
descriptions[paramName] = descPart
|
descriptions[paramName] = descPart
|
||||||
types[paramName] = paramType
|
types[paramName] = paramType
|
||||||
|
|
@ -165,4 +168,54 @@ class MethodBase:
|
||||||
elif hasattr(type_annotation, '_name'):
|
elif hasattr(type_annotation, '_name'):
|
||||||
return type_annotation._name
|
return type_annotation._name
|
||||||
else:
|
else:
|
||||||
return str(type_annotation)
|
return str(type_annotation)
|
||||||
|
|
||||||
|
def _generateMeaningfulFileName(self, base_name: str, extension: str, workflow_context: Dict[str, Any] = None, action_name: str = None) -> str:
|
||||||
|
"""
|
||||||
|
Generate a meaningful file name with round/task/action information.
|
||||||
|
|
||||||
|
Format: {base_name}_alpha_r{round}t{task}a{action}.{extension}
|
||||||
|
Example: report_alpha_r1t3a4.json
|
||||||
|
|
||||||
|
Args:
|
||||||
|
base_name: Base name for the file (e.g., "report", "analysis", "summary")
|
||||||
|
extension: File extension without dot (e.g., "json", "html", "txt")
|
||||||
|
workflow_context: Dictionary with currentRound, currentTask, currentAction
|
||||||
|
action_name: Name of the action being performed (optional, for additional context)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Formatted file name string
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Get workflow context from services if not provided
|
||||||
|
if workflow_context is None and hasattr(self.services, 'workflow'):
|
||||||
|
workflow_context = self.services.workflow.getWorkflowContext()
|
||||||
|
|
||||||
|
# Extract round, task, action numbers
|
||||||
|
round_num = workflow_context.get('currentRound', 0) if workflow_context else 0
|
||||||
|
task_num = workflow_context.get('currentTask', 0) if workflow_context else 0
|
||||||
|
action_num = workflow_context.get('currentAction', 0) if workflow_context else 0
|
||||||
|
|
||||||
|
# Clean base name (remove special characters, spaces)
|
||||||
|
clean_base = base_name.lower().replace(' ', '_').replace('-', '_')
|
||||||
|
# Remove any non-alphanumeric characters except underscores
|
||||||
|
import re
|
||||||
|
clean_base = re.sub(r'[^a-z0-9_]', '', clean_base)
|
||||||
|
|
||||||
|
# Add action name if provided
|
||||||
|
if action_name:
|
||||||
|
clean_action = action_name.lower().replace(' ', '_').replace('-', '_')
|
||||||
|
clean_action = re.sub(r'[^a-z0-9_]', '', clean_action)
|
||||||
|
clean_base = f"{clean_base}_{clean_action}"
|
||||||
|
|
||||||
|
# Generate the meaningful file name
|
||||||
|
meaningful_name = f"{clean_base}_r{round_num}t{task_num}a{action_num}.{extension}"
|
||||||
|
|
||||||
|
self.logger.debug(f"Generated meaningful file name: {meaningful_name} (Round: {round_num}, Task: {task_num}, Action: {action_num})")
|
||||||
|
return meaningful_name
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.warning(f"Error generating meaningful file name, using fallback: {str(e)}")
|
||||||
|
# Fallback to timestamp-based naming
|
||||||
|
timestamp = datetime.now(UTC).strftime("%Y%m%d_%H%M%S")
|
||||||
|
return f"{base_name}_{timestamp}.{extension}"
|
||||||
|
|
@ -9,7 +9,8 @@ from typing import Dict, Any, List, Optional
|
||||||
from datetime import datetime, UTC
|
from datetime import datetime, UTC
|
||||||
|
|
||||||
from modules.workflows.methods.methodBase import MethodBase, action
|
from modules.workflows.methods.methodBase import MethodBase, action
|
||||||
from modules.datamodels.datamodelWorkflow import ActionResult, ChatDocument
|
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
||||||
|
from modules.datamodels.datamodelChat import ChatDocument
|
||||||
from modules.datamodels.datamodelAi import AiCallOptions, OperationType, Priority
|
from modules.datamodels.datamodelAi import AiCallOptions, OperationType, Priority
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
@ -30,19 +31,34 @@ class MethodDocument(MethodBase):
|
||||||
@action
|
@action
|
||||||
async def extract(self, parameters: Dict[str, Any]) -> ActionResult:
|
async def extract(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||||
"""
|
"""
|
||||||
Extract content from any document using AI prompt.
|
GENERAL:
|
||||||
|
- Purpose: Extract and analyze content from existing documents using AI.
|
||||||
|
- Input requirements: documentList (required); prompt (required).
|
||||||
|
- Output format: Plain text per source document (.txt by default).
|
||||||
|
|
||||||
Parameters:
|
Parameters:
|
||||||
documentList (list): Document list reference(s)
|
- documentList (list, required): Document reference(s) to extract from.
|
||||||
aiPrompt (str): AI prompt for extraction
|
- prompt (str, required): Instruction describing what to extract.
|
||||||
expectedDocumentFormats (list, optional): Output formats
|
- operationType (str, optional): extract_content | analyze_document | summarize_content. Default: extract_content.
|
||||||
includeMetadata (bool, optional): Include metadata (default: True)
|
- processDocumentsIndividually (bool, optional): Process each document separately. Default: True.
|
||||||
|
- chunkAllowed (bool, optional): Allow chunking for large inputs. Default: True.
|
||||||
|
- mergeStrategy (dict, optional): Merge strategy for chunked content.
|
||||||
|
- expectedDocumentFormats (list, optional): Desired output format specs.
|
||||||
|
- includeMetadata (bool, optional): Include file metadata. Default: True.
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
documentList = parameters.get("documentList")
|
documentList = parameters.get("documentList")
|
||||||
if isinstance(documentList, str):
|
if isinstance(documentList, str):
|
||||||
documentList = [documentList]
|
documentList = [documentList]
|
||||||
aiPrompt = parameters.get("aiPrompt")
|
prompt = parameters.get("prompt")
|
||||||
|
operationType = parameters.get("operationType", "extract_content")
|
||||||
|
processDocumentsIndividually = parameters.get("processDocumentsIndividually", True)
|
||||||
|
chunkAllowed = parameters.get("chunkAllowed", True)
|
||||||
|
mergeStrategy = parameters.get("mergeStrategy", {
|
||||||
|
"groupBy": "typeGroup",
|
||||||
|
"orderBy": "id",
|
||||||
|
"mergeType": "concatenate"
|
||||||
|
})
|
||||||
expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
|
expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
|
||||||
includeMetadata = parameters.get("includeMetadata", True)
|
includeMetadata = parameters.get("includeMetadata", True)
|
||||||
|
|
||||||
|
|
@ -51,9 +67,9 @@ class MethodDocument(MethodBase):
|
||||||
error="Document list reference is required"
|
error="Document list reference is required"
|
||||||
)
|
)
|
||||||
|
|
||||||
if not aiPrompt:
|
if not prompt:
|
||||||
return ActionResult.isFailure(
|
return ActionResult.isFailure(
|
||||||
error="AI prompt is required"
|
error="Prompt is required"
|
||||||
)
|
)
|
||||||
|
|
||||||
chatDocuments = self.services.workflow.getChatDocumentsFromDocumentList(documentList)
|
chatDocuments = self.services.workflow.getChatDocumentsFromDocumentList(documentList)
|
||||||
|
|
@ -62,120 +78,77 @@ class MethodDocument(MethodBase):
|
||||||
error="No documents found for the provided reference"
|
error="No documents found for the provided reference"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Batch extract content from all documents at once
|
# Use enhanced AI service with integrated extraction
|
||||||
all_extracted_content = []
|
|
||||||
file_infos = []
|
|
||||||
batch_docs = []
|
|
||||||
for chatDocument in chatDocuments:
|
|
||||||
file_info = self.services.workflow.getFileInfo(chatDocument.fileId)
|
|
||||||
if includeMetadata:
|
|
||||||
file_infos.append(file_info)
|
|
||||||
try:
|
|
||||||
data = self.services.workflow.getFileData(chatDocument.fileId) if hasattr(chatDocument, 'fileId') else None
|
|
||||||
except Exception:
|
|
||||||
data = None
|
|
||||||
batch_docs.append({
|
|
||||||
"id": getattr(chatDocument, 'id', None),
|
|
||||||
"bytes": data or b"",
|
|
||||||
"fileName": getattr(chatDocument, 'fileName', 'unknown'),
|
|
||||||
"mimeType": getattr(chatDocument, 'mimeType', None) or "application/octet-stream"
|
|
||||||
})
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
extracted_list = await self.services.extraction.extractContentFromDocuments(
|
# Build AI call options
|
||||||
prompt=aiPrompt,
|
ai_options = AiCallOptions(
|
||||||
documents=batch_docs,
|
operationType=operationType,
|
||||||
options={"ai": {"enabled": False}}
|
processDocumentsIndividually=processDocumentsIndividually,
|
||||||
|
compressContext=not chunkAllowed
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Add format instructions to prompt if expected formats are provided
|
||||||
|
enhanced_prompt = prompt
|
||||||
|
if expectedDocumentFormats:
|
||||||
|
format_instructions = []
|
||||||
|
for fmt in expectedDocumentFormats:
|
||||||
|
extension = fmt.get("extension", ".txt")
|
||||||
|
mime_type = fmt.get("mimeType", "text/plain")
|
||||||
|
description = fmt.get("description", "")
|
||||||
|
format_instructions.append(f"- {extension} ({mime_type}): {description}")
|
||||||
|
|
||||||
|
if format_instructions:
|
||||||
|
enhanced_prompt += f"\n\nPlease format the output as: {', '.join([fmt.get('extension', '.txt') for fmt in expectedDocumentFormats])}"
|
||||||
|
enhanced_prompt += f"\nExpected formats:\n" + "\n".join(format_instructions)
|
||||||
|
|
||||||
|
# Use enhanced AI service for extraction
|
||||||
|
ai_response = await self.services.ai.callAi(
|
||||||
|
prompt=enhanced_prompt,
|
||||||
|
documents=chatDocuments,
|
||||||
|
options=ai_options
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.info(f"AI extraction completed: {len(ai_response)} characters")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Batch extraction failed: {str(e)}")
|
logger.error(f"AI extraction failed: {str(e)}")
|
||||||
extracted_list = []
|
ai_response = ""
|
||||||
|
|
||||||
all_extracted_content = extracted_list or []
|
|
||||||
|
|
||||||
if not all_extracted_content:
|
if not ai_response or ai_response.strip() == "":
|
||||||
return ActionResult.isFailure(
|
return ActionResult.isFailure(
|
||||||
error="No content could be extracted from any documents"
|
error="No content could be extracted from any documents"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Process each document individually with its own format conversion
|
# Process each document individually with extracted content
|
||||||
output_documents = []
|
action_documents = []
|
||||||
|
|
||||||
for i, chatDocument in enumerate(chatDocuments):
|
for i, chatDocument in enumerate(chatDocuments):
|
||||||
# Extract text content from this document
|
# Use the AI response directly - it already contains processed content
|
||||||
text_content = ""
|
final_content = ai_response
|
||||||
try:
|
final_mime_type = "text/plain"
|
||||||
ec = all_extracted_content[i] if i < len(all_extracted_content) else None
|
final_extension = ".txt"
|
||||||
if ec and hasattr(ec, 'parts'):
|
|
||||||
text_parts = []
|
|
||||||
for part in getattr(ec, 'parts', []):
|
|
||||||
try:
|
|
||||||
if getattr(part, 'typeGroup', '') in ("text", "table", "structure") and getattr(part, 'data', None):
|
|
||||||
text_parts.append(part.data)
|
|
||||||
except Exception:
|
|
||||||
continue
|
|
||||||
text_content = "\n".join(text_parts)
|
|
||||||
else:
|
|
||||||
text_content = ""
|
|
||||||
except Exception:
|
|
||||||
text_content = ""
|
|
||||||
|
|
||||||
# Get the expected format for this document (or use default)
|
# Create meaningful output fileName with workflow context
|
||||||
target_format = None
|
|
||||||
if expectedDocumentFormats and i < len(expectedDocumentFormats):
|
|
||||||
target_format = expectedDocumentFormats[i]
|
|
||||||
elif expectedDocumentFormats and len(expectedDocumentFormats) > 0:
|
|
||||||
# If fewer formats than documents, use the last format for remaining documents
|
|
||||||
target_format = expectedDocumentFormats[-1]
|
|
||||||
|
|
||||||
# Determine output format and fileName
|
|
||||||
if target_format:
|
|
||||||
target_extension = target_format.get("extension", ".txt")
|
|
||||||
target_mime_type = target_format.get("mimeType", "text/plain")
|
|
||||||
|
|
||||||
# Check if format conversion is needed
|
|
||||||
if target_extension not in [".txt", ".text"] or target_mime_type != "text/plain":
|
|
||||||
logger.info(f"Converting document {i+1} to format: {target_extension} ({target_mime_type})")
|
|
||||||
# Use AI to convert format
|
|
||||||
formatted_content = await self._convertContentToFormat(text_content, target_format)
|
|
||||||
final_content = formatted_content
|
|
||||||
final_mime_type = target_mime_type
|
|
||||||
final_extension = target_extension
|
|
||||||
else:
|
|
||||||
logger.info(f"Document {i+1}: No format conversion needed, using plain text")
|
|
||||||
final_content = text_content
|
|
||||||
final_mime_type = "text/plain"
|
|
||||||
final_extension = ".txt"
|
|
||||||
else:
|
|
||||||
logger.info(f"Document {i+1}: No expected format specified, using plain text")
|
|
||||||
final_content = text_content
|
|
||||||
final_mime_type = "text/plain"
|
|
||||||
final_extension = ".txt"
|
|
||||||
|
|
||||||
# Create output fileName based on original fileName and target format
|
|
||||||
original_fileName = chatDocument.fileName
|
original_fileName = chatDocument.fileName
|
||||||
base_name = original_fileName.rsplit('.', 1)[0] if '.' in original_fileName else original_fileName
|
base_name = original_fileName.rsplit('.', 1)[0] if '.' in original_fileName else original_fileName
|
||||||
output_fileName = f"{base_name}_extracted_{self._format_timestamp_for_filename()}{final_extension}"
|
extension = final_extension.lstrip('.') # Remove leading dot for meaningful naming
|
||||||
|
output_fileName = self._generateMeaningfulFileName(
|
||||||
# Create result data for this document
|
base_name=f"{base_name}_extracted",
|
||||||
result_data = {
|
extension=extension,
|
||||||
"documentCount": 1,
|
action_name="extract"
|
||||||
"content": final_content,
|
)
|
||||||
"originalfileName": original_fileName,
|
|
||||||
"fileInfos": [file_infos[i]] if includeMetadata and i < len(file_infos) else None,
|
|
||||||
"timestamp": self.services.utils.getUtcTimestamp()
|
|
||||||
}
|
|
||||||
|
|
||||||
logger.info(f"Created output document: {output_fileName} with {len(final_content)} characters")
|
logger.info(f"Created output document: {output_fileName} with {len(final_content)} characters")
|
||||||
|
|
||||||
output_documents.append({
|
# Create proper ActionDocument object
|
||||||
"documentName": output_fileName,
|
action_documents.append(ActionDocument(
|
||||||
"documentData": result_data,
|
documentName=output_fileName,
|
||||||
"mimeType": final_mime_type
|
documentData=final_content,
|
||||||
})
|
mimeType=final_mime_type
|
||||||
|
))
|
||||||
|
|
||||||
return ActionResult.isSuccess(
|
return ActionResult.isSuccess(
|
||||||
documents=output_documents
|
documents=action_documents
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error extracting content: {str(e)}")
|
logger.error(f"Error extracting content: {str(e)}")
|
||||||
|
|
@ -183,454 +156,27 @@ class MethodDocument(MethodBase):
|
||||||
error=str(e)
|
error=str(e)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@action
|
@action
|
||||||
async def generate(self, parameters: Dict[str, Any]) -> ActionResult:
|
async def generate(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||||
"""
|
"""
|
||||||
Convert TEXT-ONLY documents to target formats (NO AI usage).
|
GENERAL:
|
||||||
|
- Purpose: Generate formatted documents and reports from source documents.
|
||||||
|
- Input requirements: documentList (required); prompt (required); optional title and outputFormat.
|
||||||
|
- Any output format, e.g.: html | pdf | docx | txt | md | json | csv | xlsx
|
||||||
|
|
||||||
Parameters:
|
Parameters:
|
||||||
documentList (list): TEXT-ONLY documents only
|
- documentList (list, required): Document reference(s) to include as context.
|
||||||
expectedDocumentFormats (list): Target formats
|
- prompt (str, required): Instruction describing the desired document/report.
|
||||||
originalDocuments (list, optional): Original names
|
- title (str, optional): Title for the generated document. Default: "Summary Report".
|
||||||
includeMetadata (bool, optional): Include metadata (default: True)
|
- outputFormat (str, optional): html | pdf | docx | txt | md | json | csv | xlsx. Default: html.
|
||||||
mergeDocuments (bool, optional): Merge all documents into single output (default: False)
|
- operationType (str, optional): generate_report | analyze_documents. Default: generate_report.
|
||||||
"""
|
- processDocumentsIndividually (bool, optional): Process per document. Default: True.
|
||||||
try:
|
- chunkAllowed (bool, optional): Allow chunking for large inputs. Default: True.
|
||||||
document_list = parameters.get("documentList", [])
|
- mergeStrategy (dict, optional): Merging rules for multi-part generation.
|
||||||
if isinstance(document_list, str):
|
- includeMetadata (bool, optional): Include file metadata. Default: True.
|
||||||
document_list = [document_list]
|
|
||||||
expected_document_formats = parameters.get("expectedDocumentFormats", [])
|
|
||||||
original_documents = parameters.get("originalDocuments", [])
|
|
||||||
include_metadata = parameters.get("includeMetadata", True)
|
|
||||||
merge_documents = parameters.get("mergeDocuments", False)
|
|
||||||
|
|
||||||
if not document_list:
|
|
||||||
return ActionResult.isFailure(
|
|
||||||
error="Document list is required for generation"
|
|
||||||
)
|
|
||||||
|
|
||||||
if not expected_document_formats or len(expected_document_formats) == 0:
|
|
||||||
return ActionResult.isFailure(
|
|
||||||
error="Expected document formats specification is required"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Get chat documents for original documents list
|
|
||||||
chat_documents = self.services.workflow.getChatDocumentsFromDocumentList(document_list)
|
|
||||||
logger.info(f"Found {len(chat_documents)} chat documents")
|
|
||||||
|
|
||||||
if not chat_documents:
|
|
||||||
return ActionResult.isFailure(
|
|
||||||
error="No documents found for the provided documentList reference"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Update original documents list if not provided
|
|
||||||
if not original_documents:
|
|
||||||
original_documents = [doc.fileName if hasattr(doc, 'fileName') else str(doc.id) for doc in chat_documents]
|
|
||||||
|
|
||||||
# Extract content from all documents first
|
|
||||||
document_contents = []
|
|
||||||
for i, chat_document in enumerate(chat_documents):
|
|
||||||
# Extract content from this document directly - NO AI, just read the data as-is
|
|
||||||
# This ensures we get the original text content for format conversion
|
|
||||||
content = ""
|
|
||||||
if hasattr(chat_document, 'fileId') and chat_document.fileId:
|
|
||||||
try:
|
|
||||||
# Get file data directly without AI processing
|
|
||||||
file_data = self.services.workflow.getFileData(chat_document.fileId)
|
|
||||||
if file_data:
|
|
||||||
# Check if it's text data and convert to string
|
|
||||||
if isinstance(file_data, bytes):
|
|
||||||
try:
|
|
||||||
# Try to decode as UTF-8 to check if it's text
|
|
||||||
content = file_data.decode('utf-8')
|
|
||||||
logger.info(f"Document {i+1} ({chat_document.fileName}): Successfully decoded as UTF-8 text")
|
|
||||||
except UnicodeDecodeError:
|
|
||||||
logger.info(f"Document {i+1} ({chat_document.fileName}): Binary data, not text - skipping")
|
|
||||||
continue
|
|
||||||
else:
|
|
||||||
# Already a string
|
|
||||||
content = str(file_data)
|
|
||||||
logger.info(f"Document {i+1} ({chat_document.fileName}): Already text data")
|
|
||||||
else:
|
|
||||||
logger.warning(f"Document {i+1} ({chat_document.fileName}): No file data found")
|
|
||||||
continue
|
|
||||||
|
|
||||||
if not content.strip():
|
|
||||||
logger.info(f"Document {i+1} ({chat_document.fileName}): Empty text content, skipping")
|
|
||||||
continue
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"Error reading document {i+1} ({chat_document.fileName}): {str(e)}")
|
|
||||||
continue
|
|
||||||
else:
|
|
||||||
logger.warning(f"Document {i+1} has no fileId, skipping")
|
|
||||||
continue
|
|
||||||
|
|
||||||
logger.info(f"Extracted content from document {i+1}: {len(content)} characters")
|
|
||||||
|
|
||||||
document_contents.append({
|
|
||||||
"document": chat_document,
|
|
||||||
"content": content,
|
|
||||||
"index": i,
|
|
||||||
"original_name": original_documents[i] if i < len(original_documents) else f"document_{i+1}"
|
|
||||||
})
|
|
||||||
|
|
||||||
if not document_contents:
|
|
||||||
return ActionResult.isFailure(
|
|
||||||
error="No valid text content could be extracted from any documents"
|
|
||||||
)
|
|
||||||
|
|
||||||
if merge_documents and len(document_contents) > 1:
|
|
||||||
# Merge all documents into single output
|
|
||||||
logger.info("Merging all documents into single output")
|
|
||||||
return await self._mergeDocuments(document_contents, expected_document_formats, include_metadata)
|
|
||||||
else:
|
|
||||||
# Process each document individually with its own format conversion
|
|
||||||
logger.info("Processing documents individually")
|
|
||||||
output_documents = []
|
|
||||||
|
|
||||||
for item in document_contents:
|
|
||||||
chat_document = item["document"]
|
|
||||||
content = item["content"]
|
|
||||||
i = item["index"]
|
|
||||||
original_name = item["original_name"]
|
|
||||||
|
|
||||||
# Get the expected format for this document (or use default)
|
|
||||||
target_format = None
|
|
||||||
if i < len(expected_document_formats):
|
|
||||||
target_format = expected_document_formats[i]
|
|
||||||
elif len(expected_document_formats) > 0:
|
|
||||||
# If fewer formats than documents, use the last format for remaining documents
|
|
||||||
target_format = expected_document_formats[-1]
|
|
||||||
|
|
||||||
if not target_format:
|
|
||||||
logger.warning(f"No expected format for document {i+1}, skipping")
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Use AI to convert format
|
|
||||||
formatted_content = await self._convertContentToFormat(content, target_format)
|
|
||||||
if not formatted_content:
|
|
||||||
logger.warning(f"Failed to format document {i+1}, skipping")
|
|
||||||
continue
|
|
||||||
|
|
||||||
target_extension = target_format.get("extension", ".txt")
|
|
||||||
target_mime_type = target_format.get("mimeType", "text/plain")
|
|
||||||
|
|
||||||
# Create output fileName
|
|
||||||
base_name = original_name.rsplit('.', 1)[0] if '.' in original_name else original_name
|
|
||||||
output_fileName = f"{base_name}_generated_{self._format_timestamp_for_filename()}{target_extension}"
|
|
||||||
|
|
||||||
# Create result data
|
|
||||||
result_data = {
|
|
||||||
"documentCount": 1,
|
|
||||||
"content": formatted_content,
|
|
||||||
"outputFormat": target_format,
|
|
||||||
"originalDocument": original_name,
|
|
||||||
"timestamp": self.services.utils.getUtcTimestamp()
|
|
||||||
}
|
|
||||||
|
|
||||||
logger.info(f"Generated document: {output_fileName} with {len(formatted_content)} characters")
|
|
||||||
|
|
||||||
output_documents.append({
|
|
||||||
"documentName": output_fileName,
|
|
||||||
"documentData": result_data,
|
|
||||||
"mimeType": target_mime_type
|
|
||||||
})
|
|
||||||
|
|
||||||
if not output_documents:
|
|
||||||
return ActionResult.isFailure(
|
|
||||||
error="No documents could be generated"
|
|
||||||
)
|
|
||||||
|
|
||||||
return ActionResult.isSuccess(
|
|
||||||
documents=output_documents
|
|
||||||
)
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error generating document: {str(e)}")
|
|
||||||
return ActionResult.isFailure(
|
|
||||||
error=str(e)
|
|
||||||
)
|
|
||||||
|
|
||||||
async def _mergeDocuments(self, document_contents: List[Dict[str, Any]],
|
|
||||||
expected_document_formats: List[Dict[str, Any]],
|
|
||||||
include_metadata: bool) -> ActionResult:
|
|
||||||
"""
|
|
||||||
Merge all documents into a single output document.
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
# Combine all document content
|
|
||||||
combined_content_parts = []
|
|
||||||
original_file_names = []
|
|
||||||
|
|
||||||
for item in document_contents:
|
|
||||||
chat_document = item["document"]
|
|
||||||
content = item["content"]
|
|
||||||
original_name = item["original_name"]
|
|
||||||
|
|
||||||
if content.strip():
|
|
||||||
combined_content_parts.append(f"=== Document: {original_name} ===\n{content}\n")
|
|
||||||
original_file_names.append(original_name)
|
|
||||||
|
|
||||||
if not combined_content_parts:
|
|
||||||
return ActionResult.isFailure(
|
|
||||||
error="No content could be extracted from any documents for merging"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Combine all content
|
|
||||||
combined_content = "\n".join(combined_content_parts)
|
|
||||||
logger.info(f"Combined content from {len(original_file_names)} documents: {len(combined_content)} characters")
|
|
||||||
|
|
||||||
# Get the expected format for the merged output
|
|
||||||
target_format = None
|
|
||||||
if expected_document_formats and len(expected_document_formats) > 0:
|
|
||||||
target_format = expected_document_formats[0] # Use first format for merged output
|
|
||||||
|
|
||||||
if not target_format:
|
|
||||||
logger.warning("No expected format specified for merged output, using plain text")
|
|
||||||
target_format = {"extension": ".txt", "mimeType": "text/plain"}
|
|
||||||
|
|
||||||
# Use AI to convert format
|
|
||||||
formatted_content = await self._convertContentToFormat(combined_content, target_format)
|
|
||||||
if not formatted_content:
|
|
||||||
logger.warning("Failed to format merged content, using raw content")
|
|
||||||
formatted_content = combined_content
|
|
||||||
|
|
||||||
target_extension = target_format.get("extension", ".txt")
|
|
||||||
target_mime_type = target_format.get("mimeType", "text/plain")
|
|
||||||
|
|
||||||
# Create output fileName for merged document
|
|
||||||
timestamp = self._format_timestamp_for_filename()
|
|
||||||
output_fileName = f"merged_documents_{timestamp}{target_extension}"
|
|
||||||
|
|
||||||
# Create result data for merged document
|
|
||||||
result_data = {
|
|
||||||
"documentCount": len(document_contents),
|
|
||||||
"content": formatted_content,
|
|
||||||
"outputFormat": target_format,
|
|
||||||
"originalDocuments": original_file_names,
|
|
||||||
"timestamp": self.services.utils.getUtcTimestamp(),
|
|
||||||
"merged": True
|
|
||||||
}
|
|
||||||
|
|
||||||
logger.info(f"Created merged document: {output_fileName} with {len(formatted_content)} characters")
|
|
||||||
|
|
||||||
return ActionResult.isSuccess(
|
|
||||||
documents=[{
|
|
||||||
"documentName": output_fileName,
|
|
||||||
"documentData": result_data,
|
|
||||||
"mimeType": target_mime_type
|
|
||||||
}]
|
|
||||||
)
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error merging documents: {str(e)}")
|
|
||||||
return ActionResult.isFailure(
|
|
||||||
error=f"Failed to merge documents: {str(e)}"
|
|
||||||
)
|
|
||||||
|
|
||||||
async def _convertContentToFormat(self, content: str, target_format: Dict[str, Any]) -> str:
|
|
||||||
"""
|
|
||||||
Helper function to convert content to the specified format using AI.
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
extension = target_format.get("extension", ".txt")
|
|
||||||
mime_type = target_format.get("mimeType", "text/plain")
|
|
||||||
|
|
||||||
logger.info(f"Converting content to format: {extension} ({mime_type})")
|
|
||||||
|
|
||||||
# Create AI prompt for format conversion
|
|
||||||
format_prompts = {
|
|
||||||
".csv": f"""
|
|
||||||
Convert the following content into a proper CSV format.
|
|
||||||
|
|
||||||
Requirements:
|
|
||||||
1. Output ONLY the CSV data without any markdown, code blocks, or additional text
|
|
||||||
2. Use appropriate headers based on the content
|
|
||||||
3. Ensure proper CSV formatting with commas and quotes where needed
|
|
||||||
4. Make the data easily readable and importable into spreadsheet applications
|
|
||||||
|
|
||||||
Content to convert:
|
|
||||||
{content}
|
|
||||||
|
|
||||||
Generate ONLY the CSV data:
|
|
||||||
""",
|
|
||||||
|
|
||||||
".json": f"""
|
|
||||||
Convert the following content into a proper JSON format.
|
|
||||||
|
|
||||||
Requirements:
|
|
||||||
1. Output ONLY the JSON data without any markdown, code blocks, or additional text
|
|
||||||
2. Structure the data logically with appropriate keys and values
|
|
||||||
3. Ensure valid JSON syntax
|
|
||||||
4. Make the data easily parseable and readable
|
|
||||||
|
|
||||||
Content to convert:
|
|
||||||
{content}
|
|
||||||
|
|
||||||
Generate ONLY the JSON data:
|
|
||||||
""",
|
|
||||||
|
|
||||||
".xml": f"""
|
|
||||||
Convert the following content into a proper XML format.
|
|
||||||
|
|
||||||
Requirements:
|
|
||||||
1. Output ONLY the XML data without any markdown, code blocks, or additional text
|
|
||||||
2. Use appropriate XML tags and structure
|
|
||||||
3. Ensure valid XML syntax
|
|
||||||
4. Make the data easily parseable and readable
|
|
||||||
|
|
||||||
Content to convert:
|
|
||||||
{content}
|
|
||||||
|
|
||||||
Generate ONLY the XML data:
|
|
||||||
""",
|
|
||||||
|
|
||||||
".html": f"""
|
|
||||||
Convert the following content into a proper HTML format.
|
|
||||||
|
|
||||||
Requirements:
|
|
||||||
1. Output ONLY the HTML data without any markdown, code blocks, or additional text
|
|
||||||
2. Use appropriate HTML tags and structure
|
|
||||||
3. Ensure valid HTML syntax
|
|
||||||
4. Make the data easily readable in web browsers
|
|
||||||
|
|
||||||
Content to convert:
|
|
||||||
{content}
|
|
||||||
|
|
||||||
Generate ONLY the HTML data:
|
|
||||||
""",
|
|
||||||
|
|
||||||
".md": f"""
|
|
||||||
Convert the following content into a proper Markdown format.
|
|
||||||
|
|
||||||
Requirements:
|
|
||||||
1. Output ONLY the Markdown data without any code blocks or additional text
|
|
||||||
2. Use appropriate Markdown syntax for headers, lists, emphasis, etc.
|
|
||||||
3. Structure the content logically
|
|
||||||
4. Make the data easily readable and convertible to other formats
|
|
||||||
|
|
||||||
Content to convert:
|
|
||||||
{content}
|
|
||||||
|
|
||||||
Generate ONLY the Markdown data:
|
|
||||||
"""
|
|
||||||
}
|
|
||||||
|
|
||||||
# Get the appropriate prompt for the target format
|
|
||||||
if extension in format_prompts:
|
|
||||||
ai_prompt = format_prompts[extension]
|
|
||||||
else:
|
|
||||||
# Generic format conversion
|
|
||||||
ai_prompt = f"""
|
|
||||||
Convert the following content into {extension.upper()} format.
|
|
||||||
|
|
||||||
Requirements:
|
|
||||||
1. Output ONLY the {extension.upper()} data without any markdown, code blocks, or additional text
|
|
||||||
2. Use appropriate formatting for {extension.upper()} files
|
|
||||||
3. Ensure the output is valid and usable
|
|
||||||
4. Make the data easily readable and importable
|
|
||||||
|
|
||||||
Content to convert:
|
|
||||||
{content}
|
|
||||||
|
|
||||||
Generate ONLY the {extension.upper()} data:
|
|
||||||
"""
|
|
||||||
|
|
||||||
# Call AI to generate the formatted content
|
|
||||||
logger.info(f"Calling AI for {extension} format conversion")
|
|
||||||
formatted_content = await self.services.ai.callAi(
|
|
||||||
prompt=ai_prompt,
|
|
||||||
documents=None,
|
|
||||||
options=AiCallOptions(
|
|
||||||
operationType=OperationType.GENERATE_CONTENT,
|
|
||||||
priority=Priority.SPEED,
|
|
||||||
compressPrompt=True,
|
|
||||||
compressContext=False,
|
|
||||||
maxCost=0.02
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
if not formatted_content or formatted_content.strip() == "":
|
|
||||||
logger.warning("AI format conversion failed, using fallback")
|
|
||||||
return self._generateFallbackFormattedContent(content, extension, mime_type)
|
|
||||||
|
|
||||||
# Clean up the AI response
|
|
||||||
formatted_content = formatted_content.strip()
|
|
||||||
|
|
||||||
# Remove markdown code blocks if present
|
|
||||||
if formatted_content.startswith("```") and formatted_content.endswith("```"):
|
|
||||||
lines = formatted_content.split('\n')
|
|
||||||
if len(lines) > 2:
|
|
||||||
formatted_content = '\n'.join(lines[1:-1])
|
|
||||||
|
|
||||||
# For HTML format, check if AI returned complete HTML document
|
|
||||||
if extension == ".html" and (formatted_content.startswith('<!DOCTYPE') or formatted_content.startswith('<html')):
|
|
||||||
return formatted_content
|
|
||||||
|
|
||||||
return formatted_content
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error in AI format conversion: {str(e)}")
|
|
||||||
return self._generateFallbackFormattedContent(content, extension, mime_type)
|
|
||||||
|
|
||||||
def _generateFallbackFormattedContent(self, content: str, extension: str, mime_type: str) -> str:
|
|
||||||
"""
|
|
||||||
Generate fallback formatted content when AI conversion fails.
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
if extension == ".csv":
|
|
||||||
# Simple CSV fallback - split by lines and create basic CSV
|
|
||||||
lines = content.strip().split('\n')
|
|
||||||
if lines:
|
|
||||||
# Create a simple CSV with line numbers and content
|
|
||||||
csv_lines = ["Line,Content"]
|
|
||||||
for i, line in enumerate(lines, 1):
|
|
||||||
# Escape quotes and wrap in quotes if comma present
|
|
||||||
if ',' in line:
|
|
||||||
line = f'"{line.replace(chr(34), chr(34) + chr(34))}"'
|
|
||||||
csv_lines.append(f"{i},{line}")
|
|
||||||
return '\n'.join(csv_lines)
|
|
||||||
return "Line,Content\n1,No content available"
|
|
||||||
|
|
||||||
elif extension == ".json":
|
|
||||||
# Simple JSON fallback
|
|
||||||
content_escaped = content.replace('"', '\\"')
|
|
||||||
timestamp = self.services.utils.getUtcTimestamp()
|
|
||||||
return f'{{"content": "{content_escaped}", "format": "json", "timestamp": {timestamp}}}'
|
|
||||||
|
|
||||||
elif extension == ".xml":
|
|
||||||
# Simple XML fallback
|
|
||||||
timestamp = self.services.utils.getUtcTimestamp()
|
|
||||||
return f'<?xml version="1.0" encoding="UTF-8"?>\n<document>\n<content>{content}</content>\n<format>xml</format>\n<timestamp>{timestamp}</timestamp>\n</document>'
|
|
||||||
|
|
||||||
elif extension == ".html":
|
|
||||||
# Simple HTML fallback
|
|
||||||
timestamp = int(self.services.utils.getUtcTimestamp())
|
|
||||||
return f'<!DOCTYPE html>\n<html>\n<head><meta charset="UTF-8"><title>Generated Document</title></head>\n<body>\n<pre>{content}</pre>\n<p><em>Generated on {timestamp}</em></p>\n</body>\n</html>'
|
|
||||||
|
|
||||||
elif extension == ".md":
|
|
||||||
# Simple Markdown fallback
|
|
||||||
timestamp = int(self.services.utils.getUtcTimestamp())
|
|
||||||
return f"# Generated Document\n\n{content}\n\n---\n*Generated on {timestamp}*"
|
|
||||||
|
|
||||||
else:
|
|
||||||
# Generic fallback - return content as-is
|
|
||||||
return content
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error in fallback format conversion: {str(e)}")
|
|
||||||
return content
|
|
||||||
|
|
||||||
@action
|
|
||||||
async def generateReport(self, parameters: Dict[str, Any]) -> ActionResult:
|
|
||||||
"""
|
|
||||||
Generate HTML report from multiple documents using AI.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
documentList (list): Document list reference(s)
|
|
||||||
prompt (str): AI prompt for report generation
|
|
||||||
title (str, optional): Report title (default: "Summary Report")
|
|
||||||
includeMetadata (bool, optional): Include metadata (default: True)
|
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
documentList = parameters.get("documentList")
|
documentList = parameters.get("documentList")
|
||||||
|
|
@ -638,6 +184,15 @@ class MethodDocument(MethodBase):
|
||||||
documentList = [documentList]
|
documentList = [documentList]
|
||||||
prompt = parameters.get("prompt")
|
prompt = parameters.get("prompt")
|
||||||
title = parameters.get("title", "Summary Report")
|
title = parameters.get("title", "Summary Report")
|
||||||
|
outputFormat = parameters.get("outputFormat", "html")
|
||||||
|
operationType = parameters.get("operationType", "generate_report")
|
||||||
|
processDocumentsIndividually = parameters.get("processDocumentsIndividually", True)
|
||||||
|
chunkAllowed = parameters.get("chunkAllowed", True)
|
||||||
|
mergeStrategy = parameters.get("mergeStrategy", {
|
||||||
|
"groupBy": "typeGroup",
|
||||||
|
"orderBy": "id",
|
||||||
|
"mergeType": "concatenate"
|
||||||
|
})
|
||||||
includeMetadata = parameters.get("includeMetadata", True)
|
includeMetadata = parameters.get("includeMetadata", True)
|
||||||
|
|
||||||
if not documentList:
|
if not documentList:
|
||||||
|
|
@ -658,179 +213,54 @@ class MethodDocument(MethodBase):
|
||||||
error="No documents found for the provided reference"
|
error="No documents found for the provided reference"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Generate HTML report
|
# Use enhanced AI service with document generation
|
||||||
html_content = await self._generateHtmlReport(chatDocuments, title, includeMetadata, prompt)
|
try:
|
||||||
|
# Build AI call options
|
||||||
|
ai_options = AiCallOptions(
|
||||||
|
operationType=operationType,
|
||||||
|
processDocumentsIndividually=processDocumentsIndividually,
|
||||||
|
compressContext=not chunkAllowed
|
||||||
|
)
|
||||||
|
|
||||||
|
# Use enhanced AI service with document generation
|
||||||
|
result = await self.services.ai.callAi(
|
||||||
|
prompt=prompt,
|
||||||
|
documents=chatDocuments,
|
||||||
|
options=ai_options,
|
||||||
|
outputFormat=outputFormat,
|
||||||
|
title=title
|
||||||
|
)
|
||||||
|
|
||||||
|
if isinstance(result, dict) and result.get("success"):
|
||||||
|
# Extract document information from result
|
||||||
|
documents = result.get("documents", [])
|
||||||
|
if documents:
|
||||||
|
# Convert to ActionDocument format
|
||||||
|
action_documents = []
|
||||||
|
for doc in documents:
|
||||||
|
action_documents.append(ActionDocument(
|
||||||
|
documentName=doc["documentName"],
|
||||||
|
documentData=doc["documentData"],
|
||||||
|
mimeType=doc["mimeType"]
|
||||||
|
))
|
||||||
|
|
||||||
|
logger.info(f"Generated {outputFormat.upper()} report: {len(action_documents)} documents")
|
||||||
|
return ActionResult.isSuccess(documents=action_documents)
|
||||||
|
else:
|
||||||
|
return ActionResult.isFailure(error="No documents generated")
|
||||||
|
else:
|
||||||
|
error_msg = result.get("error", "Unknown error") if isinstance(result, dict) else "AI generation failed"
|
||||||
|
return ActionResult.isFailure(error=error_msg)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"AI generation failed: {str(e)}")
|
||||||
|
return ActionResult.isFailure(error=str(e))
|
||||||
|
|
||||||
# Create output fileName
|
|
||||||
timestamp = int(self.services.utils.getUtcTimestamp())
|
|
||||||
output_fileName = f"report_{self._format_timestamp_for_filename()}.html"
|
|
||||||
|
|
||||||
result_data = {
|
|
||||||
"documentCount": len(chatDocuments),
|
|
||||||
"content": html_content,
|
|
||||||
"title": title,
|
|
||||||
"timestamp": self.services.utils.getUtcTimestamp()
|
|
||||||
}
|
|
||||||
|
|
||||||
logger.info(f"Generated HTML report: {output_fileName} with {len(html_content)} characters")
|
|
||||||
|
|
||||||
return ActionResult.isSuccess(
|
|
||||||
documents=[{
|
|
||||||
"documentName": output_fileName,
|
|
||||||
"documentData": result_data,
|
|
||||||
"mimeType": "text/html"
|
|
||||||
}]
|
|
||||||
)
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error generating report: {str(e)}")
|
logger.error(f"Error generating report: {str(e)}")
|
||||||
return ActionResult.isFailure(
|
return ActionResult.isFailure(
|
||||||
error=str(e)
|
error=str(e)
|
||||||
)
|
)
|
||||||
|
|
||||||
async def _generateHtmlReport(self, chatDocuments: List[Any], title: str, includeMetadata: bool, prompt: str) -> str:
|
|
||||||
"""
|
|
||||||
Generate a comprehensive HTML report using AI from all input documents.
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
# Filter out empty documents and collect content
|
|
||||||
validDocuments = []
|
|
||||||
allContent = []
|
|
||||||
|
|
||||||
for doc in chatDocuments:
|
|
||||||
content = ""
|
|
||||||
logger.info(f"Processing document: type={type(doc)}")
|
|
||||||
|
|
||||||
# Batch extraction approach: prepare one doc payload and call extractor
|
|
||||||
try:
|
|
||||||
try:
|
|
||||||
data = self.services.workflow.getFileData(doc.fileId) if hasattr(doc, 'fileId') else None
|
|
||||||
except Exception:
|
|
||||||
data = None
|
|
||||||
extracted_list = await self.services.extraction.extractContentFromDocuments(
|
|
||||||
prompt="Extract readable text content for HTML report generation",
|
|
||||||
documents=[{
|
|
||||||
"id": getattr(doc, 'id', None),
|
|
||||||
"bytes": data or b"",
|
|
||||||
"fileName": getattr(doc, 'fileName', 'unknown'),
|
|
||||||
"mimeType": getattr(doc, 'mimeType', None) or "application/octet-stream"
|
|
||||||
}],
|
|
||||||
options={"ai": {"enabled": False}}
|
|
||||||
)
|
|
||||||
ec = extracted_list[0] if extracted_list else None
|
|
||||||
if ec and hasattr(ec, 'parts'):
|
|
||||||
for part in getattr(ec, 'parts', []):
|
|
||||||
try:
|
|
||||||
if getattr(part, 'typeGroup', '') in ("text", "table", "structure") and getattr(part, 'data', None):
|
|
||||||
content += part.data + " "
|
|
||||||
except Exception:
|
|
||||||
continue
|
|
||||||
if content.strip():
|
|
||||||
logger.info(f" Retrieved content from file: {len(content)} characters")
|
|
||||||
else:
|
|
||||||
logger.info(f" No readable text content found (binary file)")
|
|
||||||
else:
|
|
||||||
logger.info(f" No content extracted (binary file)")
|
|
||||||
except Exception as e:
|
|
||||||
logger.info(f" Could not extract content (binary file): {str(e)}")
|
|
||||||
|
|
||||||
# Skip empty documents
|
|
||||||
if content and content.strip():
|
|
||||||
validDocuments.append(doc)
|
|
||||||
allContent.append(f"Document: {doc.fileName}\n{content}\n")
|
|
||||||
logger.info(f" Added document to valid documents list")
|
|
||||||
else:
|
|
||||||
logger.info(f" Skipping document with no readable text content")
|
|
||||||
|
|
||||||
if not validDocuments:
|
|
||||||
# No readable content; return a minimal valid HTML document
|
|
||||||
timestamp = int(self.services.utils.getUtcTimestamp())
|
|
||||||
return f"<!DOCTYPE html><html><head><meta charset=\"UTF-8\"><title>{title}</title></head><body><h1>{title}</h1><p>Keine auswertbaren Inhalte gefunden.</p><p>Generated: {timestamp}</p></body></html>"
|
|
||||||
|
|
||||||
# Create AI prompt for comprehensive report generation using user's prompt
|
|
||||||
combinedContent = "\n\n".join(allContent)
|
|
||||||
aiPrompt = f"""
|
|
||||||
{prompt}
|
|
||||||
|
|
||||||
Report Title: {title}
|
|
||||||
|
|
||||||
OUTPUT POLICY:
|
|
||||||
- Return ONLY a complete, raw HTML document.
|
|
||||||
- Start with: <!DOCTYPE html>
|
|
||||||
- Must include: <html>, <head> (with <meta charset="UTF-8"> and <title>), and <body>.
|
|
||||||
- The response must be valid, self-contained HTML suitable for saving as .html.
|
|
||||||
|
|
||||||
Structure:
|
|
||||||
- Title and short subtitle
|
|
||||||
- Executive summary
|
|
||||||
- Sections with clear headings
|
|
||||||
- Use tables for structured data when helpful
|
|
||||||
- Key findings and recommendations
|
|
||||||
- Generation date and number of documents
|
|
||||||
|
|
||||||
Quality and design requirements:
|
|
||||||
- Use clear, professional, and accessible styling in a <style> block
|
|
||||||
- Apply clean layout, spacing, and visual hierarchy for headings
|
|
||||||
- Keep HTML and CSS standards-compliant and lightweight
|
|
||||||
|
|
||||||
SOURCE DOCUMENT CONTENT:
|
|
||||||
---START---
|
|
||||||
{combinedContent}
|
|
||||||
---END---
|
|
||||||
"""
|
|
||||||
|
|
||||||
# Call AI to generate the report
|
|
||||||
logger.info(f"Generating AI report for {len(validDocuments)} documents")
|
|
||||||
# Build ChatDocument list from chatDocuments
|
|
||||||
documents = []
|
|
||||||
try:
|
|
||||||
for d in validDocuments:
|
|
||||||
try:
|
|
||||||
data = self.services.workflow.getFileData(d.fileId) if hasattr(d, 'fileId') else None
|
|
||||||
if data:
|
|
||||||
documents.append(ChatDocument(fileData=data, fileName=d.fileName, mimeType=d.mimeType))
|
|
||||||
except Exception:
|
|
||||||
continue
|
|
||||||
except Exception:
|
|
||||||
documents = None
|
|
||||||
aiReport = await self.services.ai.callAi(
|
|
||||||
prompt=aiPrompt,
|
|
||||||
documents=documents or None,
|
|
||||||
options=AiCallOptions(
|
|
||||||
operationType=OperationType.GENERATE_CONTENT, # Using GENERATE_CONTENT for report generation
|
|
||||||
priority=Priority.QUALITY,
|
|
||||||
compressPrompt=False,
|
|
||||||
compressContext=True,
|
|
||||||
processDocumentsIndividually=True,
|
|
||||||
resultFormat="html",
|
|
||||||
processingMode="detailed",
|
|
||||||
maxCost=0.08,
|
|
||||||
maxProcessingTime=90
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
# If AI call fails, return error - AI is crucial for report generation
|
|
||||||
if not aiReport or aiReport.strip() == "":
|
|
||||||
logger.error("AI report generation failed - AI is crucial for this action")
|
|
||||||
raise Exception("AI report generation failed - AI is required for report generation")
|
|
||||||
|
|
||||||
# Clean up the AI response and ensure it's valid HTML
|
|
||||||
aiReport = aiReport.strip()
|
|
||||||
|
|
||||||
# Normalize: strip code fences if present
|
|
||||||
if aiReport.startswith("```") and aiReport.endswith("```"):
|
|
||||||
lines = aiReport.split('\n')
|
|
||||||
if len(lines) >= 2:
|
|
||||||
aiReport = '\n'.join(lines[1:-1]).strip()
|
|
||||||
|
|
||||||
cleaned = aiReport.strip()
|
|
||||||
|
|
||||||
# Return exactly what we have (no wrapping)
|
|
||||||
return cleaned
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error generating AI report: {str(e)}")
|
|
||||||
# Re-raise the error - AI is crucial for report generation
|
|
||||||
raise
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load diff
|
|
@ -14,7 +14,7 @@ import aiohttp
|
||||||
import asyncio
|
import asyncio
|
||||||
|
|
||||||
from modules.workflows.methods.methodBase import MethodBase, action
|
from modules.workflows.methods.methodBase import MethodBase, action
|
||||||
from modules.datamodels.datamodelWorkflow import ActionResult
|
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
@ -443,13 +443,16 @@ class MethodSharepoint(MethodBase):
|
||||||
@action
|
@action
|
||||||
async def findDocumentPath(self, parameters: Dict[str, Any]) -> ActionResult:
|
async def findDocumentPath(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||||
"""
|
"""
|
||||||
Find documents/folders by searching their NAMES across SharePoint sites.
|
GENERAL:
|
||||||
|
- Purpose: Find documents and folders by name/path across sites.
|
||||||
|
- Input requirements: connectionReference (required); searchQuery (required); optional site, maxResults.
|
||||||
|
- Output format: JSON with found items and paths.
|
||||||
|
|
||||||
Parameters:
|
Parameters:
|
||||||
connectionReference (str): Microsoft connection reference
|
- connectionReference (str, required): Microsoft connection label.
|
||||||
site (str, optional): Site hint (e.g., "SSS", "KM XYZ")
|
- site (str, optional): Site hint.
|
||||||
searchQuery (str): Search query - "budget", "folders:alpha", "files:budget", "/Documents/Project1", "namepart1 namepart2 namepart3". Use "folders:" prefix when user wants to store files or find folders
|
- searchQuery (str, required): Search terms or path.
|
||||||
maxResults (int, optional): Max results (default: 100)
|
- maxResults (int, optional): Maximum items to return. Default: 100.
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
connectionReference = parameters.get("connectionReference")
|
connectionReference = parameters.get("connectionReference")
|
||||||
|
|
@ -799,11 +802,11 @@ class MethodSharepoint(MethodBase):
|
||||||
return ActionResult(
|
return ActionResult(
|
||||||
success=True,
|
success=True,
|
||||||
documents=[
|
documents=[
|
||||||
{
|
ActionDocument(
|
||||||
"documentName": f"sharepoint_find_path_{self._format_timestamp_for_filename()}{output_extension}",
|
documentName=f"sharepoint_find_path_{self._format_timestamp_for_filename()}{output_extension}",
|
||||||
"documentData": result_data,
|
documentData=json.dumps(result_data, indent=2),
|
||||||
"mimeType": output_mime_type
|
mimeType=output_mime_type
|
||||||
}
|
)
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -814,14 +817,17 @@ class MethodSharepoint(MethodBase):
|
||||||
@action
|
@action
|
||||||
async def readDocuments(self, parameters: Dict[str, Any]) -> ActionResult:
|
async def readDocuments(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||||
"""
|
"""
|
||||||
Read documents from SharePoint across all accessible sites
|
GENERAL:
|
||||||
|
- Purpose: Read documents from SharePoint and extract content/metadata.
|
||||||
|
- Input requirements: connectionReference (required); documentList (required); optional pathObject or pathQuery; includeMetadata.
|
||||||
|
- Output format: JSON with read results per document.
|
||||||
|
|
||||||
Parameters:
|
Parameters:
|
||||||
documentList (list): Reference(s) to the document list to read
|
- documentList (list, required): Document list reference(s) to read.
|
||||||
connectionReference (str): Reference to the Microsoft connection
|
- connectionReference (str, required): Microsoft connection label.
|
||||||
pathObject (str, optional): Path object to locate documents. This can ONLY be a reference to a result from sharepoint.findDocumentPath action
|
- pathObject (str, optional): Reference to a previous path result.
|
||||||
pathQuery (str, optional): Path query to locate documents, only if no pathObject is provided (e.g., "/Documents/Project1", "*" for all sites)
|
- pathQuery (str, optional): Path query if no pathObject.
|
||||||
includeMetadata (bool, optional): Whether to include metadata (default: True)
|
- includeMetadata (bool, optional): Include metadata. Default: True.
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
documentList = parameters.get("documentList")
|
documentList = parameters.get("documentList")
|
||||||
|
|
@ -1073,11 +1079,11 @@ class MethodSharepoint(MethodBase):
|
||||||
return ActionResult(
|
return ActionResult(
|
||||||
success=True,
|
success=True,
|
||||||
documents=[
|
documents=[
|
||||||
{
|
ActionDocument(
|
||||||
"documentName": f"sharepoint_documents_{self._format_timestamp_for_filename()}{output_extension}",
|
documentName=f"sharepoint_documents_{self._format_timestamp_for_filename()}{output_extension}",
|
||||||
"documentData": result_data,
|
documentData=json.dumps(result_data, indent=2),
|
||||||
"mimeType": output_mime_type
|
mimeType=output_mime_type
|
||||||
}
|
)
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
@ -1090,14 +1096,17 @@ class MethodSharepoint(MethodBase):
|
||||||
@action
|
@action
|
||||||
async def uploadDocument(self, parameters: Dict[str, Any]) -> ActionResult:
|
async def uploadDocument(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||||
"""
|
"""
|
||||||
Upload documents to SharePoint across accessible sites
|
GENERAL:
|
||||||
|
- Purpose: Upload documents to SharePoint.
|
||||||
|
- Input requirements: connectionReference (required); documentList (required); fileNames (required); optional pathObject or pathQuery.
|
||||||
|
- Output format: JSON with upload status and file info.
|
||||||
|
|
||||||
Parameters:
|
Parameters:
|
||||||
connectionReference (str): Reference to the Microsoft connection
|
- connectionReference (str, required): Microsoft connection label.
|
||||||
pathObject (str, optional): Path object to locate documents. This can ONLY be a reference to a result from sharepoint.findDocumentPath action
|
- pathObject (str, optional): Reference to a previous path result.
|
||||||
pathQuery (str, optional): Path query to locate documents, only if no pathObject is provided (e.g., "/Documents/Project1", "*" for all sites)
|
- pathQuery (str, optional): Upload target path if no pathObject.
|
||||||
documentList (list): Reference(s) to the document list to upload
|
- documentList (list, required): Document reference(s) to upload.
|
||||||
fileNames (List[str]): List of names for the uploaded files
|
- fileNames (list, required): Output file names.
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
connectionReference = parameters.get("connectionReference")
|
connectionReference = parameters.get("connectionReference")
|
||||||
|
|
@ -1424,11 +1433,11 @@ class MethodSharepoint(MethodBase):
|
||||||
return ActionResult(
|
return ActionResult(
|
||||||
success=True,
|
success=True,
|
||||||
documents=[
|
documents=[
|
||||||
{
|
ActionDocument(
|
||||||
"documentName": f"sharepoint_upload_{self._format_timestamp_for_filename()}{output_extension}",
|
documentName=f"sharepoint_upload_{self._format_timestamp_for_filename()}{output_extension}",
|
||||||
"documentData": result_data,
|
documentData=json.dumps(result_data, indent=2),
|
||||||
"mimeType": output_mime_type
|
mimeType=output_mime_type
|
||||||
}
|
)
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -1442,13 +1451,16 @@ class MethodSharepoint(MethodBase):
|
||||||
@action
|
@action
|
||||||
async def listDocuments(self, parameters: Dict[str, Any]) -> ActionResult:
|
async def listDocuments(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||||
"""
|
"""
|
||||||
List documents in SharePoint folders across accessible sites
|
GENERAL:
|
||||||
|
- Purpose: List documents and folders in SharePoint paths across sites.
|
||||||
|
- Input requirements: connectionReference (required); optional pathObject or pathQuery; includeSubfolders.
|
||||||
|
- Output format: JSON with folder items and metadata.
|
||||||
|
|
||||||
Parameters:
|
Parameters:
|
||||||
connectionReference (str): Reference to the Microsoft connection
|
- connectionReference (str, required): Microsoft connection label.
|
||||||
pathObject (str, optional): Path object to locate documents. This can ONLY be a reference to a result from sharepoint.findDocumentPath action
|
- pathObject (str, optional): Reference to a previous path result.
|
||||||
pathQuery (str, optional): Path query to locate documents, only if no pathObject is provided (e.g., "/Documents/Project1", "*" for all sites)
|
- pathQuery (str, optional): Path query if no pathObject.
|
||||||
includeSubfolders (bool, optional): Whether to include subfolders (default: False)
|
- includeSubfolders (bool, optional): Include one level of subfolders. Default: False.
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
connectionReference = parameters.get("connectionReference")
|
connectionReference = parameters.get("connectionReference")
|
||||||
|
|
@ -1817,11 +1829,11 @@ class MethodSharepoint(MethodBase):
|
||||||
return ActionResult(
|
return ActionResult(
|
||||||
success=True,
|
success=True,
|
||||||
documents=[
|
documents=[
|
||||||
{
|
ActionDocument(
|
||||||
"documentName": f"sharepoint_document_list_{self._format_timestamp_for_filename()}{output_extension}",
|
documentName=f"sharepoint_document_list_{self._format_timestamp_for_filename()}{output_extension}",
|
||||||
"documentData": result_data,
|
documentData=json.dumps(result_data, indent=2),
|
||||||
"mimeType": output_mime_type
|
mimeType=output_mime_type
|
||||||
}
|
)
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,437 +0,0 @@
|
||||||
import logging
|
|
||||||
import csv
|
|
||||||
import io
|
|
||||||
import json as _json
|
|
||||||
from typing import Any, Dict
|
|
||||||
from modules.workflows.methods.methodBase import MethodBase, action
|
|
||||||
from modules.datamodels.datamodelWorkflow import ActionResult, ActionDocument
|
|
||||||
from modules.datamodels.datamodelAi import AiCallOptions, OperationType, Priority
|
|
||||||
from modules.datamodels.datamodelWeb import (
|
|
||||||
WebSearchRequest,
|
|
||||||
WebCrawlRequest,
|
|
||||||
WebScrapeRequest,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
class MethodWeb(MethodBase):
|
|
||||||
"""Web method implementation for web operations."""
|
|
||||||
|
|
||||||
def __init__(self, services):
|
|
||||||
super().__init__(services)
|
|
||||||
self.name = "web"
|
|
||||||
self.description = "Web search, crawling, and scraping operations using Tavily"
|
|
||||||
|
|
||||||
@action
|
|
||||||
async def search(self, parameters: Dict[str, Any]) -> ActionResult:
|
|
||||||
"""Perform a web search and output a CSV with the found URLs. Each result row contains columns "url" and "title".
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
query (str, required): Search query.
|
|
||||||
maxResults (int, optional): Max number of results. Default: 10.
|
|
||||||
searchDepth ("basic"|"advanced", optional): Search depth. Default: provider default.
|
|
||||||
timeRange ("d"|"w"|"m"|"y", optional): Limit to last day/week/month/year.
|
|
||||||
topic ("general"|"news"|"academic", optional): Result domain preference.
|
|
||||||
includeDomains (list[str], optional): Only include these domains.
|
|
||||||
excludeDomains (list[str], optional): Exclude these domains.
|
|
||||||
language (str, optional): ISO code like "de", "en" to bias results.
|
|
||||||
includeAnswer (bool, optional): Ask provider to generate a short answer.
|
|
||||||
includeRawContent (bool, optional): Include raw content where possible.
|
|
||||||
"""
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Prepare request data (generic, no region/language bias)
|
|
||||||
raw_query = parameters.get("query")
|
|
||||||
max_results = parameters.get("maxResults", 10)
|
|
||||||
|
|
||||||
if not raw_query or not isinstance(raw_query, str):
|
|
||||||
return ActionResult(success=False, error="Search query is required")
|
|
||||||
|
|
||||||
web_search_request = WebSearchRequest(
|
|
||||||
query=raw_query.strip(),
|
|
||||||
max_results=max_results,
|
|
||||||
search_depth=parameters.get("searchDepth"),
|
|
||||||
time_range=parameters.get("timeRange"),
|
|
||||||
topic=parameters.get("topic"),
|
|
||||||
include_domains=parameters.get("includeDomains"),
|
|
||||||
exclude_domains=parameters.get("excludeDomains"),
|
|
||||||
language=parameters.get("language"),
|
|
||||||
include_answer=parameters.get("includeAnswer"),
|
|
||||||
include_raw_content=parameters.get("includeRawContent"),
|
|
||||||
)
|
|
||||||
|
|
||||||
# Perform request via centralized service wrappers
|
|
||||||
web_search_result = await self.services.web.webSearch(web_search_request)
|
|
||||||
|
|
||||||
# Convert search results to CSV format (generic)
|
|
||||||
if web_search_result.success and web_search_result.documents:
|
|
||||||
csv_content = self._convert_web_result_to_csv(web_search_result)
|
|
||||||
csv_document = ActionDocument(
|
|
||||||
documentName=f"web_search_results.csv",
|
|
||||||
documentData=csv_content,
|
|
||||||
mimeType="text/csv"
|
|
||||||
)
|
|
||||||
return ActionResult(success=True, documents=[csv_document])
|
|
||||||
else:
|
|
||||||
return web_search_result
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
return ActionResult(success=False, error=str(e))
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def _read_csv_with_urls(self, csv_content: str) -> list:
|
|
||||||
"""Read CSV content and extract URLs from url,title or title,url format (both ; and , delimiters)"""
|
|
||||||
urls = []
|
|
||||||
|
|
||||||
# Try both semicolon and comma delimiters
|
|
||||||
for delimiter in [';', ',']:
|
|
||||||
try:
|
|
||||||
reader = csv.DictReader(io.StringIO(csv_content), delimiter=delimiter)
|
|
||||||
for row in reader:
|
|
||||||
# Look for url column (case insensitive)
|
|
||||||
url = None
|
|
||||||
for key in row.keys():
|
|
||||||
if key.lower() == 'url':
|
|
||||||
url = row[key].strip()
|
|
||||||
break
|
|
||||||
|
|
||||||
if url and (url.startswith('http://') or url.startswith('https://')):
|
|
||||||
urls.append(url)
|
|
||||||
|
|
||||||
# If we found URLs with this delimiter, return them
|
|
||||||
if urls:
|
|
||||||
return urls
|
|
||||||
|
|
||||||
except Exception:
|
|
||||||
# Try next delimiter
|
|
||||||
continue
|
|
||||||
|
|
||||||
# If no valid CSV found, try simple text parsing as fallback
|
|
||||||
lines = csv_content.split('\n')
|
|
||||||
for line in lines:
|
|
||||||
line = line.strip()
|
|
||||||
if line and (line.startswith('http://') or line.startswith('https://')):
|
|
||||||
urls.append(line)
|
|
||||||
|
|
||||||
return urls
|
|
||||||
|
|
||||||
@action
|
|
||||||
async def crawl(self, parameters: Dict[str, Any]) -> ActionResult:
|
|
||||||
"""Crawl a list of URLs and extract text content.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
documentList (list[str]|str, required): Reference(s) to documents containing URLs (e.g., CSV from search). Can be a single ref or list.
|
|
||||||
expectedDocumentFormats (list, optional): Hint for downstream handling.
|
|
||||||
extractDepth ("basic"|"advanced", optional): Extraction depth. Default: "advanced".
|
|
||||||
format ("text"|"markdown", optional): Output format. Default: "text".
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
document_list = parameters.get("documentList")
|
|
||||||
|
|
||||||
# Normalize to list if a single string reference is provided
|
|
||||||
if isinstance(document_list, str):
|
|
||||||
document_list = [document_list]
|
|
||||||
|
|
||||||
if not document_list:
|
|
||||||
return ActionResult(
|
|
||||||
success=False, error="No document list reference provided."
|
|
||||||
)
|
|
||||||
|
|
||||||
# Resolve document list reference to ChatDocument objects
|
|
||||||
chat_documents = self.services.workflow.getChatDocumentsFromDocumentList(document_list)
|
|
||||||
|
|
||||||
if not chat_documents:
|
|
||||||
return ActionResult(
|
|
||||||
success=False,
|
|
||||||
error=f"No documents found for reference: {document_list}",
|
|
||||||
)
|
|
||||||
|
|
||||||
# Extract URLs from all documents and combine them
|
|
||||||
all_urls = []
|
|
||||||
import json
|
|
||||||
import re
|
|
||||||
|
|
||||||
for i, doc in enumerate(chat_documents):
|
|
||||||
logger.info(f"Processing document {i+1}/{len(chat_documents)}: {doc.fileName}")
|
|
||||||
|
|
||||||
# Get file data using the service center
|
|
||||||
file_data = self.services.workflow.getFileData(doc.fileId)
|
|
||||||
if not file_data:
|
|
||||||
logger.warning(f"Could not retrieve file data for document: {doc.fileName}")
|
|
||||||
continue
|
|
||||||
|
|
||||||
content = file_data.decode("utf-8")
|
|
||||||
|
|
||||||
# Try to parse as CSV first (for new CSV format)
|
|
||||||
if doc.fileName.lower().endswith('.csv') or 'csv' in doc.mimeType.lower():
|
|
||||||
logger.info(f"Processing CSV file: {doc.fileName}")
|
|
||||||
doc_urls = self._read_csv_with_urls(content)
|
|
||||||
else:
|
|
||||||
# Parse JSON to extract URLs from search results
|
|
||||||
try:
|
|
||||||
# The document structure from WebSearchActionResult
|
|
||||||
search_data = json.loads(content)
|
|
||||||
|
|
||||||
# Extract URLs from the search results structure
|
|
||||||
doc_urls = []
|
|
||||||
if isinstance(search_data, dict):
|
|
||||||
# Handle the document structure: documentData contains the actual search results
|
|
||||||
doc_data = search_data.get("documentData", search_data)
|
|
||||||
if "results" in doc_data and isinstance(doc_data["results"], list):
|
|
||||||
doc_urls = [
|
|
||||||
result["url"]
|
|
||||||
for result in doc_data["results"]
|
|
||||||
if isinstance(result, dict) and "url" in result
|
|
||||||
]
|
|
||||||
elif "urls" in doc_data and isinstance(doc_data["urls"], list):
|
|
||||||
# Fallback: if URLs are stored directly in a 'urls' field
|
|
||||||
doc_urls = [url for url in doc_data["urls"] if isinstance(url, str)]
|
|
||||||
|
|
||||||
# Fallback: try to parse as plain text with regex (for backward compatibility)
|
|
||||||
if not doc_urls:
|
|
||||||
logger.warning(
|
|
||||||
f"Could not extract URLs from JSON structure in {doc.fileName}, trying plain text parsing"
|
|
||||||
)
|
|
||||||
doc_urls = re.split(r"[\n,;]+", content)
|
|
||||||
doc_urls = [
|
|
||||||
u.strip()
|
|
||||||
for u in doc_urls
|
|
||||||
if u.strip()
|
|
||||||
and (
|
|
||||||
u.strip().startswith("http://")
|
|
||||||
or u.strip().startswith("https://")
|
|
||||||
)
|
|
||||||
]
|
|
||||||
|
|
||||||
except json.JSONDecodeError:
|
|
||||||
# Fallback to plain text parsing if JSON parsing fails
|
|
||||||
logger.warning(f"Document {doc.fileName} is not valid JSON, trying plain text parsing")
|
|
||||||
doc_urls = re.split(r"[\n,;]+", content)
|
|
||||||
doc_urls = [
|
|
||||||
u.strip()
|
|
||||||
for u in doc_urls
|
|
||||||
if u.strip()
|
|
||||||
and (
|
|
||||||
u.strip().startswith("http://")
|
|
||||||
or u.strip().startswith("https://")
|
|
||||||
)
|
|
||||||
]
|
|
||||||
|
|
||||||
if doc_urls:
|
|
||||||
all_urls.extend(doc_urls)
|
|
||||||
logger.info(f"Extracted {len(doc_urls)} URLs from {doc.fileName}")
|
|
||||||
else:
|
|
||||||
logger.warning(f"No valid URLs found in document: {doc.fileName}")
|
|
||||||
|
|
||||||
if not all_urls:
|
|
||||||
return ActionResult(
|
|
||||||
success=False, error="No valid URLs found in any of the documents."
|
|
||||||
)
|
|
||||||
|
|
||||||
# Remove duplicates while preserving order
|
|
||||||
unique_urls = list(dict.fromkeys(all_urls))
|
|
||||||
logger.info(f"Extracted {len(unique_urls)} unique URLs from {len(chat_documents)} documents")
|
|
||||||
|
|
||||||
# Prepare request data with normalization
|
|
||||||
allowed_extract_depth = {"basic", "advanced"}
|
|
||||||
allowed_formats = {"text", "markdown"}
|
|
||||||
extract_depth = parameters.get("extractDepth")
|
|
||||||
if extract_depth and extract_depth not in allowed_extract_depth:
|
|
||||||
logger.warning(f"Invalid extractDepth '{extract_depth}' provided. Falling back to 'advanced'.")
|
|
||||||
extract_depth = "advanced"
|
|
||||||
fmt = parameters.get("format")
|
|
||||||
if fmt and fmt not in allowed_formats:
|
|
||||||
logger.warning(f"Invalid format '{fmt}' provided. Falling back to 'text'.")
|
|
||||||
fmt = "text"
|
|
||||||
|
|
||||||
web_crawl_request = WebCrawlRequest(
|
|
||||||
urls=unique_urls,
|
|
||||||
extract_depth=extract_depth,
|
|
||||||
format=fmt,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Perform request via centralized service wrappers
|
|
||||||
web_crawl_result = await self.services.web.webCrawl(web_crawl_request)
|
|
||||||
|
|
||||||
# Convert and enrich with concise summaries per URL for better context
|
|
||||||
if web_crawl_result.success:
|
|
||||||
try:
|
|
||||||
doc = web_crawl_result.documents[0]
|
|
||||||
results = getattr(doc.documentData, "results", [])
|
|
||||||
enriched = []
|
|
||||||
# Summarize each result briefly using AI for added context
|
|
||||||
for item in results:
|
|
||||||
url = str(getattr(item, "url", ""))
|
|
||||||
content = str(getattr(item, "content", ""))
|
|
||||||
summary = ""
|
|
||||||
try:
|
|
||||||
if content:
|
|
||||||
prompt = (
|
|
||||||
"Summarize the following webpage content in 3-5 concise bullet points. "
|
|
||||||
"Focus on key points, figures, named entities (companies/institutions), and location context. "
|
|
||||||
"Return only bullet points without any preface."
|
|
||||||
)
|
|
||||||
context = content[:4000]
|
|
||||||
# Centralized AI summary (balanced analyse_content)
|
|
||||||
summary = await self.services.ai.callAi(
|
|
||||||
prompt=prompt,
|
|
||||||
documents=None,
|
|
||||||
options=AiCallOptions(
|
|
||||||
operationType=OperationType.ANALYSE_CONTENT,
|
|
||||||
priority=Priority.BALANCED,
|
|
||||||
compressPrompt=True,
|
|
||||||
compressContext=False,
|
|
||||||
processingMode="advanced",
|
|
||||||
maxCost=0.05,
|
|
||||||
maxProcessingTime=30
|
|
||||||
)
|
|
||||||
)
|
|
||||||
summary = summary.strip()
|
|
||||||
except Exception:
|
|
||||||
summary = ""
|
|
||||||
enriched.append({
|
|
||||||
"url": url,
|
|
||||||
"summary": summary,
|
|
||||||
"snippet": content[:500]
|
|
||||||
})
|
|
||||||
|
|
||||||
import json as _json
|
|
||||||
payload = {
|
|
||||||
"success": True,
|
|
||||||
"total_count": len(enriched),
|
|
||||||
"results": enriched,
|
|
||||||
}
|
|
||||||
json_content = _json.dumps(payload, ensure_ascii=False, indent=2)
|
|
||||||
except Exception:
|
|
||||||
# Fallback to original conversion
|
|
||||||
json_content = self._convert_web_result_to_json(web_crawl_result)
|
|
||||||
|
|
||||||
json_document = ActionDocument(
|
|
||||||
documentName=f"web_crawl_results.json",
|
|
||||||
documentData=json_content,
|
|
||||||
mimeType="application/json"
|
|
||||||
)
|
|
||||||
return ActionResult(success=True, documents=[json_document])
|
|
||||||
else:
|
|
||||||
return web_crawl_result
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error in crawl method: {str(e)}")
|
|
||||||
return ActionResult(success=False, error=str(e))
|
|
||||||
|
|
||||||
@action
|
|
||||||
async def scrape(self, parameters: Dict[str, Any]) -> ActionResult:
|
|
||||||
"""Search and then crawl the found URLs in one step. To use for market analysis, web research, internet searches
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
query (str, required): Search query.
|
|
||||||
maxResults (int, optional): Max number of results. Default: 10.
|
|
||||||
searchDepth ("basic"|"advanced", optional): Search depth.
|
|
||||||
timeRange ("d"|"w"|"m"|"y", optional): Time window.
|
|
||||||
topic ("general"|"news"|"academic", optional): Result domain preference.
|
|
||||||
includeDomains (list[str], optional): Only include these domains.
|
|
||||||
excludeDomains (list[str], optional): Exclude these domains.
|
|
||||||
language (str, optional): ISO language bias.
|
|
||||||
includeAnswer (bool, optional): Ask provider to include an answer.
|
|
||||||
includeRawContent (bool, optional): Include raw content where possible.
|
|
||||||
extractDepth ("basic"|"advanced", optional): Crawl extraction depth. Default: "advanced".
|
|
||||||
format ("text"|"markdown", optional): Crawl output format. Default: "text".
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
query = parameters.get("query")
|
|
||||||
max_results = parameters.get("maxResults", 10)
|
|
||||||
# Normalize optional enums to avoid validation errors
|
|
||||||
allowed_search_depth = {"basic", "advanced"}
|
|
||||||
allowed_extract_depth = {"basic", "advanced"}
|
|
||||||
allowed_formats = {"text", "markdown"}
|
|
||||||
|
|
||||||
search_depth = parameters.get("searchDepth")
|
|
||||||
if search_depth and search_depth not in allowed_search_depth:
|
|
||||||
logger.warning(f"Invalid searchDepth '{search_depth}' provided. Falling back to None.")
|
|
||||||
search_depth = None
|
|
||||||
|
|
||||||
extract_depth = parameters.get("extractDepth")
|
|
||||||
if extract_depth and extract_depth not in allowed_extract_depth:
|
|
||||||
logger.warning(f"Invalid extractDepth '{extract_depth}' provided. Falling back to 'advanced'.")
|
|
||||||
extract_depth = "advanced"
|
|
||||||
|
|
||||||
fmt = parameters.get("format")
|
|
||||||
if fmt and fmt not in allowed_formats:
|
|
||||||
logger.warning(f"Invalid format '{fmt}' provided. Falling back to 'text'.")
|
|
||||||
fmt = "text"
|
|
||||||
|
|
||||||
if not query:
|
|
||||||
return ActionResult(success=False, error="Search query is required")
|
|
||||||
|
|
||||||
# Prepare request data
|
|
||||||
web_scrape_request = WebScrapeRequest(
|
|
||||||
query=query,
|
|
||||||
max_results=max_results,
|
|
||||||
search_depth=search_depth,
|
|
||||||
time_range=parameters.get("timeRange"),
|
|
||||||
topic=parameters.get("topic"),
|
|
||||||
include_domains=parameters.get("includeDomains"),
|
|
||||||
exclude_domains=parameters.get("excludeDomains"),
|
|
||||||
language=parameters.get("language"),
|
|
||||||
include_answer=parameters.get("includeAnswer"),
|
|
||||||
include_raw_content=parameters.get("includeRawContent"),
|
|
||||||
extract_depth=extract_depth,
|
|
||||||
format=fmt,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Perform request via centralized service wrappers
|
|
||||||
web_scrape_result = await self.services.web.webScrape(web_scrape_request)
|
|
||||||
|
|
||||||
# Convert to proper JSON format
|
|
||||||
if web_scrape_result.success:
|
|
||||||
json_content = self._convert_web_result_to_json(web_scrape_result)
|
|
||||||
json_document = ActionDocument(
|
|
||||||
documentName=f"web_scrape_results.json",
|
|
||||||
documentData=json_content,
|
|
||||||
mimeType="application/json"
|
|
||||||
)
|
|
||||||
return ActionResult(
|
|
||||||
success=True,
|
|
||||||
documents=[json_document]
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
return web_scrape_result
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
return ActionResult(success=False, error=str(e))
|
|
||||||
|
|
||||||
# Helpers
|
|
||||||
def _convert_web_result_to_json(self, web_result):
|
|
||||||
if not getattr(web_result, 'success', False) or not getattr(web_result, 'documents', None):
|
|
||||||
return _json.dumps({"success": getattr(web_result, 'success', False), "error": getattr(web_result, 'error', None)})
|
|
||||||
document_data = web_result.documents[0].documentData
|
|
||||||
result_dict = {
|
|
||||||
"success": True,
|
|
||||||
"results": [
|
|
||||||
{
|
|
||||||
"url": str(getattr(result, 'url', "")),
|
|
||||||
"content": getattr(result, 'content', "")
|
|
||||||
}
|
|
||||||
for result in getattr(document_data, 'results', [])
|
|
||||||
],
|
|
||||||
"total_count": getattr(document_data, 'total_count', 0)
|
|
||||||
}
|
|
||||||
if hasattr(document_data, 'urls'):
|
|
||||||
result_dict["urls"] = [str(url) for url in getattr(document_data, 'urls', [])]
|
|
||||||
elif hasattr(document_data, 'query'):
|
|
||||||
result_dict["query"] = getattr(document_data, 'query', None)
|
|
||||||
return _json.dumps(result_dict, indent=2, ensure_ascii=False)
|
|
||||||
|
|
||||||
def _convert_web_result_to_csv(self, web_search_result):
|
|
||||||
if not getattr(web_search_result, 'success', False) or not getattr(web_search_result, 'documents', None):
|
|
||||||
return ""
|
|
||||||
output = io.StringIO()
|
|
||||||
writer = csv.writer(output, delimiter=';')
|
|
||||||
writer.writerow(['url', 'title'])
|
|
||||||
document_data = web_search_result.documents[0].documentData
|
|
||||||
for result in getattr(document_data, 'results', []):
|
|
||||||
writer.writerow([str(getattr(result, 'url', "")), getattr(result, 'title', "")])
|
|
||||||
return output.getvalue()
|
|
||||||
9
modules/workflows/processing/adaptive/__init__.py
Normal file
9
modules/workflows/processing/adaptive/__init__.py
Normal file
|
|
@ -0,0 +1,9 @@
|
||||||
|
# adaptive module for React mode
|
||||||
|
# Provides adaptive learning capabilities
|
||||||
|
|
||||||
|
from .intentAnalyzer import IntentAnalyzer, DataType, ExpectedFormat
|
||||||
|
from .contentValidator import ContentValidator
|
||||||
|
from .learningEngine import LearningEngine
|
||||||
|
from .progressTracker import ProgressTracker
|
||||||
|
|
||||||
|
__all__ = ['IntentAnalyzer', 'ContentValidator', 'LearningEngine', 'ProgressTracker', 'DataType', 'ExpectedFormat']
|
||||||
308
modules/workflows/processing/adaptive/contentValidator.py
Normal file
308
modules/workflows/processing/adaptive/contentValidator.py
Normal file
|
|
@ -0,0 +1,308 @@
|
||||||
|
# contentValidator.py
|
||||||
|
# Content validation for adaptive React mode
|
||||||
|
|
||||||
|
import re
|
||||||
|
import logging
|
||||||
|
from typing import List, Dict, Any
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
class ContentValidator:
|
||||||
|
"""Validates delivered content against user intent"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def validateContent(self, documents: List[Any], intent: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
|
"""Validates delivered content against user intent"""
|
||||||
|
try:
|
||||||
|
validationDetails = []
|
||||||
|
|
||||||
|
for doc in documents:
|
||||||
|
content = self._extractContent(doc)
|
||||||
|
detail = self._validateSingleDocument(content, doc, intent)
|
||||||
|
validationDetails.append(detail)
|
||||||
|
|
||||||
|
# Calculate overall success
|
||||||
|
overallSuccess = all(detail.get("successCriteriaMet", [False]) for detail in validationDetails)
|
||||||
|
|
||||||
|
# Calculate quality score
|
||||||
|
qualityScore = self._calculateQualityScore(validationDetails)
|
||||||
|
|
||||||
|
# Generate improvement suggestions
|
||||||
|
improvementSuggestions = self._generateImprovementSuggestions(validationDetails, intent)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"overallSuccess": overallSuccess,
|
||||||
|
"qualityScore": qualityScore,
|
||||||
|
"validationDetails": validationDetails,
|
||||||
|
"improvementSuggestions": improvementSuggestions
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error validating content: {str(e)}")
|
||||||
|
return self._createFailedValidationResult(str(e))
|
||||||
|
|
||||||
|
def _extractContent(self, doc: Any) -> str:
|
||||||
|
"""Extracts content from a document"""
|
||||||
|
try:
|
||||||
|
if hasattr(doc, 'documentData'):
|
||||||
|
data = doc.documentData
|
||||||
|
if isinstance(data, dict) and 'content' in data:
|
||||||
|
return str(data['content'])
|
||||||
|
else:
|
||||||
|
return str(data)
|
||||||
|
return ""
|
||||||
|
except Exception:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
def _validateSingleDocument(self, content: str, doc: Any, intent: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
|
"""Validates a single document against intent"""
|
||||||
|
# Check data type match
|
||||||
|
dataTypeMatch = self._checkDataTypeMatch(content, intent.get("dataType", "unknown"))
|
||||||
|
|
||||||
|
# Check format match
|
||||||
|
formatMatch = self._checkFormatMatch(content, intent.get("expectedFormat", "unknown"))
|
||||||
|
|
||||||
|
# Calculate quality score
|
||||||
|
qualityScore = self._calculateDocumentQualityScore(content, intent)
|
||||||
|
|
||||||
|
# Check success criteria
|
||||||
|
successCriteriaMet = self._checkSuccessCriteria(content, intent)
|
||||||
|
|
||||||
|
# Identify specific issues
|
||||||
|
specificIssues = self._identifySpecificIssues(content, intent)
|
||||||
|
|
||||||
|
# Generate improvement suggestions
|
||||||
|
improvementSuggestions = self._generateDocumentImprovementSuggestions(content, intent)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"documentName": getattr(doc, 'documentName', 'Unknown'),
|
||||||
|
"dataTypeMatch": dataTypeMatch,
|
||||||
|
"formatMatch": formatMatch,
|
||||||
|
"qualityScore": qualityScore,
|
||||||
|
"successCriteriaMet": successCriteriaMet,
|
||||||
|
"specificIssues": specificIssues,
|
||||||
|
"improvementSuggestions": improvementSuggestions
|
||||||
|
}
|
||||||
|
|
||||||
|
def _checkDataTypeMatch(self, content: str, dataType: str) -> bool:
|
||||||
|
"""Checks if content matches the expected data type"""
|
||||||
|
if dataType == "numbers":
|
||||||
|
return self._containsNumbers(content)
|
||||||
|
elif dataType == "text":
|
||||||
|
return self._containsText(content)
|
||||||
|
elif dataType == "documents":
|
||||||
|
return self._containsDocumentContent(content)
|
||||||
|
elif dataType == "analysis":
|
||||||
|
return self._containsAnalysis(content)
|
||||||
|
elif dataType == "code":
|
||||||
|
return self._containsCode(content)
|
||||||
|
else:
|
||||||
|
return True # Unknown type, assume match
|
||||||
|
|
||||||
|
def _containsNumbers(self, content: str) -> bool:
|
||||||
|
"""Checks if content contains actual numbers (not code)"""
|
||||||
|
# Look for actual numbers in the content
|
||||||
|
numbers = re.findall(r'\b\d+\b', content)
|
||||||
|
|
||||||
|
# Check if it's code (contains function definitions, etc.)
|
||||||
|
isCode = any(keyword in content.lower() for keyword in [
|
||||||
|
'def ', 'function', 'import ', 'class ', 'for ', 'while ', 'if ',
|
||||||
|
'return', 'print(', 'console.log', 'public ', 'private '
|
||||||
|
])
|
||||||
|
|
||||||
|
# If it's code, it doesn't contain actual numbers
|
||||||
|
if isCode:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# If it has numbers and it's not code, it contains actual numbers
|
||||||
|
return len(numbers) > 0
|
||||||
|
|
||||||
|
def _containsText(self, content: str) -> bool:
|
||||||
|
"""Checks if content contains readable text"""
|
||||||
|
# Remove numbers and special characters
|
||||||
|
textContent = re.sub(r'[^\w\s]', '', content)
|
||||||
|
words = textContent.split()
|
||||||
|
|
||||||
|
# Check if there are enough words to be considered text
|
||||||
|
return len(words) > 5
|
||||||
|
|
||||||
|
def _containsDocumentContent(self, content: str) -> bool:
|
||||||
|
"""Checks if content is suitable for document creation"""
|
||||||
|
# Check for structured content
|
||||||
|
hasStructure = any(indicator in content for indicator in [
|
||||||
|
'\n', '\t', '|', '-', '*', '1.', '2.', '•', '◦'
|
||||||
|
])
|
||||||
|
|
||||||
|
# Check for meaningful content
|
||||||
|
hasMeaningfulContent = len(content.strip()) > 50
|
||||||
|
|
||||||
|
return hasStructure and hasMeaningfulContent
|
||||||
|
|
||||||
|
def _containsAnalysis(self, content: str) -> bool:
|
||||||
|
"""Checks if content contains analysis"""
|
||||||
|
analysisIndicators = [
|
||||||
|
'analysis', 'findings', 'conclusion', 'summary', 'insights',
|
||||||
|
'trends', 'patterns', 'comparison', 'evaluation', 'assessment'
|
||||||
|
]
|
||||||
|
|
||||||
|
contentLower = content.lower()
|
||||||
|
return any(indicator in contentLower for indicator in analysisIndicators)
|
||||||
|
|
||||||
|
def _containsCode(self, content: str) -> bool:
|
||||||
|
"""Checks if content contains code"""
|
||||||
|
codeIndicators = [
|
||||||
|
'def ', 'function', 'import ', 'class ', 'for ', 'while ', 'if ',
|
||||||
|
'return', 'print(', 'console.log', 'public ', 'private ', 'void ',
|
||||||
|
'int ', 'string ', 'var ', 'let ', 'const '
|
||||||
|
]
|
||||||
|
|
||||||
|
contentLower = content.lower()
|
||||||
|
return any(indicator in contentLower for indicator in codeIndicators)
|
||||||
|
|
||||||
|
def _checkFormatMatch(self, content: str, expectedFormat: str) -> bool:
|
||||||
|
"""Checks if content matches expected format"""
|
||||||
|
if expectedFormat == "raw_data":
|
||||||
|
# Raw data should be simple, not heavily formatted
|
||||||
|
return not any(indicator in content for indicator in [
|
||||||
|
'<html>', '<div>', '<table>', '## ', '### ', '**', '__'
|
||||||
|
])
|
||||||
|
elif expectedFormat == "formatted":
|
||||||
|
# Formatted content should have structure
|
||||||
|
return any(indicator in content for indicator in [
|
||||||
|
'\n', '\t', '|', '-', '*', '1.', '2.', '•'
|
||||||
|
])
|
||||||
|
elif expectedFormat == "structured":
|
||||||
|
# Structured content should have clear organization
|
||||||
|
return any(indicator in content for indicator in [
|
||||||
|
'{', '}', '[', ']', '|', '\t', ' '
|
||||||
|
])
|
||||||
|
else:
|
||||||
|
return True # Unknown format, assume match
|
||||||
|
|
||||||
|
def _checkSuccessCriteria(self, content: str, intent: Dict[str, Any]) -> List[bool]:
|
||||||
|
"""Checks if content meets success criteria"""
|
||||||
|
criteriaMet = []
|
||||||
|
successCriteria = intent.get("successCriteria", [])
|
||||||
|
|
||||||
|
for criterion in successCriteria:
|
||||||
|
if 'prime numbers' in criterion.lower():
|
||||||
|
# Check if content contains actual prime numbers, not code
|
||||||
|
hasNumbers = bool(re.search(r'\b\d+\b', content))
|
||||||
|
isNotCode = not any(keyword in content.lower() for keyword in [
|
||||||
|
'def ', 'function', 'import ', 'class '
|
||||||
|
])
|
||||||
|
criteriaMet.append(hasNumbers and isNotCode)
|
||||||
|
elif 'document' in criterion.lower():
|
||||||
|
# Check if content is suitable for document creation
|
||||||
|
hasStructure = any(indicator in content for indicator in [
|
||||||
|
'\n', '\t', '|', '-', '*', '1.', '2.'
|
||||||
|
])
|
||||||
|
criteriaMet.append(hasStructure)
|
||||||
|
elif 'format' in criterion.lower():
|
||||||
|
# Check if content is properly formatted
|
||||||
|
hasFormatting = any(indicator in content for indicator in [
|
||||||
|
'\n', '\t', '|', '-', '*', '1.', '2.', '•'
|
||||||
|
])
|
||||||
|
criteriaMet.append(hasFormatting)
|
||||||
|
else:
|
||||||
|
# Generic check - content should not be empty
|
||||||
|
criteriaMet.append(len(content.strip()) > 0)
|
||||||
|
|
||||||
|
return criteriaMet
|
||||||
|
|
||||||
|
def _calculateDocumentQualityScore(self, content: str, intent: Dict[str, Any]) -> float:
|
||||||
|
"""Calculates quality score for a single document"""
|
||||||
|
score = 0.0
|
||||||
|
|
||||||
|
# Base score for having content
|
||||||
|
if len(content.strip()) > 0:
|
||||||
|
score += 0.2
|
||||||
|
|
||||||
|
# Score for data type match
|
||||||
|
if self._checkDataTypeMatch(content, intent.get("dataType", "unknown")):
|
||||||
|
score += 0.3
|
||||||
|
|
||||||
|
# Score for format match
|
||||||
|
if self._checkFormatMatch(content, intent.get("expectedFormat", "unknown")):
|
||||||
|
score += 0.2
|
||||||
|
|
||||||
|
# Score for success criteria
|
||||||
|
successCriteriaMet = self._checkSuccessCriteria(content, intent)
|
||||||
|
if successCriteriaMet:
|
||||||
|
successRate = sum(successCriteriaMet) / len(successCriteriaMet)
|
||||||
|
score += 0.3 * successRate
|
||||||
|
|
||||||
|
return min(score, 1.0)
|
||||||
|
|
||||||
|
def _calculateQualityScore(self, validationDetails: List[Dict[str, Any]]) -> float:
|
||||||
|
"""Calculates overall quality score from validation details"""
|
||||||
|
if not validationDetails:
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
totalScore = sum(detail.get("qualityScore", 0) for detail in validationDetails)
|
||||||
|
return totalScore / len(validationDetails)
|
||||||
|
|
||||||
|
def _identifySpecificIssues(self, content: str, intent: Dict[str, Any]) -> List[str]:
|
||||||
|
"""Identifies specific issues with the content"""
|
||||||
|
issues = []
|
||||||
|
|
||||||
|
# Check for common issues
|
||||||
|
if intent.get("dataType") == "numbers" and self._containsCode(content):
|
||||||
|
issues.append("Content contains code instead of actual numbers")
|
||||||
|
|
||||||
|
if intent.get("expectedFormat") == "raw_data" and any(indicator in content for indicator in ['<html>', '## ', '**']):
|
||||||
|
issues.append("Content is formatted when raw data was requested")
|
||||||
|
|
||||||
|
if len(content.strip()) == 0:
|
||||||
|
issues.append("Content is empty")
|
||||||
|
|
||||||
|
return issues
|
||||||
|
|
||||||
|
def _generateDocumentImprovementSuggestions(self, content: str, intent: Dict[str, Any]) -> List[str]:
|
||||||
|
"""Generates improvement suggestions for a single document"""
|
||||||
|
suggestions = []
|
||||||
|
|
||||||
|
dataType = intent.get("dataType", "unknown")
|
||||||
|
expectedFormat = intent.get("expectedFormat", "unknown")
|
||||||
|
|
||||||
|
if dataType == "numbers" and self._containsCode(content):
|
||||||
|
suggestions.append("Deliver actual numbers, not code to generate them")
|
||||||
|
|
||||||
|
if expectedFormat == "raw_data" and any(indicator in content for indicator in ['<html>', '## ']):
|
||||||
|
suggestions.append("Provide raw data without formatting")
|
||||||
|
|
||||||
|
if len(content.strip()) == 0:
|
||||||
|
suggestions.append("Provide actual content")
|
||||||
|
|
||||||
|
return suggestions
|
||||||
|
|
||||||
|
def _generateImprovementSuggestions(self, validationDetails: List[Dict[str, Any]],
|
||||||
|
intent: Dict[str, Any]) -> List[str]:
|
||||||
|
"""Generates improvement suggestions based on validation results"""
|
||||||
|
suggestions = []
|
||||||
|
|
||||||
|
# Check for common issues
|
||||||
|
if not any(detail.get("dataTypeMatch", False) for detail in validationDetails):
|
||||||
|
dataType = intent.get("dataType", "unknown")
|
||||||
|
suggestions.append(f"Content should contain {dataType} data, not code or other formats")
|
||||||
|
|
||||||
|
if not any(detail.get("formatMatch", False) for detail in validationDetails):
|
||||||
|
expectedFormat = intent.get("expectedFormat", "unknown")
|
||||||
|
suggestions.append(f"Content should be in {expectedFormat} format")
|
||||||
|
|
||||||
|
# Add specific suggestions from validation details
|
||||||
|
for detail in validationDetails:
|
||||||
|
suggestions.extend(detail.get("improvementSuggestions", []))
|
||||||
|
|
||||||
|
return list(set(suggestions)) # Remove duplicates
|
||||||
|
|
||||||
|
def _createFailedValidationResult(self, error: str) -> Dict[str, Any]:
|
||||||
|
"""Creates a failed validation result"""
|
||||||
|
return {
|
||||||
|
"overallSuccess": False,
|
||||||
|
"qualityScore": 0.0,
|
||||||
|
"validationDetails": [],
|
||||||
|
"improvementSuggestions": [f"Validation failed: {error}"]
|
||||||
|
}
|
||||||
239
modules/workflows/processing/adaptive/intentAnalyzer.py
Normal file
239
modules/workflows/processing/adaptive/intentAnalyzer.py
Normal file
|
|
@ -0,0 +1,239 @@
|
||||||
|
# intentAnalyzer.py
|
||||||
|
# Intent analysis for adaptive React mode
|
||||||
|
|
||||||
|
import re
|
||||||
|
import logging
|
||||||
|
from typing import Dict, Any, List
|
||||||
|
from enum import Enum
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
class DataType(Enum):
|
||||||
|
NUMBERS = "numbers"
|
||||||
|
TEXT = "text"
|
||||||
|
DOCUMENTS = "documents"
|
||||||
|
ANALYSIS = "analysis"
|
||||||
|
CODE = "code"
|
||||||
|
UNKNOWN = "unknown"
|
||||||
|
|
||||||
|
class ExpectedFormat(Enum):
|
||||||
|
RAW_DATA = "raw_data"
|
||||||
|
FORMATTED = "formatted"
|
||||||
|
STRUCTURED = "structured"
|
||||||
|
VISUAL = "visual"
|
||||||
|
UNKNOWN = "unknown"
|
||||||
|
|
||||||
|
class IntentAnalyzer:
|
||||||
|
"""Analyzes user intent to understand what they actually want"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.dataTypePatterns = {
|
||||||
|
DataType.NUMBERS: [
|
||||||
|
r'\b(numbers?|digits?|count|list|sequence)\b',
|
||||||
|
r'\b(prime|fibonacci|random|even|odd)\s+(numbers?)\b',
|
||||||
|
r'\b(calculate|compute|generate)\s+(numbers?)\b',
|
||||||
|
r'\b(first|last)\s+\d+\s+(numbers?)\b'
|
||||||
|
],
|
||||||
|
DataType.TEXT: [
|
||||||
|
r'\b(text|content|words?|sentences?|paragraphs?)\b',
|
||||||
|
r'\b(write|create|generate)\s+(text|content)\b',
|
||||||
|
r'\b(summary|description|explanation)\b',
|
||||||
|
r'\b(article|essay|report)\b'
|
||||||
|
],
|
||||||
|
DataType.DOCUMENTS: [
|
||||||
|
r'\b(document|file|report|pdf|word|excel)\b',
|
||||||
|
r'\b(create|generate|make)\s+(document|file|report)\b',
|
||||||
|
r'\b(format|structure|organize)\s+(document)\b',
|
||||||
|
r'\b(presentation|slides?)\b'
|
||||||
|
],
|
||||||
|
DataType.ANALYSIS: [
|
||||||
|
r'\b(analyze|analysis|examine|study|evaluate)\b',
|
||||||
|
r'\b(insights?|findings?|results?)\b',
|
||||||
|
r'\b(compare|contrast|evaluate)\b',
|
||||||
|
r'\b(trends?|patterns?)\b'
|
||||||
|
],
|
||||||
|
DataType.CODE: [
|
||||||
|
r'\b(code|program|script|algorithm|function)\b',
|
||||||
|
r'\b(write|create|develop)\s+(code|program|script)\b',
|
||||||
|
r'\b(implement|build|construct)\b',
|
||||||
|
r'\b(debug|fix|optimize)\s+(code)\b'
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
self.formatPatterns = {
|
||||||
|
ExpectedFormat.RAW_DATA: [
|
||||||
|
r'\b(raw|plain|simple|basic)\b',
|
||||||
|
r'\b(numbers?|data|list)\b(?!\s+(in|as|with))',
|
||||||
|
r'\b(just|only)\s+(numbers?|data)\b'
|
||||||
|
],
|
||||||
|
ExpectedFormat.FORMATTED: [
|
||||||
|
r'\b(formatted|structured|organized|presented)\b',
|
||||||
|
r'\b(table|chart|graph|visual)\b',
|
||||||
|
r'\b(pretty|nice|clean)\s+(format|presentation)\b',
|
||||||
|
r'\b(professional|polished)\b'
|
||||||
|
],
|
||||||
|
ExpectedFormat.STRUCTURED: [
|
||||||
|
r'\b(json|xml|csv|structured)\b',
|
||||||
|
r'\b(organized|categorized|grouped)\b',
|
||||||
|
r'\b(systematic|methodical)\b',
|
||||||
|
r'\b(database|spreadsheet)\b'
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
def analyzeUserIntent(self, userPrompt: str, context: Any) -> Dict[str, Any]:
|
||||||
|
"""Analyzes user intent from prompt and context"""
|
||||||
|
try:
|
||||||
|
# Extract primary goal
|
||||||
|
primaryGoal = self._extractPrimaryGoal(userPrompt)
|
||||||
|
|
||||||
|
# Classify data type
|
||||||
|
dataType = self._classifyDataType(userPrompt)
|
||||||
|
|
||||||
|
# Determine expected format
|
||||||
|
expectedFormat = self._determineExpectedFormat(userPrompt)
|
||||||
|
|
||||||
|
# Assess quality requirements
|
||||||
|
qualityRequirements = self._assessQualityRequirements(userPrompt, context)
|
||||||
|
|
||||||
|
# Extract success criteria
|
||||||
|
successCriteria = self._extractSuccessCriteria(userPrompt, context)
|
||||||
|
|
||||||
|
# Calculate confidence score
|
||||||
|
confidenceScore = self._calculateConfidenceScore(dataType, expectedFormat, successCriteria)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"primaryGoal": primaryGoal,
|
||||||
|
"dataType": dataType.value,
|
||||||
|
"expectedFormat": expectedFormat.value,
|
||||||
|
"qualityRequirements": qualityRequirements,
|
||||||
|
"successCriteria": successCriteria,
|
||||||
|
"confidenceScore": confidenceScore
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error analyzing user intent: {str(e)}")
|
||||||
|
return self._createDefaultIntentAnalysis(userPrompt)
|
||||||
|
|
||||||
|
def _extractPrimaryGoal(self, userPrompt: str) -> str:
|
||||||
|
"""Extracts the primary goal from user prompt"""
|
||||||
|
# Simple extraction - can be enhanced
|
||||||
|
return userPrompt.strip()
|
||||||
|
|
||||||
|
def _classifyDataType(self, userPrompt: str) -> DataType:
|
||||||
|
"""Classifies the type of data the user wants"""
|
||||||
|
promptLower = userPrompt.lower()
|
||||||
|
|
||||||
|
for dataType, patterns in self.dataTypePatterns.items():
|
||||||
|
for pattern in patterns:
|
||||||
|
if re.search(pattern, promptLower):
|
||||||
|
return dataType
|
||||||
|
|
||||||
|
return DataType.UNKNOWN
|
||||||
|
|
||||||
|
def _determineExpectedFormat(self, userPrompt: str) -> ExpectedFormat:
|
||||||
|
"""Determines the expected format of the output"""
|
||||||
|
promptLower = userPrompt.lower()
|
||||||
|
|
||||||
|
for formatType, patterns in self.formatPatterns.items():
|
||||||
|
for pattern in patterns:
|
||||||
|
if re.search(pattern, promptLower):
|
||||||
|
return formatType
|
||||||
|
|
||||||
|
return ExpectedFormat.UNKNOWN
|
||||||
|
|
||||||
|
def _assessQualityRequirements(self, userPrompt: str, context: Any) -> Dict[str, Any]:
|
||||||
|
"""Assesses quality requirements from prompt and context"""
|
||||||
|
promptLower = userPrompt.lower()
|
||||||
|
|
||||||
|
# Check for accuracy requirements
|
||||||
|
accuracyThreshold = 0.8
|
||||||
|
if any(word in promptLower for word in ['exact', 'precise', 'accurate', 'correct']):
|
||||||
|
accuracyThreshold = 0.95
|
||||||
|
elif any(word in promptLower for word in ['approximate', 'rough', 'estimate']):
|
||||||
|
accuracyThreshold = 0.7
|
||||||
|
|
||||||
|
# Check for completeness requirements
|
||||||
|
completenessThreshold = 0.8
|
||||||
|
if any(word in promptLower for word in ['complete', 'full', 'comprehensive', 'all']):
|
||||||
|
completenessThreshold = 0.95
|
||||||
|
elif any(word in promptLower for word in ['summary', 'brief', 'overview']):
|
||||||
|
completenessThreshold = 0.6
|
||||||
|
|
||||||
|
# Check for format requirements
|
||||||
|
formatRequirement = "any"
|
||||||
|
if any(word in promptLower for word in ['formatted', 'structured', 'organized']):
|
||||||
|
formatRequirement = "formatted"
|
||||||
|
elif any(word in promptLower for word in ['raw', 'plain', 'simple']):
|
||||||
|
formatRequirement = "raw"
|
||||||
|
|
||||||
|
return {
|
||||||
|
"accuracyThreshold": accuracyThreshold,
|
||||||
|
"completenessThreshold": completenessThreshold,
|
||||||
|
"formatRequirement": formatRequirement
|
||||||
|
}
|
||||||
|
|
||||||
|
def _extractSuccessCriteria(self, userPrompt: str, context: Any) -> List[str]:
|
||||||
|
"""Extracts success criteria from prompt and context"""
|
||||||
|
criteria = []
|
||||||
|
promptLower = userPrompt.lower()
|
||||||
|
|
||||||
|
# Extract explicit criteria
|
||||||
|
if 'first' in promptLower and 'numbers' in promptLower:
|
||||||
|
criteria.append("Contains the first N numbers as requested")
|
||||||
|
|
||||||
|
if 'prime' in promptLower:
|
||||||
|
criteria.append("Contains actual prime numbers, not code to generate them")
|
||||||
|
|
||||||
|
if 'document' in promptLower:
|
||||||
|
criteria.append("Creates a properly formatted document")
|
||||||
|
|
||||||
|
if 'format' in promptLower:
|
||||||
|
criteria.append("Content is properly formatted as requested")
|
||||||
|
|
||||||
|
# Add context-based criteria
|
||||||
|
if hasattr(context, 'task_step') and context.task_step:
|
||||||
|
taskObjective = context.task_step.objective.lower()
|
||||||
|
if 'word' in taskObjective:
|
||||||
|
criteria.append("Creates a Word document")
|
||||||
|
if 'excel' in taskObjective:
|
||||||
|
criteria.append("Creates an Excel spreadsheet")
|
||||||
|
|
||||||
|
return criteria if criteria else ["Delivers what the user requested"]
|
||||||
|
|
||||||
|
def _calculateConfidenceScore(self, dataType: DataType, expectedFormat: ExpectedFormat,
|
||||||
|
successCriteria: List[str]) -> float:
|
||||||
|
"""Calculates confidence score for the intent analysis"""
|
||||||
|
score = 0.0
|
||||||
|
|
||||||
|
# Data type confidence
|
||||||
|
if dataType != DataType.UNKNOWN:
|
||||||
|
score += 0.3
|
||||||
|
|
||||||
|
# Format confidence
|
||||||
|
if expectedFormat != ExpectedFormat.UNKNOWN:
|
||||||
|
score += 0.2
|
||||||
|
|
||||||
|
# Success criteria confidence
|
||||||
|
if len(successCriteria) > 0:
|
||||||
|
score += 0.3
|
||||||
|
|
||||||
|
# Additional confidence for specific patterns
|
||||||
|
if len(successCriteria) > 1:
|
||||||
|
score += 0.2
|
||||||
|
|
||||||
|
return min(score, 1.0)
|
||||||
|
|
||||||
|
def _createDefaultIntentAnalysis(self, userPrompt: str) -> Dict[str, Any]:
|
||||||
|
"""Creates a default intent analysis when analysis fails"""
|
||||||
|
return {
|
||||||
|
"primaryGoal": userPrompt,
|
||||||
|
"dataType": "unknown",
|
||||||
|
"expectedFormat": "unknown",
|
||||||
|
"qualityRequirements": {
|
||||||
|
"accuracyThreshold": 0.8,
|
||||||
|
"completenessThreshold": 0.8,
|
||||||
|
"formatRequirement": "any"
|
||||||
|
},
|
||||||
|
"successCriteria": ["Delivers what the user requested"],
|
||||||
|
"confidenceScore": 0.1
|
||||||
|
}
|
||||||
166
modules/workflows/processing/adaptive/learningEngine.py
Normal file
166
modules/workflows/processing/adaptive/learningEngine.py
Normal file
|
|
@ -0,0 +1,166 @@
|
||||||
|
# learningEngine.py
|
||||||
|
# Learning engine for adaptive React mode
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
from typing import Dict, Any, List
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
class LearningEngine:
|
||||||
|
"""Learns from feedback and adapts future behavior"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.strategies = {}
|
||||||
|
self.feedbackHistory = []
|
||||||
|
|
||||||
|
def learnFromFeedback(self, feedback: Dict[str, Any], context: Any, intent: Dict[str, Any]):
|
||||||
|
"""Learns from feedback and updates strategies"""
|
||||||
|
try:
|
||||||
|
# Store feedback
|
||||||
|
self.feedbackHistory.append({
|
||||||
|
"feedback": feedback,
|
||||||
|
"context": self._serializeContext(context),
|
||||||
|
"intent": intent,
|
||||||
|
"timestamp": datetime.now(timezone.utc).timestamp()
|
||||||
|
})
|
||||||
|
|
||||||
|
# Update strategies based on feedback
|
||||||
|
self._updateStrategies(feedback, intent)
|
||||||
|
|
||||||
|
logger.info(f"Learning from feedback: {feedback.get('actionAttempted', 'unknown')} - "
|
||||||
|
f"Quality: {feedback.get('qualityScore', 0):.2f}, Intent Match: {feedback.get('intentMatchScore', 0):.2f}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error learning from feedback: {str(e)}")
|
||||||
|
|
||||||
|
def getImprovedStrategy(self, context: Any, intent: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
|
"""Returns improved strategy based on learning"""
|
||||||
|
try:
|
||||||
|
# Get strategy key based on intent
|
||||||
|
strategyKey = self._getStrategyKey(intent)
|
||||||
|
|
||||||
|
# Get existing strategy or create default
|
||||||
|
if strategyKey in self.strategies:
|
||||||
|
strategy = self.strategies[strategyKey]
|
||||||
|
logger.info(f"Using learned strategy for {strategyKey}: {strategy}")
|
||||||
|
return strategy
|
||||||
|
else:
|
||||||
|
# Create default strategy
|
||||||
|
defaultStrategy = self._createDefaultStrategy(intent)
|
||||||
|
self.strategies[strategyKey] = defaultStrategy
|
||||||
|
logger.info(f"Created default strategy for {strategyKey}")
|
||||||
|
return defaultStrategy
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error getting improved strategy: {str(e)}")
|
||||||
|
return self._createDefaultStrategy(intent)
|
||||||
|
|
||||||
|
def _updateStrategies(self, feedback: Dict[str, Any], intent: Dict[str, Any]):
|
||||||
|
"""Updates strategies based on feedback"""
|
||||||
|
strategyKey = self._getStrategyKey(intent)
|
||||||
|
actionAttempted = feedback.get('actionAttempted', 'unknown')
|
||||||
|
qualityScore = feedback.get('qualityScore', 0)
|
||||||
|
intentMatchScore = feedback.get('intentMatchScore', 0)
|
||||||
|
|
||||||
|
# Get or create strategy
|
||||||
|
if strategyKey not in self.strategies:
|
||||||
|
self.strategies[strategyKey] = self._createDefaultStrategy(intent)
|
||||||
|
|
||||||
|
strategy = self.strategies[strategyKey]
|
||||||
|
|
||||||
|
# Update based on success/failure
|
||||||
|
if qualityScore > 0.7 and intentMatchScore > 0.7:
|
||||||
|
# Successful action - reinforce it
|
||||||
|
if 'successfulActions' not in strategy:
|
||||||
|
strategy['successfulActions'] = []
|
||||||
|
if actionAttempted not in strategy['successfulActions']:
|
||||||
|
strategy['successfulActions'].append(actionAttempted)
|
||||||
|
strategy['successRate'] = min(strategy.get('successRate', 0.5) + 0.1, 1.0)
|
||||||
|
logger.info(f"Reinforced successful action: {actionAttempted}")
|
||||||
|
|
||||||
|
elif qualityScore < 0.3 or intentMatchScore < 0.3:
|
||||||
|
# Failed action - avoid it
|
||||||
|
if 'failedActions' not in strategy:
|
||||||
|
strategy['failedActions'] = []
|
||||||
|
if actionAttempted not in strategy['failedActions']:
|
||||||
|
strategy['failedActions'].append(actionAttempted)
|
||||||
|
strategy['successRate'] = max(strategy.get('successRate', 0.5) - 0.1, 0.0)
|
||||||
|
logger.info(f"Marked failed action to avoid: {actionAttempted}")
|
||||||
|
|
||||||
|
# Update last modified
|
||||||
|
strategy['lastModified'] = datetime.now(timezone.utc).timestamp()
|
||||||
|
|
||||||
|
def _getStrategyKey(self, intent: Dict[str, Any]) -> str:
|
||||||
|
"""Gets strategy key based on intent"""
|
||||||
|
dataType = intent.get('dataType', 'unknown')
|
||||||
|
expectedFormat = intent.get('expectedFormat', 'unknown')
|
||||||
|
return f"{dataType}_{expectedFormat}"
|
||||||
|
|
||||||
|
def _createDefaultStrategy(self, intent: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
|
"""Creates a default strategy for the intent"""
|
||||||
|
dataType = intent.get('dataType', 'unknown')
|
||||||
|
expectedFormat = intent.get('expectedFormat', 'unknown')
|
||||||
|
|
||||||
|
# Create strategy based on intent type
|
||||||
|
if dataType == 'numbers':
|
||||||
|
return {
|
||||||
|
'strategyId': f"numbers_{expectedFormat}",
|
||||||
|
'successfulActions': [],
|
||||||
|
'failedActions': [],
|
||||||
|
'successRate': 0.5,
|
||||||
|
'lastModified': datetime.now(timezone.utc).timestamp(),
|
||||||
|
'recommendedPrompt': f"Deliver {dataType} data in {expectedFormat} format. Provide actual numbers, not code to generate them.",
|
||||||
|
'avoidPrompt': "Do not ask AI to write code when user wants data. Deliver the data directly."
|
||||||
|
}
|
||||||
|
elif dataType == 'text':
|
||||||
|
return {
|
||||||
|
'strategyId': f"text_{expectedFormat}",
|
||||||
|
'successfulActions': [],
|
||||||
|
'failedActions': [],
|
||||||
|
'successRate': 0.5,
|
||||||
|
'lastModified': datetime.now(timezone.utc).timestamp(),
|
||||||
|
'recommendedPrompt': f"Generate {dataType} content in {expectedFormat} format.",
|
||||||
|
'avoidPrompt': "Ensure content is readable and well-structured."
|
||||||
|
}
|
||||||
|
elif dataType == 'documents':
|
||||||
|
return {
|
||||||
|
'strategyId': f"documents_{expectedFormat}",
|
||||||
|
'successfulActions': [],
|
||||||
|
'failedActions': [],
|
||||||
|
'successRate': 0.5,
|
||||||
|
'lastModified': datetime.now(timezone.utc).timestamp(),
|
||||||
|
'recommendedPrompt': f"Create {dataType} in {expectedFormat} format with proper structure.",
|
||||||
|
'avoidPrompt': "Ensure document is properly formatted and organized."
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
return {
|
||||||
|
'strategyId': f"unknown_{expectedFormat}",
|
||||||
|
'successfulActions': [],
|
||||||
|
'failedActions': [],
|
||||||
|
'successRate': 0.5,
|
||||||
|
'lastModified': datetime.now(timezone.utc).timestamp(),
|
||||||
|
'recommendedPrompt': f"Deliver {dataType} content in {expectedFormat} format.",
|
||||||
|
'avoidPrompt': "Ensure content matches user requirements."
|
||||||
|
}
|
||||||
|
|
||||||
|
def _serializeContext(self, context: Any) -> Dict[str, Any]:
|
||||||
|
"""Serializes context for storage"""
|
||||||
|
try:
|
||||||
|
return {
|
||||||
|
"taskObjective": getattr(context, 'task_step', {}).get('objective', '') if hasattr(context, 'task_step') else '',
|
||||||
|
"workflowId": getattr(context, 'workflow_id', ''),
|
||||||
|
"availableDocuments": getattr(context, 'available_documents', [])
|
||||||
|
}
|
||||||
|
except Exception:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
def getLearningSummary(self) -> Dict[str, Any]:
|
||||||
|
"""Gets a summary of what has been learned"""
|
||||||
|
return {
|
||||||
|
"totalStrategies": len(self.strategies),
|
||||||
|
"totalFeedback": len(self.feedbackHistory),
|
||||||
|
"strategies": list(self.strategies.keys()),
|
||||||
|
"averageSuccessRate": sum(s.get('successRate', 0) for s in self.strategies.values()) / max(len(self.strategies), 1)
|
||||||
|
}
|
||||||
142
modules/workflows/processing/adaptive/progressTracker.py
Normal file
142
modules/workflows/processing/adaptive/progressTracker.py
Normal file
|
|
@ -0,0 +1,142 @@
|
||||||
|
# progressTracker.py
|
||||||
|
# Progress tracking for adaptive React mode
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from typing import Dict, Any, List
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
class ProgressTracker:
|
||||||
|
"""Tracks what has been accomplished and what's still needed"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.completedObjectives = []
|
||||||
|
self.partialAchievements = []
|
||||||
|
self.failedAttempts = []
|
||||||
|
self.learningInsights = []
|
||||||
|
self.currentPhase = "planning"
|
||||||
|
|
||||||
|
def updateProgress(self, result: Any, validation: Dict[str, Any], intent: Dict[str, Any]):
|
||||||
|
"""Updates progress tracking based on action result"""
|
||||||
|
try:
|
||||||
|
overallSuccess = validation.get('overallSuccess', False)
|
||||||
|
qualityScore = validation.get('qualityScore', 0)
|
||||||
|
improvementSuggestions = validation.get('improvementSuggestions', [])
|
||||||
|
|
||||||
|
if overallSuccess and qualityScore > 0.7:
|
||||||
|
# Successful completion
|
||||||
|
self.completedObjectives.append({
|
||||||
|
"objective": intent.get('primaryGoal', 'Unknown'),
|
||||||
|
"achievement": f"Quality score: {qualityScore:.2f}",
|
||||||
|
"qualityScore": qualityScore,
|
||||||
|
"timestamp": datetime.now(timezone.utc).timestamp()
|
||||||
|
})
|
||||||
|
self.currentPhase = "completed"
|
||||||
|
logger.info(f"Objective completed: {intent.get('primaryGoal', 'Unknown')}")
|
||||||
|
|
||||||
|
elif qualityScore > 0.3:
|
||||||
|
# Partial achievement
|
||||||
|
self.partialAchievements.append({
|
||||||
|
"objective": intent.get('primaryGoal', 'Unknown'),
|
||||||
|
"partialAchievement": f"Quality score: {qualityScore:.2f}",
|
||||||
|
"missingParts": improvementSuggestions,
|
||||||
|
"timestamp": datetime.now(timezone.utc).timestamp()
|
||||||
|
})
|
||||||
|
self.currentPhase = "partial"
|
||||||
|
logger.info(f"Partial achievement: {intent.get('primaryGoal', 'Unknown')}")
|
||||||
|
|
||||||
|
else:
|
||||||
|
# Failed attempt
|
||||||
|
self.failedAttempts.append({
|
||||||
|
"objective": intent.get('primaryGoal', 'Unknown'),
|
||||||
|
"failureReason": f"Quality score: {qualityScore:.2f}",
|
||||||
|
"learningOpportunity": improvementSuggestions,
|
||||||
|
"timestamp": datetime.now(timezone.utc).timestamp()
|
||||||
|
})
|
||||||
|
self.currentPhase = "failed"
|
||||||
|
logger.info(f"Failed attempt: {intent.get('primaryGoal', 'Unknown')}")
|
||||||
|
|
||||||
|
# Extract learning insights
|
||||||
|
if improvementSuggestions:
|
||||||
|
for suggestion in improvementSuggestions:
|
||||||
|
if suggestion not in self.learningInsights:
|
||||||
|
self.learningInsights.append(suggestion)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error updating progress: {str(e)}")
|
||||||
|
|
||||||
|
def getCurrentProgress(self) -> Dict[str, Any]:
|
||||||
|
"""Gets current progress state"""
|
||||||
|
return {
|
||||||
|
"completedObjectives": self.completedObjectives,
|
||||||
|
"partialAchievements": self.partialAchievements,
|
||||||
|
"failedAttempts": self.failedAttempts,
|
||||||
|
"learningInsights": self.learningInsights,
|
||||||
|
"currentPhase": self.currentPhase,
|
||||||
|
"nextActionsSuggested": self._getNextActionSuggestions()
|
||||||
|
}
|
||||||
|
|
||||||
|
def shouldContinue(self, progress: Dict[str, Any], validation: Dict[str, Any]) -> bool:
|
||||||
|
"""Determines if the task should continue"""
|
||||||
|
try:
|
||||||
|
# If we have completed objectives, don't continue
|
||||||
|
if progress.get('completedObjectives'):
|
||||||
|
return False
|
||||||
|
|
||||||
|
# If we have too many failed attempts, don't continue
|
||||||
|
if len(progress.get('failedAttempts', [])) >= 3:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# If validation shows success, don't continue
|
||||||
|
if validation.get('overallSuccess', False):
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Otherwise, continue
|
||||||
|
return True
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error checking if should continue: {str(e)}")
|
||||||
|
return True # Default to continue on error
|
||||||
|
|
||||||
|
def _getNextActionSuggestions(self) -> List[str]:
|
||||||
|
"""Suggests next actions based on progress"""
|
||||||
|
suggestions = []
|
||||||
|
|
||||||
|
# If we have failed attempts, suggest avoiding those actions
|
||||||
|
if self.failedAttempts:
|
||||||
|
suggestions.append("Avoid actions that have failed before")
|
||||||
|
|
||||||
|
# If we have partial achievements, suggest building on them
|
||||||
|
if self.partialAchievements:
|
||||||
|
suggestions.append("Build on partial achievements")
|
||||||
|
|
||||||
|
# If we have learning insights, suggest applying them
|
||||||
|
if self.learningInsights:
|
||||||
|
suggestions.extend(self.learningInsights[:3]) # Top 3 insights
|
||||||
|
|
||||||
|
# Default suggestions
|
||||||
|
if not suggestions:
|
||||||
|
suggestions.append("Try a different approach")
|
||||||
|
suggestions.append("Focus on user intent")
|
||||||
|
|
||||||
|
return suggestions
|
||||||
|
|
||||||
|
def getProgressSummary(self) -> Dict[str, Any]:
|
||||||
|
"""Gets a summary of progress"""
|
||||||
|
return {
|
||||||
|
"totalCompleted": len(self.completedObjectives),
|
||||||
|
"totalPartial": len(self.partialAchievements),
|
||||||
|
"totalFailed": len(self.failedAttempts),
|
||||||
|
"totalInsights": len(self.learningInsights),
|
||||||
|
"currentPhase": self.currentPhase,
|
||||||
|
"successRate": len(self.completedObjectives) / max(len(self.completedObjectives) + len(self.failedAttempts), 1)
|
||||||
|
}
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
"""Resets progress tracking"""
|
||||||
|
self.completedObjectives = []
|
||||||
|
self.partialAchievements = []
|
||||||
|
self.failedAttempts = []
|
||||||
|
self.learningInsights = []
|
||||||
|
self.currentPhase = "planning"
|
||||||
1
modules/workflows/processing/core/__init__.py
Normal file
1
modules/workflows/processing/core/__init__.py
Normal file
|
|
@ -0,0 +1 @@
|
||||||
|
# Core workflow processing modules
|
||||||
302
modules/workflows/processing/core/actionExecutor.py
Normal file
302
modules/workflows/processing/core/actionExecutor.py
Normal file
|
|
@ -0,0 +1,302 @@
|
||||||
|
# actionExecutor.py
|
||||||
|
# Action execution functionality for workflows
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from typing import Dict, Any, List
|
||||||
|
from modules.datamodels.datamodelChat import ActionResult, ActionItem, TaskStep
|
||||||
|
from modules.datamodels.datamodelChat import ChatWorkflow
|
||||||
|
from modules.workflows.processing.shared.methodDiscovery import methods
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
class ActionExecutor:
|
||||||
|
"""Handles execution of workflow actions"""
|
||||||
|
|
||||||
|
def __init__(self, services):
|
||||||
|
self.services = services
|
||||||
|
|
||||||
|
def _checkWorkflowStopped(self, workflow):
|
||||||
|
"""Check if workflow has been stopped by user and raise exception if so"""
|
||||||
|
try:
|
||||||
|
# Get the current workflow status from the database to avoid stale data
|
||||||
|
current_workflow = self.services.interfaceDbChat.getWorkflow(workflow.id)
|
||||||
|
if current_workflow and current_workflow.status == "stopped":
|
||||||
|
logger.info("Workflow stopped by user, aborting action execution")
|
||||||
|
raise Exception("Workflow was stopped by user")
|
||||||
|
except Exception as e:
|
||||||
|
# If we can't get the current status due to other database issues, fall back to the in-memory object
|
||||||
|
logger.warning(f"Could not check current workflow status from database: {str(e)}")
|
||||||
|
if workflow and workflow.status == "stopped":
|
||||||
|
logger.info("Workflow stopped by user (from in-memory object), aborting action execution")
|
||||||
|
raise Exception("Workflow was stopped by user")
|
||||||
|
|
||||||
|
async def executeAction(self, methodName: str, actionName: str, parameters: Dict[str, Any]) -> ActionResult:
|
||||||
|
"""Execute a method action"""
|
||||||
|
try:
|
||||||
|
if methodName not in methods:
|
||||||
|
raise ValueError(f"Unknown method: {methodName}")
|
||||||
|
|
||||||
|
method = methods[methodName]
|
||||||
|
if actionName not in method['actions']:
|
||||||
|
raise ValueError(f"Unknown action: {actionName} for method {methodName}")
|
||||||
|
|
||||||
|
action = method['actions'][actionName]
|
||||||
|
|
||||||
|
# Execute the action
|
||||||
|
return await action['method'](parameters)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error executing method {methodName}.{actionName}: {str(e)}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
async def executeCompoundAction(self, compoundActionName: str, parameters: Dict[str, Any]) -> ActionResult:
|
||||||
|
"""Execute a compound action (method.action format)"""
|
||||||
|
try:
|
||||||
|
# Parse compound action name (e.g., "ai.process" -> method="ai", action="process")
|
||||||
|
if '.' not in compoundActionName:
|
||||||
|
raise ValueError(f"Invalid compound action name: {compoundActionName}. Expected format: method.action")
|
||||||
|
|
||||||
|
methodName, actionName = compoundActionName.split('.', 1)
|
||||||
|
|
||||||
|
# Execute using the existing method
|
||||||
|
return await self.executeAction(methodName, actionName, parameters)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error executing compound action {compoundActionName}: {str(e)}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
async def executeSingleAction(self, action: ActionItem, workflow: ChatWorkflow, taskStep: TaskStep,
|
||||||
|
taskIndex: int = None, actionIndex: int = None, totalActions: int = None) -> ActionResult:
|
||||||
|
"""Execute a single action and return ActionResult with enhanced document processing"""
|
||||||
|
try:
|
||||||
|
# Check workflow status before executing action
|
||||||
|
self._checkWorkflowStopped(workflow)
|
||||||
|
|
||||||
|
# Use passed indices or fallback to '?'
|
||||||
|
taskNum = taskIndex if taskIndex is not None else '?'
|
||||||
|
actionNum = actionIndex if actionIndex is not None else '?'
|
||||||
|
|
||||||
|
logger.info(f"=== TASK {taskNum} ACTION {actionNum}: {action.execMethod}.{action.execAction} ===")
|
||||||
|
|
||||||
|
# Log input parameters
|
||||||
|
inputDocs = action.execParameters.get('documentList', [])
|
||||||
|
inputConnections = action.execParameters.get('connections', [])
|
||||||
|
logger.info(f"Input documents: {inputDocs} (type: {type(inputDocs)})")
|
||||||
|
if inputConnections:
|
||||||
|
logger.info(f"Input connections: {inputConnections}")
|
||||||
|
|
||||||
|
# Log all action parameters for debugging
|
||||||
|
logger.info(f"All action parameters: {action.execParameters}")
|
||||||
|
|
||||||
|
enhancedParameters = action.execParameters.copy()
|
||||||
|
if action.expectedDocumentFormats:
|
||||||
|
enhancedParameters['expectedDocumentFormats'] = action.expectedDocumentFormats
|
||||||
|
logger.info(f"Expected formats: {action.expectedDocumentFormats}")
|
||||||
|
|
||||||
|
# Check workflow status before executing the action
|
||||||
|
self._checkWorkflowStopped(workflow)
|
||||||
|
|
||||||
|
result = await self.executeAction(
|
||||||
|
methodName=action.execMethod,
|
||||||
|
actionName=action.execAction,
|
||||||
|
parameters=enhancedParameters
|
||||||
|
)
|
||||||
|
resultLabel = action.execResultLabel
|
||||||
|
|
||||||
|
# Trace action result with full document metadata
|
||||||
|
actionResultTrace = {
|
||||||
|
"method": action.execMethod,
|
||||||
|
"action": action.execAction,
|
||||||
|
"success": result.success,
|
||||||
|
"error": result.error,
|
||||||
|
"resultLabel": resultLabel,
|
||||||
|
"documentsCount": len(result.documents) if result.documents else 0
|
||||||
|
}
|
||||||
|
|
||||||
|
# Add full document metadata if documents exist
|
||||||
|
if result.documents:
|
||||||
|
actionResultTrace["documents"] = []
|
||||||
|
for doc in result.documents:
|
||||||
|
docMetadata = {
|
||||||
|
"name": getattr(doc, 'documentName', 'Unknown'),
|
||||||
|
"mimeType": getattr(doc, 'mimeType', 'Unknown'),
|
||||||
|
"size": getattr(doc, 'size', 'Unknown'),
|
||||||
|
"created": getattr(doc, 'created', 'Unknown'),
|
||||||
|
"modified": getattr(doc, 'modified', 'Unknown'),
|
||||||
|
"typeGroup": getattr(doc, 'typeGroup', 'Unknown'),
|
||||||
|
"documentId": getattr(doc, 'documentId', 'Unknown'),
|
||||||
|
"reference": getattr(doc, 'reference', 'Unknown')
|
||||||
|
}
|
||||||
|
# Remove 'Unknown' values to keep it clean
|
||||||
|
docMetadata = {k: v for k, v in docMetadata.items() if v != 'Unknown'}
|
||||||
|
actionResultTrace["documents"].append(docMetadata)
|
||||||
|
|
||||||
|
self._writeTraceLog("Action Result", actionResultTrace)
|
||||||
|
|
||||||
|
# Process action result
|
||||||
|
if result.success:
|
||||||
|
action.setSuccess()
|
||||||
|
# Extract result text from ALL documents using generation service
|
||||||
|
action.result = self._extractResultText(result)
|
||||||
|
# Preserve the action's execResultLabel for document routing
|
||||||
|
# Action methods should NOT return resultLabel - this is managed by the action handler
|
||||||
|
if not action.execResultLabel:
|
||||||
|
logger.warning(f"Action {action.execMethod}.{action.execAction} has no execResultLabel set")
|
||||||
|
|
||||||
|
# Log action results
|
||||||
|
logger.info(f"Action completed successfully")
|
||||||
|
|
||||||
|
if result.documents:
|
||||||
|
logger.info(f"Output documents ({len(result.documents)}):")
|
||||||
|
for i, doc in enumerate(result.documents):
|
||||||
|
logger.info(f" {i+1}. {doc.documentName}")
|
||||||
|
else:
|
||||||
|
logger.info("Output: No documents created")
|
||||||
|
else:
|
||||||
|
action.setError(result.error or "Action execution failed")
|
||||||
|
logger.error(f"Action failed: {result.error}")
|
||||||
|
|
||||||
|
# Create database log entry for action failure
|
||||||
|
self.services.interfaceDbChat.createLog({
|
||||||
|
"workflowId": workflow.id,
|
||||||
|
"message": f"❌ **Task {taskNum}**\n\n❌ **Action {actionNum}/{totalActions}** failed: {result.error}",
|
||||||
|
"type": "error"
|
||||||
|
})
|
||||||
|
|
||||||
|
# Log action summary
|
||||||
|
logger.info(f"=== TASK {taskNum} ACTION {actionNum} COMPLETED ===")
|
||||||
|
|
||||||
|
# Create action completion message with documents (generic)
|
||||||
|
await self._createActionCompletionMessage(action, result, workflow, taskStep, taskIndex, actionIndex, totalActions)
|
||||||
|
|
||||||
|
return ActionResult(
|
||||||
|
success=result.success,
|
||||||
|
documents=result.documents, # Return original ActionDocument objects
|
||||||
|
resultLabel=action.execResultLabel, # Always use action's execResultLabel
|
||||||
|
error=result.error or ""
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error executing single action: {str(e)}")
|
||||||
|
action.setError(str(e))
|
||||||
|
return ActionResult(
|
||||||
|
success=False,
|
||||||
|
documents=[], # Empty documents for error case
|
||||||
|
resultLabel=action.execResultLabel,
|
||||||
|
error=str(e)
|
||||||
|
)
|
||||||
|
|
||||||
|
def _extractResultText(self, result: ActionResult) -> str:
|
||||||
|
"""Extract result text from ActionResult documents"""
|
||||||
|
if not result.success or not result.documents:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
# Extract text directly from ActionDocument objects
|
||||||
|
resultParts = []
|
||||||
|
for doc in result.documents:
|
||||||
|
if hasattr(doc, 'documentData') and doc.documentData:
|
||||||
|
resultParts.append(str(doc.documentData))
|
||||||
|
|
||||||
|
# Join all document results with separators
|
||||||
|
return "\n\n---\n\n".join(resultParts) if resultParts else ""
|
||||||
|
|
||||||
|
async def _createActionCompletionMessage(self, action: ActionItem, result: ActionResult, workflow: ChatWorkflow,
|
||||||
|
taskStep: TaskStep, taskIndex: int, actionIndex: int, totalActions: int):
|
||||||
|
"""Create action completion message with documents (generic)"""
|
||||||
|
try:
|
||||||
|
# Convert ActionDocument objects to ChatDocument objects for message creation
|
||||||
|
createdDocuments = []
|
||||||
|
if result.documents:
|
||||||
|
createdDocuments = self.services.generation.createDocumentsFromActionResult(result, action, workflow, None)
|
||||||
|
|
||||||
|
# Create action message using message creator
|
||||||
|
from modules.workflows.processing.core.messageCreator import MessageCreator
|
||||||
|
messageCreator = MessageCreator(self.services)
|
||||||
|
|
||||||
|
await messageCreator.createActionMessage(
|
||||||
|
action=action,
|
||||||
|
result=result,
|
||||||
|
workflow=workflow,
|
||||||
|
resultLabel=action.execResultLabel,
|
||||||
|
createdDocuments=createdDocuments,
|
||||||
|
taskStep=taskStep,
|
||||||
|
taskIndex=taskIndex,
|
||||||
|
actionIndex=actionIndex,
|
||||||
|
totalActions=totalActions
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error creating action completion message: {str(e)}")
|
||||||
|
|
||||||
|
def _writeTraceLog(self, contextText: str, data: Any) -> None:
|
||||||
|
"""Write trace data to configured trace file if in debug mode with improved JSON formatting"""
|
||||||
|
try:
|
||||||
|
import os
|
||||||
|
import json
|
||||||
|
from datetime import datetime, UTC
|
||||||
|
|
||||||
|
# Only write if logger is in debug mode
|
||||||
|
if logger.level > logging.DEBUG:
|
||||||
|
return
|
||||||
|
|
||||||
|
# Get log directory from configuration
|
||||||
|
logDir = self.services.utils.configGet("APP_LOGGING_LOG_DIR", "./")
|
||||||
|
if not os.path.isabs(logDir):
|
||||||
|
# If relative path, make it relative to the gateway directory
|
||||||
|
gatewayDir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||||
|
logDir = os.path.join(gatewayDir, logDir)
|
||||||
|
|
||||||
|
# Ensure log directory exists
|
||||||
|
os.makedirs(logDir, exist_ok=True)
|
||||||
|
|
||||||
|
# Create trace file path
|
||||||
|
traceFile = os.path.join(logDir, "log_trace.log")
|
||||||
|
|
||||||
|
# Format the trace entry with better structure
|
||||||
|
timestamp = datetime.fromtimestamp(self.services.utils.getUtcTimestamp(), UTC).strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
|
||||||
|
|
||||||
|
# Create a structured trace entry
|
||||||
|
traceEntry = f"[{timestamp}] {contextText}\n"
|
||||||
|
traceEntry += "=" * 80 + "\n"
|
||||||
|
|
||||||
|
# Add data if provided with improved formatting
|
||||||
|
if data is not None:
|
||||||
|
try:
|
||||||
|
if isinstance(data, (dict, list)):
|
||||||
|
# Format as pretty JSON with better settings
|
||||||
|
jsonStr = json.dumps(data, indent=2, default=str, ensure_ascii=False, sort_keys=False)
|
||||||
|
traceEntry += f"JSON Data:\n{jsonStr}\n"
|
||||||
|
elif isinstance(data, str):
|
||||||
|
# For string data, try to parse as JSON first, then fall back to plain text
|
||||||
|
try:
|
||||||
|
parsed = json.loads(data)
|
||||||
|
jsonStr = json.dumps(parsed, indent=2, default=str, ensure_ascii=False, sort_keys=False)
|
||||||
|
traceEntry += f"JSON Data (parsed from string):\n{jsonStr}\n"
|
||||||
|
except (json.JSONDecodeError, TypeError):
|
||||||
|
# Not valid JSON, show as plain text with proper line breaks
|
||||||
|
formatted_data = data.replace('\\n', '\n')
|
||||||
|
traceEntry += f"Text Data:\n{formatted_data}\n"
|
||||||
|
else:
|
||||||
|
# For other types, convert to string and try to parse as JSON
|
||||||
|
dataStr = str(data)
|
||||||
|
try:
|
||||||
|
parsed = json.loads(dataStr)
|
||||||
|
jsonStr = json.dumps(parsed, indent=2, default=str, ensure_ascii=False, sort_keys=False)
|
||||||
|
traceEntry += f"JSON Data (parsed from object):\n{jsonStr}\n"
|
||||||
|
except (json.JSONDecodeError, TypeError):
|
||||||
|
# Not valid JSON, show as plain text with proper line breaks
|
||||||
|
formatted_data = dataStr.replace('\\n', '\n')
|
||||||
|
traceEntry += f"Object Data:\n{formatted_data}\n"
|
||||||
|
except Exception as e:
|
||||||
|
# Fallback to simple string representation
|
||||||
|
traceEntry += f"Data (fallback): {str(data)}\n"
|
||||||
|
else:
|
||||||
|
traceEntry += "No data provided\n"
|
||||||
|
|
||||||
|
traceEntry += "=" * 80 + "\n\n"
|
||||||
|
|
||||||
|
# Write to trace file
|
||||||
|
with open(traceFile, "a", encoding="utf-8") as f:
|
||||||
|
f.write(traceEntry)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
# Don't log trace errors to avoid recursion
|
||||||
|
pass
|
||||||
368
modules/workflows/processing/core/messageCreator.py
Normal file
368
modules/workflows/processing/core/messageCreator.py
Normal file
|
|
@ -0,0 +1,368 @@
|
||||||
|
# messageCreator.py
|
||||||
|
# Generic message creation for all workflow phases
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from typing import Dict, Any, Optional, List
|
||||||
|
from modules.datamodels.datamodelChat import TaskPlan, TaskStep, ActionResult, ReviewResult
|
||||||
|
from modules.datamodels.datamodelChat import ChatWorkflow
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
class MessageCreator:
|
||||||
|
"""Handles creation of all workflow messages"""
|
||||||
|
|
||||||
|
def __init__(self, services):
|
||||||
|
self.services = services
|
||||||
|
|
||||||
|
def _checkWorkflowStopped(self, workflow):
|
||||||
|
"""Check if workflow has been stopped by user and raise exception if so"""
|
||||||
|
try:
|
||||||
|
# Get the current workflow status from the database to avoid stale data
|
||||||
|
current_workflow = self.services.interfaceDbChat.getWorkflow(workflow.id)
|
||||||
|
if current_workflow and current_workflow.status == "stopped":
|
||||||
|
logger.info("Workflow stopped by user, aborting message creation")
|
||||||
|
raise Exception("Workflow was stopped by user")
|
||||||
|
except Exception as e:
|
||||||
|
# If we can't get the current status due to other database issues, fall back to the in-memory object
|
||||||
|
logger.warning(f"Could not check current workflow status from database: {str(e)}")
|
||||||
|
if workflow and workflow.status == "stopped":
|
||||||
|
logger.info("Workflow stopped by user (from in-memory object), aborting message creation")
|
||||||
|
raise Exception("Workflow was stopped by user")
|
||||||
|
|
||||||
|
async def createTaskPlanMessage(self, taskPlan: TaskPlan, workflow: ChatWorkflow):
|
||||||
|
"""Create a chat message containing the task plan with user-friendly messages"""
|
||||||
|
try:
|
||||||
|
# Check workflow status before creating message
|
||||||
|
self._checkWorkflowStopped(workflow)
|
||||||
|
|
||||||
|
# Build task plan summary
|
||||||
|
taskSummary = f"📋 **Task Plan**\n\n"
|
||||||
|
|
||||||
|
# Get overall user message from task plan if available
|
||||||
|
overallMessage = taskPlan.userMessage
|
||||||
|
if overallMessage:
|
||||||
|
taskSummary += f"{overallMessage}\n\n"
|
||||||
|
|
||||||
|
# Add each task with its user message
|
||||||
|
for i, task in enumerate(taskPlan.tasks):
|
||||||
|
if task.userMessage:
|
||||||
|
taskSummary += f"💬 {task.userMessage}\n"
|
||||||
|
taskSummary += "\n"
|
||||||
|
|
||||||
|
# Create workflow message
|
||||||
|
messageData = {
|
||||||
|
"workflowId": workflow.id,
|
||||||
|
"role": "assistant",
|
||||||
|
"message": taskSummary,
|
||||||
|
"status": "step",
|
||||||
|
"sequenceNr": len(workflow.messages) + 1,
|
||||||
|
"publishedAt": self.services.utils.getUtcTimestamp(),
|
||||||
|
"documentsLabel": "task_plan",
|
||||||
|
"documents": [],
|
||||||
|
# Add workflow context fields - use current workflow round instead of hardcoded 1
|
||||||
|
"roundNumber": workflow.currentRound, # Use current workflow round
|
||||||
|
"taskNumber": 1, # Task plan is before individual tasks; to keep 1, that UI not filtering the message
|
||||||
|
"actionNumber": 0,
|
||||||
|
# Add task progress status
|
||||||
|
"taskProgress": "pending"
|
||||||
|
}
|
||||||
|
|
||||||
|
message = self.services.interfaceDbChat.createMessage(messageData)
|
||||||
|
if message:
|
||||||
|
workflow.messages.append(message)
|
||||||
|
logger.info("Task plan message created successfully")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error creating task plan message: {str(e)}")
|
||||||
|
|
||||||
|
async def createTaskStartMessage(self, taskStep: TaskStep, workflow: ChatWorkflow, taskIndex: int, totalTasks: int):
|
||||||
|
"""Create a task start message for the user"""
|
||||||
|
try:
|
||||||
|
# Check workflow status before creating message
|
||||||
|
self._checkWorkflowStopped(workflow)
|
||||||
|
|
||||||
|
# Create a task start message for the user
|
||||||
|
taskProgress = f"{taskIndex}/{totalTasks}" if totalTasks is not None else str(taskIndex)
|
||||||
|
taskStartMessage = {
|
||||||
|
"workflowId": workflow.id,
|
||||||
|
"role": "assistant",
|
||||||
|
"message": f"🚀 **Task {taskProgress}**",
|
||||||
|
"status": "step",
|
||||||
|
"sequenceNr": len(workflow.messages) + 1,
|
||||||
|
"publishedAt": self.services.utils.getUtcTimestamp(),
|
||||||
|
"documentsLabel": f"task_{taskIndex}_start",
|
||||||
|
"documents": [],
|
||||||
|
# Add workflow context fields
|
||||||
|
"roundNumber": workflow.currentRound, # Use current workflow round
|
||||||
|
"taskNumber": taskIndex,
|
||||||
|
"actionNumber": 0,
|
||||||
|
# Add task progress status
|
||||||
|
"taskProgress": "running"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Add user-friendly message if available
|
||||||
|
if taskStep.userMessage:
|
||||||
|
taskStartMessage["message"] += f"\n\n💬 {taskStep.userMessage}"
|
||||||
|
|
||||||
|
message = self.services.interfaceDbChat.createMessage(taskStartMessage)
|
||||||
|
if message:
|
||||||
|
workflow.messages.append(message)
|
||||||
|
logger.info(f"Task start message created for task {taskIndex}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error creating task start message: {str(e)}")
|
||||||
|
|
||||||
|
async def createActionMessage(self, action, result: ActionResult, workflow: ChatWorkflow, resultLabel: str = None,
|
||||||
|
createdDocuments: List = None, taskStep: TaskStep = None,
|
||||||
|
taskIndex: int = None, actionIndex: int = None, totalActions: int = None):
|
||||||
|
"""Create and store a message for the action result in the workflow with enhanced document processing"""
|
||||||
|
try:
|
||||||
|
# Check workflow status before creating action message
|
||||||
|
self._checkWorkflowStopped(workflow)
|
||||||
|
|
||||||
|
if resultLabel is None:
|
||||||
|
resultLabel = action.execResultLabel
|
||||||
|
|
||||||
|
# Log delivered documents
|
||||||
|
if createdDocuments:
|
||||||
|
logger.info(f"Result label: {resultLabel} - {len(createdDocuments)} documents")
|
||||||
|
else:
|
||||||
|
logger.info(f"Result label: {resultLabel} - No documents")
|
||||||
|
|
||||||
|
# Get current workflow context and stats
|
||||||
|
workflowContext = self.services.workflow.getWorkflowContext()
|
||||||
|
workflowStats = self.services.workflow.getWorkflowStats()
|
||||||
|
|
||||||
|
# Create a more meaningful message that includes task context
|
||||||
|
taskObjective = taskStep.objective if taskStep else 'Unknown task'
|
||||||
|
|
||||||
|
# Extract round, task, and action numbers from resultLabel first, then fallback to workflow context
|
||||||
|
currentRound = self._extractRoundNumberFromLabel(resultLabel) if resultLabel else workflowContext.get('currentRound', 0)
|
||||||
|
currentTask = self._extractTaskNumberFromLabel(resultLabel) if resultLabel else (taskIndex if taskIndex is not None else workflowContext.get('currentTask', 0))
|
||||||
|
totalTasks = workflowStats.get('totalTasks', 0)
|
||||||
|
currentAction = self._extractActionNumberFromLabel(resultLabel) if resultLabel else (actionIndex if actionIndex is not None else workflowContext.get('currentAction', 0))
|
||||||
|
totalActions = totalActions if totalActions is not None else workflowStats.get('totalActions', 0)
|
||||||
|
|
||||||
|
# Debug logging for round number extraction
|
||||||
|
logger.info(f"Action message round number extraction: resultLabel='{resultLabel}', extractedRound={currentRound}, workflowRound={workflowContext.get('currentRound', 0)}")
|
||||||
|
|
||||||
|
# Build a user-friendly message based on success/failure
|
||||||
|
if result.success:
|
||||||
|
messageText = f"**Action {currentAction}/{totalActions} ({action.execMethod}.{action.execAction})**\n\n"
|
||||||
|
messageText += f"✅ {taskObjective}\n\n"
|
||||||
|
else:
|
||||||
|
# ⚠️ FAILURE MESSAGE - Show error details to user
|
||||||
|
errorDetails = result.error if result.error else "Unknown error occurred"
|
||||||
|
messageText = f"**Action {currentAction}/{totalActions} ({action.execMethod}.{action.execAction})**\n\n"
|
||||||
|
messageText += f"❌ {taskObjective}\n\n"
|
||||||
|
messageText += f"{errorDetails}\n\n"
|
||||||
|
|
||||||
|
# Build concise summary to persist for history context
|
||||||
|
doc_count = len(createdDocuments) if createdDocuments else 0
|
||||||
|
trimmed_msg = (messageText or "").strip().replace("\n", " ")
|
||||||
|
if len(trimmed_msg) > 160:
|
||||||
|
trimmed_msg = trimmed_msg[:157] + "..."
|
||||||
|
|
||||||
|
messageData = {
|
||||||
|
"workflowId": workflow.id,
|
||||||
|
"role": "assistant",
|
||||||
|
"message": messageText,
|
||||||
|
"status": "step",
|
||||||
|
"sequenceNr": len(workflow.messages) + 1,
|
||||||
|
"publishedAt": self.services.utils.getUtcTimestamp(),
|
||||||
|
"actionId": action.id,
|
||||||
|
"actionMethod": action.execMethod,
|
||||||
|
"actionName": action.execAction,
|
||||||
|
"documentsLabel": resultLabel,
|
||||||
|
"documents": createdDocuments,
|
||||||
|
# Add workflow context fields - extract from resultLabel to match document reference
|
||||||
|
"roundNumber": currentRound,
|
||||||
|
"taskNumber": currentTask,
|
||||||
|
"actionNumber": currentAction,
|
||||||
|
"actionProgress": "success" if result.success else "fail",
|
||||||
|
"summary": f"{action.execMethod}.{action.execAction}: {doc_count} docs | msg='{trimmed_msg}'"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Add debugging for error messages
|
||||||
|
if not result.success:
|
||||||
|
logger.info(f"Creating ERROR message: {messageText}")
|
||||||
|
logger.info(f"Message data: {messageData}")
|
||||||
|
|
||||||
|
message = self.services.interfaceDbChat.createMessage(messageData)
|
||||||
|
if message:
|
||||||
|
workflow.messages.append(message)
|
||||||
|
logger.info(f"Message created: {action.execMethod}.{action.execAction}")
|
||||||
|
return message
|
||||||
|
else:
|
||||||
|
logger.error(f"Failed to create workflow message for action {action.execMethod}.{action.execAction}")
|
||||||
|
return None
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error creating action message: {str(e)}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
async def createTaskCompletionMessage(self, taskStep: TaskStep, workflow: ChatWorkflow, taskIndex: int,
|
||||||
|
totalTasks: int, reviewResult: ReviewResult):
|
||||||
|
"""Create a task completion message for the user"""
|
||||||
|
try:
|
||||||
|
# Check workflow status before creating message
|
||||||
|
self._checkWorkflowStopped(workflow)
|
||||||
|
|
||||||
|
# Create a task completion message for the user
|
||||||
|
taskProgress = f"{taskIndex}/{totalTasks}" if totalTasks is not None else str(taskIndex)
|
||||||
|
|
||||||
|
# Enhanced completion message with criteria details
|
||||||
|
completionMessage = f"🎯 **Task {taskProgress}**\n\n✅ {reviewResult.reason or 'Task completed successfully'}"
|
||||||
|
|
||||||
|
# Add criteria status if available
|
||||||
|
if hasattr(reviewResult, 'met_criteria') and reviewResult.met_criteria:
|
||||||
|
for criterion in reviewResult.met_criteria:
|
||||||
|
completionMessage += f"\n• {criterion}"
|
||||||
|
|
||||||
|
if hasattr(reviewResult, 'quality_score'):
|
||||||
|
completionMessage += f"\n📊 Score {reviewResult.quality_score}/10"
|
||||||
|
|
||||||
|
taskCompletionMessage = {
|
||||||
|
"workflowId": workflow.id,
|
||||||
|
"role": "assistant",
|
||||||
|
"message": completionMessage,
|
||||||
|
"status": "step",
|
||||||
|
"sequenceNr": len(workflow.messages) + 1,
|
||||||
|
"publishedAt": self.services.utils.getUtcTimestamp(),
|
||||||
|
"documentsLabel": f"task_{taskIndex}_completion",
|
||||||
|
"documents": [],
|
||||||
|
# Add workflow context fields
|
||||||
|
"roundNumber": workflow.currentRound, # Use current workflow round
|
||||||
|
"taskNumber": taskIndex,
|
||||||
|
"actionNumber": 0,
|
||||||
|
# Add task progress status
|
||||||
|
"taskProgress": "success"
|
||||||
|
}
|
||||||
|
|
||||||
|
message = self.services.interfaceDbChat.createMessage(taskCompletionMessage)
|
||||||
|
if message:
|
||||||
|
workflow.messages.append(message)
|
||||||
|
logger.info(f"Task completion message created for task {taskIndex}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error creating task completion message: {str(e)}")
|
||||||
|
|
||||||
|
async def createRetryMessage(self, taskStep: TaskStep, workflow: ChatWorkflow, taskIndex: int, reviewResult: ReviewResult):
|
||||||
|
"""Create a retry message for the user"""
|
||||||
|
try:
|
||||||
|
# Check workflow status before creating message
|
||||||
|
self._checkWorkflowStopped(workflow)
|
||||||
|
|
||||||
|
# Create retry message for user
|
||||||
|
retryMessage = {
|
||||||
|
"workflowId": workflow.id,
|
||||||
|
"role": "assistant",
|
||||||
|
"message": f"🔄 **Task {taskIndex}** needs retry: {reviewResult.improvements}",
|
||||||
|
"status": "step",
|
||||||
|
"sequenceNr": len(workflow.messages) + 1,
|
||||||
|
"publishedAt": self.services.utils.getUtcTimestamp(),
|
||||||
|
"documentsLabel": f"task_{taskIndex}_retry",
|
||||||
|
"documents": [],
|
||||||
|
"roundNumber": workflow.currentRound,
|
||||||
|
"taskNumber": taskIndex,
|
||||||
|
"actionNumber": 0,
|
||||||
|
"taskProgress": "retry"
|
||||||
|
}
|
||||||
|
|
||||||
|
message = self.services.interfaceDbChat.createMessage(retryMessage)
|
||||||
|
if message:
|
||||||
|
workflow.messages.append(message)
|
||||||
|
logger.info(f"Retry message created for task {taskIndex}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error creating retry message: {str(e)}")
|
||||||
|
|
||||||
|
async def createErrorMessage(self, taskStep: TaskStep, workflow: ChatWorkflow, taskIndex: int, errorDetails: str):
|
||||||
|
"""Create an error message for the user"""
|
||||||
|
try:
|
||||||
|
# Check workflow status before creating message
|
||||||
|
self._checkWorkflowStopped(workflow)
|
||||||
|
|
||||||
|
# Create user-facing error message for task failure
|
||||||
|
errorMessage = f"**Task {taskIndex}**\n\n❌ '{taskStep.objective}' failed\n\n"
|
||||||
|
|
||||||
|
# Add specific error details if available
|
||||||
|
if errorDetails:
|
||||||
|
errorMessage += f"{errorDetails}\n\n"
|
||||||
|
|
||||||
|
# Create workflow message for user
|
||||||
|
messageData = {
|
||||||
|
"workflowId": workflow.id,
|
||||||
|
"role": "assistant",
|
||||||
|
"message": errorMessage,
|
||||||
|
"status": "step",
|
||||||
|
"sequenceNr": len(workflow.messages) + 1,
|
||||||
|
"publishedAt": self.services.utils.getUtcTimestamp(),
|
||||||
|
"actionId": None,
|
||||||
|
"actionMethod": "task",
|
||||||
|
"actionName": "task_error",
|
||||||
|
"documentsLabel": None,
|
||||||
|
"documents": [],
|
||||||
|
# Add workflow context fields
|
||||||
|
"roundNumber": workflow.currentRound, # Use current workflow round
|
||||||
|
"taskNumber": taskIndex,
|
||||||
|
"actionNumber": 0,
|
||||||
|
# Add task progress status
|
||||||
|
"taskProgress": "fail"
|
||||||
|
}
|
||||||
|
|
||||||
|
message = self.services.interfaceDbChat.createMessage(messageData)
|
||||||
|
if message:
|
||||||
|
workflow.messages.append(message)
|
||||||
|
logger.info(f"Error message created for task {taskIndex}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error creating error message: {str(e)}")
|
||||||
|
|
||||||
|
def _extractRoundNumberFromLabel(self, label: str) -> int:
|
||||||
|
"""Extract round number from a document label like 'round1_task1_action1_diagram_analysis'"""
|
||||||
|
try:
|
||||||
|
if not label or not isinstance(label, str):
|
||||||
|
return 0
|
||||||
|
|
||||||
|
# Parse label format: round{round}_task{task}_action{action}_{context}
|
||||||
|
if label.startswith('round'):
|
||||||
|
roundPart = label.split('_')[0] # Get 'round1' part
|
||||||
|
if roundPart.startswith('round'):
|
||||||
|
roundNumber = roundPart[5:] # Remove 'round' prefix
|
||||||
|
return int(roundNumber)
|
||||||
|
|
||||||
|
return 0
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Could not extract round number from label '{label}': {str(e)}")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
def _extractTaskNumberFromLabel(self, label: str) -> int:
|
||||||
|
"""Extract task number from a document label like 'round1_task1_action1_diagram_analysis'"""
|
||||||
|
try:
|
||||||
|
if not label or not isinstance(label, str):
|
||||||
|
return 0
|
||||||
|
|
||||||
|
# Parse label format: round{round}_task{task}_action{action}_{context}
|
||||||
|
if '_task' in label:
|
||||||
|
taskPart = label.split('_task')[1]
|
||||||
|
if taskPart and '_' in taskPart:
|
||||||
|
taskNumber = taskPart.split('_')[0]
|
||||||
|
return int(taskNumber)
|
||||||
|
|
||||||
|
return 0
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Could not extract task number from label '{label}': {str(e)}")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
def _extractActionNumberFromLabel(self, label: str) -> int:
|
||||||
|
"""Extract action number from a document label like 'round1_task1_action1_diagram_analysis'"""
|
||||||
|
try:
|
||||||
|
if not label or not isinstance(label, str):
|
||||||
|
return 0
|
||||||
|
|
||||||
|
# Parse label format: round{round}_task{task}_action{action}_{context}
|
||||||
|
if '_action' in label:
|
||||||
|
actionPart = label.split('_action')[1]
|
||||||
|
if actionPart and '_' in actionPart:
|
||||||
|
actionNumber = actionPart.split('_')[0]
|
||||||
|
return int(actionNumber)
|
||||||
|
|
||||||
|
return 0
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Could not extract action number from label '{label}': {str(e)}")
|
||||||
|
return 0
|
||||||
333
modules/workflows/processing/core/taskPlanner.py
Normal file
333
modules/workflows/processing/core/taskPlanner.py
Normal file
|
|
@ -0,0 +1,333 @@
|
||||||
|
# taskPlanner.py
|
||||||
|
# Task planning functionality for workflows
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
from typing import Dict, Any
|
||||||
|
from modules.datamodels.datamodelChat import TaskStep, TaskContext, TaskPlan
|
||||||
|
from modules.datamodels.datamodelAi import AiCallOptions, OperationType, ProcessingMode, Priority
|
||||||
|
from modules.workflows.processing.shared.promptGenerationTaskplan import (
|
||||||
|
generateTaskPlanningPrompt
|
||||||
|
)
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
class TaskPlanner:
|
||||||
|
"""Handles task planning for workflows"""
|
||||||
|
|
||||||
|
def __init__(self, services):
|
||||||
|
self.services = services
|
||||||
|
|
||||||
|
def _checkWorkflowStopped(self, workflow):
|
||||||
|
"""Check if workflow has been stopped by user and raise exception if so"""
|
||||||
|
try:
|
||||||
|
# Get the current workflow status from the database to avoid stale data
|
||||||
|
current_workflow = self.services.interfaceDbChat.getWorkflow(workflow.id)
|
||||||
|
if current_workflow and current_workflow.status == "stopped":
|
||||||
|
logger.info("Workflow stopped by user, aborting task planning")
|
||||||
|
raise Exception("Workflow was stopped by user")
|
||||||
|
except Exception as e:
|
||||||
|
# If we can't get the current status due to other database issues, fall back to the in-memory object
|
||||||
|
logger.warning(f"Could not check current workflow status from database: {str(e)}")
|
||||||
|
if workflow and workflow.status == "stopped":
|
||||||
|
logger.info("Workflow stopped by user (from in-memory object), aborting task planning")
|
||||||
|
raise Exception("Workflow was stopped by user")
|
||||||
|
|
||||||
|
async def generateTaskPlan(self, userInput: str, workflow) -> TaskPlan:
|
||||||
|
"""Generate a high-level task plan for the workflow"""
|
||||||
|
try:
|
||||||
|
# Check workflow status before generating task plan
|
||||||
|
self._checkWorkflowStopped(workflow)
|
||||||
|
|
||||||
|
logger.info(f"=== STARTING TASK PLAN GENERATION ===")
|
||||||
|
logger.info(f"Workflow ID: {workflow.id}")
|
||||||
|
logger.info(f"User Input: {userInput}")
|
||||||
|
|
||||||
|
# Use stored user prompt if available, otherwise use the input
|
||||||
|
actualUserPrompt = self.services.currentUserPrompt if self.services and hasattr(self.services, 'currentUserPrompt') and self.services.currentUserPrompt else userInput
|
||||||
|
logger.info(f"Actual User Prompt: {actualUserPrompt}")
|
||||||
|
|
||||||
|
# Check workflow status before calling AI service
|
||||||
|
self._checkWorkflowStopped(workflow)
|
||||||
|
|
||||||
|
# Create proper context object for task planning
|
||||||
|
# For task planning, we need to create a minimal TaskStep since TaskContext requires it
|
||||||
|
planningTaskStep = TaskStep(
|
||||||
|
id="planning",
|
||||||
|
objective=actualUserPrompt,
|
||||||
|
dependencies=[],
|
||||||
|
success_criteria=[],
|
||||||
|
estimated_complexity="medium"
|
||||||
|
)
|
||||||
|
|
||||||
|
taskPlanningContext = TaskContext(
|
||||||
|
task_step=planningTaskStep,
|
||||||
|
workflow=workflow,
|
||||||
|
workflow_id=workflow.id,
|
||||||
|
available_documents=None,
|
||||||
|
available_connections=None,
|
||||||
|
previous_results=[],
|
||||||
|
previous_handover=None,
|
||||||
|
improvements=[],
|
||||||
|
retry_count=0,
|
||||||
|
previous_action_results=[],
|
||||||
|
previous_review_result=None,
|
||||||
|
is_regeneration=False,
|
||||||
|
failure_patterns=[],
|
||||||
|
failed_actions=[],
|
||||||
|
successful_actions=[],
|
||||||
|
criteria_progress={
|
||||||
|
'met_criteria': set(),
|
||||||
|
'unmet_criteria': set(),
|
||||||
|
'attempt_history': []
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
# Build prompt bundle (template + placeholders) using new API
|
||||||
|
bundle = generateTaskPlanningPrompt(self.services, taskPlanningContext)
|
||||||
|
taskPlanningPromptTemplate = bundle.prompt
|
||||||
|
placeholders = bundle.placeholders
|
||||||
|
|
||||||
|
# Log task planning prompt sent to AI
|
||||||
|
logger.info("=== TASK PLANNING PROMPT SENT TO AI ===")
|
||||||
|
# Trace task planning prompt
|
||||||
|
self._writeTraceLog("Task Plan Prompt", taskPlanningPromptTemplate)
|
||||||
|
self._writeTraceLog("Task Plan Placeholders", placeholders)
|
||||||
|
|
||||||
|
# Centralized AI call: Task planning (quality, detailed) with placeholders
|
||||||
|
options = AiCallOptions(
|
||||||
|
operationType=OperationType.GENERATE_PLAN,
|
||||||
|
priority=Priority.QUALITY,
|
||||||
|
compressPrompt=False,
|
||||||
|
compressContext=False,
|
||||||
|
processingMode=ProcessingMode.DETAILED,
|
||||||
|
maxCost=0.10,
|
||||||
|
maxProcessingTime=30
|
||||||
|
)
|
||||||
|
|
||||||
|
prompt = await self.services.ai.callAi(
|
||||||
|
prompt=taskPlanningPromptTemplate,
|
||||||
|
placeholders=placeholders,
|
||||||
|
options=options
|
||||||
|
)
|
||||||
|
|
||||||
|
# Check if AI response is valid
|
||||||
|
if not prompt:
|
||||||
|
raise ValueError("AI service returned no response for task planning")
|
||||||
|
|
||||||
|
# Log task planning response received
|
||||||
|
logger.info("=== TASK PLANNING AI RESPONSE RECEIVED ===")
|
||||||
|
logger.info(f"Response length: {len(prompt) if prompt else 0}")
|
||||||
|
# Trace task planning response
|
||||||
|
self._writeTraceLog("Task Plan Response", prompt)
|
||||||
|
|
||||||
|
# Parse task plan response
|
||||||
|
try:
|
||||||
|
jsonStart = prompt.find('{')
|
||||||
|
jsonEnd = prompt.rfind('}') + 1
|
||||||
|
if jsonStart == -1 or jsonEnd == 0:
|
||||||
|
raise ValueError("No JSON found in response")
|
||||||
|
jsonStr = prompt[jsonStart:jsonEnd]
|
||||||
|
taskPlanDict = json.loads(jsonStr)
|
||||||
|
|
||||||
|
if 'tasks' not in taskPlanDict:
|
||||||
|
raise ValueError("Task plan missing 'tasks' field")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error parsing task plan response: {str(e)}")
|
||||||
|
taskPlanDict = {'tasks': []}
|
||||||
|
|
||||||
|
if not self._validateTaskPlan(taskPlanDict):
|
||||||
|
logger.error("Generated task plan failed validation")
|
||||||
|
logger.error(f"AI Response: {prompt}")
|
||||||
|
logger.error(f"Parsed Task Plan: {json.dumps(taskPlanDict, indent=2)}")
|
||||||
|
raise Exception("AI-generated task plan failed validation - AI is required for task planning")
|
||||||
|
|
||||||
|
if not taskPlanDict.get('tasks'):
|
||||||
|
raise ValueError("Task plan contains no tasks")
|
||||||
|
|
||||||
|
# LANGUAGE DETECTION: Determine user language once for the entire workflow
|
||||||
|
# Priority: 1. languageUserDetected from AI response, 2. service.user.language, 3. "en"
|
||||||
|
detectedLanguage = taskPlanDict.get('languageUserDetected', '').strip()
|
||||||
|
serviceUserLanguage = getattr(self.services.user, 'language', '') if self.services and self.services.user else ''
|
||||||
|
|
||||||
|
if detectedLanguage and len(detectedLanguage) == 2: # Valid language code like "en", "de", "fr"
|
||||||
|
userLanguage = detectedLanguage
|
||||||
|
logger.info(f"Using detected language from AI response: {userLanguage}")
|
||||||
|
elif serviceUserLanguage and len(serviceUserLanguage) == 2:
|
||||||
|
userLanguage = serviceUserLanguage
|
||||||
|
logger.info(f"Using language from service user object: {userLanguage}")
|
||||||
|
else:
|
||||||
|
userLanguage = "en"
|
||||||
|
logger.info(f"Using default language: {userLanguage}")
|
||||||
|
|
||||||
|
# Set the detected language in the service for use throughout the workflow
|
||||||
|
if self.services and self.services.user:
|
||||||
|
self.services.user.language = userLanguage
|
||||||
|
logger.info(f"Set workflow user language to: {userLanguage}")
|
||||||
|
|
||||||
|
tasks = []
|
||||||
|
for i, taskDict in enumerate(taskPlanDict.get('tasks', [])):
|
||||||
|
if not isinstance(taskDict, dict):
|
||||||
|
logger.warning(f"Skipping invalid task {i+1}: not a dictionary")
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Map old 'description' field to new 'objective' field
|
||||||
|
if 'description' in taskDict and 'objective' not in taskDict:
|
||||||
|
taskDict['objective'] = taskDict.pop('description')
|
||||||
|
|
||||||
|
try:
|
||||||
|
task = TaskStep(**taskDict)
|
||||||
|
tasks.append(task)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Skipping invalid task {i+1}: {str(e)}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
if not tasks:
|
||||||
|
raise ValueError("No valid tasks could be created from AI response")
|
||||||
|
|
||||||
|
taskPlan = TaskPlan(
|
||||||
|
overview=taskPlanDict.get('overview', ''),
|
||||||
|
tasks=tasks,
|
||||||
|
userMessage=taskPlanDict.get('userMessage', '')
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.info(f"Task plan generated successfully with {len(tasks)} tasks")
|
||||||
|
logger.info(f"Workflow user language set to: {userLanguage}")
|
||||||
|
|
||||||
|
return taskPlan
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error in generateTaskPlan: {str(e)}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def _validateTaskPlan(self, taskPlan: Dict[str, Any]) -> bool:
|
||||||
|
"""Validate task plan structure"""
|
||||||
|
try:
|
||||||
|
if not isinstance(taskPlan, dict):
|
||||||
|
logger.error("Task plan is not a dictionary")
|
||||||
|
return False
|
||||||
|
|
||||||
|
if 'tasks' not in taskPlan or not isinstance(taskPlan['tasks'], list):
|
||||||
|
logger.error(f"Task plan missing 'tasks' field or not a list. Found: {type(taskPlan.get('tasks', 'MISSING'))}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
# First pass: collect all task IDs to validate dependencies
|
||||||
|
taskIds = set()
|
||||||
|
for task in taskPlan['tasks']:
|
||||||
|
if not isinstance(task, dict):
|
||||||
|
logger.error(f"Task is not a dictionary: {type(task)}")
|
||||||
|
return False
|
||||||
|
if 'id' not in task:
|
||||||
|
logger.error(f"Task missing 'id' field: {task}")
|
||||||
|
return False
|
||||||
|
taskIds.add(task['id'])
|
||||||
|
|
||||||
|
# Second pass: validate each task
|
||||||
|
for i, task in enumerate(taskPlan['tasks']):
|
||||||
|
if not isinstance(task, dict):
|
||||||
|
logger.error(f"Task {i} is not a dictionary: {type(task)}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
requiredFields = ['id', 'objective', 'success_criteria']
|
||||||
|
missingFields = [field for field in requiredFields if field not in task]
|
||||||
|
if missingFields:
|
||||||
|
logger.error(f"Task {i} missing required fields: {missingFields}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Check for duplicate IDs (shouldn't happen after first pass, but safety check)
|
||||||
|
if task['id'] in taskIds and list(taskPlan['tasks']).count(task['id']) > 1:
|
||||||
|
logger.error(f"Task {i} has duplicate ID: {task['id']}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
dependencies = task.get('dependencies', [])
|
||||||
|
if not isinstance(dependencies, list):
|
||||||
|
logger.error(f"Task {i} dependencies is not a list: {type(dependencies)}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
for dep in dependencies:
|
||||||
|
if dep not in taskIds and dep != 'task_0':
|
||||||
|
logger.error(f"Task {i} has invalid dependency: {dep} (available: {list(taskIds) + ['task_0']})")
|
||||||
|
return False
|
||||||
|
|
||||||
|
logger.info(f"Task plan validation successful with {len(taskIds)} tasks")
|
||||||
|
return True
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error validating task plan: {str(e)}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
def _writeTraceLog(self, contextText: str, data: Any) -> None:
|
||||||
|
"""Write trace data to configured trace file if in debug mode with improved JSON formatting"""
|
||||||
|
try:
|
||||||
|
import os
|
||||||
|
import json
|
||||||
|
from datetime import datetime, UTC
|
||||||
|
|
||||||
|
# Only write if logger is in debug mode
|
||||||
|
if logger.level > logging.DEBUG:
|
||||||
|
return
|
||||||
|
|
||||||
|
# Get log directory from configuration
|
||||||
|
logDir = self.services.utils.configGet("APP_LOGGING_LOG_DIR", "./")
|
||||||
|
if not os.path.isabs(logDir):
|
||||||
|
# If relative path, make it relative to the gateway directory
|
||||||
|
gatewayDir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||||
|
logDir = os.path.join(gatewayDir, logDir)
|
||||||
|
|
||||||
|
# Ensure log directory exists
|
||||||
|
os.makedirs(logDir, exist_ok=True)
|
||||||
|
|
||||||
|
# Create trace file path
|
||||||
|
traceFile = os.path.join(logDir, "log_trace.log")
|
||||||
|
|
||||||
|
# Format the trace entry with better structure
|
||||||
|
timestamp = datetime.fromtimestamp(self.services.utils.getUtcTimestamp(), UTC).strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
|
||||||
|
|
||||||
|
# Create a structured trace entry
|
||||||
|
traceEntry = f"[{timestamp}] {contextText}\n"
|
||||||
|
traceEntry += "=" * 80 + "\n"
|
||||||
|
|
||||||
|
# Add data if provided with improved formatting
|
||||||
|
if data is not None:
|
||||||
|
try:
|
||||||
|
if isinstance(data, (dict, list)):
|
||||||
|
# Format as pretty JSON with better settings
|
||||||
|
jsonStr = json.dumps(data, indent=2, default=str, ensure_ascii=False, sort_keys=False)
|
||||||
|
traceEntry += f"JSON Data:\n{jsonStr}\n"
|
||||||
|
elif isinstance(data, str):
|
||||||
|
# For string data, try to parse as JSON first, then fall back to plain text
|
||||||
|
try:
|
||||||
|
parsed = json.loads(data)
|
||||||
|
jsonStr = json.dumps(parsed, indent=2, default=str, ensure_ascii=False, sort_keys=False)
|
||||||
|
traceEntry += f"JSON Data (parsed from string):\n{jsonStr}\n"
|
||||||
|
except (json.JSONDecodeError, TypeError):
|
||||||
|
# Not valid JSON, show as plain text with proper line breaks
|
||||||
|
formatted_data = data.replace('\\n', '\n')
|
||||||
|
traceEntry += f"Text Data:\n{formatted_data}\n"
|
||||||
|
else:
|
||||||
|
# For other types, convert to string and try to parse as JSON
|
||||||
|
dataStr = str(data)
|
||||||
|
try:
|
||||||
|
parsed = json.loads(dataStr)
|
||||||
|
jsonStr = json.dumps(parsed, indent=2, default=str, ensure_ascii=False, sort_keys=False)
|
||||||
|
traceEntry += f"JSON Data (parsed from object):\n{jsonStr}\n"
|
||||||
|
except (json.JSONDecodeError, TypeError):
|
||||||
|
# Not valid JSON, show as plain text with proper line breaks
|
||||||
|
formatted_data = dataStr.replace('\\n', '\n')
|
||||||
|
traceEntry += f"Object Data:\n{formatted_data}\n"
|
||||||
|
except Exception as e:
|
||||||
|
# Fallback to simple string representation
|
||||||
|
traceEntry += f"Data (fallback): {str(data)}\n"
|
||||||
|
else:
|
||||||
|
traceEntry += "No data provided\n"
|
||||||
|
|
||||||
|
traceEntry += "=" * 80 + "\n\n"
|
||||||
|
|
||||||
|
# Write to trace file
|
||||||
|
with open(traceFile, "a", encoding="utf-8") as f:
|
||||||
|
f.write(traceEntry)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
# Don't log trace errors to avoid recursion
|
||||||
|
pass
|
||||||
111
modules/workflows/processing/core/validator.py
Normal file
111
modules/workflows/processing/core/validator.py
Normal file
|
|
@ -0,0 +1,111 @@
|
||||||
|
# validator.py
|
||||||
|
# Validation logic for workflows
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from typing import Dict, Any, List
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
class WorkflowValidator:
|
||||||
|
"""Handles validation of workflow components"""
|
||||||
|
|
||||||
|
def __init__(self, services):
|
||||||
|
self.services = services
|
||||||
|
|
||||||
|
def validateTask(self, taskPlan: Dict[str, Any]) -> bool:
|
||||||
|
"""Validate task plan structure"""
|
||||||
|
try:
|
||||||
|
if not isinstance(taskPlan, dict):
|
||||||
|
logger.error("Task plan is not a dictionary")
|
||||||
|
return False
|
||||||
|
|
||||||
|
if 'tasks' not in taskPlan or not isinstance(taskPlan['tasks'], list):
|
||||||
|
logger.error(f"Task plan missing 'tasks' field or not a list. Found: {type(taskPlan.get('tasks', 'MISSING'))}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
# First pass: collect all task IDs to validate dependencies
|
||||||
|
taskIds = set()
|
||||||
|
for task in taskPlan['tasks']:
|
||||||
|
if not isinstance(task, dict):
|
||||||
|
logger.error(f"Task is not a dictionary: {type(task)}")
|
||||||
|
return False
|
||||||
|
if 'id' not in task:
|
||||||
|
logger.error(f"Task missing 'id' field: {task}")
|
||||||
|
return False
|
||||||
|
taskIds.add(task['id'])
|
||||||
|
|
||||||
|
# Second pass: validate each task
|
||||||
|
for i, task in enumerate(taskPlan['tasks']):
|
||||||
|
if not isinstance(task, dict):
|
||||||
|
logger.error(f"Task {i} is not a dictionary: {type(task)}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
requiredFields = ['id', 'objective', 'success_criteria']
|
||||||
|
missingFields = [field for field in requiredFields if field not in task]
|
||||||
|
if missingFields:
|
||||||
|
logger.error(f"Task {i} missing required fields: {missingFields}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Check for duplicate IDs (shouldn't happen after first pass, but safety check)
|
||||||
|
if task['id'] in taskIds and list(taskPlan['tasks']).count(task['id']) > 1:
|
||||||
|
logger.error(f"Task {i} has duplicate ID: {task['id']}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
dependencies = task.get('dependencies', [])
|
||||||
|
if not isinstance(dependencies, list):
|
||||||
|
logger.error(f"Task {i} dependencies is not a list: {type(dependencies)}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
for dep in dependencies:
|
||||||
|
if dep not in taskIds and dep != 'task_0':
|
||||||
|
logger.error(f"Task {i} has invalid dependency: {dep} (available: {list(taskIds) + ['task_0']})")
|
||||||
|
return False
|
||||||
|
|
||||||
|
logger.info(f"Task plan validation successful with {len(taskIds)} tasks")
|
||||||
|
return True
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error validating task plan: {str(e)}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
def validateAction(self, actions: List[Dict[str, Any]], context) -> bool:
|
||||||
|
"""Validate action structure"""
|
||||||
|
try:
|
||||||
|
if not isinstance(actions, list):
|
||||||
|
logger.error("Actions must be a list")
|
||||||
|
return False
|
||||||
|
if len(actions) == 0:
|
||||||
|
logger.warning("No actions generated")
|
||||||
|
return False
|
||||||
|
for i, action in enumerate(actions):
|
||||||
|
if not isinstance(action, dict):
|
||||||
|
logger.error(f"Action {i} must be a dictionary")
|
||||||
|
return False
|
||||||
|
# Check for compound action format (new) or separate method/action format (old)
|
||||||
|
if 'action' in action and '.' in str(action.get('action', '')):
|
||||||
|
# New compound action format: "method.action"
|
||||||
|
requiredFields = ['action', 'parameters', 'resultLabel']
|
||||||
|
else:
|
||||||
|
# Old separate format: method + action fields
|
||||||
|
requiredFields = ['method', 'action', 'parameters', 'resultLabel']
|
||||||
|
|
||||||
|
missingFields = []
|
||||||
|
for field in requiredFields:
|
||||||
|
if field not in action or not action[field]:
|
||||||
|
missingFields.append(field)
|
||||||
|
if missingFields:
|
||||||
|
logger.error(f"Action {i} missing required fields: {missingFields}")
|
||||||
|
return False
|
||||||
|
resultLabel = action.get('resultLabel', '')
|
||||||
|
if not resultLabel.startswith('round'):
|
||||||
|
logger.error(f"Action {i} result label must start with 'round': {resultLabel}")
|
||||||
|
return False
|
||||||
|
parameters = action.get('parameters', {})
|
||||||
|
if not isinstance(parameters, dict):
|
||||||
|
logger.error(f"Action {i} parameters must be a dictionary")
|
||||||
|
return False
|
||||||
|
logger.info(f"Successfully validated {len(actions)} actions")
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error validating actions: {str(e)}")
|
||||||
|
return False
|
||||||
File diff suppressed because it is too large
Load diff
1
modules/workflows/processing/modes/__init__.py
Normal file
1
modules/workflows/processing/modes/__init__.py
Normal file
|
|
@ -0,0 +1 @@
|
||||||
|
# Workflow mode implementations
|
||||||
833
modules/workflows/processing/modes/modeActionplan.py
Normal file
833
modules/workflows/processing/modes/modeActionplan.py
Normal file
|
|
@ -0,0 +1,833 @@
|
||||||
|
# modeActionplan.py
|
||||||
|
# Actionplan mode implementation for workflows
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import uuid
|
||||||
|
from typing import List, Dict, Any
|
||||||
|
from modules.datamodels.datamodelChat import (
|
||||||
|
TaskStep, TaskContext, TaskResult, ActionItem, TaskStatus,
|
||||||
|
ActionResult, ReviewResult, ReviewContext
|
||||||
|
)
|
||||||
|
from modules.datamodels.datamodelChat import ChatWorkflow
|
||||||
|
from modules.datamodels.datamodelAi import AiCallOptions, OperationType, ProcessingMode, Priority
|
||||||
|
from modules.workflows.processing.modes.modeBase import BaseMode
|
||||||
|
from modules.workflows.processing.shared.executionState import TaskExecutionState
|
||||||
|
from modules.workflows.processing.shared.promptGenerationActionsActionplan import (
|
||||||
|
generateActionDefinitionPrompt,
|
||||||
|
generateResultReviewPrompt
|
||||||
|
)
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
class ActionplanMode(BaseMode):
|
||||||
|
"""Actionplan mode implementation - batch planning and sequential execution"""
|
||||||
|
|
||||||
|
def __init__(self, services, workflow):
|
||||||
|
super().__init__(services, workflow)
|
||||||
|
|
||||||
|
async def generateActionItems(self, taskStep: TaskStep, workflow: ChatWorkflow,
|
||||||
|
previousResults: List = None, enhancedContext: TaskContext = None) -> List[ActionItem]:
|
||||||
|
"""Generate actions for a given task step using batch planning approach"""
|
||||||
|
try:
|
||||||
|
# Check workflow status before generating actions
|
||||||
|
self._checkWorkflowStopped(workflow)
|
||||||
|
|
||||||
|
retryInfo = f" (Retry #{enhancedContext.retry_count})" if enhancedContext and enhancedContext.retry_count > 0 else ""
|
||||||
|
logger.info(f"Generating actions for task: {taskStep.objective}{retryInfo}")
|
||||||
|
|
||||||
|
# Log criteria progress if this is a retry
|
||||||
|
if enhancedContext and hasattr(enhancedContext, 'criteria_progress') and enhancedContext.criteria_progress is not None:
|
||||||
|
progress = enhancedContext.criteria_progress
|
||||||
|
logger.info(f"Retry attempt {enhancedContext.retry_count} - Criteria progress:")
|
||||||
|
if progress.get('met_criteria'):
|
||||||
|
logger.info(f" Met criteria: {', '.join(progress['met_criteria'])}")
|
||||||
|
if progress.get('unmet_criteria'):
|
||||||
|
logger.warning(f" Unmet criteria: {', '.join(progress['unmet_criteria'])}")
|
||||||
|
|
||||||
|
# Show improvement trends
|
||||||
|
if progress.get('attempt_history'):
|
||||||
|
recentAttempts = progress['attempt_history'][-2:] # Last 2 attempts
|
||||||
|
if len(recentAttempts) >= 2:
|
||||||
|
prevScore = recentAttempts[0].get('quality_score', 0)
|
||||||
|
currScore = recentAttempts[1].get('quality_score', 0)
|
||||||
|
if currScore > prevScore:
|
||||||
|
logger.info(f" Quality improving: {prevScore} -> {currScore}")
|
||||||
|
elif currScore < prevScore:
|
||||||
|
logger.warning(f" Quality declining: {prevScore} -> {currScore}")
|
||||||
|
else:
|
||||||
|
logger.info(f" Quality stable: {currScore}")
|
||||||
|
|
||||||
|
# Enhanced retry context logging
|
||||||
|
if enhancedContext and enhancedContext.retry_count > 0:
|
||||||
|
logger.info("=== RETRY CONTEXT FOR ACTION GENERATION ===")
|
||||||
|
logger.info(f"Retry Count: {enhancedContext.retry_count}")
|
||||||
|
logger.debug(f"Previous Improvements: {enhancedContext.improvements}")
|
||||||
|
logger.debug(f"Previous Review Result: {enhancedContext.previous_review_result}")
|
||||||
|
logger.debug(f"Failure Patterns: {enhancedContext.failure_patterns}")
|
||||||
|
logger.debug(f"Failed Actions: {enhancedContext.failed_actions}")
|
||||||
|
logger.debug(f"Successful Actions: {enhancedContext.successful_actions}")
|
||||||
|
logger.info("=== END RETRY CONTEXT ===")
|
||||||
|
|
||||||
|
# Log that we're starting action generation
|
||||||
|
logger.info("=== STARTING ACTION GENERATION ===")
|
||||||
|
|
||||||
|
# Create proper context object for action definition
|
||||||
|
if enhancedContext and isinstance(enhancedContext, TaskContext):
|
||||||
|
# Use existing TaskContext if provided
|
||||||
|
actionContext = TaskContext(
|
||||||
|
task_step=enhancedContext.task_step,
|
||||||
|
workflow=enhancedContext.workflow,
|
||||||
|
workflow_id=enhancedContext.workflow_id,
|
||||||
|
available_documents=enhancedContext.available_documents,
|
||||||
|
available_connections=enhancedContext.available_connections,
|
||||||
|
previous_results=enhancedContext.previous_results or previousResults or [],
|
||||||
|
previous_handover=enhancedContext.previous_handover,
|
||||||
|
improvements=enhancedContext.improvements or [],
|
||||||
|
retry_count=enhancedContext.retry_count or 0,
|
||||||
|
previous_action_results=enhancedContext.previous_action_results or [],
|
||||||
|
previous_review_result=enhancedContext.previous_review_result,
|
||||||
|
is_regeneration=enhancedContext.is_regeneration or False,
|
||||||
|
failure_patterns=enhancedContext.failure_patterns or [],
|
||||||
|
failed_actions=enhancedContext.failed_actions or [],
|
||||||
|
successful_actions=enhancedContext.successful_actions or [],
|
||||||
|
criteria_progress=enhancedContext.criteria_progress
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
# Create new context from scratch
|
||||||
|
actionContext = TaskContext(
|
||||||
|
task_step=taskStep,
|
||||||
|
workflow=workflow,
|
||||||
|
workflow_id=workflow.id,
|
||||||
|
available_documents=None,
|
||||||
|
available_connections=None,
|
||||||
|
previous_results=previousResults or [],
|
||||||
|
previous_handover=None,
|
||||||
|
improvements=[],
|
||||||
|
retry_count=0,
|
||||||
|
previous_action_results=[],
|
||||||
|
previous_review_result=None,
|
||||||
|
is_regeneration=False,
|
||||||
|
failure_patterns=[],
|
||||||
|
failed_actions=[],
|
||||||
|
successful_actions=[],
|
||||||
|
criteria_progress=None
|
||||||
|
)
|
||||||
|
|
||||||
|
# Check workflow status before calling AI service
|
||||||
|
self._checkWorkflowStopped(workflow)
|
||||||
|
|
||||||
|
# Build prompt bundle (template + placeholders)
|
||||||
|
bundle = generateActionDefinitionPrompt(self.services, actionContext)
|
||||||
|
actionPromptTemplate = bundle.prompt
|
||||||
|
placeholders = bundle.placeholders
|
||||||
|
|
||||||
|
# Trace action planning prompt
|
||||||
|
self._writeTraceLog("Action Plan Prompt", actionPromptTemplate)
|
||||||
|
self._writeTraceLog("Action Plan Placeholders", placeholders)
|
||||||
|
|
||||||
|
# Centralized AI call: Action planning (quality, detailed) with placeholders
|
||||||
|
options = AiCallOptions(
|
||||||
|
operationType=OperationType.GENERATE_PLAN,
|
||||||
|
priority=Priority.QUALITY,
|
||||||
|
compressPrompt=False,
|
||||||
|
compressContext=False,
|
||||||
|
processingMode=ProcessingMode.DETAILED,
|
||||||
|
maxCost=0.10,
|
||||||
|
maxProcessingTime=30
|
||||||
|
)
|
||||||
|
|
||||||
|
prompt = await self.services.ai.callAi(prompt=actionPromptTemplate, placeholders=placeholders, options=options)
|
||||||
|
|
||||||
|
# Check if AI response is valid
|
||||||
|
if not prompt:
|
||||||
|
raise ValueError("AI service returned no response")
|
||||||
|
|
||||||
|
# Log action response received
|
||||||
|
logger.info("=== ACTION PLAN AI RESPONSE RECEIVED ===")
|
||||||
|
logger.info(f"Response length: {len(prompt) if prompt else 0}")
|
||||||
|
# Trace action planning response
|
||||||
|
self._writeTraceLog("Action Plan Response", prompt)
|
||||||
|
|
||||||
|
# Parse action response
|
||||||
|
jsonStart = prompt.find('{')
|
||||||
|
jsonEnd = prompt.rfind('}') + 1
|
||||||
|
if jsonStart == -1 or jsonEnd == 0:
|
||||||
|
raise ValueError("No JSON found in response")
|
||||||
|
jsonStr = prompt[jsonStart:jsonEnd]
|
||||||
|
|
||||||
|
try:
|
||||||
|
actionData = json.loads(jsonStr)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error parsing action response JSON: {str(e)}")
|
||||||
|
actionData = {}
|
||||||
|
|
||||||
|
if 'actions' not in actionData:
|
||||||
|
raise ValueError("Action response missing 'actions' field")
|
||||||
|
|
||||||
|
actions = actionData['actions']
|
||||||
|
if not actions:
|
||||||
|
raise ValueError("Action response contains empty actions list")
|
||||||
|
|
||||||
|
if not isinstance(actions, list):
|
||||||
|
raise ValueError(f"Action response 'actions' field is not a list: {type(actions)}")
|
||||||
|
|
||||||
|
if not self.validator.validateAction(actions, actionContext):
|
||||||
|
logger.error("Generated actions failed validation")
|
||||||
|
raise Exception("AI-generated actions failed validation - AI is required for action generation")
|
||||||
|
|
||||||
|
# Convert to ActionItem objects
|
||||||
|
taskActions = []
|
||||||
|
for i, a in enumerate(actions):
|
||||||
|
if not isinstance(a, dict):
|
||||||
|
logger.warning(f"Skipping invalid action {i+1}: not a dictionary")
|
||||||
|
continue
|
||||||
|
|
||||||
|
|
||||||
|
# Handle compound action format (new) or separate method/action format (old)
|
||||||
|
action_name = a.get('action', 'unknown')
|
||||||
|
if '.' in action_name:
|
||||||
|
# New compound action format: "method.action"
|
||||||
|
method_name, action_name = action_name.split('.', 1)
|
||||||
|
else:
|
||||||
|
# Old separate format: method + action fields
|
||||||
|
method_name = a.get('method', 'unknown')
|
||||||
|
|
||||||
|
taskAction = self._createActionItem({
|
||||||
|
"execMethod": method_name,
|
||||||
|
"execAction": action_name,
|
||||||
|
"execParameters": a.get('parameters', {}),
|
||||||
|
"execResultLabel": a.get('resultLabel', ''),
|
||||||
|
"expectedDocumentFormats": a.get('expectedDocumentFormats', None),
|
||||||
|
"status": TaskStatus.PENDING,
|
||||||
|
# Extract user-friendly message if available
|
||||||
|
"userMessage": a.get('userMessage', None)
|
||||||
|
})
|
||||||
|
|
||||||
|
if taskAction:
|
||||||
|
taskActions.append(taskAction)
|
||||||
|
else:
|
||||||
|
logger.warning(f"Skipping invalid action {i+1}: failed to create ActionItem")
|
||||||
|
|
||||||
|
validActions = [ta for ta in taskActions if ta]
|
||||||
|
|
||||||
|
if not validActions:
|
||||||
|
raise ValueError("No valid actions could be created from AI response")
|
||||||
|
|
||||||
|
return validActions
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error in generateActionItems: {str(e)}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
async def executeTask(self, taskStep: TaskStep, workflow: ChatWorkflow, context: TaskContext,
|
||||||
|
taskIndex: int = None, totalTasks: int = None) -> TaskResult:
|
||||||
|
"""Execute all actions for a task step using Actionplan mode"""
|
||||||
|
logger.info(f"=== STARTING TASK {taskIndex or '?'}: {taskStep.objective} ===")
|
||||||
|
|
||||||
|
# Update workflow object before executing task
|
||||||
|
if taskIndex is not None:
|
||||||
|
self._updateWorkflowBeforeExecutingTask(taskIndex)
|
||||||
|
|
||||||
|
# Update workflow context for this task
|
||||||
|
if taskIndex is not None:
|
||||||
|
self.services.workflow.setWorkflowContext(task_number=taskIndex)
|
||||||
|
|
||||||
|
# Create task start message
|
||||||
|
await self.messageCreator.createTaskStartMessage(taskStep, workflow, taskIndex, totalTasks)
|
||||||
|
|
||||||
|
state = TaskExecutionState(taskStep)
|
||||||
|
retryContext = context
|
||||||
|
maxRetries = state.max_retries
|
||||||
|
|
||||||
|
for attempt in range(maxRetries):
|
||||||
|
logger.info(f"Task execution attempt {attempt+1}/{maxRetries}")
|
||||||
|
|
||||||
|
# Check workflow status before starting task execution
|
||||||
|
self._checkWorkflowStopped(workflow)
|
||||||
|
|
||||||
|
# Update retry context with current attempt information
|
||||||
|
if retryContext:
|
||||||
|
retryContext.retry_count = attempt + 1
|
||||||
|
|
||||||
|
actions = await self.generateActionItems(taskStep, workflow,
|
||||||
|
previousResults=retryContext.previous_results,
|
||||||
|
enhancedContext=retryContext)
|
||||||
|
|
||||||
|
# Log total actions count for this task
|
||||||
|
totalActions = len(actions) if actions else 0
|
||||||
|
logger.info(f"Task {taskIndex or '?'} has {totalActions} actions")
|
||||||
|
|
||||||
|
# Update workflow object after action planning
|
||||||
|
self._updateWorkflowAfterActionPlanning(totalActions)
|
||||||
|
self._setWorkflowTotals(totalActions=totalActions)
|
||||||
|
|
||||||
|
if not actions:
|
||||||
|
logger.error("No actions defined for task step, aborting task execution")
|
||||||
|
break
|
||||||
|
|
||||||
|
actionResults = []
|
||||||
|
for actionIdx, action in enumerate(actions):
|
||||||
|
# Check workflow status before each action execution
|
||||||
|
self._checkWorkflowStopped(workflow)
|
||||||
|
|
||||||
|
# Update workflow object before executing action
|
||||||
|
actionNumber = actionIdx + 1
|
||||||
|
self._updateWorkflowBeforeExecutingAction(actionNumber)
|
||||||
|
|
||||||
|
# Update workflow context for this action
|
||||||
|
self.services.workflow.setWorkflowContext(action_number=actionNumber)
|
||||||
|
|
||||||
|
# Log action start
|
||||||
|
logger.info(f"Task {taskIndex} - Starting action {actionNumber}/{totalActions}")
|
||||||
|
|
||||||
|
# Create action start message
|
||||||
|
actionStartMessage = {
|
||||||
|
"workflowId": workflow.id,
|
||||||
|
"role": "assistant",
|
||||||
|
"message": f"⚡ **Action {actionNumber}/{totalActions}** (Method {action.execMethod}.{action.execAction})",
|
||||||
|
"status": "step",
|
||||||
|
"sequenceNr": len(workflow.messages) + 1,
|
||||||
|
"publishedAt": self.services.utils.getUtcTimestamp(),
|
||||||
|
"documentsLabel": f"action_{actionNumber}_start",
|
||||||
|
"documents": [],
|
||||||
|
"actionProgress": "running",
|
||||||
|
"roundNumber": workflow.currentRound,
|
||||||
|
"taskNumber": taskIndex,
|
||||||
|
"actionNumber": actionNumber
|
||||||
|
}
|
||||||
|
|
||||||
|
# Add user-friendly message if available
|
||||||
|
if action.userMessage:
|
||||||
|
actionStartMessage["message"] += f"\n\n💬 {action.userMessage}"
|
||||||
|
|
||||||
|
message = self.services.interfaceDbChat.createMessage(actionStartMessage)
|
||||||
|
if message:
|
||||||
|
workflow.messages.append(message)
|
||||||
|
logger.info(f"Action start message created for action {actionNumber}")
|
||||||
|
|
||||||
|
# Execute single action
|
||||||
|
result = await self.actionExecutor.executeSingleAction(action, workflow, taskStep,
|
||||||
|
taskIndex, actionNumber, totalActions)
|
||||||
|
actionResults.append(result)
|
||||||
|
|
||||||
|
if result.success:
|
||||||
|
state.addSuccessfulAction(result)
|
||||||
|
else:
|
||||||
|
state.addFailedAction(result)
|
||||||
|
|
||||||
|
# Check workflow status before review
|
||||||
|
self._checkWorkflowStopped(workflow)
|
||||||
|
|
||||||
|
reviewResult = await self._reviewTaskCompletion(taskStep, actions, actionResults, workflow)
|
||||||
|
success = reviewResult.status == 'success'
|
||||||
|
feedback = reviewResult.reason
|
||||||
|
error = None if success else reviewResult.reason
|
||||||
|
|
||||||
|
if success:
|
||||||
|
logger.info(f"=== TASK {taskIndex or '?'} COMPLETED SUCCESSFULLY: {taskStep.objective} ===")
|
||||||
|
|
||||||
|
# Create task completion message
|
||||||
|
await self.messageCreator.createTaskCompletionMessage(taskStep, workflow, taskIndex, totalTasks, reviewResult)
|
||||||
|
|
||||||
|
return TaskResult(
|
||||||
|
taskId=taskStep.id,
|
||||||
|
status=TaskStatus.COMPLETED,
|
||||||
|
success=True,
|
||||||
|
feedback=feedback,
|
||||||
|
error=None
|
||||||
|
)
|
||||||
|
|
||||||
|
elif reviewResult.status == 'retry' and state.canRetry():
|
||||||
|
logger.warning(f"Task step '{taskStep.objective}' requires retry: {reviewResult.improvements}")
|
||||||
|
|
||||||
|
# Enhanced logging of criteria status
|
||||||
|
if reviewResult.met_criteria:
|
||||||
|
logger.info(f"Met criteria: {', '.join(reviewResult.met_criteria)}")
|
||||||
|
if reviewResult.unmet_criteria:
|
||||||
|
logger.warning(f"Unmet criteria: {', '.join(reviewResult.unmet_criteria)}")
|
||||||
|
|
||||||
|
state.incrementRetryCount()
|
||||||
|
|
||||||
|
# Update retry context with retry information and criteria tracking
|
||||||
|
if retryContext:
|
||||||
|
retryContext.retry_count = state.retry_count
|
||||||
|
retryContext.improvements = reviewResult.improvements
|
||||||
|
retryContext.previous_action_results = actionResults
|
||||||
|
retryContext.previous_review_result = reviewResult
|
||||||
|
retryContext.is_regeneration = True
|
||||||
|
retryContext.failure_patterns = state.getFailurePatterns()
|
||||||
|
retryContext.failed_actions = state.failed_actions
|
||||||
|
retryContext.successful_actions = state.successful_actions
|
||||||
|
|
||||||
|
# Track criteria progress across retries
|
||||||
|
if not hasattr(retryContext, 'criteria_progress'):
|
||||||
|
retryContext.criteria_progress = {
|
||||||
|
'met_criteria': set(),
|
||||||
|
'unmet_criteria': set(),
|
||||||
|
'attempt_history': []
|
||||||
|
}
|
||||||
|
|
||||||
|
# Update criteria progress
|
||||||
|
if reviewResult.met_criteria:
|
||||||
|
retryContext.criteria_progress['met_criteria'].update(reviewResult.met_criteria)
|
||||||
|
if reviewResult.unmet_criteria:
|
||||||
|
retryContext.criteria_progress['unmet_criteria'].update(reviewResult.unmet_criteria)
|
||||||
|
|
||||||
|
# Record this attempt's criteria status
|
||||||
|
attemptRecord = {
|
||||||
|
'attempt': state.retry_count,
|
||||||
|
'met_criteria': reviewResult.met_criteria or [],
|
||||||
|
'unmet_criteria': reviewResult.unmet_criteria or [],
|
||||||
|
'quality_score': reviewResult.quality_score,
|
||||||
|
'improvements': reviewResult.improvements or []
|
||||||
|
}
|
||||||
|
retryContext.criteria_progress['attempt_history'].append(attemptRecord)
|
||||||
|
|
||||||
|
# Create retry message
|
||||||
|
await self.messageCreator.createRetryMessage(taskStep, workflow, taskIndex, reviewResult)
|
||||||
|
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
logger.error(f"=== TASK {taskIndex or '?'} FAILED: {taskStep.objective} after {attempt+1} attempts ===")
|
||||||
|
|
||||||
|
# Create error message
|
||||||
|
await self.messageCreator.createErrorMessage(taskStep, workflow, taskIndex, reviewResult.reason)
|
||||||
|
|
||||||
|
return TaskResult(
|
||||||
|
taskId=taskStep.id,
|
||||||
|
status=TaskStatus.FAILED,
|
||||||
|
success=False,
|
||||||
|
feedback=feedback,
|
||||||
|
error=reviewResult.reason if reviewResult and hasattr(reviewResult, 'reason') else "Task failed after retry attempts"
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.error(f"=== TASK {taskIndex or '?'} FAILED AFTER ALL RETRIES: {taskStep.objective} ===")
|
||||||
|
|
||||||
|
# Create final error message
|
||||||
|
await self.messageCreator.createErrorMessage(taskStep, workflow, taskIndex, "Task failed after all retries")
|
||||||
|
|
||||||
|
return TaskResult(
|
||||||
|
taskId=taskStep.id,
|
||||||
|
status=TaskStatus.FAILED,
|
||||||
|
success=False,
|
||||||
|
feedback="Task failed after all retries.",
|
||||||
|
error="Task failed after all retries."
|
||||||
|
)
|
||||||
|
|
||||||
|
async def _reviewTaskCompletion(self, taskStep: TaskStep, taskActions: List[ActionItem],
|
||||||
|
actionResults: List[ActionResult], workflow: ChatWorkflow) -> ReviewResult:
|
||||||
|
"""Review task completion and determine success/failure/retry"""
|
||||||
|
try:
|
||||||
|
# Check workflow status before reviewing task completion
|
||||||
|
self._checkWorkflowStopped(workflow)
|
||||||
|
|
||||||
|
logger.info(f"=== STARTING TASK COMPLETION REVIEW ===")
|
||||||
|
logger.info(f"Task: {taskStep.objective}")
|
||||||
|
logger.info(f"Actions executed: {len(taskActions) if taskActions else 0}")
|
||||||
|
logger.info(f"Action results: {len(actionResults) if actionResults else 0}")
|
||||||
|
|
||||||
|
# Create proper context object for result review
|
||||||
|
reviewContext = ReviewContext(
|
||||||
|
task_step=taskStep,
|
||||||
|
task_actions=taskActions,
|
||||||
|
action_results=actionResults,
|
||||||
|
step_result={
|
||||||
|
'successful_actions': sum(1 for result in actionResults if result.success),
|
||||||
|
'total_actions': len(actionResults),
|
||||||
|
'results': [self._extractResultText(result) for result in actionResults if result.success],
|
||||||
|
'errors': [result.error for result in actionResults if not result.success],
|
||||||
|
'documents': [
|
||||||
|
{
|
||||||
|
'action_index': i,
|
||||||
|
'documents_count': len(result.documents) if result.documents else 0,
|
||||||
|
'documents': result.documents if result.documents else []
|
||||||
|
}
|
||||||
|
for i, result in enumerate(actionResults)
|
||||||
|
]
|
||||||
|
},
|
||||||
|
workflow_id=workflow.id,
|
||||||
|
previous_results=[]
|
||||||
|
)
|
||||||
|
|
||||||
|
# Check workflow status before calling AI service
|
||||||
|
self._checkWorkflowStopped(workflow)
|
||||||
|
|
||||||
|
# Build prompt bundle for result review
|
||||||
|
bundle = generateResultReviewPrompt(reviewContext)
|
||||||
|
promptTemplate = bundle.prompt
|
||||||
|
placeholders = bundle.placeholders
|
||||||
|
|
||||||
|
# Log result review prompt sent to AI
|
||||||
|
logger.info("=== RESULT REVIEW PROMPT SENT TO AI ===")
|
||||||
|
logger.info(f"Task: {taskStep.objective}")
|
||||||
|
logger.info(f"Action Results Count: {len(reviewContext.action_results) if reviewContext.action_results else 0}")
|
||||||
|
logger.info(f"Task Actions Count: {len(reviewContext.task_actions) if reviewContext.task_actions else 0}")
|
||||||
|
# Trace result review prompt
|
||||||
|
self._writeTraceLog("Result Review Prompt", promptTemplate)
|
||||||
|
self._writeTraceLog("Result Review Placeholders", placeholders)
|
||||||
|
|
||||||
|
# Centralized AI call: Result validation (balanced analysis) with placeholders
|
||||||
|
options = AiCallOptions(
|
||||||
|
operationType=OperationType.ANALYSE_CONTENT,
|
||||||
|
priority=Priority.BALANCED,
|
||||||
|
compressPrompt=True,
|
||||||
|
compressContext=False,
|
||||||
|
processingMode=ProcessingMode.ADVANCED,
|
||||||
|
maxCost=0.05,
|
||||||
|
maxProcessingTime=30
|
||||||
|
)
|
||||||
|
|
||||||
|
response = await self.services.ai.callAi(prompt=promptTemplate, placeholders=placeholders, options=options)
|
||||||
|
|
||||||
|
# Log result review response received
|
||||||
|
logger.info("=== RESULT REVIEW AI RESPONSE RECEIVED ===")
|
||||||
|
logger.info(f"Response length: {len(response) if response else 0}")
|
||||||
|
# Trace result review response
|
||||||
|
self._writeTraceLog("Result Review Response", response)
|
||||||
|
|
||||||
|
# Parse review response
|
||||||
|
jsonStart = response.find('{')
|
||||||
|
jsonEnd = response.rfind('}') + 1
|
||||||
|
if jsonStart == -1 or jsonEnd == 0:
|
||||||
|
raise ValueError("No JSON found in review response")
|
||||||
|
jsonStr = response[jsonStart:jsonEnd]
|
||||||
|
|
||||||
|
try:
|
||||||
|
review = json.loads(jsonStr)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error parsing review response JSON: {str(e)}")
|
||||||
|
review = {}
|
||||||
|
if 'status' not in review:
|
||||||
|
raise ValueError("Review response missing 'status' field")
|
||||||
|
review.setdefault('status', 'unknown')
|
||||||
|
review.setdefault('reason', 'No reason provided')
|
||||||
|
review.setdefault('quality_score', 5)
|
||||||
|
|
||||||
|
# Ensure improvements is a list
|
||||||
|
improvements = review.get('improvements', [])
|
||||||
|
if isinstance(improvements, str):
|
||||||
|
# Split string into list if it's a single improvement
|
||||||
|
improvements = [improvements.strip()] if improvements.strip() else []
|
||||||
|
elif not isinstance(improvements, list):
|
||||||
|
improvements = []
|
||||||
|
|
||||||
|
# Ensure all list fields are properly typed
|
||||||
|
metCriteria = review.get('met_criteria', [])
|
||||||
|
if not isinstance(metCriteria, list):
|
||||||
|
metCriteria = []
|
||||||
|
|
||||||
|
unmetCriteria = review.get('unmet_criteria', [])
|
||||||
|
if not isinstance(unmetCriteria, list):
|
||||||
|
unmetCriteria = []
|
||||||
|
|
||||||
|
reviewResult = ReviewResult(
|
||||||
|
status=review.get('status', 'unknown'),
|
||||||
|
reason=review.get('reason', 'No reason provided'),
|
||||||
|
improvements=improvements,
|
||||||
|
quality_score=review.get('quality_score', 5),
|
||||||
|
missing_outputs=[],
|
||||||
|
met_criteria=metCriteria,
|
||||||
|
unmet_criteria=unmetCriteria,
|
||||||
|
confidence=review.get('confidence', 0.5),
|
||||||
|
# Extract user-friendly message if available
|
||||||
|
userMessage=review.get('userMessage', None)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Enhanced validation logging
|
||||||
|
logger.info(f"VALIDATION RESULT - Task: '{taskStep.objective}' - Status: {reviewResult.status.upper()}, Quality: {reviewResult.quality_score}/10")
|
||||||
|
if reviewResult.status == 'success':
|
||||||
|
logger.info(f"VALIDATION SUCCESS - Task completed successfully")
|
||||||
|
if reviewResult.met_criteria:
|
||||||
|
logger.info(f"Met criteria: {', '.join(reviewResult.met_criteria)}")
|
||||||
|
elif reviewResult.status == 'retry':
|
||||||
|
logger.warning(f"VALIDATION RETRY - Task requires retry: {reviewResult.improvements}")
|
||||||
|
if reviewResult.unmet_criteria:
|
||||||
|
logger.warning(f"Unmet criteria: {', '.join(reviewResult.unmet_criteria)}")
|
||||||
|
else:
|
||||||
|
logger.error(f"VALIDATION FAILED - Task failed: {reviewResult.reason}")
|
||||||
|
|
||||||
|
logger.info(f"=== TASK COMPLETION REVIEW FINISHED ===")
|
||||||
|
logger.info(f"Final Status: {reviewResult.status}")
|
||||||
|
logger.info(f"Quality Score: {reviewResult.quality_score}/10")
|
||||||
|
logger.info(f"Improvements: {reviewResult.improvements}")
|
||||||
|
logger.info("=== END REVIEW ===")
|
||||||
|
|
||||||
|
return reviewResult
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error in reviewTaskCompletion: {str(e)}")
|
||||||
|
return ReviewResult(
|
||||||
|
status='failed',
|
||||||
|
reason=str(e),
|
||||||
|
quality_score=0
|
||||||
|
)
|
||||||
|
|
||||||
|
def _createActionItem(self, actionData: Dict[str, Any]) -> ActionItem:
|
||||||
|
"""Creates a new task action"""
|
||||||
|
try:
|
||||||
|
# Ensure ID is present
|
||||||
|
if "id" not in actionData or not actionData["id"]:
|
||||||
|
actionData["id"] = f"action_{uuid.uuid4()}"
|
||||||
|
|
||||||
|
# Ensure required fields
|
||||||
|
if "status" not in actionData:
|
||||||
|
actionData["status"] = TaskStatus.PENDING
|
||||||
|
|
||||||
|
if "execMethod" not in actionData:
|
||||||
|
logger.error("execMethod is required for task action")
|
||||||
|
return None
|
||||||
|
|
||||||
|
if "execAction" not in actionData:
|
||||||
|
logger.error("execAction is required for task action")
|
||||||
|
return None
|
||||||
|
|
||||||
|
if "execParameters" not in actionData:
|
||||||
|
actionData["execParameters"] = {}
|
||||||
|
|
||||||
|
# Use generic field separation based on ActionItem model
|
||||||
|
simpleFields, objectFields = self.services.interfaceDbChat._separate_object_fields(ActionItem, actionData)
|
||||||
|
|
||||||
|
# Create action in database
|
||||||
|
createdAction = self.services.interfaceDbChat.db.recordCreate(ActionItem, simpleFields)
|
||||||
|
|
||||||
|
# Convert to ActionItem model
|
||||||
|
return ActionItem(
|
||||||
|
id=createdAction["id"],
|
||||||
|
execMethod=createdAction["execMethod"],
|
||||||
|
execAction=createdAction["execAction"],
|
||||||
|
execParameters=createdAction.get("execParameters", {}),
|
||||||
|
execResultLabel=createdAction.get("execResultLabel"),
|
||||||
|
expectedDocumentFormats=createdAction.get("expectedDocumentFormats"),
|
||||||
|
status=createdAction.get("status", TaskStatus.PENDING),
|
||||||
|
error=createdAction.get("error"),
|
||||||
|
retryCount=createdAction.get("retryCount", 0),
|
||||||
|
retryMax=createdAction.get("retryMax", 3),
|
||||||
|
processingTime=createdAction.get("processingTime"),
|
||||||
|
timestamp=float(createdAction.get("timestamp", self.services.utils.getUtcTimestamp())),
|
||||||
|
result=createdAction.get("result"),
|
||||||
|
resultDocuments=createdAction.get("resultDocuments", []),
|
||||||
|
userMessage=createdAction.get("userMessage")
|
||||||
|
)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error creating task action: {str(e)}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _extractResultText(self, result: ActionResult) -> str:
|
||||||
|
"""Extract result text from ActionResult documents"""
|
||||||
|
if not result.success or not result.documents:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
# Extract text directly from ActionDocument objects
|
||||||
|
resultParts = []
|
||||||
|
for doc in result.documents:
|
||||||
|
if hasattr(doc, 'documentData') and doc.documentData:
|
||||||
|
resultParts.append(str(doc.documentData))
|
||||||
|
|
||||||
|
# Join all document results with separators
|
||||||
|
return "\n\n---\n\n".join(resultParts) if resultParts else ""
|
||||||
|
|
||||||
|
def _updateWorkflowBeforeExecutingTask(self, taskNumber: int):
|
||||||
|
"""Update workflow object before executing a task"""
|
||||||
|
try:
|
||||||
|
updateData = {
|
||||||
|
"currentTask": taskNumber,
|
||||||
|
"currentAction": 0,
|
||||||
|
"totalActions": 0
|
||||||
|
}
|
||||||
|
|
||||||
|
# Update workflow object
|
||||||
|
self.workflow.currentTask = taskNumber
|
||||||
|
self.workflow.currentAction = 0
|
||||||
|
self.workflow.totalActions = 0
|
||||||
|
|
||||||
|
# Update in database
|
||||||
|
self.services.interfaceDbChat.updateWorkflow(self.workflow.id, updateData)
|
||||||
|
logger.info(f"Updated workflow {self.workflow.id} before executing task {taskNumber}: {updateData}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error updating workflow before executing task: {str(e)}")
|
||||||
|
|
||||||
|
def _updateWorkflowAfterActionPlanning(self, totalActions: int):
|
||||||
|
"""Update workflow object after action planning for current task"""
|
||||||
|
try:
|
||||||
|
updateData = {
|
||||||
|
"totalActions": totalActions
|
||||||
|
}
|
||||||
|
|
||||||
|
# Update workflow object
|
||||||
|
self.workflow.totalActions = totalActions
|
||||||
|
|
||||||
|
# Update in database
|
||||||
|
self.services.interfaceDbChat.updateWorkflow(self.workflow.id, updateData)
|
||||||
|
logger.info(f"Updated workflow {self.workflow.id} after action planning: {updateData}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error updating workflow after action planning: {str(e)}")
|
||||||
|
|
||||||
|
def _updateWorkflowBeforeExecutingAction(self, actionNumber: int):
|
||||||
|
"""Update workflow object before executing an action"""
|
||||||
|
try:
|
||||||
|
updateData = {
|
||||||
|
"currentAction": actionNumber
|
||||||
|
}
|
||||||
|
|
||||||
|
# Update workflow object
|
||||||
|
self.workflow.currentAction = actionNumber
|
||||||
|
|
||||||
|
# Update in database
|
||||||
|
self.services.interfaceDbChat.updateWorkflow(self.workflow.id, updateData)
|
||||||
|
logger.info(f"Updated workflow {self.workflow.id} before executing action {actionNumber}: {updateData}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error updating workflow before executing action: {str(e)}")
|
||||||
|
|
||||||
|
def _setWorkflowTotals(self, totalTasks: int = None, totalActions: int = None):
|
||||||
|
"""Set total counts for workflow progress tracking and update database"""
|
||||||
|
try:
|
||||||
|
updateData = {}
|
||||||
|
|
||||||
|
if totalTasks is not None:
|
||||||
|
self.workflow.totalTasks = totalTasks
|
||||||
|
updateData["totalTasks"] = totalTasks
|
||||||
|
|
||||||
|
if totalActions is not None:
|
||||||
|
self.workflow.totalActions = totalActions
|
||||||
|
updateData["totalActions"] = totalActions
|
||||||
|
|
||||||
|
# Update workflow object in database if we have changes
|
||||||
|
if updateData:
|
||||||
|
self.services.interfaceDbChat.updateWorkflow(self.workflow.id, updateData)
|
||||||
|
logger.info(f"Updated workflow {self.workflow.id} totals in database: {updateData}")
|
||||||
|
|
||||||
|
logger.debug(f"Updated workflow totals: Tasks {self.workflow.totalTasks if hasattr(self.workflow, 'totalTasks') else 'N/A'}, Actions {self.workflow.totalActions if hasattr(self.workflow, 'totalActions') else 'N/A'}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error setting workflow totals: {str(e)}")
|
||||||
|
|
||||||
|
def _createActionItem(self, actionData: Dict[str, Any]) -> ActionItem:
|
||||||
|
"""Creates a new task action"""
|
||||||
|
try:
|
||||||
|
import uuid
|
||||||
|
|
||||||
|
# Ensure ID is present
|
||||||
|
if "id" not in actionData or not actionData["id"]:
|
||||||
|
actionData["id"] = f"action_{uuid.uuid4()}"
|
||||||
|
|
||||||
|
# Ensure required fields
|
||||||
|
if "status" not in actionData:
|
||||||
|
actionData["status"] = TaskStatus.PENDING
|
||||||
|
|
||||||
|
if "execMethod" not in actionData:
|
||||||
|
logger.error("execMethod is required for task action")
|
||||||
|
return None
|
||||||
|
|
||||||
|
if "execAction" not in actionData:
|
||||||
|
logger.error("execAction is required for task action")
|
||||||
|
return None
|
||||||
|
|
||||||
|
if "execParameters" not in actionData:
|
||||||
|
actionData["execParameters"] = {}
|
||||||
|
|
||||||
|
# Use generic field separation based on ActionItem model
|
||||||
|
simpleFields, objectFields = self.services.interfaceDbChat._separate_object_fields(ActionItem, actionData)
|
||||||
|
|
||||||
|
# Create action in database
|
||||||
|
createdAction = self.services.interfaceDbChat.db.recordCreate(ActionItem, simpleFields)
|
||||||
|
|
||||||
|
# Convert to ActionItem model
|
||||||
|
return ActionItem(
|
||||||
|
id=createdAction["id"],
|
||||||
|
execMethod=createdAction["execMethod"],
|
||||||
|
execAction=createdAction["execAction"],
|
||||||
|
execParameters=createdAction.get("execParameters", {}),
|
||||||
|
execResultLabel=createdAction.get("execResultLabel"),
|
||||||
|
expectedDocumentFormats=createdAction.get("expectedDocumentFormats"),
|
||||||
|
status=createdAction.get("status", TaskStatus.PENDING),
|
||||||
|
error=createdAction.get("error"),
|
||||||
|
retryCount=createdAction.get("retryCount", 0),
|
||||||
|
retryMax=createdAction.get("retryMax", 3),
|
||||||
|
processingTime=createdAction.get("processingTime"),
|
||||||
|
timestamp=float(createdAction.get("timestamp", self.services.utils.getUtcTimestamp())),
|
||||||
|
result=createdAction.get("result"),
|
||||||
|
resultDocuments=createdAction.get("resultDocuments", []),
|
||||||
|
userMessage=createdAction.get("userMessage")
|
||||||
|
)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error creating task action: {str(e)}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _writeTraceLog(self, contextText: str, data: Any) -> None:
|
||||||
|
"""Write trace data to configured trace file if in debug mode with improved JSON formatting"""
|
||||||
|
try:
|
||||||
|
import os
|
||||||
|
import json
|
||||||
|
from datetime import datetime, UTC
|
||||||
|
|
||||||
|
# Only write if logger is in debug mode
|
||||||
|
if logger.level > logging.DEBUG:
|
||||||
|
return
|
||||||
|
|
||||||
|
# Get log directory from configuration
|
||||||
|
logDir = self.services.utils.configGet("APP_LOGGING_LOG_DIR", "./")
|
||||||
|
if not os.path.isabs(logDir):
|
||||||
|
# If relative path, make it relative to the gateway directory
|
||||||
|
gatewayDir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||||
|
logDir = os.path.join(gatewayDir, logDir)
|
||||||
|
|
||||||
|
# Ensure log directory exists
|
||||||
|
os.makedirs(logDir, exist_ok=True)
|
||||||
|
|
||||||
|
# Create trace file path
|
||||||
|
traceFile = os.path.join(logDir, "log_trace.log")
|
||||||
|
|
||||||
|
# Format the trace entry with better structure
|
||||||
|
timestamp = datetime.fromtimestamp(self.services.utils.getUtcTimestamp(), UTC).strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
|
||||||
|
|
||||||
|
# Create a structured trace entry
|
||||||
|
traceEntry = f"[{timestamp}] {contextText}\n"
|
||||||
|
traceEntry += "=" * 80 + "\n"
|
||||||
|
|
||||||
|
# Add data if provided with improved formatting
|
||||||
|
if data is not None:
|
||||||
|
try:
|
||||||
|
if isinstance(data, (dict, list)):
|
||||||
|
# Format as pretty JSON with better settings
|
||||||
|
jsonStr = json.dumps(data, indent=2, default=str, ensure_ascii=False, sort_keys=False)
|
||||||
|
traceEntry += f"JSON Data:\n{jsonStr}\n"
|
||||||
|
elif isinstance(data, str):
|
||||||
|
# For string data, try to parse as JSON first, then fall back to plain text
|
||||||
|
try:
|
||||||
|
parsed = json.loads(data)
|
||||||
|
jsonStr = json.dumps(parsed, indent=2, default=str, ensure_ascii=False, sort_keys=False)
|
||||||
|
traceEntry += f"JSON Data (parsed from string):\n{jsonStr}\n"
|
||||||
|
except (json.JSONDecodeError, TypeError):
|
||||||
|
# Not valid JSON, show as plain text with proper line breaks
|
||||||
|
formatted_data = data.replace('\\n', '\n')
|
||||||
|
traceEntry += f"Text Data:\n{formatted_data}\n"
|
||||||
|
else:
|
||||||
|
# For other types, convert to string and try to parse as JSON
|
||||||
|
dataStr = str(data)
|
||||||
|
try:
|
||||||
|
parsed = json.loads(dataStr)
|
||||||
|
jsonStr = json.dumps(parsed, indent=2, default=str, ensure_ascii=False, sort_keys=False)
|
||||||
|
traceEntry += f"JSON Data (parsed from object):\n{jsonStr}\n"
|
||||||
|
except (json.JSONDecodeError, TypeError):
|
||||||
|
# Not valid JSON, show as plain text with proper line breaks
|
||||||
|
formatted_data = dataStr.replace('\\n', '\n')
|
||||||
|
traceEntry += f"Object Data:\n{formatted_data}\n"
|
||||||
|
except Exception as e:
|
||||||
|
# Fallback to simple string representation
|
||||||
|
traceEntry += f"Data (fallback): {str(data)}\n"
|
||||||
|
else:
|
||||||
|
traceEntry += "No data provided\n"
|
||||||
|
|
||||||
|
traceEntry += "=" * 80 + "\n\n"
|
||||||
|
|
||||||
|
# Write to trace file
|
||||||
|
with open(traceFile, "a", encoding="utf-8") as f:
|
||||||
|
f.write(traceEntry)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
# Don't log trace errors to avoid recursion
|
||||||
|
pass
|
||||||
63
modules/workflows/processing/modes/modeBase.py
Normal file
63
modules/workflows/processing/modes/modeBase.py
Normal file
|
|
@ -0,0 +1,63 @@
|
||||||
|
# modeBase.py
|
||||||
|
# Abstract base class for workflow modes
|
||||||
|
|
||||||
|
from abc import ABC, abstractmethod
|
||||||
|
import logging
|
||||||
|
from typing import List, Dict, Any
|
||||||
|
from modules.datamodels.datamodelChat import TaskStep, TaskContext, TaskResult, ActionItem
|
||||||
|
from modules.datamodels.datamodelChat import ChatWorkflow
|
||||||
|
from modules.workflows.processing.core.taskPlanner import TaskPlanner
|
||||||
|
from modules.workflows.processing.core.actionExecutor import ActionExecutor
|
||||||
|
from modules.workflows.processing.core.messageCreator import MessageCreator
|
||||||
|
from modules.workflows.processing.core.validator import WorkflowValidator
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
class BaseMode(ABC):
|
||||||
|
"""Abstract base class for workflow execution modes"""
|
||||||
|
|
||||||
|
def __init__(self, services, workflow):
|
||||||
|
self.services = services
|
||||||
|
self.workflow = workflow
|
||||||
|
self.taskPlanner = TaskPlanner(services)
|
||||||
|
self.actionExecutor = ActionExecutor(services)
|
||||||
|
self.messageCreator = MessageCreator(services)
|
||||||
|
self.validator = WorkflowValidator(services)
|
||||||
|
|
||||||
|
def _checkWorkflowStopped(self, workflow):
|
||||||
|
"""Check if workflow has been stopped by user and raise exception if so"""
|
||||||
|
try:
|
||||||
|
# Get the current workflow status from the database to avoid stale data
|
||||||
|
current_workflow = self.services.interfaceDbChat.getWorkflow(workflow.id)
|
||||||
|
if current_workflow and current_workflow.status == "stopped":
|
||||||
|
logger.info("Workflow stopped by user, aborting execution")
|
||||||
|
raise Exception("Workflow was stopped by user")
|
||||||
|
except Exception as e:
|
||||||
|
# If this was the explicit stop signal, re-raise to abort immediately
|
||||||
|
if str(e) == "Workflow was stopped by user":
|
||||||
|
raise
|
||||||
|
# If we can't get the current status due to other database issues, fall back to the in-memory object
|
||||||
|
logger.warning(f"Could not check current workflow status from database: {str(e)}")
|
||||||
|
if workflow and workflow.status == "stopped":
|
||||||
|
logger.info("Workflow stopped by user (from in-memory object), aborting execution")
|
||||||
|
raise Exception("Workflow was stopped by user")
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
async def executeTask(self, taskStep: TaskStep, workflow: ChatWorkflow, context: TaskContext,
|
||||||
|
taskIndex: int = None, totalTasks: int = None) -> TaskResult:
|
||||||
|
"""Execute a task step - must be implemented by concrete modes"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
async def generateActionItems(self, taskStep: TaskStep, workflow: ChatWorkflow,
|
||||||
|
previousResults: List = None, enhancedContext: TaskContext = None) -> List[ActionItem]:
|
||||||
|
"""Generate actions for a task step - must be implemented by concrete modes"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
async def generateTaskPlan(self, userInput: str, workflow: ChatWorkflow):
|
||||||
|
"""Generate task plan - common to all modes"""
|
||||||
|
return await self.taskPlanner.generateTaskPlan(userInput, workflow)
|
||||||
|
|
||||||
|
async def createTaskPlanMessage(self, taskPlan, workflow: ChatWorkflow):
|
||||||
|
"""Create task plan message - common to all modes"""
|
||||||
|
return await self.messageCreator.createTaskPlanMessage(taskPlan, workflow)
|
||||||
938
modules/workflows/processing/modes/modeReact.py
Normal file
938
modules/workflows/processing/modes/modeReact.py
Normal file
|
|
@ -0,0 +1,938 @@
|
||||||
|
# modeReact.py
|
||||||
|
# React mode implementation for workflows
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import re
|
||||||
|
import time
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from typing import List, Dict, Any
|
||||||
|
from modules.datamodels.datamodelChat import (
|
||||||
|
TaskStep, TaskContext, TaskResult, ActionItem, TaskStatus,
|
||||||
|
ActionResult
|
||||||
|
)
|
||||||
|
from modules.datamodels.datamodelChat import ChatWorkflow
|
||||||
|
from modules.datamodels.datamodelAi import AiCallOptions, OperationType, ProcessingMode, Priority
|
||||||
|
from modules.workflows.processing.modes.modeBase import BaseMode
|
||||||
|
from modules.workflows.processing.shared.executionState import TaskExecutionState, shouldContinue
|
||||||
|
from modules.workflows.processing.shared.promptGenerationActionsReact import (
|
||||||
|
generateReactPlanSelectionPrompt,
|
||||||
|
generateReactParametersPrompt,
|
||||||
|
generateReactRefinementPrompt
|
||||||
|
)
|
||||||
|
from modules.workflows.processing.shared.placeholderFactory import extractReviewContent
|
||||||
|
from modules.workflows.processing.adaptive import IntentAnalyzer, ContentValidator, LearningEngine, ProgressTracker
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
class ReactMode(BaseMode):
|
||||||
|
"""React mode implementation - iterative plan-act-observe-refine loop"""
|
||||||
|
|
||||||
|
def __init__(self, services, workflow):
|
||||||
|
super().__init__(services, workflow)
|
||||||
|
# Initialize adaptive components
|
||||||
|
self.intentAnalyzer = IntentAnalyzer()
|
||||||
|
self.contentValidator = ContentValidator()
|
||||||
|
self.learningEngine = LearningEngine()
|
||||||
|
self.progressTracker = ProgressTracker()
|
||||||
|
self.currentIntent = None
|
||||||
|
# Placeholder service no longer used; prompts are generated directly
|
||||||
|
|
||||||
|
async def generateActionItems(self, taskStep: TaskStep, workflow: ChatWorkflow,
|
||||||
|
previousResults: List = None, enhancedContext: TaskContext = None) -> List[ActionItem]:
|
||||||
|
"""React mode doesn't use batch action generation - actions are generated iteratively"""
|
||||||
|
# React mode generates actions one at a time in the execution loop
|
||||||
|
return []
|
||||||
|
|
||||||
|
async def executeTask(self, taskStep: TaskStep, workflow: ChatWorkflow, context: TaskContext,
|
||||||
|
taskIndex: int = None, totalTasks: int = None) -> TaskResult:
|
||||||
|
"""Execute task using React mode - iterative plan-act-observe-refine loop"""
|
||||||
|
logger.info(f"=== STARTING TASK {taskIndex or '?'}: {taskStep.objective} ===")
|
||||||
|
|
||||||
|
# NEW: Analyze user intent with both original prompt and task objective
|
||||||
|
# Get original user prompt from services (clean and reliable)
|
||||||
|
original_prompt = self.services.currentUserPrompt if self.services and hasattr(self.services, 'currentUserPrompt') else taskStep.objective
|
||||||
|
combined_context = f"Original request: {original_prompt}\n\nCurrent task: {taskStep.objective}"
|
||||||
|
|
||||||
|
self.currentIntent = self.intentAnalyzer.analyzeUserIntent(combined_context, context)
|
||||||
|
logger.info(f"Intent analysis (original + task): {self.currentIntent}")
|
||||||
|
|
||||||
|
# NEW: Reset progress tracking for new task
|
||||||
|
self.progressTracker.reset()
|
||||||
|
|
||||||
|
# Update workflow object before executing task
|
||||||
|
if taskIndex is not None:
|
||||||
|
self._updateWorkflowBeforeExecutingTask(taskIndex)
|
||||||
|
|
||||||
|
# Update workflow context for this task
|
||||||
|
if taskIndex is not None:
|
||||||
|
self.services.workflow.setWorkflowContext(task_number=taskIndex)
|
||||||
|
|
||||||
|
# Create task start message
|
||||||
|
await self.messageCreator.createTaskStartMessage(taskStep, workflow, taskIndex, totalTasks)
|
||||||
|
|
||||||
|
state = TaskExecutionState(taskStep)
|
||||||
|
# React mode uses max_steps instead of max_retries
|
||||||
|
state.max_steps = max(1, int(getattr(workflow, 'maxSteps', 5)))
|
||||||
|
logger.info(f"Using React mode execution with max_steps: {state.max_steps}")
|
||||||
|
|
||||||
|
step = 1
|
||||||
|
lastReviewDict = None
|
||||||
|
|
||||||
|
while step <= state.max_steps:
|
||||||
|
self._checkWorkflowStopped(workflow)
|
||||||
|
|
||||||
|
# Update workflow[currentAction] for UI
|
||||||
|
self._updateWorkflowBeforeExecutingAction(step)
|
||||||
|
self.services.workflow.setWorkflowContext(action_number=step)
|
||||||
|
|
||||||
|
try:
|
||||||
|
t0 = time.time()
|
||||||
|
selection = await self._planSelect(context)
|
||||||
|
logger.info(f"React step {step}: Selected action: {selection}")
|
||||||
|
|
||||||
|
# Create user-friendly message BEFORE action execution
|
||||||
|
# Action intention message is now handled by the standard message creator in _actExecute
|
||||||
|
|
||||||
|
result = await self._actExecute(context, selection, taskStep, workflow, step)
|
||||||
|
observation = self._observeBuild(result)
|
||||||
|
# Attach deterministic label for clarity
|
||||||
|
observation['resultLabel'] = result.resultLabel
|
||||||
|
|
||||||
|
# NEW: Add content validation
|
||||||
|
if self.currentIntent and result.documents:
|
||||||
|
validationResult = self.contentValidator.validateContent(result.documents, self.currentIntent)
|
||||||
|
observation['contentValidation'] = validationResult
|
||||||
|
logger.info(f"Content validation: {validationResult['overallSuccess']} (quality: {validationResult['qualityScore']:.2f})")
|
||||||
|
|
||||||
|
# NEW: Learn from feedback
|
||||||
|
feedback = self._collectFeedback(result, validationResult, self.currentIntent)
|
||||||
|
self.learningEngine.learnFromFeedback(feedback, context, self.currentIntent)
|
||||||
|
|
||||||
|
# NEW: Update progress
|
||||||
|
self.progressTracker.updateProgress(result, validationResult, self.currentIntent)
|
||||||
|
|
||||||
|
decision = await self._refineDecide(context, observation)
|
||||||
|
|
||||||
|
# Store refinement decision in context for next iteration
|
||||||
|
if not hasattr(context, 'previous_review_result') or context.previous_review_result is None:
|
||||||
|
context.previous_review_result = []
|
||||||
|
if decision: # Only append if decision is not None
|
||||||
|
context.previous_review_result.append(decision)
|
||||||
|
|
||||||
|
# Update context with learnings from this step
|
||||||
|
if decision and decision.get('reason'):
|
||||||
|
if not hasattr(context, 'improvements'):
|
||||||
|
context.improvements = []
|
||||||
|
context.improvements.append(f"Step {step}: {decision.get('reason')}")
|
||||||
|
|
||||||
|
# Telemetry: simple duration per step
|
||||||
|
duration = time.time() - t0
|
||||||
|
self.services.interfaceDbChat.createLog({
|
||||||
|
"workflowId": workflow.id,
|
||||||
|
"message": f"react_step_duration_sec={duration:.3f}",
|
||||||
|
"type": "info"
|
||||||
|
})
|
||||||
|
lastReviewDict = decision
|
||||||
|
|
||||||
|
# Create user-friendly message AFTER action execution
|
||||||
|
# Action completion message is now handled by the standard message creator in _actExecute
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"React step {step} error: {e}")
|
||||||
|
break
|
||||||
|
|
||||||
|
# NEW: Use adaptive stopping logic
|
||||||
|
progressState = self.progressTracker.getCurrentProgress()
|
||||||
|
continueByProgress = self.progressTracker.shouldContinue(progressState, observation.get('contentValidation', {}))
|
||||||
|
continueByReview = shouldContinue(observation, lastReviewDict, step, state.max_steps)
|
||||||
|
|
||||||
|
if not continueByProgress or not continueByReview:
|
||||||
|
logger.info(f"Stopping at step {step}: progress={continueByProgress}, review={continueByReview}")
|
||||||
|
break
|
||||||
|
step += 1
|
||||||
|
|
||||||
|
# Summarize task result for react mode
|
||||||
|
status = TaskStatus.COMPLETED
|
||||||
|
success = True
|
||||||
|
feedback = lastReviewDict.get('reason') if lastReviewDict and isinstance(lastReviewDict, dict) else 'Completed'
|
||||||
|
if lastReviewDict and isinstance(lastReviewDict, dict) and lastReviewDict.get('decision') == 'stop':
|
||||||
|
success = True
|
||||||
|
|
||||||
|
# Create task completion message
|
||||||
|
await self.messageCreator.createTaskCompletionMessage(taskStep, workflow, taskIndex, totalTasks,
|
||||||
|
type('ReviewResult', (), {'reason': feedback, 'met_criteria': [], 'quality_score': 8})())
|
||||||
|
|
||||||
|
return TaskResult(
|
||||||
|
taskId=taskStep.id,
|
||||||
|
status=status,
|
||||||
|
success=success,
|
||||||
|
feedback=feedback,
|
||||||
|
error=None if success else feedback
|
||||||
|
)
|
||||||
|
|
||||||
|
async def _planSelect(self, context: TaskContext) -> Dict[str, Any]:
|
||||||
|
"""Plan: select exactly one action. Returns {"action": {method, name}}"""
|
||||||
|
bundle = generateReactPlanSelectionPrompt(self.services, context)
|
||||||
|
promptTemplate = bundle.prompt
|
||||||
|
placeholders = bundle.placeholders
|
||||||
|
|
||||||
|
self._writeTraceLog("React Plan Selection Prompt", promptTemplate)
|
||||||
|
self._writeTraceLog("React Plan Selection Placeholders", placeholders)
|
||||||
|
|
||||||
|
# Centralized AI call for plan selection (use plan generation quality)
|
||||||
|
options = AiCallOptions(
|
||||||
|
operationType=OperationType.GENERATE_PLAN,
|
||||||
|
priority=Priority.QUALITY,
|
||||||
|
compressPrompt=False,
|
||||||
|
compressContext=False,
|
||||||
|
processingMode=ProcessingMode.DETAILED,
|
||||||
|
maxCost=0.10,
|
||||||
|
maxProcessingTime=30
|
||||||
|
)
|
||||||
|
|
||||||
|
response = await self.services.ai.callAi(
|
||||||
|
prompt=promptTemplate,
|
||||||
|
placeholders=placeholders,
|
||||||
|
options=options
|
||||||
|
)
|
||||||
|
self._writeTraceLog("React Plan Selection Response", response)
|
||||||
|
jsonStart = response.find('{') if response else -1
|
||||||
|
jsonEnd = response.rfind('}') + 1 if response else 0
|
||||||
|
if jsonStart == -1 or jsonEnd == 0:
|
||||||
|
raise ValueError("No JSON in selection response")
|
||||||
|
selection = json.loads(response[jsonStart:jsonEnd])
|
||||||
|
if 'action' not in selection or not isinstance(selection['action'], str):
|
||||||
|
raise ValueError("Selection missing 'action' as string")
|
||||||
|
# Enforce spec: Stage 1 must NOT include 'parameters'
|
||||||
|
if 'parameters' in selection:
|
||||||
|
# Remove to avoid accidental carryover
|
||||||
|
try:
|
||||||
|
del selection['parameters']
|
||||||
|
except Exception:
|
||||||
|
selection['parameters'] = None
|
||||||
|
return selection
|
||||||
|
|
||||||
|
async def _actExecute(self, context: TaskContext, selection: Dict[str, Any], taskStep: TaskStep,
|
||||||
|
workflow: ChatWorkflow, stepIndex: int) -> ActionResult:
|
||||||
|
"""Act: request minimal parameters then execute selected action"""
|
||||||
|
compoundActionName = selection.get('action', '')
|
||||||
|
|
||||||
|
# Parse compound action name (e.g., "ai.webResearch" -> method="ai", action="webResearch")
|
||||||
|
if '.' not in compoundActionName:
|
||||||
|
raise ValueError(f"Invalid compound action name: {compoundActionName}. Expected format: method.action")
|
||||||
|
|
||||||
|
methodName, actionName = compoundActionName.split('.', 1)
|
||||||
|
|
||||||
|
# Always request parameters in Stage 2 (spec: Stage 1 must not provide them)
|
||||||
|
logger.info("Requesting parameters in Stage 2 based on Stage 1 outputs")
|
||||||
|
|
||||||
|
# Create a permissive Stage 2 context to avoid TaskContext attribute restrictions
|
||||||
|
from types import SimpleNamespace
|
||||||
|
stage2Context = SimpleNamespace()
|
||||||
|
|
||||||
|
# Copy essential fields from original context for fallbacks (snake_case for placeholderFactory compatibility)
|
||||||
|
stage2Context.task_step = getattr(context, 'task_step', None)
|
||||||
|
stage2Context.workflow_id = getattr(context, 'workflow_id', None)
|
||||||
|
|
||||||
|
# Set Stage 1 data directly on the permissive context (snake_case for promptGenerationActionsReact compatibility)
|
||||||
|
if isinstance(selection, dict):
|
||||||
|
stage2Context.action_objective = selection.get('actionObjective', '')
|
||||||
|
stage2Context.parameters_context = selection.get('parametersContext', '')
|
||||||
|
stage2Context.learnings = selection.get('learnings', [])
|
||||||
|
else:
|
||||||
|
stage2Context.action_objective = ''
|
||||||
|
stage2Context.parameters_context = ''
|
||||||
|
stage2Context.learnings = []
|
||||||
|
|
||||||
|
# Build and send the Stage 2 parameters prompt (always)
|
||||||
|
bundle = generateReactParametersPrompt(self.services, stage2Context, compoundActionName)
|
||||||
|
promptTemplate = bundle.prompt
|
||||||
|
placeholders = bundle.placeholders
|
||||||
|
|
||||||
|
self._writeTraceLog("React Parameters Prompt", promptTemplate)
|
||||||
|
self._writeTraceLog("React Parameters Placeholders", placeholders)
|
||||||
|
|
||||||
|
# Centralized AI call for parameter suggestion (balanced analysis)
|
||||||
|
options = AiCallOptions(
|
||||||
|
operationType=OperationType.ANALYSE_CONTENT,
|
||||||
|
priority=Priority.BALANCED,
|
||||||
|
compressPrompt=True,
|
||||||
|
compressContext=False,
|
||||||
|
processingMode=ProcessingMode.ADVANCED,
|
||||||
|
maxCost=0.05,
|
||||||
|
maxProcessingTime=30,
|
||||||
|
temperature=0.3, # Slightly higher temperature for better instruction following
|
||||||
|
# maxTokens not set - use model's maximum for big JSON responses
|
||||||
|
resultFormat="json" # Explicitly request JSON format
|
||||||
|
)
|
||||||
|
|
||||||
|
paramsResp = await self.services.ai.callAi(
|
||||||
|
prompt=promptTemplate,
|
||||||
|
placeholders=placeholders,
|
||||||
|
options=options
|
||||||
|
)
|
||||||
|
# Parse JSON response
|
||||||
|
js = paramsResp[paramsResp.find('{'):paramsResp.rfind('}')+1] if paramsResp else '{}'
|
||||||
|
try:
|
||||||
|
paramObj = json.loads(js)
|
||||||
|
parameters = paramObj.get('parameters', {}) if isinstance(paramObj, dict) else {}
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to parse AI parameters response as JSON: {str(e)}")
|
||||||
|
logger.error(f"Response was: {paramsResp}")
|
||||||
|
parameters = {}
|
||||||
|
|
||||||
|
# Merge Stage 1 resource selections into Stage 2 parameters (only if action expects them)
|
||||||
|
try:
|
||||||
|
requiredDocs = selection.get('requiredInputDocuments')
|
||||||
|
if requiredDocs:
|
||||||
|
# Ensure list
|
||||||
|
if isinstance(requiredDocs, list):
|
||||||
|
# Only attach if target action defines 'documentList'
|
||||||
|
methodName, actionName = compoundActionName.split('.', 1)
|
||||||
|
from modules.workflows.processing.shared.methodDiscovery import getActionParameterList, methods as _methods
|
||||||
|
expectedParams = getActionParameterList(methodName, actionName, _methods)
|
||||||
|
if 'documentList' in expectedParams:
|
||||||
|
parameters['documentList'] = requiredDocs
|
||||||
|
requiredConn = selection.get('requiredConnection')
|
||||||
|
if requiredConn:
|
||||||
|
# Only attach if target action defines 'connectionReference'
|
||||||
|
methodName, actionName = compoundActionName.split('.', 1)
|
||||||
|
from modules.workflows.processing.shared.methodDiscovery import getActionParameterList, methods as _methods
|
||||||
|
expectedParams = getActionParameterList(methodName, actionName, _methods)
|
||||||
|
if 'connectionReference' in expectedParams:
|
||||||
|
parameters['connectionReference'] = requiredConn
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Apply minimal defaults in-code (language)
|
||||||
|
if 'language' not in parameters and hasattr(self.services, 'user') and getattr(self.services.user, 'language', None):
|
||||||
|
parameters['language'] = self.services.user.language
|
||||||
|
|
||||||
|
# Write merged parameters to trace BEFORE continuing
|
||||||
|
try:
|
||||||
|
mergedParamObj = {
|
||||||
|
"schema": (paramObj.get('schema') if isinstance(paramObj, dict) else 'parameters_v1'),
|
||||||
|
"parameters": parameters
|
||||||
|
}
|
||||||
|
self._writeTraceLog("React Parameters Response", mergedParamObj)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Build a synthetic ActionItem for execution routing and labels
|
||||||
|
currentRound = getattr(self.workflow, 'currentRound', 0)
|
||||||
|
currentTask = getattr(self.workflow, 'currentTask', 0)
|
||||||
|
resultLabel = f"round{currentRound}_task{currentTask}_action{stepIndex}_results"
|
||||||
|
|
||||||
|
taskAction = self._createActionItem({
|
||||||
|
"execMethod": methodName,
|
||||||
|
"execAction": actionName,
|
||||||
|
"execParameters": parameters,
|
||||||
|
"execResultLabel": resultLabel,
|
||||||
|
"status": TaskStatus.PENDING
|
||||||
|
})
|
||||||
|
|
||||||
|
# Execute using existing single action flow (message creation is handled internally)
|
||||||
|
result = await self.actionExecutor.executeSingleAction(taskAction, workflow, taskStep, currentTask, stepIndex, 1)
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
def _observeBuild(self, actionResult: ActionResult) -> Dict[str, Any]:
|
||||||
|
"""Observe: build compact observation object from ActionResult with full document metadata"""
|
||||||
|
previews = []
|
||||||
|
notes = []
|
||||||
|
if actionResult and actionResult.documents:
|
||||||
|
# Process all documents and show full metadata
|
||||||
|
for doc in actionResult.documents:
|
||||||
|
# Extract all available metadata without content
|
||||||
|
docMetadata = {
|
||||||
|
"name": getattr(doc, 'fileName', None) or getattr(doc, 'documentName', 'Unknown'),
|
||||||
|
"mimeType": getattr(doc, 'mimeType', 'Unknown'),
|
||||||
|
"size": getattr(doc, 'size', 'Unknown'),
|
||||||
|
"created": getattr(doc, 'created', 'Unknown'),
|
||||||
|
"modified": getattr(doc, 'modified', 'Unknown'),
|
||||||
|
"typeGroup": getattr(doc, 'typeGroup', 'Unknown'),
|
||||||
|
"documentId": getattr(doc, 'documentId', 'Unknown'),
|
||||||
|
"reference": getattr(doc, 'reference', 'Unknown')
|
||||||
|
}
|
||||||
|
# Remove 'Unknown' values to keep it clean
|
||||||
|
docMetadata = {k: v for k, v in docMetadata.items() if v != 'Unknown'}
|
||||||
|
|
||||||
|
# Add content size indicator instead of actual content
|
||||||
|
if hasattr(doc, 'documentData') and doc.documentData:
|
||||||
|
if isinstance(doc.documentData, dict) and 'content' in doc.documentData:
|
||||||
|
contentLength = len(str(doc.documentData['content']))
|
||||||
|
docMetadata['contentSize'] = f"{contentLength} characters"
|
||||||
|
else:
|
||||||
|
contentLength = len(str(doc.documentData))
|
||||||
|
docMetadata['contentSize'] = f"{contentLength} characters"
|
||||||
|
|
||||||
|
# Extract comment if available
|
||||||
|
if hasattr(doc, 'documentData') and doc.documentData:
|
||||||
|
data = getattr(doc, 'documentData', None)
|
||||||
|
if isinstance(data, dict):
|
||||||
|
comment = data.get("comment", "")
|
||||||
|
if comment:
|
||||||
|
notes.append(f"Document '{docMetadata.get('name', 'Unknown')}': {comment}")
|
||||||
|
|
||||||
|
previews.append(docMetadata)
|
||||||
|
|
||||||
|
observation = {
|
||||||
|
"success": bool(actionResult.success),
|
||||||
|
"resultLabel": actionResult.resultLabel or "",
|
||||||
|
"documentsCount": len(actionResult.documents) if actionResult.documents else 0,
|
||||||
|
"previews": previews,
|
||||||
|
"notes": notes
|
||||||
|
}
|
||||||
|
|
||||||
|
# NEW: Add content analysis if intent is available
|
||||||
|
if self.currentIntent and actionResult.documents:
|
||||||
|
contentAnalysis = self._analyzeContent(actionResult.documents)
|
||||||
|
observation['contentAnalysis'] = contentAnalysis
|
||||||
|
|
||||||
|
return observation
|
||||||
|
|
||||||
|
def _analyzeContent(self, documents: List[Any]) -> Dict[str, Any]:
|
||||||
|
"""Analyzes content of documents for adaptive learning"""
|
||||||
|
try:
|
||||||
|
if not documents:
|
||||||
|
return {"contentType": "none", "contentSnippet": "", "intentMatch": False}
|
||||||
|
|
||||||
|
# Extract content from first document
|
||||||
|
firstDoc = documents[0]
|
||||||
|
content = ""
|
||||||
|
if hasattr(firstDoc, 'documentData'):
|
||||||
|
data = firstDoc.documentData
|
||||||
|
if isinstance(data, dict) and 'content' in data:
|
||||||
|
content = str(data['content'])
|
||||||
|
else:
|
||||||
|
content = str(data)
|
||||||
|
|
||||||
|
# Classify content type
|
||||||
|
contentType = self._classifyContent(content)
|
||||||
|
|
||||||
|
# Create content snippet
|
||||||
|
contentSnippet = content[:200] + "..." if len(content) > 200 else content
|
||||||
|
|
||||||
|
# Assess intent match
|
||||||
|
intentMatch = self._assessIntentMatch(content, self.currentIntent)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"contentType": contentType,
|
||||||
|
"contentSnippet": contentSnippet,
|
||||||
|
"intentMatch": intentMatch
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error analyzing content: {str(e)}")
|
||||||
|
return {"contentType": "error", "contentSnippet": "", "intentMatch": False}
|
||||||
|
|
||||||
|
def _classifyContent(self, content: str) -> str:
|
||||||
|
"""Classifies the type of content"""
|
||||||
|
if not content:
|
||||||
|
return "empty"
|
||||||
|
|
||||||
|
# Check for code
|
||||||
|
codeIndicators = ['def ', 'function', 'import ', 'class ', 'for ', 'while ', 'if ']
|
||||||
|
if any(indicator in content.lower() for indicator in codeIndicators):
|
||||||
|
return "code"
|
||||||
|
|
||||||
|
# Check for numbers
|
||||||
|
if re.search(r'\b\d+\b', content):
|
||||||
|
return "numbers"
|
||||||
|
|
||||||
|
# Check for structured content
|
||||||
|
if any(indicator in content for indicator in ['\n', '\t', '|', '-', '*', '1.', '2.']):
|
||||||
|
return "structured"
|
||||||
|
|
||||||
|
# Default to text
|
||||||
|
return "text"
|
||||||
|
|
||||||
|
def _assessIntentMatch(self, content: str, intent: Dict[str, Any]) -> bool:
|
||||||
|
"""Assesses if content matches the user intent"""
|
||||||
|
if not intent:
|
||||||
|
return False
|
||||||
|
|
||||||
|
dataType = intent.get("dataType", "unknown")
|
||||||
|
|
||||||
|
if dataType == "numbers":
|
||||||
|
# Check if content contains actual numbers, not code
|
||||||
|
hasNumbers = bool(re.search(r'\b\d+\b', content))
|
||||||
|
isNotCode = not any(keyword in content.lower() for keyword in ['def ', 'function', 'import '])
|
||||||
|
return hasNumbers and isNotCode
|
||||||
|
|
||||||
|
elif dataType == "text":
|
||||||
|
# Check if content is readable text
|
||||||
|
words = re.findall(r'\b\w+\b', content)
|
||||||
|
return len(words) > 5
|
||||||
|
|
||||||
|
elif dataType == "documents":
|
||||||
|
# Check if content is suitable for document creation
|
||||||
|
hasStructure = any(indicator in content for indicator in ['\n', '\t', '|', '-', '*'])
|
||||||
|
hasContent = len(content.strip()) > 50
|
||||||
|
return hasStructure and hasContent
|
||||||
|
|
||||||
|
return True # Default to match for unknown types
|
||||||
|
|
||||||
|
def _collectFeedback(self, result: Any, validation: Dict[str, Any], intent: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
|
"""Collects comprehensive feedback from action execution"""
|
||||||
|
try:
|
||||||
|
# Extract content summary
|
||||||
|
contentDelivered = ""
|
||||||
|
if result.documents:
|
||||||
|
firstDoc = result.documents[0]
|
||||||
|
if hasattr(firstDoc, 'documentData'):
|
||||||
|
data = firstDoc.documentData
|
||||||
|
if isinstance(data, dict) and 'content' in data:
|
||||||
|
content = str(data['content'])
|
||||||
|
contentDelivered = content[:100] + "..." if len(content) > 100 else content
|
||||||
|
else:
|
||||||
|
contentDelivered = str(data)[:100] + "..." if len(str(data)) > 100 else str(data)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"actionAttempted": result.resultLabel or "unknown",
|
||||||
|
"parametersUsed": {}, # Would be extracted from action context
|
||||||
|
"contentDelivered": contentDelivered,
|
||||||
|
"intentMatchScore": validation.get('qualityScore', 0),
|
||||||
|
"qualityScore": validation.get('qualityScore', 0),
|
||||||
|
"issuesFound": validation.get('improvementSuggestions', []),
|
||||||
|
"learningOpportunities": validation.get('improvementSuggestions', []),
|
||||||
|
"userSatisfaction": None, # Would be collected from user feedback
|
||||||
|
"timestamp": datetime.now(timezone.utc).timestamp()
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error collecting feedback: {str(e)}")
|
||||||
|
return {
|
||||||
|
"actionAttempted": "unknown",
|
||||||
|
"parametersUsed": {},
|
||||||
|
"contentDelivered": "",
|
||||||
|
"intentMatchScore": 0,
|
||||||
|
"qualityScore": 0,
|
||||||
|
"issuesFound": [],
|
||||||
|
"learningOpportunities": [],
|
||||||
|
"userSatisfaction": None,
|
||||||
|
"timestamp": datetime.now(timezone.utc).timestamp()
|
||||||
|
}
|
||||||
|
|
||||||
|
async def _refineDecide(self, context: TaskContext, observation: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
|
"""Refine: decide continue or stop, with reason"""
|
||||||
|
# Create proper ReviewContext for extractReviewContent
|
||||||
|
from modules.datamodels.datamodelChat import ReviewContext
|
||||||
|
reviewContext = ReviewContext(
|
||||||
|
task_step=context.task_step,
|
||||||
|
task_actions=[],
|
||||||
|
action_results=[], # React mode doesn't have action results in this context
|
||||||
|
step_result={'observation': observation},
|
||||||
|
workflow_id=context.workflow_id,
|
||||||
|
previous_results=[]
|
||||||
|
)
|
||||||
|
|
||||||
|
baseReviewContent = extractReviewContent(reviewContext)
|
||||||
|
placeholders = {"REVIEW_CONTENT": baseReviewContent}
|
||||||
|
|
||||||
|
# NEW: Add content validation to review content
|
||||||
|
enhancedReviewContent = placeholders.get("REVIEW_CONTENT", "")
|
||||||
|
if 'contentValidation' in observation:
|
||||||
|
validation = observation['contentValidation']
|
||||||
|
enhancedReviewContent += f"\n\nCONTENT VALIDATION:\n"
|
||||||
|
enhancedReviewContent += f"Overall Success: {validation['overallSuccess']}\n"
|
||||||
|
enhancedReviewContent += f"Quality Score: {validation['qualityScore']:.2f}\n"
|
||||||
|
if validation['improvementSuggestions']:
|
||||||
|
enhancedReviewContent += f"Improvement Suggestions: {', '.join(validation['improvementSuggestions'])}\n"
|
||||||
|
|
||||||
|
# NEW: Add content analysis to review content
|
||||||
|
if 'contentAnalysis' in observation:
|
||||||
|
analysis = observation['contentAnalysis']
|
||||||
|
enhancedReviewContent += f"\nCONTENT ANALYSIS:\n"
|
||||||
|
enhancedReviewContent += f"Content Type: {analysis['contentType']}\n"
|
||||||
|
enhancedReviewContent += f"Intent Match: {analysis['intentMatch']}\n"
|
||||||
|
if analysis['contentSnippet']:
|
||||||
|
enhancedReviewContent += f"Content Preview: {analysis['contentSnippet']}\n"
|
||||||
|
|
||||||
|
# NEW: Add progress state to review content
|
||||||
|
progressState = self.progressTracker.getCurrentProgress()
|
||||||
|
enhancedReviewContent += f"\nPROGRESS STATE:\n"
|
||||||
|
enhancedReviewContent += f"Completed Objectives: {len(progressState['completedObjectives'])}\n"
|
||||||
|
enhancedReviewContent += f"Partial Achievements: {len(progressState['partialAchievements'])}\n"
|
||||||
|
enhancedReviewContent += f"Failed Attempts: {len(progressState['failedAttempts'])}\n"
|
||||||
|
enhancedReviewContent += f"Current Phase: {progressState['currentPhase']}\n"
|
||||||
|
if progressState['nextActionsSuggested']:
|
||||||
|
enhancedReviewContent += f"Next Action Suggestions: {', '.join(progressState['nextActionsSuggested'])}\n"
|
||||||
|
|
||||||
|
# Update placeholders with enhanced review content
|
||||||
|
placeholders["REVIEW_CONTENT"] = enhancedReviewContent
|
||||||
|
|
||||||
|
bundle = generateReactRefinementPrompt(self.services, context, enhancedReviewContent)
|
||||||
|
promptTemplate = bundle.prompt
|
||||||
|
placeholders = bundle.placeholders
|
||||||
|
|
||||||
|
self._writeTraceLog("React Refinement Prompt", promptTemplate)
|
||||||
|
self._writeTraceLog("React Refinement Placeholders", placeholders)
|
||||||
|
|
||||||
|
# Centralized AI call for refinement decision (balanced analysis)
|
||||||
|
options = AiCallOptions(
|
||||||
|
operationType=OperationType.ANALYSE_CONTENT,
|
||||||
|
priority=Priority.BALANCED,
|
||||||
|
compressPrompt=True,
|
||||||
|
compressContext=False,
|
||||||
|
processingMode=ProcessingMode.ADVANCED,
|
||||||
|
maxCost=0.05,
|
||||||
|
maxProcessingTime=30
|
||||||
|
)
|
||||||
|
|
||||||
|
resp = await self.services.ai.callAi(
|
||||||
|
prompt=promptTemplate,
|
||||||
|
placeholders=placeholders,
|
||||||
|
options=options
|
||||||
|
)
|
||||||
|
self._writeTraceLog("React Refinement Response", resp)
|
||||||
|
js = resp[resp.find('{'):resp.rfind('}')+1] if resp else '{}'
|
||||||
|
try:
|
||||||
|
decision = json.loads(js)
|
||||||
|
except Exception:
|
||||||
|
decision = {"decision": "continue", "reason": "default"}
|
||||||
|
return decision
|
||||||
|
|
||||||
|
async def _createReactActionMessage(self, workflow: ChatWorkflow, selection: Dict[str, Any],
|
||||||
|
step: int, maxSteps: int, taskIndex: int, messageType: str,
|
||||||
|
result: ActionResult = None, observation: Dict[str, Any] = None):
|
||||||
|
"""Create user-friendly messages for React workflow actions"""
|
||||||
|
try:
|
||||||
|
action = selection.get('action', {})
|
||||||
|
method = action.get('method', '')
|
||||||
|
actionName = action.get('name', '')
|
||||||
|
|
||||||
|
# Get user language
|
||||||
|
userLanguage = self.services.user.language if self.services and self.services.user else 'en'
|
||||||
|
|
||||||
|
if messageType == "before":
|
||||||
|
# Message BEFORE action execution
|
||||||
|
userMessage = await self._generateActionIntentionMessage(method, actionName, userLanguage)
|
||||||
|
messageContent = f"🔄 **Step {step}/{maxSteps}**\n\n{userMessage}"
|
||||||
|
status = "step"
|
||||||
|
actionProgress = "pending"
|
||||||
|
documentsLabel = f"action_{step}_intention"
|
||||||
|
|
||||||
|
elif messageType == "after":
|
||||||
|
# Message AFTER action execution
|
||||||
|
userMessage = await self._generateActionResultMessage(method, actionName, result, observation, userLanguage)
|
||||||
|
successIcon = "✅" if result and result.success else "❌"
|
||||||
|
messageContent = f"{successIcon} **Step {step}/{maxSteps} Complete**\n\n{userMessage}"
|
||||||
|
status = "step"
|
||||||
|
actionProgress = "success" if result and result.success else "fail"
|
||||||
|
documentsLabel = observation.get('resultLabel') if observation else f"action_{step}_result"
|
||||||
|
else:
|
||||||
|
return
|
||||||
|
|
||||||
|
# Create workflow message
|
||||||
|
messageData = {
|
||||||
|
"workflowId": workflow.id,
|
||||||
|
"role": "assistant",
|
||||||
|
"message": messageContent,
|
||||||
|
"status": status,
|
||||||
|
"sequenceNr": len(workflow.messages) + 1,
|
||||||
|
"publishedAt": self.services.utils.getUtcTimestamp(),
|
||||||
|
"documentsLabel": documentsLabel,
|
||||||
|
"documents": [],
|
||||||
|
"roundNumber": workflow.currentRound,
|
||||||
|
"taskNumber": taskIndex,
|
||||||
|
"actionNumber": step,
|
||||||
|
"actionProgress": actionProgress
|
||||||
|
}
|
||||||
|
|
||||||
|
message = self.services.interfaceDbChat.createMessage(messageData)
|
||||||
|
if message:
|
||||||
|
workflow.messages.append(message)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error creating React action message: {str(e)}")
|
||||||
|
|
||||||
|
async def _generateActionIntentionMessage(self, method: str, actionName: str, userLanguage: str):
|
||||||
|
"""Generate user-friendly message explaining what action will do"""
|
||||||
|
try:
|
||||||
|
# Create a simple AI prompt to generate user-friendly action descriptions
|
||||||
|
prompt = f"""Generate a brief, user-friendly message explaining what the {method}.{actionName} action will do.
|
||||||
|
|
||||||
|
User language: {userLanguage}
|
||||||
|
|
||||||
|
|
||||||
|
Return only the user-friendly message, no technical details."""
|
||||||
|
|
||||||
|
# Call AI to generate user-friendly message
|
||||||
|
response = await self.services.ai.callAi(
|
||||||
|
prompt=prompt,
|
||||||
|
options=AiCallOptions(
|
||||||
|
operationType=OperationType.GENERATE_CONTENT,
|
||||||
|
priority=Priority.SPEED,
|
||||||
|
compressPrompt=True,
|
||||||
|
maxCost=0.01,
|
||||||
|
maxProcessingTime=5
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
return response.strip() if response else f"Executing {method}.{actionName} action..."
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error generating action intention message: {str(e)}")
|
||||||
|
return f"Executing {method}.{actionName} action..."
|
||||||
|
|
||||||
|
async def _generateActionResultMessage(self, method: str, actionName: str, result: ActionResult,
|
||||||
|
observation: Dict[str, Any], userLanguage: str):
|
||||||
|
"""Generate user-friendly message explaining action results"""
|
||||||
|
try:
|
||||||
|
# Build result context
|
||||||
|
resultContext = ""
|
||||||
|
if result and result.documents:
|
||||||
|
docCount = len(result.documents)
|
||||||
|
resultContext = f"Generated {docCount} document(s)"
|
||||||
|
elif observation and observation.get('documentsCount', 0) > 0:
|
||||||
|
docCount = observation.get('documentsCount', 0)
|
||||||
|
resultContext = f"Generated {docCount} document(s)"
|
||||||
|
|
||||||
|
# Create AI prompt for result message
|
||||||
|
prompt = f"""Generate a brief, user-friendly message explaining the result of the {method}.{actionName} action.
|
||||||
|
|
||||||
|
User language: {userLanguage}
|
||||||
|
Success: {result.success if result else 'Unknown'}
|
||||||
|
Result context: {resultContext}
|
||||||
|
|
||||||
|
Return only the user-friendly message, no technical details."""
|
||||||
|
|
||||||
|
# Call AI to generate user-friendly result message
|
||||||
|
response = await self.services.ai.callAi(
|
||||||
|
prompt=prompt,
|
||||||
|
options=AiCallOptions(
|
||||||
|
operationType=OperationType.GENERATE_CONTENT,
|
||||||
|
priority=Priority.SPEED,
|
||||||
|
compressPrompt=True,
|
||||||
|
maxCost=0.01,
|
||||||
|
maxProcessingTime=5
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
return response.strip() if response else f"{method}.{actionName} action completed"
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error generating action result message: {str(e)}")
|
||||||
|
return f"{method}.{actionName} action completed"
|
||||||
|
|
||||||
|
def _createActionItem(self, actionData: Dict[str, Any]) -> ActionItem:
|
||||||
|
"""Creates a new task action for React mode"""
|
||||||
|
try:
|
||||||
|
import uuid
|
||||||
|
|
||||||
|
# Ensure ID is present
|
||||||
|
if "id" not in actionData or not actionData["id"]:
|
||||||
|
actionData["id"] = f"action_{uuid.uuid4()}"
|
||||||
|
|
||||||
|
# Ensure required fields
|
||||||
|
if "status" not in actionData:
|
||||||
|
actionData["status"] = TaskStatus.PENDING
|
||||||
|
|
||||||
|
if "execMethod" not in actionData:
|
||||||
|
logger.error("execMethod is required for task action")
|
||||||
|
return None
|
||||||
|
|
||||||
|
if "execAction" not in actionData:
|
||||||
|
logger.error("execAction is required for task action")
|
||||||
|
return None
|
||||||
|
|
||||||
|
if "execParameters" not in actionData:
|
||||||
|
actionData["execParameters"] = {}
|
||||||
|
|
||||||
|
# Use generic field separation based on ActionItem model
|
||||||
|
simpleFields, objectFields = self.services.interfaceDbChat._separate_object_fields(ActionItem, actionData)
|
||||||
|
|
||||||
|
# Create action in database
|
||||||
|
createdAction = self.services.interfaceDbChat.db.recordCreate(ActionItem, simpleFields)
|
||||||
|
|
||||||
|
# Convert to ActionItem model
|
||||||
|
return ActionItem(
|
||||||
|
id=createdAction["id"],
|
||||||
|
execMethod=createdAction["execMethod"],
|
||||||
|
execAction=createdAction["execAction"],
|
||||||
|
execParameters=createdAction.get("execParameters", {}),
|
||||||
|
execResultLabel=createdAction.get("execResultLabel"),
|
||||||
|
expectedDocumentFormats=createdAction.get("expectedDocumentFormats"),
|
||||||
|
status=createdAction.get("status", TaskStatus.PENDING),
|
||||||
|
error=createdAction.get("error"),
|
||||||
|
retryCount=createdAction.get("retryCount", 0),
|
||||||
|
retryMax=createdAction.get("retryMax", 3),
|
||||||
|
processingTime=createdAction.get("processingTime"),
|
||||||
|
timestamp=float(createdAction.get("timestamp", self.services.utils.getUtcTimestamp())),
|
||||||
|
result=createdAction.get("result"),
|
||||||
|
resultDocuments=createdAction.get("resultDocuments", []),
|
||||||
|
userMessage=createdAction.get("userMessage")
|
||||||
|
)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error creating task action: {str(e)}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _updateWorkflowBeforeExecutingTask(self, taskNumber: int):
|
||||||
|
"""Update workflow object before executing a task"""
|
||||||
|
try:
|
||||||
|
updateData = {
|
||||||
|
"currentTask": taskNumber,
|
||||||
|
"currentAction": 0,
|
||||||
|
"totalActions": 0
|
||||||
|
}
|
||||||
|
|
||||||
|
# Update workflow object
|
||||||
|
self.workflow.currentTask = taskNumber
|
||||||
|
self.workflow.currentAction = 0
|
||||||
|
self.workflow.totalActions = 0
|
||||||
|
|
||||||
|
# Update in database
|
||||||
|
self.services.interfaceDbChat.updateWorkflow(self.workflow.id, updateData)
|
||||||
|
logger.info(f"Updated workflow {self.workflow.id} before executing task {taskNumber}: {updateData}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error updating workflow before executing task: {str(e)}")
|
||||||
|
|
||||||
|
def _updateWorkflowBeforeExecutingAction(self, actionNumber: int):
|
||||||
|
"""Update workflow object before executing an action"""
|
||||||
|
try:
|
||||||
|
updateData = {
|
||||||
|
"currentAction": actionNumber
|
||||||
|
}
|
||||||
|
|
||||||
|
# Update workflow object
|
||||||
|
self.workflow.currentAction = actionNumber
|
||||||
|
|
||||||
|
# Update in database
|
||||||
|
self.services.interfaceDbChat.updateWorkflow(self.workflow.id, updateData)
|
||||||
|
logger.info(f"Updated workflow {self.workflow.id} before executing action {actionNumber}: {updateData}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error updating workflow before executing action: {str(e)}")
|
||||||
|
|
||||||
|
def _createActionItem(self, actionData: Dict[str, Any]) -> ActionItem:
|
||||||
|
"""Creates a new task action for React mode"""
|
||||||
|
try:
|
||||||
|
import uuid
|
||||||
|
|
||||||
|
# Ensure ID is present
|
||||||
|
if "id" not in actionData or not actionData["id"]:
|
||||||
|
actionData["id"] = f"action_{uuid.uuid4()}"
|
||||||
|
|
||||||
|
# Ensure required fields
|
||||||
|
if "status" not in actionData:
|
||||||
|
actionData["status"] = TaskStatus.PENDING
|
||||||
|
|
||||||
|
if "execMethod" not in actionData:
|
||||||
|
logger.error("execMethod is required for task action")
|
||||||
|
return None
|
||||||
|
|
||||||
|
if "execAction" not in actionData:
|
||||||
|
logger.error("execAction is required for task action")
|
||||||
|
return None
|
||||||
|
|
||||||
|
if "execParameters" not in actionData:
|
||||||
|
actionData["execParameters"] = {}
|
||||||
|
|
||||||
|
# Use generic field separation based on ActionItem model
|
||||||
|
simpleFields, objectFields = self.services.interfaceDbChat._separate_object_fields(ActionItem, actionData)
|
||||||
|
|
||||||
|
# Create action in database
|
||||||
|
createdAction = self.services.interfaceDbChat.db.recordCreate(ActionItem, simpleFields)
|
||||||
|
|
||||||
|
# Convert to ActionItem model
|
||||||
|
return ActionItem(
|
||||||
|
id=createdAction["id"],
|
||||||
|
execMethod=createdAction["execMethod"],
|
||||||
|
execAction=createdAction["execAction"],
|
||||||
|
execParameters=createdAction.get("execParameters", {}),
|
||||||
|
execResultLabel=createdAction.get("execResultLabel"),
|
||||||
|
expectedDocumentFormats=createdAction.get("expectedDocumentFormats"),
|
||||||
|
status=createdAction.get("status", TaskStatus.PENDING),
|
||||||
|
error=createdAction.get("error"),
|
||||||
|
retryCount=createdAction.get("retryCount", 0),
|
||||||
|
retryMax=createdAction.get("retryMax", 3),
|
||||||
|
processingTime=createdAction.get("processingTime"),
|
||||||
|
timestamp=float(createdAction.get("timestamp", self.services.utils.getUtcTimestamp())),
|
||||||
|
result=createdAction.get("result"),
|
||||||
|
resultDocuments=createdAction.get("resultDocuments", []),
|
||||||
|
userMessage=createdAction.get("userMessage")
|
||||||
|
)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error creating task action: {str(e)}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _writeTraceLog(self, contextText: str, data: Any) -> None:
|
||||||
|
"""Write trace data to configured trace file if in debug mode with improved JSON formatting"""
|
||||||
|
try:
|
||||||
|
import os
|
||||||
|
import json
|
||||||
|
from datetime import datetime, UTC
|
||||||
|
|
||||||
|
# Only write if logger is in debug mode
|
||||||
|
if logger.level > logging.DEBUG:
|
||||||
|
return
|
||||||
|
|
||||||
|
# Get log directory from configuration
|
||||||
|
logDir = self.services.utils.configGet("APP_LOGGING_LOG_DIR", "./")
|
||||||
|
if not os.path.isabs(logDir):
|
||||||
|
# If relative path, make it relative to the gateway directory
|
||||||
|
gatewayDir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||||
|
logDir = os.path.join(gatewayDir, logDir)
|
||||||
|
|
||||||
|
# Ensure log directory exists
|
||||||
|
os.makedirs(logDir, exist_ok=True)
|
||||||
|
|
||||||
|
# Create trace file path
|
||||||
|
traceFile = os.path.join(logDir, "log_trace.log")
|
||||||
|
|
||||||
|
# Format the trace entry with better structure
|
||||||
|
timestamp = datetime.fromtimestamp(self.services.utils.getUtcTimestamp(), UTC).strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
|
||||||
|
|
||||||
|
# Create a structured trace entry
|
||||||
|
traceEntry = f"[{timestamp}] {contextText}\n"
|
||||||
|
traceEntry += "=" * 80 + "\n"
|
||||||
|
|
||||||
|
# Add data if provided with improved formatting
|
||||||
|
if data is not None:
|
||||||
|
try:
|
||||||
|
if isinstance(data, (dict, list)):
|
||||||
|
# Format as pretty JSON with better settings
|
||||||
|
jsonStr = json.dumps(data, indent=2, default=str, ensure_ascii=False, sort_keys=False)
|
||||||
|
traceEntry += f"JSON Data:\n{jsonStr}\n"
|
||||||
|
elif isinstance(data, str):
|
||||||
|
# For string data, try to parse as JSON first, then fall back to plain text
|
||||||
|
try:
|
||||||
|
parsed = json.loads(data)
|
||||||
|
jsonStr = json.dumps(parsed, indent=2, default=str, ensure_ascii=False, sort_keys=False)
|
||||||
|
traceEntry += f"JSON Data (parsed from string):\n{jsonStr}\n"
|
||||||
|
except (json.JSONDecodeError, TypeError):
|
||||||
|
# Not valid JSON, show as plain text with proper line breaks
|
||||||
|
formatted_data = data.replace('\\n', '\n')
|
||||||
|
traceEntry += f"Text Data:\n{formatted_data}\n"
|
||||||
|
else:
|
||||||
|
# For other types, convert to string and try to parse as JSON
|
||||||
|
dataStr = str(data)
|
||||||
|
try:
|
||||||
|
parsed = json.loads(dataStr)
|
||||||
|
jsonStr = json.dumps(parsed, indent=2, default=str, ensure_ascii=False, sort_keys=False)
|
||||||
|
traceEntry += f"JSON Data (parsed from object):\n{jsonStr}\n"
|
||||||
|
except (json.JSONDecodeError, TypeError):
|
||||||
|
# Not valid JSON, show as plain text with proper line breaks
|
||||||
|
formatted_data = dataStr.replace('\\n', '\n')
|
||||||
|
traceEntry += f"Object Data:\n{formatted_data}\n"
|
||||||
|
except Exception as e:
|
||||||
|
# Fallback to simple string representation
|
||||||
|
traceEntry += f"Data (fallback): {str(data)}\n"
|
||||||
|
else:
|
||||||
|
traceEntry += "No data provided\n"
|
||||||
|
|
||||||
|
traceEntry += "=" * 80 + "\n\n"
|
||||||
|
|
||||||
|
# Write to trace file
|
||||||
|
with open(traceFile, "a", encoding="utf-8") as f:
|
||||||
|
f.write(traceEntry)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
# Don't log trace errors to avoid recursion
|
||||||
|
pass
|
||||||
|
|
||||||
File diff suppressed because it is too large
Load diff
|
|
@ -1,418 +0,0 @@
|
||||||
"""
|
|
||||||
Placeholder-based prompt factory for dynamic AI calls.
|
|
||||||
This module provides prompt templates with placeholders that can be filled dynamically.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import json
|
|
||||||
from typing import Dict, Any
|
|
||||||
from modules.workflows.processing.promptFactory import (
|
|
||||||
_getAvailableDocuments,
|
|
||||||
_getPreviousRoundContext,
|
|
||||||
getMethodsList,
|
|
||||||
getEnhancedDocumentContext,
|
|
||||||
_getConnectionReferenceList,
|
|
||||||
methods
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def createTaskPlanningPromptTemplate() -> str:
|
|
||||||
"""Create task planning prompt template with placeholders."""
|
|
||||||
return """You are a task planning AI that analyzes user requests and creates structured, self-contained task plans with user-friendly feedback messages.
|
|
||||||
|
|
||||||
USER REQUEST: {{KEY:USER_PROMPT}}
|
|
||||||
|
|
||||||
AVAILABLE DOCUMENTS: {{KEY:AVAILABLE_DOCUMENTS}}
|
|
||||||
|
|
||||||
PREVIOUS WORKFLOW ROUNDS CONTEXT:
|
|
||||||
{{KEY:WORKFLOW_HISTORY}}
|
|
||||||
|
|
||||||
INSTRUCTIONS:
|
|
||||||
1. Analyze the user request, available documents, and previous workflow rounds context
|
|
||||||
2. If the user request appears to be a follow-up (like "try again", "versuche es nochmals", "retry", etc.),
|
|
||||||
use the PREVIOUS WORKFLOW ROUNDS CONTEXT to understand what the user wants to retry or continue
|
|
||||||
3. Group related topics and sequential steps into single, comprehensive tasks
|
|
||||||
4. Focus on business outcomes, not technical operations
|
|
||||||
5. Make each task self-contained: clearly state what to do and what outputs are expected
|
|
||||||
6. Ensure proper handover between tasks (later actions will use your task outputs)
|
|
||||||
7. Detect the language of the user request and include it in languageUserDetected
|
|
||||||
8. Generate user-friendly messages for each task in the user's request language
|
|
||||||
9. Return a JSON object with the exact structure shown below
|
|
||||||
|
|
||||||
TASK GROUPING PRINCIPLES:
|
|
||||||
- COMBINE RELATED TOPICS: Group related subjects, sequential steps, or workflow-structured activities into single tasks
|
|
||||||
- SEQUENTIAL WORKFLOWS: If the user says "first do this, then that, then that" → create ONE task that handles the entire sequence
|
|
||||||
- SIMILAR CONTENT: If multiple items deal with the same subject matter → combine into ONE comprehensive task
|
|
||||||
- ONLY SPLIT WHEN DIFFERENT: Create separate tasks ONLY when the user explicitly wants different, independent things
|
|
||||||
|
|
||||||
EXAMPLES OF GOOD TASK GROUPING:
|
|
||||||
|
|
||||||
COMBINE INTO ONE TASK:
|
|
||||||
- "Analyze the documents, extract key insights, and create a summary report" → ONE task: "Analyze documents and create comprehensive summary report"
|
|
||||||
- "First check my emails, then respond to urgent ones, then organize my inbox" → ONE task: "Process and organize email inbox with priority responses"
|
|
||||||
- "Review the budget, analyze spending patterns, and suggest cost-cutting measures" → ONE task: "Comprehensive budget analysis with optimization recommendations"
|
|
||||||
- "Create a business strategy, develop marketing plan, and prepare presentation" → ONE task: "Develop complete business strategy with marketing plan and presentation"
|
|
||||||
|
|
||||||
SPLIT INTO MULTIPLE TASKS:
|
|
||||||
- "Create a business strategy for Q4" AND "Check my emails for messages from my assistant" → TWO separate tasks (different subjects)
|
|
||||||
- "Analyze customer feedback" AND "Prepare quarterly financial report" → TWO separate tasks (different business areas)
|
|
||||||
- "Review project timeline" AND "Update employee handbook" → TWO separate tasks (unrelated activities)
|
|
||||||
|
|
||||||
TASK PLANNING PRINCIPLES:
|
|
||||||
- Break down complex requests into logical, sequential steps
|
|
||||||
- Focus on business value and outcomes
|
|
||||||
- Keep tasks at a meaningful level of abstraction (not implementation details)
|
|
||||||
- Each task should produce results that can be used by subsequent tasks
|
|
||||||
- Ensure clear dependencies and handovers between tasks
|
|
||||||
- Provide clear, actionable user messages in the user's request language
|
|
||||||
- Group related activities to minimize task fragmentation
|
|
||||||
- Only create multiple tasks when dealing with truly different, independent objectives
|
|
||||||
- Make task objectives action-oriented and specific (include scope, data sources to consider, and output intent at high level)
|
|
||||||
- Write success_criteria as measurable acceptance criteria focusing on outputs (what artifacts or insights will exist and how they are validated)
|
|
||||||
|
|
||||||
FOLLOW-UP PROMPT HANDLING:
|
|
||||||
- If the user request is a follow-up (e.g., "try again", "versuche es nochmals", "retry", "continue", "proceed"),
|
|
||||||
analyze the PREVIOUS WORKFLOW ROUNDS CONTEXT to understand what failed or was incomplete
|
|
||||||
- Use the previous round's user requests and task outcomes to determine what the user wants to retry
|
|
||||||
- If previous rounds failed due to missing documents, and documents are now available,
|
|
||||||
create tasks that use the newly available documents to accomplish the original request
|
|
||||||
- Maintain the same business objective from previous rounds but adapt to current available resources
|
|
||||||
|
|
||||||
SPECIFIC SCENARIO HANDLING:
|
|
||||||
- If previous round failed with "documents missing" error and current round has documents available,
|
|
||||||
the user likely wants to retry the same operation with the newly provided documents
|
|
||||||
- Example: Previous round "speichere mir die 3 dokumente im sharepoint unter xxx" failed due to missing documents,
|
|
||||||
current round "versuche es nochmals" with documents should retry the SharePoint save operation
|
|
||||||
- Always check if the current request is a retry by looking for retry keywords and previous round context
|
|
||||||
|
|
||||||
REQUIRED JSON STRUCTURE:
|
|
||||||
{{
|
|
||||||
"overview": "Brief description of the overall plan",
|
|
||||||
"languageUserDetected": "en", // Language code detected from user request (en, de, fr, it, es, etc.)
|
|
||||||
"userMessage": "User-friendly message explaining the task plan in user's request language",
|
|
||||||
"tasks": [
|
|
||||||
{{
|
|
||||||
"id": "task_1",
|
|
||||||
"objective": "Clear business objective this task accomplishes (combining related activities)",
|
|
||||||
"dependencies": ["task_0"], // IDs of tasks that must complete first
|
|
||||||
"success_criteria": ["criteria1", "criteria2"],
|
|
||||||
"estimated_complexity": "low|medium|high",
|
|
||||||
"userMessage": "User-friendly message explaining what this task will accomplish in user's request language"
|
|
||||||
}}
|
|
||||||
]
|
|
||||||
}}
|
|
||||||
|
|
||||||
EXAMPLES OF GOOD TASK OBJECTIVES (COMBINING RELATED ACTIVITIES):
|
|
||||||
- "Analyze documents and extract key insights for business communication"
|
|
||||||
- "Create professional business communication incorporating analyzed information"
|
|
||||||
- "Execute business communication using specified channels and document outcomes"
|
|
||||||
- "Develop comprehensive business strategy with implementation roadmap and success metrics"
|
|
||||||
|
|
||||||
EXAMPLES OF WELL-FORMED SUCCESS CRITERIA (OUTPUT-FOCUSED):
|
|
||||||
- "Deliver a prioritized list of 10–20 candidates with justification"
|
|
||||||
- "Provide a structured JSON with fields: company, ticker, rationale, metrics"
|
|
||||||
- "Produce a presentation outline with 5 sections and bullet points per section"
|
|
||||||
- "Include data sources and date stamped references for traceability"
|
|
||||||
|
|
||||||
EXAMPLES OF GOOD SUCCESS CRITERIA:
|
|
||||||
- "Key insights extracted and ready for business use"
|
|
||||||
- "Professional communication created with clear business value"
|
|
||||||
- "Business communication successfully delivered and documented"
|
|
||||||
- "All outcomes properly documented and accessible"
|
|
||||||
|
|
||||||
EXAMPLES OF BAD TASK OBJECTIVES:
|
|
||||||
- "Read the PDF file" (too granular - should be "Analyze document content")
|
|
||||||
- "Convert data to CSV" (implementation detail - should be "Structure data for analysis")
|
|
||||||
- "Send email" (too specific - should be "Deliver business communication")
|
|
||||||
|
|
||||||
LANGUAGE DETECTION:
|
|
||||||
- Analyze the user request text to identify the language
|
|
||||||
- Use standard language codes: en (English), de (German), fr (French), it (Italian), es (Spanish), etc.
|
|
||||||
- If the language cannot be determined, use "en" as default
|
|
||||||
- Include the detected language in the languageUserDetected field
|
|
||||||
|
|
||||||
NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
|
|
||||||
|
|
||||||
|
|
||||||
def createActionDefinitionPromptTemplate() -> str:
|
|
||||||
"""Create action definition prompt template with placeholders."""
|
|
||||||
return """You are an action planning AI that generates specific, executable actions for task steps.
|
|
||||||
|
|
||||||
TASK OBJECTIVE: {{KEY:USER_PROMPT}}
|
|
||||||
|
|
||||||
AVAILABLE DOCUMENTS: {{KEY:AVAILABLE_DOCUMENTS}}
|
|
||||||
|
|
||||||
WORKFLOW HISTORY: {{KEY:WORKFLOW_HISTORY}}
|
|
||||||
|
|
||||||
AVAILABLE METHODS: {{KEY:AVAILABLE_METHODS}}
|
|
||||||
|
|
||||||
USER LANGUAGE: {{KEY:USER_LANGUAGE}}
|
|
||||||
|
|
||||||
INSTRUCTIONS:
|
|
||||||
- Generate actions to accomplish this task step using available documents, connections, and previous results
|
|
||||||
- Use docItem for single documents and docList for groups of documents as shown in AVAILABLE DOCUMENTS
|
|
||||||
- If there are no documents available, do not create document extraction actions. Select methods strictly based on the task objective; choose web actions when external information is required. Otherwise, generate a status/information report requesting needed inputs.
|
|
||||||
- Always pass documentList as a LIST of references (docItem and/or docList) - this list CANNOT be empty for document extraction actions
|
|
||||||
- For referencing documents from previous actions, use the format "round{current_round}_task{current_task}_action{action_number}_{descriptive_label}"
|
|
||||||
- Each action must be self-contained and executable with the provided parameters
|
|
||||||
- For document extraction, ensure prompts are specific and detailed
|
|
||||||
- Include validation steps in extraction prompts where relevant
|
|
||||||
- If this is a retry, learn from previous failures and improve the approach
|
|
||||||
- Address specific issues mentioned in previous review feedback
|
|
||||||
- When specifying expectedDocumentFormats, ensure AI prompts explicitly request pure data without markdown formatting
|
|
||||||
- Generate user-friendly messages for each action in the user's language
|
|
||||||
|
|
||||||
PARAMETER COMPLETENESS REQUIREMENTS:
|
|
||||||
- Every parameter must contain all information needed to execute without implicit context
|
|
||||||
- Use explicit, concrete values (units, languages, formats, limits, date ranges, IDs) when applicable
|
|
||||||
- For search-like parameters (if any method requires a query), derive the query from the task objective AND ALL success criteria dimensions. Include:
|
|
||||||
- Key entities and domain terms from the objective
|
|
||||||
- All distinct facets from success_criteria (e.g., valuation AND AI potential AND know-how needs)
|
|
||||||
- Geography/localization (e.g., Schweiz/Suisse/Switzerland; use multilingual synonyms when helpful)
|
|
||||||
- Time horizon or recency if relevant
|
|
||||||
- Boolean operators and synonyms to increase precision (use AND/OR, quotes, parentheses)
|
|
||||||
- Avoid single-topic or generic queries focused only on one facet (e.g., pure valuation metrics)
|
|
||||||
- When facets are truly distinct, create 1–3 focused actions with precise queries rather than one vague catch-all
|
|
||||||
- Document list parameters must reference only existing labels or prior action outputs; do not reference future outputs
|
|
||||||
|
|
||||||
DOCUMENT ROUTING GUIDANCE:
|
|
||||||
- Each action should produce documents with a clear resultLabel for routing
|
|
||||||
- Use consistent naming: "round{current_round}_task{current_task}_action{action_number}_{descriptive_label}"
|
|
||||||
- Ensure document flow: Action A produces documents that Action B can consume
|
|
||||||
- Document labels should be descriptive of content, not just "results" or "output"
|
|
||||||
- Consider what subsequent actions will need and structure outputs accordingly
|
|
||||||
|
|
||||||
REQUIRED JSON STRUCTURE:
|
|
||||||
{{
|
|
||||||
"actions": [
|
|
||||||
{{
|
|
||||||
"method": "method_name",
|
|
||||||
"action": "action_name",
|
|
||||||
"parameters": {{}},
|
|
||||||
"resultLabel": "round{current_round}_task{current_task}_action{action_number}_{descriptive_label}",
|
|
||||||
"description": "Brief description of what this action accomplishes",
|
|
||||||
"userMessage": "User-friendly message explaining what this action will do in user's language"
|
|
||||||
}}
|
|
||||||
]
|
|
||||||
}}
|
|
||||||
|
|
||||||
IMPORTANT NOTES:
|
|
||||||
- Respond with ONLY the JSON object. Do not include any explanatory text.
|
|
||||||
- Before creating any document extraction action, verify that AVAILABLE DOCUMENTS contains actual document references.
|
|
||||||
- Always include a user-friendly userMessage for each action in the user's language.
|
|
||||||
- The examples above show German user messages as reference - adapt the language to match the USER LANGUAGE specified above."""
|
|
||||||
|
|
||||||
|
|
||||||
def createActionSelectionPromptTemplate() -> str:
|
|
||||||
"""Create action selection prompt template with placeholders."""
|
|
||||||
return """Select exactly one action to advance the task.
|
|
||||||
|
|
||||||
OBJECTIVE: {{KEY:USER_PROMPT}}
|
|
||||||
AVAILABLE DOCUMENTS: {{KEY:AVAILABLE_DOCUMENTS}}
|
|
||||||
USER LANGUAGE: {{KEY:USER_LANGUAGE}}
|
|
||||||
|
|
||||||
MINIMAL TOOL CATALOG (method -> action -> [parameterNames]):
|
|
||||||
{{KEY:AVAILABLE_METHODS}}
|
|
||||||
|
|
||||||
BUSINESS RULES:
|
|
||||||
- Pick exactly one action per step.
|
|
||||||
- Derive choice from objective and success criteria.
|
|
||||||
- Prefer user language.
|
|
||||||
- Keep it minimal; avoid provider specifics.
|
|
||||||
|
|
||||||
RESPONSE FORMAT (JSON only):
|
|
||||||
{{"action":{{"method":"web","name":"search"}}}}"""
|
|
||||||
|
|
||||||
|
|
||||||
def createActionParameterPromptTemplate() -> str:
|
|
||||||
"""Create action parameter prompt template with placeholders."""
|
|
||||||
return """Provide only the required parameters for this action.
|
|
||||||
|
|
||||||
SELECTED ACTION: {{KEY:SELECTED_ACTION}}
|
|
||||||
ACTION SIGNATURE: {{KEY:ACTION_SIGNATURE}}
|
|
||||||
OBJECTIVE: {{KEY:USER_PROMPT}}
|
|
||||||
AVAILABLE DOCUMENTS: {{KEY:AVAILABLE_DOCUMENTS}}
|
|
||||||
USER LANGUAGE: {{KEY:USER_LANGUAGE}}
|
|
||||||
|
|
||||||
RULES:
|
|
||||||
- Return only the parameters object.
|
|
||||||
- Include user language if relevant.
|
|
||||||
- Reference documents only by exact labels available.
|
|
||||||
- Avoid unnecessary fields; host applies defaults.
|
|
||||||
- Use the ACTION SIGNATURE above to understand what parameters are required.
|
|
||||||
- Convert the objective into appropriate parameter values as needed.
|
|
||||||
|
|
||||||
RESPONSE FORMAT (JSON only):
|
|
||||||
{{"parameters":{{}}}}"""
|
|
||||||
|
|
||||||
|
|
||||||
def createRefinementPromptTemplate() -> str:
|
|
||||||
"""Create refinement prompt template with placeholders."""
|
|
||||||
return """Decide next step based on observation.
|
|
||||||
|
|
||||||
OBJECTIVE: {{KEY:USER_PROMPT}}
|
|
||||||
OBSERVATION:
|
|
||||||
{{KEY:REVIEW_CONTENT}}
|
|
||||||
|
|
||||||
RULES:
|
|
||||||
- If criteria are met or no further action helps, decide stop.
|
|
||||||
- Else decide continue.
|
|
||||||
|
|
||||||
RESPONSE FORMAT (JSON only):
|
|
||||||
{{"decision":"continue","reason":"Need more data"}}"""
|
|
||||||
|
|
||||||
|
|
||||||
def createResultReviewPromptTemplate() -> str:
|
|
||||||
"""Create result review prompt template with placeholders."""
|
|
||||||
return """You are a result validation AI that reviews task execution outcomes and determines success, retry needs, or failure.
|
|
||||||
|
|
||||||
TASK OBJECTIVE: {{KEY:USER_PROMPT}}
|
|
||||||
|
|
||||||
EXECUTION RESULTS:
|
|
||||||
{{KEY:REVIEW_CONTENT}}
|
|
||||||
|
|
||||||
VALIDATION CRITERIA:
|
|
||||||
- Review each action's success/failure status
|
|
||||||
- Check if required documents were produced
|
|
||||||
- Validate document quality and completeness
|
|
||||||
- Assess if success criteria were met
|
|
||||||
- Identify any missing or incomplete outputs
|
|
||||||
- Determine if retry would help or if task should be marked as failed
|
|
||||||
|
|
||||||
REQUIRED JSON STRUCTURE:
|
|
||||||
{{
|
|
||||||
"status": "success|retry|failed",
|
|
||||||
"reason": "Detailed explanation of the validation decision",
|
|
||||||
"improvements": ["specific improvement 1", "specific improvement 2"],
|
|
||||||
"quality_score": 8, // 1-10 scale
|
|
||||||
"met_criteria": ["criteria1", "criteria2"],
|
|
||||||
"unmet_criteria": ["criteria3", "criteria4"],
|
|
||||||
"confidence": 0.85, // 0.0-1.0 scale
|
|
||||||
"userMessage": "User-friendly message explaining the validation result"
|
|
||||||
}}
|
|
||||||
|
|
||||||
VALIDATION PRINCIPLES:
|
|
||||||
- Be thorough but fair in assessment
|
|
||||||
- Focus on business value and outcomes
|
|
||||||
- Consider both technical execution and business results
|
|
||||||
- Provide specific, actionable improvement suggestions
|
|
||||||
- Use quality scores to track progress across retries
|
|
||||||
- Clearly identify which success criteria were met vs. unmet
|
|
||||||
- Set appropriate confidence levels based on evidence quality
|
|
||||||
|
|
||||||
NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
|
|
||||||
|
|
||||||
|
|
||||||
# Helper functions to extract content for placeholders
|
|
||||||
|
|
||||||
def extractUserPrompt(context) -> str:
|
|
||||||
"""Extract user prompt from context."""
|
|
||||||
if hasattr(context, 'task_step') and context.task_step:
|
|
||||||
return context.task_step.objective or 'No request specified'
|
|
||||||
return 'No request specified'
|
|
||||||
|
|
||||||
|
|
||||||
def extractAvailableDocuments(context) -> str:
|
|
||||||
"""Extract available documents from context."""
|
|
||||||
if hasattr(context, 'workflow') and context.workflow:
|
|
||||||
return _getAvailableDocuments(context.workflow)
|
|
||||||
return "No documents available"
|
|
||||||
|
|
||||||
|
|
||||||
def extractWorkflowHistory(service, context) -> str:
|
|
||||||
"""Extract workflow history from context."""
|
|
||||||
if hasattr(context, 'workflow') and context.workflow:
|
|
||||||
return _getPreviousRoundContext(service, context.workflow) or "No previous workflow rounds - this is the first round."
|
|
||||||
return "No previous workflow rounds - this is the first round."
|
|
||||||
|
|
||||||
|
|
||||||
def extractAvailableMethods(service) -> str:
|
|
||||||
"""Extract available methods for action planning."""
|
|
||||||
methodList = getMethodsList(service)
|
|
||||||
method_actions = {}
|
|
||||||
for sig in methodList:
|
|
||||||
if '.' in sig:
|
|
||||||
method, rest = sig.split('.', 1)
|
|
||||||
action = rest.split('(')[0]
|
|
||||||
method_actions.setdefault(method, []).append((action, sig))
|
|
||||||
|
|
||||||
# Create a structured JSON format for better AI parsing
|
|
||||||
available_methods_json = {}
|
|
||||||
for method, actions in method_actions.items():
|
|
||||||
available_methods_json[method] = {}
|
|
||||||
# Get the method instance for accessing docstrings
|
|
||||||
method_instance = methods.get(method, {}).get('instance') if methods else None
|
|
||||||
|
|
||||||
for action, sig in actions:
|
|
||||||
# Parse the signature to extract parameters
|
|
||||||
if '(' in sig and ')' in sig:
|
|
||||||
# Extract parameters from signature
|
|
||||||
params_start = sig.find('(')
|
|
||||||
params_end = sig.find(')')
|
|
||||||
params_str = sig[params_start+1:params_end]
|
|
||||||
|
|
||||||
# Parse parameters directly from the docstring - much simpler and more reliable!
|
|
||||||
parameters = []
|
|
||||||
|
|
||||||
# Get the actual function's docstring
|
|
||||||
if method_instance and hasattr(method_instance, action):
|
|
||||||
func = getattr(method_instance, action)
|
|
||||||
if hasattr(func, '__doc__') and func.__doc__:
|
|
||||||
docstring = func.__doc__
|
|
||||||
|
|
||||||
# Parse Parameters section from docstring
|
|
||||||
lines = docstring.split('\n')
|
|
||||||
in_parameters = False
|
|
||||||
for i, line in enumerate(lines):
|
|
||||||
original_line = line
|
|
||||||
line = line.strip()
|
|
||||||
|
|
||||||
if line.startswith('Parameters:'):
|
|
||||||
in_parameters = True
|
|
||||||
continue
|
|
||||||
elif line.startswith('Returns:') or line.startswith('Raises:') or line.startswith('Note:') or line.startswith('Example:') or line.startswith('Examples:'):
|
|
||||||
in_parameters = False
|
|
||||||
continue
|
|
||||||
elif in_parameters and line and not line.startswith('-') and not line.startswith('*'):
|
|
||||||
# This is a parameter line
|
|
||||||
if ':' in line:
|
|
||||||
param_name = line.split(':')[0].strip()
|
|
||||||
param_desc = line.split(':', 1)[1].strip()
|
|
||||||
parameters.append(f"{param_name}: {param_desc}")
|
|
||||||
|
|
||||||
available_methods_json[method][action] = parameters
|
|
||||||
else:
|
|
||||||
available_methods_json[method][action] = []
|
|
||||||
|
|
||||||
return json.dumps(available_methods_json, indent=2, ensure_ascii=False)
|
|
||||||
|
|
||||||
|
|
||||||
def extractUserLanguage(service) -> str:
|
|
||||||
"""Extract user language from service."""
|
|
||||||
return service.user.language if service and service.user else 'en'
|
|
||||||
|
|
||||||
|
|
||||||
def extractReviewContent(context) -> str:
|
|
||||||
"""Extract review content from context."""
|
|
||||||
if hasattr(context, 'action_results') and context.action_results:
|
|
||||||
# Build result summary
|
|
||||||
result_summary = ""
|
|
||||||
for i, result in enumerate(context.action_results):
|
|
||||||
result_summary += f"\nRESULT {i+1}:\n"
|
|
||||||
result_summary += f" Success: {result.success}\n"
|
|
||||||
if result.error:
|
|
||||||
result_summary += f" Error: {result.error}\n"
|
|
||||||
|
|
||||||
if result.documents:
|
|
||||||
result_summary += f" Documents: {len(result.documents)} document(s)\n"
|
|
||||||
for doc in result.documents:
|
|
||||||
doc_name = getattr(doc, 'documentName', 'Unknown')
|
|
||||||
doc_mime = getattr(doc, 'mimeType', 'Unknown')
|
|
||||||
result_summary += f" - {doc_name} ({doc_mime})\n"
|
|
||||||
else:
|
|
||||||
result_summary += f" Documents: None\n"
|
|
||||||
|
|
||||||
return result_summary
|
|
||||||
elif hasattr(context, 'observation') and context.observation:
|
|
||||||
return json.dumps(context.observation, ensure_ascii=False)
|
|
||||||
else:
|
|
||||||
return "No review content available"
|
|
||||||
1
modules/workflows/processing/shared/__init__.py
Normal file
1
modules/workflows/processing/shared/__init__.py
Normal file
|
|
@ -0,0 +1 @@
|
||||||
|
# Shared workflow utilities
|
||||||
|
|
@ -4,8 +4,8 @@
|
||||||
import logging
|
import logging
|
||||||
from typing import List
|
from typing import List
|
||||||
from datetime import datetime, UTC
|
from datetime import datetime, UTC
|
||||||
from modules.datamodels.datamodelWorkflow import TaskStep
|
from modules.datamodels.datamodelChat import TaskStep
|
||||||
from modules.datamodels.datamodelWorkflow import ActionResult
|
from modules.datamodels.datamodelChat import ActionResult
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
@ -58,7 +58,7 @@ class TaskExecutionState:
|
||||||
patterns.append("permission_issues")
|
patterns.append("permission_issues")
|
||||||
return list(set(patterns))
|
return list(set(patterns))
|
||||||
|
|
||||||
def should_continue(observation, review=None, current_step: int = 0, max_steps: int = 5) -> bool:
|
def shouldContinue(observation, review=None, current_step: int = 0, max_steps: int = 5) -> bool:
|
||||||
"""Helper to decide if the iterative loop should continue
|
"""Helper to decide if the iterative loop should continue
|
||||||
- Stop if review indicates 'stop' or success criteria are met
|
- Stop if review indicates 'stop' or success criteria are met
|
||||||
- Stop on failure with no retry path
|
- Stop on failure with no retry path
|
||||||
131
modules/workflows/processing/shared/methodDiscovery.py
Normal file
131
modules/workflows/processing/shared/methodDiscovery.py
Normal file
|
|
@ -0,0 +1,131 @@
|
||||||
|
# methodDiscovery.py
|
||||||
|
# Method discovery and management for workflow execution
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import importlib
|
||||||
|
import pkgutil
|
||||||
|
import inspect
|
||||||
|
from typing import Any, Dict, List
|
||||||
|
from modules.datamodels.datamodelChat import TaskContext, ReviewContext, DocumentExchange
|
||||||
|
from modules.workflows.methods.methodBase import MethodBase
|
||||||
|
|
||||||
|
# Set up logger
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Global methods catalog - moved from serviceCenter
|
||||||
|
methods = {}
|
||||||
|
|
||||||
|
def discoverMethods(serviceCenter):
|
||||||
|
"""Dynamically discover all method classes and their actions in modules methods package"""
|
||||||
|
try:
|
||||||
|
# Import the methods package
|
||||||
|
methodsPackage = importlib.import_module('modules.workflows.methods')
|
||||||
|
|
||||||
|
# Discover all modules in the package
|
||||||
|
for _, name, isPkg in pkgutil.iter_modules(methodsPackage.__path__):
|
||||||
|
if not isPkg and name.startswith('method'):
|
||||||
|
try:
|
||||||
|
# Import the module
|
||||||
|
module = importlib.import_module(f'modules.workflows.methods.{name}')
|
||||||
|
|
||||||
|
# Find all classes in the module that inherit from MethodBase
|
||||||
|
for itemName, item in inspect.getmembers(module):
|
||||||
|
if (inspect.isclass(item) and
|
||||||
|
issubclass(item, MethodBase) and
|
||||||
|
item != MethodBase):
|
||||||
|
# Instantiate the method
|
||||||
|
methodInstance = item(serviceCenter)
|
||||||
|
|
||||||
|
# Use the actions property from MethodBase which handles @action decorator
|
||||||
|
actions = methodInstance.actions
|
||||||
|
|
||||||
|
# Create method info
|
||||||
|
methodInfo = {
|
||||||
|
'instance': methodInstance,
|
||||||
|
'actions': actions,
|
||||||
|
'description': item.__doc__ or f"Method {itemName}"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Store the method with full class name
|
||||||
|
methods[itemName] = methodInfo
|
||||||
|
|
||||||
|
# Also store with short name for action executor access
|
||||||
|
shortName = itemName.replace('Method', '').lower()
|
||||||
|
methods[shortName] = methodInfo
|
||||||
|
|
||||||
|
logger.info(f"Discovered method {itemName} (short: {shortName}) with {len(actions)} actions")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error discovering method {name}: {str(e)}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
logger.info(f"Discovered {len(methods)} method entries total")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error discovering methods: {str(e)}")
|
||||||
|
|
||||||
|
def getMethodsList(serviceCenter):
|
||||||
|
"""Get a list of available methods with their signatures"""
|
||||||
|
if not methods:
|
||||||
|
discoverMethods(serviceCenter)
|
||||||
|
|
||||||
|
methodsList = []
|
||||||
|
for methodName, methodInfo in methods.items():
|
||||||
|
methodDescription = methodInfo['description']
|
||||||
|
actionsList = []
|
||||||
|
|
||||||
|
for actionName, actionInfo in methodInfo['actions'].items():
|
||||||
|
actionDescription = actionInfo['description']
|
||||||
|
parameters = actionInfo['parameters']
|
||||||
|
|
||||||
|
# Build parameter signature
|
||||||
|
paramSig = []
|
||||||
|
for paramName, paramInfo in parameters.items():
|
||||||
|
paramType = paramInfo['type']
|
||||||
|
paramRequired = paramInfo['required']
|
||||||
|
paramDefault = paramInfo['default']
|
||||||
|
|
||||||
|
if paramRequired:
|
||||||
|
paramSig.append(f"{paramName}: {paramType}")
|
||||||
|
else:
|
||||||
|
defaultStr = f" = {paramDefault}" if paramDefault is not None else " = None"
|
||||||
|
paramSig.append(f"{paramName}: {paramType}{defaultStr}")
|
||||||
|
|
||||||
|
paramSignature = f"({', '.join(paramSig)})" if paramSig else "()"
|
||||||
|
actionsList.append(f"- {actionName}{paramSignature}: {actionDescription}")
|
||||||
|
|
||||||
|
actionsStr = "\n".join(actionsList)
|
||||||
|
methodsList.append(f"**{methodName}**: {methodDescription}\n{actionsStr}")
|
||||||
|
|
||||||
|
return "\n\n".join(methodsList)
|
||||||
|
|
||||||
|
def getActionParameterList(methodName: str, actionName: str, methods: Dict[str, Any]) -> str:
|
||||||
|
"""Get action parameter list from method docstring for AI parameter generation (list only)."""
|
||||||
|
try:
|
||||||
|
if not methods or methodName not in methods:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
methodInstance = methods[methodName]['instance']
|
||||||
|
if actionName not in methodInstance.actions:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
action_info = methodInstance.actions[actionName]
|
||||||
|
# Extract parameter descriptions from docstring
|
||||||
|
docstring = action_info.get('description', '')
|
||||||
|
paramDescriptions, paramTypes = methodInstance._extractParameterDetails(docstring)
|
||||||
|
|
||||||
|
param_list = []
|
||||||
|
for paramName, paramDesc in paramDescriptions.items():
|
||||||
|
paramType = paramTypes.get(paramName, 'Any')
|
||||||
|
if paramDesc:
|
||||||
|
param_list.append(f"- {paramName} ({paramType}): {paramDesc}")
|
||||||
|
else:
|
||||||
|
param_list.append(f"- {paramName} ({paramType})")
|
||||||
|
|
||||||
|
# Return list only, without leading headings or trailing text
|
||||||
|
return "\n".join(param_list)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error getting action parameter signature for {methodName}.{actionName}: {str(e)}")
|
||||||
|
return ""
|
||||||
|
|
||||||
411
modules/workflows/processing/shared/placeholderFactory.py
Normal file
411
modules/workflows/processing/shared/placeholderFactory.py
Normal file
|
|
@ -0,0 +1,411 @@
|
||||||
|
"""
|
||||||
|
Placeholder Factory
|
||||||
|
Centralized placeholder extraction functions for all workflow modes.
|
||||||
|
Each function corresponds to a {{KEY:PLACEHOLDER_NAME}} in prompt templates.
|
||||||
|
|
||||||
|
NAMING CONVENTION:
|
||||||
|
- All functions follow pattern: extract{PlaceholderName}()
|
||||||
|
- Placeholder names are in UPPER_CASE with underscores
|
||||||
|
- Function names are in camelCase
|
||||||
|
|
||||||
|
MAPPING TABLE (keys → function) with usage [taskplan | actionplan | react]:
|
||||||
|
{{KEY:USER_PROMPT}} -> extractUserPrompt() [taskplan, actionplan, react]
|
||||||
|
{{KEY:USER_LANGUAGE}} -> extractUserLanguage() [actionplan, react]
|
||||||
|
{{KEY:WORKFLOW_HISTORY}} -> extractWorkflowHistory() [taskplan, actionplan, react]
|
||||||
|
{{KEY:AVAILABLE_CONNECTIONS_INDEX}} -> extractAvailableConnectionsIndex() [actionplan, react]
|
||||||
|
{{KEY:AVAILABLE_CONNECTIONS_SUMMARY}} -> extractAvailableConnectionsSummary() []
|
||||||
|
{{KEY:AVAILABLE_DOCUMENTS_SUMMARY}} -> extractAvailableDocumentsSummary() [taskplan, actionplan, react]
|
||||||
|
{{KEY:AVAILABLE_DOCUMENTS_INDEX}} -> extractAvailableDocumentsIndex() [react]
|
||||||
|
{{KEY:AVAILABLE_METHODS}} -> extractAvailableMethods() [actionplan, react]
|
||||||
|
{{KEY:REVIEW_CONTENT}} -> extractReviewContent() [actionplan, react]
|
||||||
|
{{KEY:PREVIOUS_ACTION_RESULTS}} -> extractPreviousActionResults() [react]
|
||||||
|
{{KEY:LEARNINGS_AND_IMPROVEMENTS}} -> extractLearningsAndImprovements() [react]
|
||||||
|
{{KEY:LATEST_REFINEMENT_FEEDBACK}} -> extractLatestRefinementFeedback() [react]
|
||||||
|
|
||||||
|
Following placeholders are populated directly by prompt builders with according context in promptGenerationActionsReact module:
|
||||||
|
- ACTION_OBJECTIVE,
|
||||||
|
- SELECTED_ACTION,
|
||||||
|
- ACTION_SIGNATURE
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
from typing import Dict, Any, List
|
||||||
|
from modules.datamodels.datamodelChat import ChatDocument
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
from modules.workflows.processing.shared.methodDiscovery import (methods, discoverMethods)
|
||||||
|
|
||||||
|
def extractUserPrompt(context: Any) -> str:
|
||||||
|
"""Extract user prompt from context. Maps to {{KEY:USER_PROMPT}}.
|
||||||
|
Prefer the cleaned intent stored on the services object if available via context.
|
||||||
|
Fallback to the task_step objective.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Prefer services.currentUserPrompt when accessible through context
|
||||||
|
services = getattr(context, 'services', None)
|
||||||
|
if services and getattr(services, 'currentUserPrompt', None):
|
||||||
|
return services.currentUserPrompt
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
if hasattr(context, 'task_step') and context.task_step:
|
||||||
|
return context.task_step.objective or 'No request specified'
|
||||||
|
return 'No request specified'
|
||||||
|
|
||||||
|
def extractWorkflowHistory(service: Any, context: Any) -> str:
|
||||||
|
"""Extract workflow history from context. Maps to {{KEY:WORKFLOW_HISTORY}}
|
||||||
|
Reverse-chronological, enriched with message summaries and document labels.
|
||||||
|
"""
|
||||||
|
# Prefer explicit workflow on context; else fall back to services.workflow
|
||||||
|
workflow = None
|
||||||
|
try:
|
||||||
|
if hasattr(context, 'workflow') and context.workflow:
|
||||||
|
workflow = context.workflow
|
||||||
|
elif hasattr(service, 'workflow') and service.workflow:
|
||||||
|
workflow = service.workflow
|
||||||
|
except Exception:
|
||||||
|
workflow = None
|
||||||
|
|
||||||
|
if workflow:
|
||||||
|
history = getPreviousRoundContext(service, workflow)
|
||||||
|
return history or "No previous workflow rounds available"
|
||||||
|
return "No previous workflow rounds available"
|
||||||
|
|
||||||
|
def extractAvailableMethods(service: Any) -> str:
|
||||||
|
"""Extract available methods for action planning. Maps to {{KEY:AVAILABLE_METHODS}}"""
|
||||||
|
try:
|
||||||
|
# Get the methods dictionary directly from the global methods variable
|
||||||
|
if not methods:
|
||||||
|
discoverMethods(service)
|
||||||
|
|
||||||
|
# Create a flat JSON format with compound action names for better AI parsing
|
||||||
|
available_actions_json = {}
|
||||||
|
for methodName, methodInfo in methods.items():
|
||||||
|
# Convert MethodAi -> ai, MethodDocument -> document, etc.
|
||||||
|
shortName = methodName.replace('Method', '').lower()
|
||||||
|
|
||||||
|
for actionName, actionInfo in methodInfo['actions'].items():
|
||||||
|
# Create compound action name: method.action
|
||||||
|
compoundActionName = f"{shortName}.{actionName}"
|
||||||
|
# Get the action description
|
||||||
|
action_description = actionInfo.get('description', f"Execute {actionName} action")
|
||||||
|
available_actions_json[compoundActionName] = action_description
|
||||||
|
|
||||||
|
return json.dumps(available_actions_json, indent=2, ensure_ascii=False)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error extracting available methods: {str(e)}")
|
||||||
|
return json.dumps({}, indent=2, ensure_ascii=False)
|
||||||
|
|
||||||
|
def extractUserLanguage(service: Any) -> str:
|
||||||
|
"""Extract user language from service. Maps to {{KEY:USER_LANGUAGE}}"""
|
||||||
|
return service.user.language if service and service.user else 'en'
|
||||||
|
|
||||||
|
|
||||||
|
def _computeMessageSummary(msg) -> str:
|
||||||
|
"""Create a concise summary for a ChatMessage with documents only.
|
||||||
|
Fields: documentCount, roundNumber, documentsLabel, document names, message (full), success flag.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
docs = getattr(msg, 'documents', []) or []
|
||||||
|
if not docs:
|
||||||
|
return "" # Only summarize messages that contain documents
|
||||||
|
document_count = len(docs)
|
||||||
|
round_number = getattr(msg, 'roundNumber', None) or 0
|
||||||
|
label = getattr(msg, 'documentsLabel', None) or ""
|
||||||
|
# Collect ALL document names (supports ChatDocument objects and dicts)
|
||||||
|
doc_names = []
|
||||||
|
for d in docs:
|
||||||
|
name = None
|
||||||
|
try:
|
||||||
|
if isinstance(d, dict):
|
||||||
|
# For dict objects, try multiple possible field names
|
||||||
|
name = d.get('fileName') or d.get('documentName') or d.get('name') or d.get('filename')
|
||||||
|
else:
|
||||||
|
# For ChatDocument objects, use fileName field
|
||||||
|
name = getattr(d, 'fileName', None) or getattr(d, 'documentName', None) or getattr(d, 'name', None) or getattr(d, 'filename', None)
|
||||||
|
except Exception:
|
||||||
|
name = None
|
||||||
|
doc_names.append(name or "(unnamed)")
|
||||||
|
# Format document names in brackets
|
||||||
|
if doc_names:
|
||||||
|
names_part = f"({', '.join(doc_names)})"
|
||||||
|
else:
|
||||||
|
names_part = "(no documents)"
|
||||||
|
|
||||||
|
# Don't truncate the message - show full content
|
||||||
|
user_message = (getattr(msg, 'message', '') or '').strip().replace("\n", " ")
|
||||||
|
# Read success from ChatMessage.success field
|
||||||
|
success_flag = getattr(msg, 'success', None)
|
||||||
|
success_text = "success=True" if success_flag is True else ("success=False" if success_flag is False else "success=Unknown")
|
||||||
|
label_part = f" label='{label}'" if label else ""
|
||||||
|
|
||||||
|
# Add learning/feedback if available
|
||||||
|
learning_part = ""
|
||||||
|
if hasattr(msg, 'summary') and msg.summary and 'learnings' in msg.summary.lower():
|
||||||
|
learning_part = " | learnings available"
|
||||||
|
|
||||||
|
return f"Round {round_number}: {document_count} docs {names_part}{label_part} | {success_text}{learning_part} | msg='{user_message}'"
|
||||||
|
except Exception:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
def getMessageSummary(msg) -> str:
|
||||||
|
"""Return existing ChatMessage.summary or compute, set, and return it (documents only)."""
|
||||||
|
try:
|
||||||
|
if getattr(msg, 'summary', None):
|
||||||
|
return msg.summary
|
||||||
|
summary = _computeMessageSummary(msg)
|
||||||
|
# Persist in-memory only; caller can store if desired
|
||||||
|
if summary:
|
||||||
|
try:
|
||||||
|
setattr(msg, 'summary', summary)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return summary
|
||||||
|
except Exception:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
def getPreviousRoundContext(services, workflow: Any) -> str:
|
||||||
|
"""Get enriched context:
|
||||||
|
- Reverse-chronological ordering
|
||||||
|
- Current round first (newest → oldest), then older rounds
|
||||||
|
- Only messages with documents summarized
|
||||||
|
- Include available documents snapshot at end
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
if not workflow:
|
||||||
|
return "No previous round context available"
|
||||||
|
|
||||||
|
lines: List[str] = []
|
||||||
|
|
||||||
|
# Reverse-chronological, current round first
|
||||||
|
try:
|
||||||
|
msgs = getattr(workflow, 'messages', []) or []
|
||||||
|
current_round = getattr(workflow, 'currentRound', None)
|
||||||
|
current_round_msgs: List[Any] = []
|
||||||
|
previous_round_msgs: List[Any] = []
|
||||||
|
for m in msgs:
|
||||||
|
if current_round is not None and getattr(m, 'roundNumber', None) == current_round:
|
||||||
|
current_round_msgs.append(m)
|
||||||
|
else:
|
||||||
|
previous_round_msgs.append(m)
|
||||||
|
|
||||||
|
for m in reversed(current_round_msgs):
|
||||||
|
s = getMessageSummary(m)
|
||||||
|
if s:
|
||||||
|
lines.append(f"- {s}")
|
||||||
|
for m in reversed(previous_round_msgs):
|
||||||
|
s = getMessageSummary(m)
|
||||||
|
if s:
|
||||||
|
lines.append(f"- {s}")
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Include available documents snapshot at end
|
||||||
|
try:
|
||||||
|
if hasattr(services, 'workflow'):
|
||||||
|
docs_index = services.workflow.getAvailableDocuments(workflow)
|
||||||
|
if docs_index and docs_index != "No documents available":
|
||||||
|
doc_count = docs_index.count("docItem:") # Only count actual documents, not document list labels
|
||||||
|
lines.append(f"Available documents: {doc_count}")
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
if not lines:
|
||||||
|
return "No previous round context available"
|
||||||
|
return "\n".join(lines)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error getting previous round context: {str(e)}")
|
||||||
|
return "Error retrieving previous round context"
|
||||||
|
|
||||||
|
def extractReviewContent(context: Any) -> str:
|
||||||
|
"""Extract review content for result validation. Maps to {{KEY:REVIEW_CONTENT}}"""
|
||||||
|
try:
|
||||||
|
if hasattr(context, 'action_results') and context.action_results:
|
||||||
|
# Build result summary
|
||||||
|
result_summary = ""
|
||||||
|
for i, result in enumerate(context.action_results):
|
||||||
|
result_summary += f"\nRESULT {i+1}:\n"
|
||||||
|
result_summary += f" Success: {result.success}\n"
|
||||||
|
if result.error:
|
||||||
|
result_summary += f" Error: {result.error}\n"
|
||||||
|
|
||||||
|
if result.documents:
|
||||||
|
result_summary += f" Documents: {len(result.documents)} document(s)\n"
|
||||||
|
for doc in result.documents:
|
||||||
|
# Extract all available metadata without content
|
||||||
|
doc_metadata = {
|
||||||
|
"name": getattr(doc, 'fileName', None) or getattr(doc, 'documentName', 'Unknown'),
|
||||||
|
"mimeType": getattr(doc, 'mimeType', 'Unknown'),
|
||||||
|
"size": getattr(doc, 'size', 'Unknown'),
|
||||||
|
"created": getattr(doc, 'created', 'Unknown'),
|
||||||
|
"modified": getattr(doc, 'modified', 'Unknown'),
|
||||||
|
"typeGroup": getattr(doc, 'typeGroup', 'Unknown'),
|
||||||
|
"documentId": getattr(doc, 'documentId', 'Unknown'),
|
||||||
|
"reference": getattr(doc, 'reference', 'Unknown')
|
||||||
|
}
|
||||||
|
# Remove 'Unknown' values to keep it clean
|
||||||
|
doc_metadata = {k: v for k, v in doc_metadata.items() if v != 'Unknown'}
|
||||||
|
result_summary += f" - {json.dumps(doc_metadata, indent=6, ensure_ascii=False)}\n"
|
||||||
|
else:
|
||||||
|
result_summary += f" Documents: None\n"
|
||||||
|
|
||||||
|
return result_summary
|
||||||
|
elif hasattr(context, 'observation') and context.observation:
|
||||||
|
# For observation data, show full content but handle documents specially
|
||||||
|
if isinstance(context.observation, dict):
|
||||||
|
# Create a copy to modify
|
||||||
|
obs_copy = context.observation.copy()
|
||||||
|
|
||||||
|
# If there are previews with documents, show only metadata
|
||||||
|
if 'previews' in obs_copy and isinstance(obs_copy['previews'], list):
|
||||||
|
for preview in obs_copy['previews']:
|
||||||
|
if isinstance(preview, dict) and 'snippet' in preview:
|
||||||
|
# Replace snippet with metadata indicator
|
||||||
|
preview['snippet'] = f"[Content: {len(preview.get('snippet', ''))} characters]"
|
||||||
|
|
||||||
|
return json.dumps(obs_copy, indent=2, ensure_ascii=False)
|
||||||
|
else:
|
||||||
|
return json.dumps(context.observation, ensure_ascii=False)
|
||||||
|
elif hasattr(context, 'step_result') and context.step_result and 'observation' in context.step_result:
|
||||||
|
# For observation data in step_result, show full content but handle documents specially
|
||||||
|
observation = context.step_result['observation']
|
||||||
|
if isinstance(observation, dict):
|
||||||
|
# Create a copy to modify
|
||||||
|
obs_copy = observation.copy()
|
||||||
|
|
||||||
|
# If there are previews with documents, show only metadata
|
||||||
|
if 'previews' in obs_copy and isinstance(obs_copy['previews'], list):
|
||||||
|
for preview in obs_copy['previews']:
|
||||||
|
if isinstance(preview, dict) and 'snippet' in preview:
|
||||||
|
# Replace snippet with metadata indicator
|
||||||
|
preview['snippet'] = f"[Content: {len(preview.get('snippet', ''))} characters]"
|
||||||
|
|
||||||
|
return json.dumps(obs_copy, indent=2, ensure_ascii=False)
|
||||||
|
else:
|
||||||
|
return json.dumps(observation, ensure_ascii=False)
|
||||||
|
else:
|
||||||
|
return "No review content available"
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error extracting review content: {str(e)}")
|
||||||
|
return "No review content available"
|
||||||
|
|
||||||
|
def extractPreviousActionResults(context: Any) -> str:
|
||||||
|
"""Extract previous action results for learning context. Maps to {{KEY:PREVIOUS_ACTION_RESULTS}}"""
|
||||||
|
try:
|
||||||
|
if not hasattr(context, 'previous_action_results') or not context.previous_action_results:
|
||||||
|
return "No previous actions executed yet"
|
||||||
|
|
||||||
|
results = []
|
||||||
|
for i, result in enumerate(context.previous_action_results[-5:], 1): # Last 5 results
|
||||||
|
if hasattr(result, 'resultLabel') and hasattr(result, 'status'):
|
||||||
|
status = "SUCCESS" if result.status == "completed" else "FAILED"
|
||||||
|
results.append(f"Action {i}: {result.resultLabel} - {status}")
|
||||||
|
if hasattr(result, 'error') and result.error:
|
||||||
|
results.append(f" Error: {result.error}")
|
||||||
|
|
||||||
|
return "\n".join(results) if results else "No previous actions executed yet"
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error extracting previous action results: {str(e)}")
|
||||||
|
return "No previous actions executed yet"
|
||||||
|
|
||||||
|
def extractLearningsAndImprovements(context: Any) -> str:
|
||||||
|
"""Extract learnings and improvements from previous actions. Maps to {{KEY:LEARNINGS_AND_IMPROVEMENTS}}"""
|
||||||
|
try:
|
||||||
|
learnings = []
|
||||||
|
|
||||||
|
# Get improvements from context
|
||||||
|
if hasattr(context, 'improvements') and context.improvements and isinstance(context.improvements, list):
|
||||||
|
learnings.append("IMPROVEMENTS:")
|
||||||
|
for improvement in context.improvements[-3:]: # Last 3 improvements
|
||||||
|
learnings.append(f"- {improvement}")
|
||||||
|
|
||||||
|
# Get failure patterns
|
||||||
|
if hasattr(context, 'failure_patterns') and context.failure_patterns and isinstance(context.failure_patterns, list):
|
||||||
|
learnings.append("FAILURE PATTERNS TO AVOID:")
|
||||||
|
for pattern in context.failure_patterns[-3:]: # Last 3 patterns
|
||||||
|
learnings.append(f"- {pattern}")
|
||||||
|
|
||||||
|
# Get successful actions
|
||||||
|
if hasattr(context, 'successful_actions') and context.successful_actions and isinstance(context.successful_actions, list):
|
||||||
|
learnings.append("SUCCESSFUL APPROACHES:")
|
||||||
|
for action in context.successful_actions[-3:]: # Last 3 successful
|
||||||
|
learnings.append(f"- {action}")
|
||||||
|
|
||||||
|
return "\n".join(learnings) if learnings else "No learnings available yet"
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error extracting learnings and improvements: {str(e)}")
|
||||||
|
return "No learnings available yet"
|
||||||
|
|
||||||
|
def extractLatestRefinementFeedback(context: Any) -> str:
|
||||||
|
"""Extract the latest refinement feedback. Maps to {{KEY:LATEST_REFINEMENT_FEEDBACK}}"""
|
||||||
|
try:
|
||||||
|
if not hasattr(context, 'previous_review_result') or not context.previous_review_result or not isinstance(context.previous_review_result, list):
|
||||||
|
return "No previous refinement feedback available"
|
||||||
|
|
||||||
|
# Get the most recent refinement decision
|
||||||
|
latest_decision = context.previous_review_result[-1]
|
||||||
|
if not isinstance(latest_decision, dict):
|
||||||
|
return "No previous refinement feedback available"
|
||||||
|
|
||||||
|
feedback_parts = []
|
||||||
|
|
||||||
|
# Add decision and reason
|
||||||
|
decision = latest_decision.get('decision', 'unknown')
|
||||||
|
reason = latest_decision.get('reason', 'No reason provided')
|
||||||
|
feedback_parts.append(f"Latest Decision: {decision}")
|
||||||
|
feedback_parts.append(f"Reason: {reason}")
|
||||||
|
|
||||||
|
# Add any specific feedback or suggestions
|
||||||
|
if 'feedback' in latest_decision:
|
||||||
|
feedback_parts.append(f"Feedback: {latest_decision['feedback']}")
|
||||||
|
|
||||||
|
if 'suggestions' in latest_decision:
|
||||||
|
feedback_parts.append(f"Suggestions: {latest_decision['suggestions']}")
|
||||||
|
|
||||||
|
return "\n".join(feedback_parts)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error extracting latest refinement feedback: {str(e)}")
|
||||||
|
return "No previous refinement feedback available"
|
||||||
|
|
||||||
|
def extractAvailableDocumentsSummary(service: Any, context: Any) -> str:
|
||||||
|
"""Summary of available documents (count only)."""
|
||||||
|
try:
|
||||||
|
documents = service.workflow.getAvailableDocuments(context.workflow)
|
||||||
|
if documents and documents != "No documents available":
|
||||||
|
doc_count = documents.count("docList:") + documents.count("docItem:")
|
||||||
|
return f"{doc_count} documents available from previous tasks"
|
||||||
|
return "No documents available"
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error getting document summary: {str(e)}")
|
||||||
|
return "No documents available"
|
||||||
|
|
||||||
|
def extractAvailableDocumentsIndex(service: Any, context: Any) -> str:
|
||||||
|
"""Index of available documents with detailed references for parameter generation."""
|
||||||
|
try:
|
||||||
|
return service.workflow.getAvailableDocuments(context.workflow)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error getting document index: {str(e)}")
|
||||||
|
return "No documents available"
|
||||||
|
|
||||||
|
def extractAvailableConnectionsSummary(service: Any) -> str:
|
||||||
|
"""Summary of available connections (count only)."""
|
||||||
|
try:
|
||||||
|
connections = service.workflow.getConnectionReferenceList()
|
||||||
|
if connections:
|
||||||
|
return f"{len(connections)} connections available"
|
||||||
|
return "No connections available"
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error getting connection summary: {str(e)}")
|
||||||
|
return "No connections available"
|
||||||
|
|
||||||
|
def extractAvailableConnectionsIndex(service: Any) -> str:
|
||||||
|
"""Index of available connections with detailed references for parameter generation."""
|
||||||
|
try:
|
||||||
|
connections = service.workflow.getConnectionReferenceList()
|
||||||
|
if connections:
|
||||||
|
return '\n'.join(f"- {conn}" for conn in connections)
|
||||||
|
return "No connections available"
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error getting connection index: {str(e)}")
|
||||||
|
return "No connections available"
|
||||||
|
|
@ -0,0 +1,236 @@
|
||||||
|
"""
|
||||||
|
Actionplan Mode Prompt Generation
|
||||||
|
Handles prompt templates and extraction functions for actionplan mode action handling.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
from typing import Dict, Any, List
|
||||||
|
from modules.datamodels.datamodelChat import PromptBundle, PromptPlaceholder
|
||||||
|
from modules.workflows.processing.shared.placeholderFactory import (
|
||||||
|
extractUserPrompt,
|
||||||
|
extractAvailableDocumentsSummary,
|
||||||
|
extractWorkflowHistory,
|
||||||
|
extractAvailableMethods,
|
||||||
|
extractUserLanguage,
|
||||||
|
extractAvailableConnectionsIndex,
|
||||||
|
extractReviewContent,
|
||||||
|
)
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
def generateActionDefinitionPrompt(services, context: Any) -> PromptBundle:
|
||||||
|
"""Define placeholders first, then the template; return PromptBundle."""
|
||||||
|
placeholders: List[PromptPlaceholder] = [
|
||||||
|
PromptPlaceholder(label="USER_PROMPT", content=extractUserPrompt(context), summaryAllowed=False),
|
||||||
|
PromptPlaceholder(label="AVAILABLE_DOCUMENTS_SUMMARY", content=extractAvailableDocumentsSummary(services, context), summaryAllowed=True),
|
||||||
|
PromptPlaceholder(label="AVAILABLE_CONNECTIONS_INDEX", content=extractAvailableConnectionsIndex(services), summaryAllowed=False),
|
||||||
|
PromptPlaceholder(label="WORKFLOW_HISTORY", content=extractWorkflowHistory(services, context), summaryAllowed=True),
|
||||||
|
PromptPlaceholder(label="AVAILABLE_METHODS", content=extractAvailableMethods(services), summaryAllowed=False),
|
||||||
|
PromptPlaceholder(label="USER_LANGUAGE", content=extractUserLanguage(services), summaryAllowed=False),
|
||||||
|
]
|
||||||
|
|
||||||
|
template = """# Action Definition
|
||||||
|
|
||||||
|
Generate the next action to advance toward completing the task objective.
|
||||||
|
|
||||||
|
## 📋 Context
|
||||||
|
|
||||||
|
### Task Objective
|
||||||
|
{{KEY:USER_PROMPT}}
|
||||||
|
|
||||||
|
### Available Documents
|
||||||
|
{{KEY:AVAILABLE_DOCUMENTS_SUMMARY}}
|
||||||
|
|
||||||
|
### Available Connections
|
||||||
|
{{KEY:AVAILABLE_CONNECTIONS_INDEX}}
|
||||||
|
|
||||||
|
### User Language
|
||||||
|
{{KEY:USER_LANGUAGE}}
|
||||||
|
|
||||||
|
### Workflow History
|
||||||
|
{{KEY:WORKFLOW_HISTORY}}
|
||||||
|
|
||||||
|
### Available Methods
|
||||||
|
{{KEY:AVAILABLE_METHODS}}
|
||||||
|
|
||||||
|
## ⚠️ RULES
|
||||||
|
|
||||||
|
### Action Names
|
||||||
|
- **Use EXACT compound action names** from AVAILABLE_METHODS (e.g., "ai.process", "document.extract", "web.search")
|
||||||
|
- **DO NOT create** new action names - only use those listed in AVAILABLE_METHODS
|
||||||
|
- **DO NOT separate** method and action names - use the full compound name
|
||||||
|
|
||||||
|
### Parameter Guidelines
|
||||||
|
- **Use exact document references** from AVAILABLE_DOCUMENTS_INDEX
|
||||||
|
- **Use exact connection references** from AVAILABLE_CONNECTIONS_INDEX
|
||||||
|
- **Include user language** if relevant
|
||||||
|
- **Avoid unnecessary fields** - host applies defaults
|
||||||
|
|
||||||
|
## 📊 Required JSON Structure
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"actions": [
|
||||||
|
{
|
||||||
|
"action": "method.action_name",
|
||||||
|
"parameters": {},
|
||||||
|
"resultLabel": "round{current_round}_task{current_task}_action{action_number}_{descriptive_label}",
|
||||||
|
"description": "What this action accomplishes",
|
||||||
|
"userMessage": "User-friendly message in {{KEY:USER_LANGUAGE}}"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## ✅ Correct Example
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"actions": [
|
||||||
|
{
|
||||||
|
"action": "document.extract",
|
||||||
|
"parameters": {"documentList": ["docList:msg_123:results"]},
|
||||||
|
"resultLabel": "round1_task1_action1_extract_results",
|
||||||
|
"description": "Extract data from documents",
|
||||||
|
"userMessage": "Extracting data from documents"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
## 🎯 Action Planning Guidelines
|
||||||
|
|
||||||
|
### Method Selection
|
||||||
|
- **Choose appropriate method** based on task requirements
|
||||||
|
- **Consider available resources** (documents, connections)
|
||||||
|
- **Match method capabilities** to task objectives
|
||||||
|
|
||||||
|
### Parameter Design
|
||||||
|
- **Use ACTION SIGNATURE** to understand required parameters
|
||||||
|
- **Convert objective** into appropriate parameter values
|
||||||
|
- **Include all required parameters** for the action
|
||||||
|
|
||||||
|
### Result Labeling
|
||||||
|
- **Use descriptive labels** that explain what the action produces
|
||||||
|
- **Follow naming convention**: `round{round}_task{task}_action{action}_{label}`
|
||||||
|
- **Make labels meaningful** for future reference
|
||||||
|
|
||||||
|
### User Messages
|
||||||
|
- **Write in user language** ({{KEY:USER_LANGUAGE}})
|
||||||
|
- **Explain what's happening** in user-friendly terms
|
||||||
|
- **Keep messages concise** but informative
|
||||||
|
|
||||||
|
## 🚀 Response Format
|
||||||
|
Return ONLY the JSON object."""
|
||||||
|
|
||||||
|
return PromptBundle(prompt=template, placeholders=placeholders)
|
||||||
|
|
||||||
|
def generateResultReviewPrompt(context: Any) -> PromptBundle:
|
||||||
|
"""Define placeholders first, then the template; return PromptBundle."""
|
||||||
|
placeholders: List[PromptPlaceholder] = [
|
||||||
|
PromptPlaceholder(label="USER_PROMPT", content=extractUserPrompt(context), summaryAllowed=False),
|
||||||
|
PromptPlaceholder(label="REVIEW_CONTENT", content=extractReviewContent(context), summaryAllowed=True),
|
||||||
|
]
|
||||||
|
|
||||||
|
template = """# Result Review & Validation
|
||||||
|
|
||||||
|
Review task execution outcomes and determine success, retry needs, or failure.
|
||||||
|
|
||||||
|
## 📋 Context
|
||||||
|
|
||||||
|
### Task Objective
|
||||||
|
{{KEY:USER_PROMPT}}
|
||||||
|
|
||||||
|
### Execution Results
|
||||||
|
{{KEY:REVIEW_CONTENT}}
|
||||||
|
|
||||||
|
## 🔍 Validation Criteria
|
||||||
|
|
||||||
|
### Action Assessment
|
||||||
|
- **Review each action's success/failure status**
|
||||||
|
- **Check if required documents were produced**
|
||||||
|
- **Validate document quality and completeness**
|
||||||
|
- **Assess if success criteria were met**
|
||||||
|
- **Identify any missing or incomplete outputs**
|
||||||
|
|
||||||
|
### Decision Making
|
||||||
|
- **Determine if retry would help** or if task should be marked as failed
|
||||||
|
- **Consider business value** and user satisfaction
|
||||||
|
- **Evaluate technical execution** and results quality
|
||||||
|
|
||||||
|
## 📊 Required JSON Structure
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"status": "success|retry|failed",
|
||||||
|
"reason": "Detailed explanation of the validation decision",
|
||||||
|
"improvements": ["specific improvement 1", "specific improvement 2"],
|
||||||
|
"quality_score": 8,
|
||||||
|
"met_criteria": ["criteria1", "criteria2"],
|
||||||
|
"unmet_criteria": ["criteria3", "criteria4"],
|
||||||
|
"confidence": 0.85,
|
||||||
|
"userMessage": "User-friendly message explaining the validation result"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## 🎯 Validation Principles
|
||||||
|
|
||||||
|
### Assessment Approach
|
||||||
|
- **Be thorough but fair** in assessment
|
||||||
|
- **Focus on business value** and outcomes
|
||||||
|
- **Consider both technical execution** and business results
|
||||||
|
- **Provide specific, actionable** improvement suggestions
|
||||||
|
|
||||||
|
### Quality Scoring
|
||||||
|
- **Use quality scores** to track progress across retries
|
||||||
|
- **Scale 1-10**: 1 = Poor, 5 = Average, 10 = Excellent
|
||||||
|
- **Consider completeness, accuracy, and usefulness**
|
||||||
|
|
||||||
|
### Criteria Evaluation
|
||||||
|
- **Clearly identify** which success criteria were met vs. unmet
|
||||||
|
- **List specific criteria** that were achieved
|
||||||
|
- **Note missing requirements** that need attention
|
||||||
|
|
||||||
|
### Confidence Levels
|
||||||
|
- **Set appropriate confidence levels** based on evidence quality
|
||||||
|
- **Scale 0.0-1.0**: 0.0 = No confidence, 1.0 = Complete confidence
|
||||||
|
- **Consider data quality** and result reliability
|
||||||
|
|
||||||
|
## 📝 Status Definitions
|
||||||
|
|
||||||
|
### Success
|
||||||
|
- **All objectives met** - User got what they asked for
|
||||||
|
- **Quality standards met** - Results are complete and accurate
|
||||||
|
- **No retry needed** - Task is fully complete
|
||||||
|
|
||||||
|
### Retry
|
||||||
|
- **Partial success** - Some but not all objectives met
|
||||||
|
- **Improvement possible** - Retry could lead to better results
|
||||||
|
- **Technical issues** - Action failures that can be resolved
|
||||||
|
|
||||||
|
### Failed
|
||||||
|
- **No progress made** - Objectives not achieved
|
||||||
|
- **Technical limitations** - Cannot be resolved with retry
|
||||||
|
- **Resource constraints** - Missing required inputs
|
||||||
|
|
||||||
|
## 💡 Improvement Suggestions
|
||||||
|
|
||||||
|
### Actionable Improvements
|
||||||
|
- **Be specific** - Don't just say "improve quality"
|
||||||
|
- **Focus on process** - How to do better next time
|
||||||
|
- **Consider resources** - What additional inputs might help
|
||||||
|
- **Technical fixes** - Address specific technical issues
|
||||||
|
|
||||||
|
### Examples
|
||||||
|
- "Use more specific document references from AVAILABLE_DOCUMENTS_INDEX"
|
||||||
|
- "Include user language parameter for better localization"
|
||||||
|
- "Break down complex objective into smaller, focused actions"
|
||||||
|
- "Verify document references before processing"
|
||||||
|
|
||||||
|
## 🚀 Response Format
|
||||||
|
Return ONLY the JSON object. Do not include any explanatory text."""
|
||||||
|
|
||||||
|
return PromptBundle(prompt=template, placeholders=placeholders)
|
||||||
|
|
||||||
|
|
@ -0,0 +1,237 @@
|
||||||
|
"""
|
||||||
|
React Mode Prompt Generation
|
||||||
|
Handles prompt templates for react mode action handling.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from typing import Any, List
|
||||||
|
from modules.datamodels.datamodelChat import PromptBundle, PromptPlaceholder
|
||||||
|
from modules.workflows.processing.shared.placeholderFactory import (
|
||||||
|
extractUserPrompt,
|
||||||
|
extractUserLanguage,
|
||||||
|
extractAvailableMethods,
|
||||||
|
extractAvailableDocumentsSummary,
|
||||||
|
extractAvailableDocumentsIndex,
|
||||||
|
extractAvailableConnectionsIndex,
|
||||||
|
extractPreviousActionResults,
|
||||||
|
extractLearningsAndImprovements,
|
||||||
|
extractLatestRefinementFeedback,
|
||||||
|
extractWorkflowHistory,
|
||||||
|
)
|
||||||
|
from modules.workflows.processing.shared.methodDiscovery import methods, getActionParameterList
|
||||||
|
|
||||||
|
def generateReactPlanSelectionPrompt(services, context: Any) -> PromptBundle:
|
||||||
|
"""Define placeholders first, then the template; return PromptBundle."""
|
||||||
|
placeholders: List[PromptPlaceholder] = [
|
||||||
|
PromptPlaceholder(label="USER_PROMPT", content=extractUserPrompt(context), summaryAllowed=False),
|
||||||
|
PromptPlaceholder(label="AVAILABLE_DOCUMENTS_SUMMARY", content=extractAvailableDocumentsSummary(services, context), summaryAllowed=True),
|
||||||
|
PromptPlaceholder(label="AVAILABLE_METHODS", content=extractAvailableMethods(services), summaryAllowed=False),
|
||||||
|
# Provide enriched history context for Stage 1 to craft parametersContext
|
||||||
|
PromptPlaceholder(label="WORKFLOW_HISTORY", content=extractWorkflowHistory(services, context), summaryAllowed=True),
|
||||||
|
# Provide deterministic indexes so the planner can choose exact labels
|
||||||
|
PromptPlaceholder(label="AVAILABLE_DOCUMENTS_INDEX", content=extractAvailableDocumentsIndex(services, context), summaryAllowed=True),
|
||||||
|
PromptPlaceholder(label="AVAILABLE_CONNECTIONS_INDEX", content=extractAvailableConnectionsIndex(services), summaryAllowed=False),
|
||||||
|
]
|
||||||
|
|
||||||
|
template = """Select exactly one action to advance the task.
|
||||||
|
|
||||||
|
OBJECTIVE:
|
||||||
|
{{KEY:USER_PROMPT}}
|
||||||
|
|
||||||
|
AVAILABLE_DOCUMENTS_SUMMARY:
|
||||||
|
{{KEY:AVAILABLE_DOCUMENTS_SUMMARY}}
|
||||||
|
|
||||||
|
AVAILABLE_METHODS:
|
||||||
|
{{KEY:AVAILABLE_METHODS}}
|
||||||
|
|
||||||
|
WORKFLOW_HISTORY (reverse-chronological, enriched):
|
||||||
|
{{KEY:WORKFLOW_HISTORY}}
|
||||||
|
|
||||||
|
AVAILABLE_DOCUMENTS_INDEX:
|
||||||
|
{{KEY:AVAILABLE_DOCUMENTS_INDEX}}
|
||||||
|
|
||||||
|
AVAILABLE_CONNECTIONS_INDEX:
|
||||||
|
{{KEY:AVAILABLE_CONNECTIONS_INDEX}}
|
||||||
|
|
||||||
|
REPLY: Return ONLY a JSON object with the following structure (no comments, no extra text):
|
||||||
|
{{
|
||||||
|
"action": "method.action_name",
|
||||||
|
"actionObjective": "...",
|
||||||
|
"learnings": ["..."],
|
||||||
|
"requiredInputDocuments": ["docList:..."],
|
||||||
|
"requiredConnection": "connection:..." | null,
|
||||||
|
"parametersContext": "concise text that Stage 2 will use to set business parameters"
|
||||||
|
}}
|
||||||
|
|
||||||
|
EXAMPLE how to assign references from AVAILABLE_DOCUMENTS_INDEX and AVAILABLE_CONNECTIONS_INDEX:
|
||||||
|
"requiredInputDocuments": ["docList:msg_47a7a578-e8f2-4ba8-ac66-0dbff40605e0:round8_task1_action1_results","docItem:5d8b7aee-b546-4487-b6a8-835c86f7b186:AI_Generated_Document_20251006-104256.docx"],
|
||||||
|
"requiredConnection": "connection:msft:p.motsch@valueon.ch:1ae8b8e5-128b-49b8-b1cb-7c632669eeae",
|
||||||
|
|
||||||
|
RULES:
|
||||||
|
1. Use EXACT action names from AVAILABLE_METHODS
|
||||||
|
2. Do NOT output a "parameters" object
|
||||||
|
3. parametersContext must be short and sufficient for Stage 2
|
||||||
|
4. Return ONLY JSON - no markdown, no explanations
|
||||||
|
5. For requiredInputDocuments, use ONLY exact references from AVAILABLE_DOCUMENTS_INDEX (docList:... or docItem:...)
|
||||||
|
6. For requiredConnection, use ONLY an exact label from AVAILABLE_CONNECTIONS_INDEX
|
||||||
|
"""
|
||||||
|
|
||||||
|
return PromptBundle(prompt=template, placeholders=placeholders)
|
||||||
|
|
||||||
|
def generateReactParametersPrompt(services, context: Any, compoundActionName: str) -> PromptBundle:
|
||||||
|
"""Define placeholders first, then the template; return PromptBundle.
|
||||||
|
|
||||||
|
Minimal Stage 2 (no fallback): consumes actionObjective, selectedAction, parametersContext only.
|
||||||
|
Excludes documents/connections/history entirely.
|
||||||
|
"""
|
||||||
|
# derive method/action and parameter list
|
||||||
|
methodName, actionName = (compoundActionName.split('.', 1) if '.' in compoundActionName else (compoundActionName, ''))
|
||||||
|
actionParameterList = getActionParameterList(methodName, actionName, methods)
|
||||||
|
|
||||||
|
def _formatBusinessParameters(params) -> str:
|
||||||
|
excluded = {"documentList", "connectionReference"}
|
||||||
|
# Case 1: params is a list of dicts or objects with 'name'
|
||||||
|
if isinstance(params, (list, tuple)):
|
||||||
|
entries = []
|
||||||
|
for p in params:
|
||||||
|
try:
|
||||||
|
if isinstance(p, dict):
|
||||||
|
name = p.get("name")
|
||||||
|
if not name or name in excluded:
|
||||||
|
continue
|
||||||
|
ptype = p.get("type") or p.get("dataType") or ""
|
||||||
|
req = p.get("required")
|
||||||
|
reqTxt = "required" if (req is True or str(req).lower() == "true") else "optional"
|
||||||
|
desc = p.get("description") or p.get("desc") or ""
|
||||||
|
entry = f"- {name} ({ptype}, {reqTxt})" + (f": {desc}" if desc else "")
|
||||||
|
entries.append(entry)
|
||||||
|
else:
|
||||||
|
# Try attribute access
|
||||||
|
name = getattr(p, "name", None)
|
||||||
|
if not name or name in excluded:
|
||||||
|
continue
|
||||||
|
ptype = getattr(p, "type", "") or getattr(p, "dataType", "")
|
||||||
|
req = getattr(p, "required", False)
|
||||||
|
reqTxt = "required" if (req is True or str(req).lower() == "true") else "optional"
|
||||||
|
desc = getattr(p, "description", None) or getattr(p, "desc", None) or ""
|
||||||
|
entry = f"- {name} ({ptype}, {reqTxt})" + (f": {desc}" if desc else "")
|
||||||
|
entries.append(entry)
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
return "\n".join(entries)
|
||||||
|
# Case 2: params is a string description: filter out lines mentioning excluded names
|
||||||
|
if isinstance(params, str):
|
||||||
|
lines = [ln for ln in params.splitlines() if not any(ex in ln for ex in excluded)]
|
||||||
|
return "\n".join(lines).strip()
|
||||||
|
# Fallback: plain string
|
||||||
|
try:
|
||||||
|
return str(params)
|
||||||
|
except Exception:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
actionParametersText = _formatBusinessParameters(actionParameterList)
|
||||||
|
|
||||||
|
# determine action objective if available, else fall back to user prompt
|
||||||
|
if hasattr(context, 'action_objective') and context.action_objective:
|
||||||
|
actionObjective = context.action_objective
|
||||||
|
elif hasattr(context, 'task_step') and context.task_step and getattr(context.task_step, 'objective', None):
|
||||||
|
actionObjective = context.task_step.objective
|
||||||
|
else:
|
||||||
|
actionObjective = extractUserPrompt(context)
|
||||||
|
|
||||||
|
# Minimal Stage 2 (no fallback)
|
||||||
|
parametersContext = getattr(context, 'parameters_context', None)
|
||||||
|
learningsText = ""
|
||||||
|
try:
|
||||||
|
# If Stage 1 learnings were attached to context, pass them textually
|
||||||
|
if hasattr(context, 'learnings') and context.learnings:
|
||||||
|
if isinstance(context.learnings, (list, tuple)):
|
||||||
|
learningsText = "\n".join(f"- {str(x)}" for x in context.learnings)
|
||||||
|
else:
|
||||||
|
learningsText = str(context.learnings)
|
||||||
|
except Exception:
|
||||||
|
learningsText = ""
|
||||||
|
|
||||||
|
placeholders: List[PromptPlaceholder] = [
|
||||||
|
PromptPlaceholder(label="ACTION_OBJECTIVE", content=actionObjective, summaryAllowed=False),
|
||||||
|
PromptPlaceholder(label="SELECTED_ACTION", content=compoundActionName, summaryAllowed=False),
|
||||||
|
PromptPlaceholder(label="PARAMETERS_CONTEXT", content=(parametersContext or ""), summaryAllowed=True),
|
||||||
|
PromptPlaceholder(label="ACTION_PARAMETERS", content=actionParametersText, summaryAllowed=False),
|
||||||
|
PromptPlaceholder(label="LEARNINGS", content=learningsText, summaryAllowed=True),
|
||||||
|
]
|
||||||
|
|
||||||
|
template = """You are a parameter generator. Set the parameters for this specific action.
|
||||||
|
|
||||||
|
CONTEXT AND OBJECTIVE:
|
||||||
|
-----------------
|
||||||
|
{{KEY:ACTION_OBJECTIVE}}
|
||||||
|
-----------------
|
||||||
|
|
||||||
|
SELECTED_ACTION:
|
||||||
|
{{KEY:SELECTED_ACTION}}
|
||||||
|
|
||||||
|
|
||||||
|
REPLY (ONLY JSON):
|
||||||
|
{{
|
||||||
|
"schema": "parameters_v1",
|
||||||
|
"parameters": {{
|
||||||
|
"paramName": "value"
|
||||||
|
}}
|
||||||
|
}}
|
||||||
|
|
||||||
|
|
||||||
|
CONTEXT FOR PARAMETER VALUES:
|
||||||
|
-----------------
|
||||||
|
{{KEY:PARAMETERS_CONTEXT}}
|
||||||
|
-----------------
|
||||||
|
|
||||||
|
LEARNINGS (from prior attempts, if any):
|
||||||
|
{{KEY:LEARNINGS}}
|
||||||
|
|
||||||
|
REQUIRED PARAMETERS FOR THIS ACTION (use these exact parameter names):
|
||||||
|
{{KEY:ACTION_PARAMETERS}}
|
||||||
|
|
||||||
|
INSTRUCTIONS:
|
||||||
|
- Use ONLY the parameter names listed in section REQUIRED PARAMETERS FOR THIS ACTION
|
||||||
|
- Fill in appropriate values based on the context and objective
|
||||||
|
- Do NOT invent new parameters
|
||||||
|
- Do NOT include: documentList, connectionReference, history, documents, connections
|
||||||
|
|
||||||
|
RULES:
|
||||||
|
- Return ONLY JSON (no markdown, no prose)
|
||||||
|
- Use ONLY the exact parameter names listed in REQUIRED PARAMETERS FOR THIS ACTION
|
||||||
|
- Do NOT add any parameters not listed above
|
||||||
|
- Do NOT add nested objects or custom fields
|
||||||
|
"""
|
||||||
|
|
||||||
|
return PromptBundle(prompt=template, placeholders=placeholders)
|
||||||
|
|
||||||
|
def generateReactRefinementPrompt(services, context: Any, reviewContent: str) -> PromptBundle:
|
||||||
|
"""Define placeholders first, then the template; return PromptBundle."""
|
||||||
|
placeholders: List[PromptPlaceholder] = [
|
||||||
|
PromptPlaceholder(label="USER_PROMPT", content=extractUserPrompt(context), summaryAllowed=False),
|
||||||
|
PromptPlaceholder(label="REVIEW_CONTENT", content=reviewContent, summaryAllowed=True),
|
||||||
|
]
|
||||||
|
|
||||||
|
template = """Decide the next step based on the observation.
|
||||||
|
|
||||||
|
OBJECTIVE:
|
||||||
|
{{KEY:USER_PROMPT}}
|
||||||
|
|
||||||
|
OBSERVATION:
|
||||||
|
{{KEY:REVIEW_CONTENT}}
|
||||||
|
|
||||||
|
REPLY: Return only a JSON object with your decision:
|
||||||
|
{{
|
||||||
|
"decision": "continue|stop",
|
||||||
|
"reason": "brief explanation"
|
||||||
|
}}
|
||||||
|
|
||||||
|
RULES:
|
||||||
|
1. Use "continue" if objective NOT fulfilled
|
||||||
|
2. Use "stop" if objective fulfilled
|
||||||
|
3. Return ONLY JSON - no other text
|
||||||
|
4. Do NOT use markdown code blocks
|
||||||
|
5. Do NOT add explanations
|
||||||
|
"""
|
||||||
|
|
||||||
|
return PromptBundle(prompt=template, placeholders=placeholders)
|
||||||
121
modules/workflows/processing/shared/promptGenerationTaskplan.py
Normal file
121
modules/workflows/processing/shared/promptGenerationTaskplan.py
Normal file
|
|
@ -0,0 +1,121 @@
|
||||||
|
"""
|
||||||
|
Task Planning Prompt Generation
|
||||||
|
Handles prompt templates and extraction functions for task planning phase.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
from typing import Dict, Any, List
|
||||||
|
from modules.datamodels.datamodelChat import PromptBundle, PromptPlaceholder
|
||||||
|
from modules.workflows.processing.shared.placeholderFactory import (
|
||||||
|
extractUserPrompt,
|
||||||
|
extractAvailableDocumentsSummary,
|
||||||
|
extractWorkflowHistory,
|
||||||
|
)
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def generateTaskPlanningPrompt(services, context: Any) -> PromptBundle:
|
||||||
|
"""Define placeholders first, then the template; return PromptBundle."""
|
||||||
|
placeholders: List[PromptPlaceholder] = [
|
||||||
|
PromptPlaceholder(label="USER_PROMPT", content=extractUserPrompt(context), summaryAllowed=False),
|
||||||
|
PromptPlaceholder(label="AVAILABLE_DOCUMENTS_SUMMARY", content=extractAvailableDocumentsSummary(services, context), summaryAllowed=True),
|
||||||
|
PromptPlaceholder(label="WORKFLOW_HISTORY", content=extractWorkflowHistory(services, context), summaryAllowed=True),
|
||||||
|
]
|
||||||
|
|
||||||
|
template = """# Task Planning
|
||||||
|
|
||||||
|
Break down user requests into logical, executable task steps.
|
||||||
|
|
||||||
|
## 📋 Context
|
||||||
|
|
||||||
|
### User Request
|
||||||
|
{{KEY:USER_PROMPT}}
|
||||||
|
|
||||||
|
### Available Documents
|
||||||
|
{{KEY:AVAILABLE_DOCUMENTS_SUMMARY}}
|
||||||
|
|
||||||
|
### Previous Workflow Rounds
|
||||||
|
{{KEY:WORKFLOW_HISTORY}}
|
||||||
|
|
||||||
|
## 📝 Task Planning Rules
|
||||||
|
|
||||||
|
### Strategic Task Grouping
|
||||||
|
- **GROUP RELATED ACTIONS** - Combine all actions for the same business topic into ONE task
|
||||||
|
- **ONE TOPIC PER TASK** - Each task should handle one complete business objective
|
||||||
|
- **HIGH-LEVEL FOCUS** - Plan strategic outcomes, not implementation steps
|
||||||
|
- **AVOID MICRO-TASKS** - Don't create separate tasks for each small action
|
||||||
|
|
||||||
|
### Task Grouping Examples
|
||||||
|
- **Research + Analysis + Report** → ONE task: "Web research report"
|
||||||
|
- **Data Collection + Processing + Visualization** → ONE task: "Collect and present data"
|
||||||
|
- **Different topics** (email + flowers) → SEPARATE tasks: "Send formal email..." + "Order flowers from Fleurop for delivery to 123 Main St, include card message"
|
||||||
|
|
||||||
|
### Retry Handling
|
||||||
|
- **If retry request**: Analyze previous rounds to understand what failed
|
||||||
|
- **Learn from mistakes**: Improve the plan based on previous failures
|
||||||
|
|
||||||
|
## 📊 Required JSON Structure
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"overview": "Brief description of the overall plan",
|
||||||
|
"languageUserDetected": "en",
|
||||||
|
"userMessage": "User-friendly message explaining the task plan",
|
||||||
|
"tasks": [
|
||||||
|
{
|
||||||
|
"id": "task_1",
|
||||||
|
"objective": "Clear business objective focusing on what to deliver",
|
||||||
|
"dependencies": ["task_0"],
|
||||||
|
"success_criteria": ["measurable criteria 1", "measurable criteria 2"],
|
||||||
|
"estimated_complexity": "low|medium|high",
|
||||||
|
"userMessage": "What this task will accomplish"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## 🎯 Task Structure Guidelines
|
||||||
|
|
||||||
|
### Task ID Format
|
||||||
|
- Use sequential numbering: `task_1`, `task_2`, `task_3`
|
||||||
|
- Keep IDs simple and clear
|
||||||
|
|
||||||
|
### Objective Writing
|
||||||
|
- **Be VERY SPECIFIC** - Include exact details needed for action planning
|
||||||
|
- **Include all requirements** - recipient, attachments, format, recipients, etc.
|
||||||
|
- **State the complete deliverable** - What exactly will be produced
|
||||||
|
- **Include context and constraints** - When, where, how, with what
|
||||||
|
- **Make it actionable** - Clear enough to plan specific actions
|
||||||
|
|
||||||
|
### Specific Objective Examples
|
||||||
|
- **Good**: "Send formal email to ceo and board of directors with annual report as attachment"
|
||||||
|
- **Bad**: "Handle email communication"
|
||||||
|
- **Good**: "Order flowers from Fleurop for delivery to 123 Main St, include card message 'Happy Birthday', deliver on March 15th"
|
||||||
|
- **Bad**: "Order flowers"
|
||||||
|
|
||||||
|
### Action Planning Requirements
|
||||||
|
- **Include all necessary details** - The objective must contain everything needed to plan actions
|
||||||
|
- **Specify recipients and destinations** - Who should receive what
|
||||||
|
- **Include file names and formats** - What documents to use/create
|
||||||
|
- **State timing and deadlines** - When things need to be done
|
||||||
|
- **Include context and constraints** - Any special requirements or limitations
|
||||||
|
|
||||||
|
### Success Criteria
|
||||||
|
- **Make them measurable** - specific, quantifiable outcomes
|
||||||
|
- **Focus on deliverables** - what the user will receive
|
||||||
|
- **Keep criteria realistic** - achievable within the task scope
|
||||||
|
- **Include all related actions** - success means completing the entire business objective
|
||||||
|
- **Be specific about requirements** - Include exact details like recipients, formats, deadlines
|
||||||
|
- **State clear completion criteria** - How to know the task is fully done
|
||||||
|
|
||||||
|
### Complexity Estimation
|
||||||
|
- **Low**: Simple, single-action tasks (1-2 actions)
|
||||||
|
- **Medium**: Multi-action tasks for one topic (3-5 actions)
|
||||||
|
- **High**: Complex strategic tasks (6+ actions)
|
||||||
|
|
||||||
|
## 🚀 Response Format
|
||||||
|
Return ONLY the JSON object."""
|
||||||
|
|
||||||
|
return PromptBundle(prompt=template, placeholders=placeholders)
|
||||||
216
modules/workflows/processing/shared/securityUtils.py
Normal file
216
modules/workflows/processing/shared/securityUtils.py
Normal file
|
|
@ -0,0 +1,216 @@
|
||||||
|
"""
|
||||||
|
Security utilities for AI prompt construction.
|
||||||
|
Provides secure content escaping to prevent prompt injection attacks.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import re
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
from typing import Any, Union, List, Dict
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
def _escapeForAiPrompt(content: str) -> str:
|
||||||
|
"""
|
||||||
|
Securely escape content for AI prompts to prevent injection attacks.
|
||||||
|
|
||||||
|
This function:
|
||||||
|
1. Escapes all special characters that could break prompt structure
|
||||||
|
2. Wraps content in secure delimiters
|
||||||
|
3. Handles multi-line content safely
|
||||||
|
4. Prevents quote injection and context breaking
|
||||||
|
|
||||||
|
Args:
|
||||||
|
content: The content to escape
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Safely escaped content wrapped in secure delimiters
|
||||||
|
"""
|
||||||
|
if not content:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
# Convert to string if not already
|
||||||
|
content_str = str(content)
|
||||||
|
|
||||||
|
# Remove or escape dangerous characters that could break prompt structure
|
||||||
|
# This includes quotes, backslashes, and other special characters
|
||||||
|
escaped = content_str
|
||||||
|
|
||||||
|
# Escape backslashes first (order matters)
|
||||||
|
escaped = escaped.replace('\\', '\\\\')
|
||||||
|
|
||||||
|
# Escape quotes and other special characters
|
||||||
|
escaped = escaped.replace('"', '\\"')
|
||||||
|
escaped = escaped.replace("'", "\\'")
|
||||||
|
escaped = escaped.replace('\n', '\\n')
|
||||||
|
escaped = escaped.replace('\r', '\\r')
|
||||||
|
escaped = escaped.replace('\t', '\\t')
|
||||||
|
|
||||||
|
# Remove or escape other potentially dangerous characters
|
||||||
|
# Remove control characters except newlines (already handled above)
|
||||||
|
escaped = re.sub(r'[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]', '', escaped)
|
||||||
|
|
||||||
|
# Wrap in secure delimiters with clear boundaries
|
||||||
|
# Using a unique delimiter pattern that's unlikely to appear in user content
|
||||||
|
secure_delimiter_start = "===USER_CONTENT_START==="
|
||||||
|
secure_delimiter_end = "===USER_CONTENT_END==="
|
||||||
|
|
||||||
|
return f"{secure_delimiter_start}\n{escaped}\n{secure_delimiter_end}"
|
||||||
|
|
||||||
|
def _escapeForJsonPrompt(content: Any) -> str:
|
||||||
|
"""
|
||||||
|
Securely escape content for JSON-based AI prompts.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
content: The content to escape (can be any type)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Safely escaped JSON string
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Convert to JSON string with proper escaping
|
||||||
|
json_str = json.dumps(content, ensure_ascii=False, separators=(',', ':'))
|
||||||
|
return json_str
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to escape content as JSON: {str(e)}")
|
||||||
|
# Fallback to string escaping
|
||||||
|
return _escapeForAiPrompt(str(content))
|
||||||
|
|
||||||
|
def _escapeForListPrompt(items: List[Any]) -> str:
|
||||||
|
"""
|
||||||
|
Securely escape a list of items for AI prompts.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
items: List of items to escape
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Safely escaped list representation
|
||||||
|
"""
|
||||||
|
if not items:
|
||||||
|
return "[]"
|
||||||
|
|
||||||
|
try:
|
||||||
|
escaped_items = []
|
||||||
|
for item in items:
|
||||||
|
if isinstance(item, (dict, list)):
|
||||||
|
escaped_items.append(_escapeForJsonPrompt(item))
|
||||||
|
else:
|
||||||
|
escaped_items.append(_escapeForAiPrompt(str(item)))
|
||||||
|
|
||||||
|
return f"[{', '.join(escaped_items)}]"
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to escape list content: {str(e)}")
|
||||||
|
return "[]"
|
||||||
|
|
||||||
|
def securePromptContent(content: Any, content_type: str = "text") -> str:
|
||||||
|
"""
|
||||||
|
Main function to securely escape content for AI prompts.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
content: The content to escape
|
||||||
|
content_type: Type of content ("text", "json", "list", "user_prompt", "document_content")
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Safely escaped content ready for AI prompt insertion
|
||||||
|
"""
|
||||||
|
if content is None:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
try:
|
||||||
|
if content_type == "json":
|
||||||
|
return _escapeForJsonPrompt(content)
|
||||||
|
elif content_type == "list":
|
||||||
|
if isinstance(content, list):
|
||||||
|
return _escapeForListPrompt(content)
|
||||||
|
else:
|
||||||
|
return _escapeForAiPrompt(str(content))
|
||||||
|
elif content_type in ["user_prompt", "document_content"]:
|
||||||
|
# Extra security for user-controlled content
|
||||||
|
escaped = _escapeForAiPrompt(str(content))
|
||||||
|
# Add additional warning for AI
|
||||||
|
return f"⚠️ USER_CONTROLLED_CONTENT: {escaped}"
|
||||||
|
else: # content_type == "text" or default
|
||||||
|
return _escapeForAiPrompt(str(content))
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error escaping content for AI prompt: {str(e)}")
|
||||||
|
# Return a safe fallback
|
||||||
|
return "[ERROR: Content could not be safely escaped]"
|
||||||
|
|
||||||
|
def buildSecurePrompt(template: str, **kwargs) -> str:
|
||||||
|
"""
|
||||||
|
Build a secure AI prompt by safely inserting content into a template.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
template: The prompt template with {key} placeholders
|
||||||
|
**kwargs: Key-value pairs for template substitution
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Securely constructed prompt
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Escape all values before substitution
|
||||||
|
escaped_kwargs = {}
|
||||||
|
for key, value in kwargs.items():
|
||||||
|
if key.endswith('_json'):
|
||||||
|
escaped_kwargs[key] = securePromptContent(value, "json")
|
||||||
|
elif key.endswith('_list'):
|
||||||
|
escaped_kwargs[key] = securePromptContent(value, "list")
|
||||||
|
elif key in ['user_prompt', 'context', 'document_content', 'user_input']:
|
||||||
|
escaped_kwargs[key] = securePromptContent(value, "user_prompt")
|
||||||
|
else:
|
||||||
|
escaped_kwargs[key] = securePromptContent(value, "text")
|
||||||
|
|
||||||
|
# Use safe string formatting
|
||||||
|
return template.format(**escaped_kwargs)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error building secure prompt: {str(e)}")
|
||||||
|
return template # Return original template if escaping fails
|
||||||
|
|
||||||
|
def validatePromptSecurity(prompt: str) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Validate that a prompt is secure and doesn't contain injection patterns.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
prompt: The prompt to validate
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dictionary with validation results
|
||||||
|
"""
|
||||||
|
issues = []
|
||||||
|
|
||||||
|
# Check for unescaped quotes that could break JSON
|
||||||
|
if '"' in prompt and '\\"' not in prompt:
|
||||||
|
# Check if quotes are properly escaped
|
||||||
|
unescaped_quotes = re.findall(r'(?<!\\)"', prompt)
|
||||||
|
if unescaped_quotes:
|
||||||
|
issues.append("Unescaped quotes detected")
|
||||||
|
|
||||||
|
# Check for potential injection patterns
|
||||||
|
injection_patterns = [
|
||||||
|
r'ignore\s+previous\s+instructions',
|
||||||
|
r'forget\s+everything',
|
||||||
|
r'you\s+are\s+now',
|
||||||
|
r'system\s*:',
|
||||||
|
r'assistant\s*:',
|
||||||
|
r'user\s*:',
|
||||||
|
r'<\|.*\|>', # Special tokens
|
||||||
|
]
|
||||||
|
|
||||||
|
for pattern in injection_patterns:
|
||||||
|
if re.search(pattern, prompt, re.IGNORECASE):
|
||||||
|
issues.append(f"Potential injection pattern detected: {pattern}")
|
||||||
|
|
||||||
|
# Check for proper content delimiters
|
||||||
|
if "===USER_CONTENT_START===" not in prompt and "===USER_CONTENT_END===" not in prompt:
|
||||||
|
# This might be okay for some prompts, but flag for review
|
||||||
|
if any(keyword in prompt.lower() for keyword in ['context', 'user', 'input', 'prompt']):
|
||||||
|
issues.append("User content may not be properly delimited")
|
||||||
|
|
||||||
|
return {
|
||||||
|
"is_secure": len(issues) == 0,
|
||||||
|
"issues": issues,
|
||||||
|
"prompt_length": len(prompt),
|
||||||
|
"has_user_content_delimiters": "===USER_CONTENT_START===" in prompt
|
||||||
|
}
|
||||||
335
modules/workflows/processing/workflowProcessor.py
Normal file
335
modules/workflows/processing/workflowProcessor.py
Normal file
|
|
@ -0,0 +1,335 @@
|
||||||
|
# workflowProcessor.py
|
||||||
|
# Main workflow processor with delegation pattern
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from typing import Dict, Any, Optional, List
|
||||||
|
from modules.datamodels.datamodelChat import TaskStep, TaskContext, TaskPlan, TaskResult, ReviewResult
|
||||||
|
from modules.datamodels.datamodelChat import ChatWorkflow
|
||||||
|
from modules.workflows.processing.modes.modeBase import BaseMode
|
||||||
|
from modules.workflows.processing.modes.modeActionplan import ActionplanMode
|
||||||
|
from modules.workflows.processing.modes.modeReact import ReactMode
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
class WorkflowStoppedException(Exception):
|
||||||
|
"""Exception raised when a workflow is stopped by the user."""
|
||||||
|
pass
|
||||||
|
|
||||||
|
class WorkflowProcessor:
|
||||||
|
"""Main workflow processor that delegates to appropriate mode implementations"""
|
||||||
|
|
||||||
|
def __init__(self, services, workflow=None):
|
||||||
|
self.services = services
|
||||||
|
self.workflow = workflow
|
||||||
|
self.mode = self._createMode(workflow.workflowMode if workflow else "Actionplan")
|
||||||
|
|
||||||
|
def _createMode(self, workflowMode: str) -> BaseMode:
|
||||||
|
"""Create the appropriate mode implementation based on workflow mode"""
|
||||||
|
if workflowMode == "React":
|
||||||
|
return ReactMode(self.services, self.workflow)
|
||||||
|
else:
|
||||||
|
return ActionplanMode(self.services, self.workflow)
|
||||||
|
|
||||||
|
def _checkWorkflowStopped(self, workflow):
|
||||||
|
"""Check if workflow has been stopped by user and raise exception if so"""
|
||||||
|
try:
|
||||||
|
# Get the current workflow status from the database to avoid stale data
|
||||||
|
current_workflow = self.services.interfaceDbChat.getWorkflow(workflow.id)
|
||||||
|
if current_workflow and current_workflow.status == "stopped":
|
||||||
|
logger.info("Workflow stopped by user, aborting processing")
|
||||||
|
raise WorkflowStoppedException("Workflow was stopped by user")
|
||||||
|
except Exception as e:
|
||||||
|
# If we can't get the current status due to other database issues, fall back to the in-memory object
|
||||||
|
logger.warning(f"Could not check current workflow status from database: {str(e)}")
|
||||||
|
if workflow and workflow.status == "stopped":
|
||||||
|
logger.info("Workflow stopped by user (from in-memory object), aborting processing")
|
||||||
|
raise WorkflowStoppedException("Workflow was stopped by user")
|
||||||
|
|
||||||
|
async def generateTaskPlan(self, userInput: str, workflow: ChatWorkflow) -> TaskPlan:
|
||||||
|
"""Generate a high-level task plan for the workflow"""
|
||||||
|
try:
|
||||||
|
# Check workflow status before generating task plan
|
||||||
|
self._checkWorkflowStopped(workflow)
|
||||||
|
|
||||||
|
logger.info(f"=== STARTING TASK PLAN GENERATION ===")
|
||||||
|
logger.info(f"Workflow ID: {workflow.id}")
|
||||||
|
logger.info(f"User Input: {userInput}")
|
||||||
|
logger.info(f"Workflow Mode: {workflow.workflowMode}")
|
||||||
|
|
||||||
|
# Delegate to the appropriate mode
|
||||||
|
taskPlan = await self.mode.generateTaskPlan(userInput, workflow)
|
||||||
|
|
||||||
|
# Create task plan message
|
||||||
|
await self.mode.createTaskPlanMessage(taskPlan, workflow)
|
||||||
|
|
||||||
|
return taskPlan
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error in generateTaskPlan: {str(e)}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
async def executeTask(self, taskStep: TaskStep, workflow: ChatWorkflow, context: TaskContext,
|
||||||
|
taskIndex: int = None, totalTasks: int = None) -> TaskResult:
|
||||||
|
"""Execute a task step using the appropriate mode"""
|
||||||
|
try:
|
||||||
|
# Check workflow status before executing task
|
||||||
|
self._checkWorkflowStopped(workflow)
|
||||||
|
|
||||||
|
logger.info(f"=== STARTING TASK EXECUTION ===")
|
||||||
|
logger.info(f"Task: {taskStep.objective}")
|
||||||
|
logger.info(f"Mode: {workflow.workflowMode}")
|
||||||
|
|
||||||
|
# Delegate to the appropriate mode
|
||||||
|
return await self.mode.executeTask(taskStep, workflow, context, taskIndex, totalTasks)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error in executeTask: {str(e)}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
async def generateActionItems(self, taskStep: TaskStep, workflow: ChatWorkflow,
|
||||||
|
previousResults: List = None, enhancedContext: TaskContext = None) -> List:
|
||||||
|
"""Generate actions for a task step using the appropriate mode"""
|
||||||
|
try:
|
||||||
|
# Check workflow status before generating actions
|
||||||
|
self._checkWorkflowStopped(workflow)
|
||||||
|
|
||||||
|
logger.info(f"=== STARTING ACTION GENERATION ===")
|
||||||
|
logger.info(f"Task: {taskStep.objective}")
|
||||||
|
logger.info(f"Mode: {workflow.workflowMode}")
|
||||||
|
|
||||||
|
# Delegate to the appropriate mode
|
||||||
|
return await self.mode.generateActionItems(taskStep, workflow, previousResults, enhancedContext)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error in generateActionItems: {str(e)}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
def updateWorkflowAfterTaskPlanCreated(self, totalTasks: int):
|
||||||
|
"""Update workflow object after task plan creation"""
|
||||||
|
try:
|
||||||
|
updateData = {
|
||||||
|
"totalTasks": totalTasks,
|
||||||
|
"currentTask": 0,
|
||||||
|
"currentAction": 0,
|
||||||
|
"totalActions": 0
|
||||||
|
}
|
||||||
|
|
||||||
|
# Update workflow object
|
||||||
|
self.workflow.totalTasks = totalTasks
|
||||||
|
self.workflow.currentTask = 0
|
||||||
|
self.workflow.currentAction = 0
|
||||||
|
self.workflow.totalActions = 0
|
||||||
|
|
||||||
|
# Update in database
|
||||||
|
self.services.interfaceDbChat.updateWorkflow(self.workflow.id, updateData)
|
||||||
|
logger.info(f"Updated workflow {self.workflow.id} after task plan creation: {updateData}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error updating workflow after task plan creation: {str(e)}")
|
||||||
|
|
||||||
|
def updateWorkflowBeforeExecutingTask(self, taskNumber: int):
|
||||||
|
"""Update workflow object before executing a task"""
|
||||||
|
try:
|
||||||
|
updateData = {
|
||||||
|
"currentTask": taskNumber,
|
||||||
|
"currentAction": 0,
|
||||||
|
"totalActions": 0
|
||||||
|
}
|
||||||
|
|
||||||
|
# Update workflow object
|
||||||
|
self.workflow.currentTask = taskNumber
|
||||||
|
self.workflow.currentAction = 0
|
||||||
|
self.workflow.totalActions = 0
|
||||||
|
|
||||||
|
# Update in database
|
||||||
|
self.services.interfaceDbChat.updateWorkflow(self.workflow.id, updateData)
|
||||||
|
logger.info(f"Updated workflow {self.workflow.id} before executing task {taskNumber}: {updateData}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error updating workflow before executing task: {str(e)}")
|
||||||
|
|
||||||
|
def updateWorkflowAfterActionPlanning(self, totalActions: int):
|
||||||
|
"""Update workflow object after action planning for current task"""
|
||||||
|
try:
|
||||||
|
updateData = {
|
||||||
|
"totalActions": totalActions
|
||||||
|
}
|
||||||
|
|
||||||
|
# Update workflow object
|
||||||
|
self.workflow.totalActions = totalActions
|
||||||
|
|
||||||
|
# Update in database
|
||||||
|
self.services.interfaceDbChat.updateWorkflow(self.workflow.id, updateData)
|
||||||
|
logger.info(f"Updated workflow {self.workflow.id} after action planning: {updateData}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error updating workflow after action planning: {str(e)}")
|
||||||
|
|
||||||
|
def updateWorkflowBeforeExecutingAction(self, actionNumber: int):
|
||||||
|
"""Update workflow object before executing an action"""
|
||||||
|
try:
|
||||||
|
updateData = {
|
||||||
|
"currentAction": actionNumber
|
||||||
|
}
|
||||||
|
|
||||||
|
# Update workflow object
|
||||||
|
self.workflow.currentAction = actionNumber
|
||||||
|
|
||||||
|
# Update in database
|
||||||
|
self.services.interfaceDbChat.updateWorkflow(self.workflow.id, updateData)
|
||||||
|
logger.info(f"Updated workflow {self.workflow.id} before executing action {actionNumber}: {updateData}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error updating workflow before executing action: {str(e)}")
|
||||||
|
|
||||||
|
def setWorkflowTotals(self, totalTasks: int = None, totalActions: int = None):
|
||||||
|
"""Set total counts for workflow progress tracking and update database"""
|
||||||
|
try:
|
||||||
|
updateData = {}
|
||||||
|
|
||||||
|
if totalTasks is not None:
|
||||||
|
self.workflow.totalTasks = totalTasks
|
||||||
|
updateData["totalTasks"] = totalTasks
|
||||||
|
|
||||||
|
if totalActions is not None:
|
||||||
|
self.workflow.totalActions = totalActions
|
||||||
|
updateData["totalActions"] = totalActions
|
||||||
|
|
||||||
|
# Update workflow object in database if we have changes
|
||||||
|
if updateData:
|
||||||
|
self.services.interfaceDbChat.updateWorkflow(self.workflow.id, updateData)
|
||||||
|
logger.info(f"Updated workflow {self.workflow.id} totals in database: {updateData}")
|
||||||
|
|
||||||
|
logger.debug(f"Updated workflow totals: Tasks {self.workflow.totalTasks if hasattr(self.workflow, 'totalTasks') else 'N/A'}, Actions {self.workflow.totalActions if hasattr(self.workflow, 'totalActions') else 'N/A'}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error setting workflow totals: {str(e)}")
|
||||||
|
|
||||||
|
def resetWorkflowForNewSession(self):
|
||||||
|
"""Reset workflow object for a new session"""
|
||||||
|
try:
|
||||||
|
updateData = {
|
||||||
|
"currentTask": 0,
|
||||||
|
"currentAction": 0,
|
||||||
|
"totalTasks": 0,
|
||||||
|
"totalActions": 0
|
||||||
|
}
|
||||||
|
|
||||||
|
# Update workflow object
|
||||||
|
self.workflow.currentTask = 0
|
||||||
|
self.workflow.currentAction = 0
|
||||||
|
self.workflow.totalTasks = 0
|
||||||
|
self.workflow.totalActions = 0
|
||||||
|
|
||||||
|
# Update in database
|
||||||
|
self.services.interfaceDbChat.updateWorkflow(self.workflow.id, updateData)
|
||||||
|
logger.info(f"Reset workflow {self.workflow.id} for new session: {updateData}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error resetting workflow for new session: {str(e)}")
|
||||||
|
|
||||||
|
def writeTraceLog(self, contextText: str, data: Any) -> None:
|
||||||
|
"""Write trace data to configured trace file if in debug mode"""
|
||||||
|
try:
|
||||||
|
import os
|
||||||
|
import json
|
||||||
|
from datetime import datetime, UTC
|
||||||
|
|
||||||
|
# Only write if logger is in debug mode
|
||||||
|
if logger.level > logging.DEBUG:
|
||||||
|
return
|
||||||
|
|
||||||
|
# Get log directory from configuration
|
||||||
|
logDir = self.services.utils.configGet("APP_LOGGING_LOG_DIR", "./")
|
||||||
|
if not os.path.isabs(logDir):
|
||||||
|
# If relative path, make it relative to the gateway directory
|
||||||
|
gatewayDir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
logDir = os.path.join(gatewayDir, logDir)
|
||||||
|
|
||||||
|
# Ensure log directory exists
|
||||||
|
os.makedirs(logDir, exist_ok=True)
|
||||||
|
|
||||||
|
# Create trace file path
|
||||||
|
traceFile = os.path.join(logDir, "log_trace.log")
|
||||||
|
|
||||||
|
# Format the trace entry
|
||||||
|
timestamp = datetime.fromtimestamp(self.services.utils.getUtcTimestamp(), UTC).strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
|
||||||
|
traceEntry = f"[{timestamp}] {contextText}\n"
|
||||||
|
|
||||||
|
# Add data if provided - show full content without truncation
|
||||||
|
if data is not None:
|
||||||
|
if isinstance(data, (dict, list)):
|
||||||
|
# Use ensure_ascii=False to preserve Unicode characters and indent=2 for readability
|
||||||
|
traceEntry += f"Data: {json.dumps(data, indent=2, default=str, ensure_ascii=False)}\n"
|
||||||
|
else:
|
||||||
|
# For string data, show full content without truncation
|
||||||
|
traceEntry += f"Data: {str(data)}\n"
|
||||||
|
|
||||||
|
traceEntry += "-" * 80 + "\n\n"
|
||||||
|
|
||||||
|
# Write to trace file
|
||||||
|
with open(traceFile, "a", encoding="utf-8") as f:
|
||||||
|
f.write(traceEntry)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
# Don't log trace errors to avoid recursion
|
||||||
|
pass
|
||||||
|
|
||||||
|
def clearTraceLog(self) -> None:
|
||||||
|
"""Clear the trace log file"""
|
||||||
|
try:
|
||||||
|
import os
|
||||||
|
|
||||||
|
# Get log directory from configuration
|
||||||
|
logDir = self.services.utils.configGet("APP_LOGGING_LOG_DIR", "./")
|
||||||
|
if not os.path.isabs(logDir):
|
||||||
|
# If relative path, make it relative to the gateway directory
|
||||||
|
gatewayDir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
logDir = os.path.join(gatewayDir, logDir)
|
||||||
|
|
||||||
|
# Create trace file path
|
||||||
|
traceFile = os.path.join(logDir, "log_trace.log")
|
||||||
|
|
||||||
|
# Clear the trace file
|
||||||
|
if os.path.exists(traceFile):
|
||||||
|
with open(traceFile, "w", encoding="utf-8") as f:
|
||||||
|
f.write("")
|
||||||
|
logger.info("Trace log cleared")
|
||||||
|
else:
|
||||||
|
logger.info("Trace log file does not exist, nothing to clear")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error clearing trace log: {str(e)}")
|
||||||
|
|
||||||
|
async def prepareTaskHandover(self, taskStep, taskActions, taskResult, workflow):
|
||||||
|
"""Prepare task handover data for workflow coordination"""
|
||||||
|
try:
|
||||||
|
# Check workflow status before preparing task handover
|
||||||
|
self._checkWorkflowStopped(workflow)
|
||||||
|
|
||||||
|
# Log handover status summary
|
||||||
|
status = taskResult.status if taskResult else 'unknown'
|
||||||
|
|
||||||
|
# Handle both TaskResult and ReviewResult objects
|
||||||
|
if hasattr(taskResult, 'met_criteria'):
|
||||||
|
# This is a ReviewResult object
|
||||||
|
met = taskResult.met_criteria if taskResult.met_criteria else []
|
||||||
|
reviewResult = taskResult.to_dict()
|
||||||
|
else:
|
||||||
|
# This is a TaskResult object
|
||||||
|
met = []
|
||||||
|
reviewResult = {
|
||||||
|
'status': taskResult.status if taskResult else 'unknown',
|
||||||
|
'reason': taskResult.error if taskResult and hasattr(taskResult, 'error') else None,
|
||||||
|
'success': taskResult.success if taskResult else False
|
||||||
|
}
|
||||||
|
|
||||||
|
handoverData = {
|
||||||
|
'task_id': taskStep.id,
|
||||||
|
'task_description': taskStep.objective,
|
||||||
|
'actions': [action.to_dict() for action in taskActions] if taskActions else [],
|
||||||
|
'review_result': reviewResult,
|
||||||
|
'workflow_id': workflow.id,
|
||||||
|
'handover_time': self.services.utils.getUtcTimestamp()
|
||||||
|
}
|
||||||
|
logger.info(f"Prepared handover for task {taskStep.id} in workflow {workflow.id}")
|
||||||
|
return handoverData
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error in prepareTaskHandover: {str(e)}")
|
||||||
|
return {'error': str(e)}
|
||||||
|
|
@ -8,11 +8,11 @@ from modules.datamodels.datamodelChat import (
|
||||||
UserInputRequest,
|
UserInputRequest,
|
||||||
ChatMessage,
|
ChatMessage,
|
||||||
ChatWorkflow,
|
ChatWorkflow,
|
||||||
ChatDocument,
|
ChatDocument
|
||||||
WorkflowResult
|
|
||||||
)
|
)
|
||||||
from modules.datamodels.datamodelWorkflow import TaskItem, TaskStatus, TaskContext
|
from modules.datamodels.datamodelChat import TaskItem, TaskStatus, TaskContext
|
||||||
from modules.workflows.processing.handlingTasks import HandlingTasks, WorkflowStoppedException
|
from modules.workflows.processing.workflowProcessor import WorkflowProcessor, WorkflowStoppedException
|
||||||
|
from modules.shared.timezoneUtils import get_utc_timestamp
|
||||||
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
@ -22,11 +22,11 @@ class WorkflowManager:
|
||||||
|
|
||||||
def __init__(self, services):
|
def __init__(self, services):
|
||||||
self.services = services
|
self.services = services
|
||||||
self.handlingTasks = None
|
self.workflowProcessor = None
|
||||||
|
|
||||||
# Exported functions
|
# Exported functions
|
||||||
|
|
||||||
async def workflowStart(self, userInput: UserInputRequest, workflowId: Optional[str] = None, workflowMode: str = "Actionplan") -> ChatWorkflow:
|
async def workflowStart(self, userInput: UserInputRequest, workflowId: Optional[str] = None, workflowMode: str = "React") -> ChatWorkflow:
|
||||||
"""Starts a new workflow or continues an existing one, then launches processing."""
|
"""Starts a new workflow or continues an existing one, then launches processing."""
|
||||||
try:
|
try:
|
||||||
# Debug log to check workflowMode parameter
|
# Debug log to check workflowMode parameter
|
||||||
|
|
@ -38,8 +38,8 @@ class WorkflowManager:
|
||||||
if not workflow:
|
if not workflow:
|
||||||
raise ValueError(f"Workflow {workflowId} not found")
|
raise ValueError(f"Workflow {workflowId} not found")
|
||||||
|
|
||||||
# Add workflow to services
|
# Store workflow in services for reference (don't overwrite the workflow service)
|
||||||
self.services.workflow = workflow
|
self.services.currentWorkflow = workflow
|
||||||
|
|
||||||
if workflow.status == "running":
|
if workflow.status == "running":
|
||||||
logger.info(f"Stopping running workflow {workflowId} before processing new prompt")
|
logger.info(f"Stopping running workflow {workflowId} before processing new prompt")
|
||||||
|
|
@ -62,7 +62,8 @@ class WorkflowManager:
|
||||||
self.services.workflow.updateWorkflow(workflowId, {
|
self.services.workflow.updateWorkflow(workflowId, {
|
||||||
"status": "running",
|
"status": "running",
|
||||||
"lastActivity": currentTime,
|
"lastActivity": currentTime,
|
||||||
"currentRound": newRound
|
"currentRound": newRound,
|
||||||
|
"workflowMode": workflowMode # Update workflow mode for existing workflows
|
||||||
})
|
})
|
||||||
|
|
||||||
workflow = self.services.workflow.getWorkflow(workflowId)
|
workflow = self.services.workflow.getWorkflow(workflowId)
|
||||||
|
|
@ -71,11 +72,14 @@ class WorkflowManager:
|
||||||
|
|
||||||
self.services.workflow.createLog({
|
self.services.workflow.createLog({
|
||||||
"workflowId": workflowId,
|
"workflowId": workflowId,
|
||||||
"message": f"Workflow resumed (round {workflow.currentRound})",
|
"message": f"Workflow resumed (round {workflow.currentRound}) with mode: {workflowMode}",
|
||||||
"type": "info",
|
"type": "info",
|
||||||
"status": "running",
|
"status": "running",
|
||||||
"progress": 0
|
"progress": 0
|
||||||
})
|
})
|
||||||
|
|
||||||
|
# CRITICAL: Update the workflow object's workflowMode attribute for immediate use
|
||||||
|
workflow.workflowMode = workflowMode
|
||||||
else:
|
else:
|
||||||
workflowData = {
|
workflowData = {
|
||||||
"name": "New Workflow",
|
"name": "New Workflow",
|
||||||
|
|
@ -108,8 +112,8 @@ class WorkflowManager:
|
||||||
self.services.workflow.updateWorkflow(workflow.id, {"currentRound": 1})
|
self.services.workflow.updateWorkflow(workflow.id, {"currentRound": 1})
|
||||||
self.services.workflow.updateWorkflowStats(workflow.id, bytesSent=0, bytesReceived=0)
|
self.services.workflow.updateWorkflowStats(workflow.id, bytesSent=0, bytesReceived=0)
|
||||||
|
|
||||||
# Add workflow to services
|
# Store workflow in services for reference (don't overwrite the workflow service)
|
||||||
self.services.workflow = workflow
|
self.services.currentWorkflow = workflow
|
||||||
|
|
||||||
# Start workflow processing asynchronously
|
# Start workflow processing asynchronously
|
||||||
asyncio.create_task(self._workflowProcess(userInput, workflow))
|
asyncio.create_task(self._workflowProcess(userInput, workflow))
|
||||||
|
|
@ -149,11 +153,14 @@ class WorkflowManager:
|
||||||
async def _workflowProcess(self, userInput: UserInputRequest, workflow: ChatWorkflow) -> None:
|
async def _workflowProcess(self, userInput: UserInputRequest, workflow: ChatWorkflow) -> None:
|
||||||
"""Process a workflow with user input"""
|
"""Process a workflow with user input"""
|
||||||
try:
|
try:
|
||||||
self.handlingTasks = HandlingTasks(self.services, workflow)
|
# Store the current user prompt in services for easy access throughout the workflow
|
||||||
|
self.services.rawUserPrompt = userInput.prompt
|
||||||
|
self.services.currentUserPrompt = userInput.prompt
|
||||||
|
self.workflowProcessor = WorkflowProcessor(self.services, workflow)
|
||||||
message = await self._sendFirstMessage(userInput, workflow)
|
message = await self._sendFirstMessage(userInput, workflow)
|
||||||
task_plan = await self._planTasks(userInput, workflow)
|
task_plan = await self._planTasks(userInput, workflow)
|
||||||
workflow_result = await self._executeTasks(task_plan, workflow)
|
await self._executeTasks(task_plan, workflow)
|
||||||
await self._processWorkflowResults(workflow, workflow_result, message)
|
await self._processWorkflowResults(workflow, message)
|
||||||
|
|
||||||
except WorkflowStoppedException:
|
except WorkflowStoppedException:
|
||||||
self._handleWorkflowStop(workflow)
|
self._handleWorkflowStop(workflow)
|
||||||
|
|
@ -166,14 +173,14 @@ class WorkflowManager:
|
||||||
async def _sendFirstMessage(self, userInput: UserInputRequest, workflow: ChatWorkflow) -> ChatMessage:
|
async def _sendFirstMessage(self, userInput: UserInputRequest, workflow: ChatWorkflow) -> ChatMessage:
|
||||||
"""Send first message to start workflow"""
|
"""Send first message to start workflow"""
|
||||||
try:
|
try:
|
||||||
self.handlingTasks._checkWorkflowStopped()
|
self.workflowProcessor._checkWorkflowStopped(workflow)
|
||||||
|
|
||||||
# Create initial message using interface
|
# Create initial message using interface
|
||||||
# Generate the correct documentsLabel that matches what getDocumentReferenceString will create
|
# For first user message, include round info in the user context label
|
||||||
round_num = workflow.currentRound
|
round_num = workflow.currentRound
|
||||||
task_num = 0
|
task_num = 0
|
||||||
action_num = 0
|
action_num = 0
|
||||||
context_label = f"round{round_num}_task{task_num}_action{action_num}_context"
|
context_label = f"round{round_num}_usercontext"
|
||||||
|
|
||||||
messageData = {
|
messageData = {
|
||||||
"workflowId": workflow.id,
|
"workflowId": workflow.id,
|
||||||
|
|
@ -199,7 +206,7 @@ class WorkflowManager:
|
||||||
workflow.messages.append(message)
|
workflow.messages.append(message)
|
||||||
|
|
||||||
# Clear trace log for new workflow session
|
# Clear trace log for new workflow session
|
||||||
self.handlingTasks.clearTraceLog()
|
self.workflowProcessor.clearTraceLog()
|
||||||
|
|
||||||
# Add documents if any, now with messageId
|
# Add documents if any, now with messageId
|
||||||
if userInput.listFileId:
|
if userInput.listFileId:
|
||||||
|
|
@ -208,6 +215,128 @@ class WorkflowManager:
|
||||||
message.documents = documents
|
message.documents = documents
|
||||||
# Update the message with documents in database
|
# Update the message with documents in database
|
||||||
self.services.workflow.updateMessage(message.id, {"documents": [doc.to_dict() for doc in documents]})
|
self.services.workflow.updateMessage(message.id, {"documents": [doc.to_dict() for doc in documents]})
|
||||||
|
|
||||||
|
# Analyze the user's input to extract intent and offload bulky context into documents
|
||||||
|
try:
|
||||||
|
analyzerPrompt = (
|
||||||
|
"You are an input analyzer. Split the user's message into:\n"
|
||||||
|
"1) intent: the user's core request in one concise paragraph, normalized to the user's language.\n"
|
||||||
|
"2) contextItems: supportive data to attach as separate documents if significantly larger than the intent. "
|
||||||
|
"Include large literal data blocks, long lists/tables, code/JSON blocks, quoted transcripts, CSV fragments, or detailed specs. "
|
||||||
|
"Keep URLs in the intent unless they include large pasted content.\n\n"
|
||||||
|
"Rules:\n"
|
||||||
|
"- If total content length (intent + data) is less than 10% of the model's max tokens, do not extract; "
|
||||||
|
"return an empty contextItems and keep a compact, self-contained intent.\n"
|
||||||
|
"- If content exceeds that, move bulky parts into contextItems, keeping the intent short and clear.\n"
|
||||||
|
"- Preserve critical references (URLs, filenames) in the intent.\n"
|
||||||
|
"- Normalize the intent to the detected language. If mixed-language, use the primary detected language and normalize.\n\n"
|
||||||
|
"Output JSON only (no markdown):\n"
|
||||||
|
"{\n"
|
||||||
|
" \"detectedLanguage\": \"en\",\n"
|
||||||
|
" \"intent\": \"Concise normalized request...\",\n"
|
||||||
|
" \"contextItems\": [\n"
|
||||||
|
" {\n"
|
||||||
|
" \"title\": \"User context 1\",\n"
|
||||||
|
" \"mimeType\": \"text/plain\",\n"
|
||||||
|
" \"content\": \"Full extracted content block here\"\n"
|
||||||
|
" }\n"
|
||||||
|
" ]\n"
|
||||||
|
"}\n\n"
|
||||||
|
f"User message:\n{userInput.prompt}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Call AI analyzer
|
||||||
|
aiResponse = await self.services.ai.callAi(prompt=analyzerPrompt)
|
||||||
|
|
||||||
|
detectedLanguage = None
|
||||||
|
intentText = userInput.prompt
|
||||||
|
contextItems = []
|
||||||
|
|
||||||
|
# Parse analyzer response (JSON expected)
|
||||||
|
try:
|
||||||
|
import json
|
||||||
|
jsonStart = aiResponse.find('{') if aiResponse else -1
|
||||||
|
jsonEnd = aiResponse.rfind('}') + 1 if aiResponse else 0
|
||||||
|
if jsonStart != -1 and jsonEnd > jsonStart:
|
||||||
|
parsed = json.loads(aiResponse[jsonStart:jsonEnd])
|
||||||
|
detectedLanguage = parsed.get('detectedLanguage') or None
|
||||||
|
if parsed.get('intent'):
|
||||||
|
intentText = parsed.get('intent')
|
||||||
|
contextItems = parsed.get('contextItems') or []
|
||||||
|
except Exception:
|
||||||
|
contextItems = []
|
||||||
|
|
||||||
|
# Update services state
|
||||||
|
if detectedLanguage and isinstance(detectedLanguage, str):
|
||||||
|
self._setUserLanguage(detectedLanguage)
|
||||||
|
self.services.currentUserPrompt = intentText or userInput.prompt
|
||||||
|
|
||||||
|
# Telemetry (sizes and counts)
|
||||||
|
try:
|
||||||
|
inputSize = len(userInput.prompt.encode('utf-8')) if userInput and userInput.prompt else 0
|
||||||
|
outputSize = len(aiResponse.encode('utf-8')) if aiResponse else 0
|
||||||
|
self.services.workflow.createLog({
|
||||||
|
"workflowId": workflow.id,
|
||||||
|
"message": f"User prompt analyzed (input {inputSize} bytes, output {outputSize} bytes, items {len(contextItems)})",
|
||||||
|
"type": "info",
|
||||||
|
"status": "running",
|
||||||
|
"progress": 0
|
||||||
|
})
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Create and attach documents for context items
|
||||||
|
if contextItems and isinstance(contextItems, list):
|
||||||
|
created_docs = []
|
||||||
|
for idx, item in enumerate(contextItems):
|
||||||
|
try:
|
||||||
|
title = item.get('title') if isinstance(item, dict) else None
|
||||||
|
mime = item.get('mimeType') if isinstance(item, dict) else None
|
||||||
|
content = item.get('content') if isinstance(item, dict) else None
|
||||||
|
if not content:
|
||||||
|
continue
|
||||||
|
fileName = (title or f"user_context_{idx+1}.txt").strip()
|
||||||
|
mimeType = (mime or "text/plain").strip()
|
||||||
|
|
||||||
|
# Create file in component storage
|
||||||
|
content_bytes = content.encode('utf-8')
|
||||||
|
file_item = self.services.interfaceDbComponent.createFile(
|
||||||
|
name=fileName,
|
||||||
|
mimeType=mimeType,
|
||||||
|
content=content_bytes
|
||||||
|
)
|
||||||
|
# Persist file data
|
||||||
|
self.services.interfaceDbComponent.createFileData(file_item.id, content_bytes)
|
||||||
|
|
||||||
|
# Collect file info
|
||||||
|
file_info = self.services.workflow.getFileInfo(file_item.id)
|
||||||
|
from modules.datamodels.datamodelChat import ChatDocument as _ChatDocument
|
||||||
|
doc = _ChatDocument(
|
||||||
|
messageId=message.id,
|
||||||
|
fileId=file_item.id,
|
||||||
|
fileName=file_info.get("fileName", fileName) if file_info else fileName,
|
||||||
|
fileSize=file_info.get("size", len(content_bytes)) if file_info else len(content_bytes),
|
||||||
|
mimeType=file_info.get("mimeType", mimeType) if file_info else mimeType
|
||||||
|
)
|
||||||
|
# Persist document record
|
||||||
|
self.services.interfaceDbChat.createDocument(doc.to_dict())
|
||||||
|
created_docs.append(doc)
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if created_docs:
|
||||||
|
# Attach to message and persist
|
||||||
|
if not message.documents:
|
||||||
|
message.documents = []
|
||||||
|
message.documents.extend(created_docs)
|
||||||
|
# Ensure label is user_context for discoverability
|
||||||
|
message.documentsLabel = context_label
|
||||||
|
self.services.workflow.updateMessage(message.id, {
|
||||||
|
"documents": [d.to_dict() for d in message.documents],
|
||||||
|
"documentsLabel": context_label
|
||||||
|
})
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Prompt analysis failed or skipped: {str(e)}")
|
||||||
|
|
||||||
return message
|
return message
|
||||||
else:
|
else:
|
||||||
|
|
@ -219,7 +348,7 @@ class WorkflowManager:
|
||||||
|
|
||||||
async def _planTasks(self, userInput: UserInputRequest, workflow: ChatWorkflow):
|
async def _planTasks(self, userInput: UserInputRequest, workflow: ChatWorkflow):
|
||||||
"""Generate task plan for workflow execution"""
|
"""Generate task plan for workflow execution"""
|
||||||
handling = self.handlingTasks
|
handling = self.workflowProcessor
|
||||||
# Generate task plan first (shared for both modes)
|
# Generate task plan first (shared for both modes)
|
||||||
task_plan = await handling.generateTaskPlan(userInput.prompt, workflow)
|
task_plan = await handling.generateTaskPlan(userInput.prompt, workflow)
|
||||||
if not task_plan or not task_plan.tasks:
|
if not task_plan or not task_plan.tasks:
|
||||||
|
|
@ -229,9 +358,9 @@ class WorkflowManager:
|
||||||
logger.info(f"Executing workflow mode={workflow_mode} with {len(task_plan.tasks)} tasks")
|
logger.info(f"Executing workflow mode={workflow_mode} with {len(task_plan.tasks)} tasks")
|
||||||
return task_plan
|
return task_plan
|
||||||
|
|
||||||
async def _executeTasks(self, task_plan, workflow: ChatWorkflow) -> WorkflowResult:
|
async def _executeTasks(self, task_plan, workflow: ChatWorkflow) -> None:
|
||||||
"""Execute all tasks in the task plan"""
|
"""Execute all tasks in the task plan and update workflow status."""
|
||||||
handling = self.handlingTasks
|
handling = self.workflowProcessor
|
||||||
total_tasks = len(task_plan.tasks)
|
total_tasks = len(task_plan.tasks)
|
||||||
all_task_results: List = []
|
all_task_results: List = []
|
||||||
previous_results: List[str] = []
|
previous_results: List[str] = []
|
||||||
|
|
@ -240,7 +369,7 @@ class WorkflowManager:
|
||||||
current_task_index = idx + 1
|
current_task_index = idx + 1
|
||||||
logger.info(f"Task {current_task_index}/{total_tasks}: {task_step.objective}")
|
logger.info(f"Task {current_task_index}/{total_tasks}: {task_step.objective}")
|
||||||
|
|
||||||
# Build TaskContext (mode-specific behavior is inside HandlingTasks)
|
# Build TaskContext (mode-specific behavior is inside WorkflowProcessor)
|
||||||
task_context = TaskContext(
|
task_context = TaskContext(
|
||||||
task_step=task_step,
|
task_step=task_step,
|
||||||
workflow=workflow,
|
workflow=workflow,
|
||||||
|
|
@ -274,19 +403,15 @@ class WorkflowManager:
|
||||||
if task_result.success and task_result.feedback:
|
if task_result.success and task_result.feedback:
|
||||||
previous_results.append(task_result.feedback)
|
previous_results.append(task_result.feedback)
|
||||||
|
|
||||||
return WorkflowResult(
|
# Mark workflow as completed; error/stop cases update status elsewhere
|
||||||
status="completed",
|
workflow.status = "completed"
|
||||||
completed_tasks=len(all_task_results),
|
return None
|
||||||
total_tasks=total_tasks,
|
|
||||||
execution_time=0.0,
|
|
||||||
final_results_count=len(all_task_results)
|
|
||||||
)
|
|
||||||
|
|
||||||
async def _processWorkflowResults(self, workflow: ChatWorkflow, workflow_result: WorkflowResult, initial_message: ChatMessage) -> None:
|
async def _processWorkflowResults(self, workflow: ChatWorkflow, initial_message: ChatMessage) -> None:
|
||||||
"""Process workflow results and create appropriate messages"""
|
"""Process workflow results based on workflow status and create appropriate messages"""
|
||||||
try:
|
try:
|
||||||
try:
|
try:
|
||||||
self.handlingTasks._checkWorkflowStopped()
|
self.workflowProcessor._checkWorkflowStopped(workflow)
|
||||||
except WorkflowStoppedException:
|
except WorkflowStoppedException:
|
||||||
logger.info(f"Workflow {workflow.id} was stopped during result processing")
|
logger.info(f"Workflow {workflow.id} was stopped during result processing")
|
||||||
|
|
||||||
|
|
@ -321,7 +446,7 @@ class WorkflowManager:
|
||||||
})
|
})
|
||||||
return
|
return
|
||||||
|
|
||||||
if workflow_result.status == 'stopped':
|
if workflow.status == 'stopped':
|
||||||
# Create stopped message
|
# Create stopped message
|
||||||
stopped_message = {
|
stopped_message = {
|
||||||
"workflowId": workflow.id,
|
"workflowId": workflow.id,
|
||||||
|
|
@ -363,12 +488,12 @@ class WorkflowManager:
|
||||||
"progress": 100
|
"progress": 100
|
||||||
})
|
})
|
||||||
return
|
return
|
||||||
elif workflow_result.status == 'failed':
|
elif workflow.status == 'failed':
|
||||||
# Create error message
|
# Create error message
|
||||||
error_message = {
|
error_message = {
|
||||||
"workflowId": workflow.id,
|
"workflowId": workflow.id,
|
||||||
"role": "assistant",
|
"role": "assistant",
|
||||||
"message": f"Workflow failed: {workflow_result.error or 'Unknown error'}",
|
"message": f"Workflow failed: {'Unknown error'}",
|
||||||
"status": "last",
|
"status": "last",
|
||||||
"sequenceNr": len(workflow.messages) + 1,
|
"sequenceNr": len(workflow.messages) + 1,
|
||||||
"publishedAt": self.services.utils.getUtcTimestamp(),
|
"publishedAt": self.services.utils.getUtcTimestamp(),
|
||||||
|
|
@ -399,7 +524,7 @@ class WorkflowManager:
|
||||||
# Add failed log entry
|
# Add failed log entry
|
||||||
self.services.workflow.createLog({
|
self.services.workflow.createLog({
|
||||||
"workflowId": workflow.id,
|
"workflowId": workflow.id,
|
||||||
"message": f"Workflow failed: {workflow_result.error or 'Unknown error'}",
|
"message": "Workflow failed: Unknown error",
|
||||||
"type": "error",
|
"type": "error",
|
||||||
"status": "failed",
|
"status": "failed",
|
||||||
"progress": 100
|
"progress": 100
|
||||||
|
|
@ -504,7 +629,7 @@ class WorkflowManager:
|
||||||
async def _generateWorkflowFeedback(self, workflow: ChatWorkflow) -> str:
|
async def _generateWorkflowFeedback(self, workflow: ChatWorkflow) -> str:
|
||||||
"""Generate feedback message for workflow completion"""
|
"""Generate feedback message for workflow completion"""
|
||||||
try:
|
try:
|
||||||
self.handlingTasks._checkWorkflowStopped()
|
self.workflowProcessor._checkWorkflowStopped(workflow)
|
||||||
|
|
||||||
# Count messages by role
|
# Count messages by role
|
||||||
user_messages = [msg for msg in workflow.messages if msg.role == 'user']
|
user_messages = [msg for msg in workflow.messages if msg.role == 'user']
|
||||||
|
|
|
||||||
BIN
testdata/00Untitled.jpg
vendored
BIN
testdata/00Untitled.jpg
vendored
Binary file not shown.
|
Before Width: | Height: | Size: 407 KiB |
BIN
testdata/Muster_Kundenliste_Test1.xlsx
vendored
BIN
testdata/Muster_Kundenliste_Test1.xlsx
vendored
Binary file not shown.
BIN
testdata/diagramm_komponenten.pdf
vendored
BIN
testdata/diagramm_komponenten.pdf
vendored
Binary file not shown.
Loading…
Reference in a new issue